[patch v2, kernel version 3.2.1] net/ipv4/ip_gre: Ethernet multipoint GRE over IP

January 16th, 2012 - 02:50 pm ET by ©tefan Gula | Report spam
From: Stefan Gula <steweg@gmail.com

This patch is an extension for current Ethernet over GRE
implementation, which allows user to create virtual bridge (multipoint
VPN) and forward traffic based on Ethernet MAC address information in
it. It simulates the Bridge behavior learning mechanism, but instead
of learning port ID from which given MAC address comes, it learns IP
address of peer which encapsulated given packet. Multicast, Broadcast
and unknown-multicast traffic is send over network as multicast
encapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be
represented as one single virtual switch on logical level and be also
represented as one multicast IPv4 address on network level.

Signed-off-by: Stefan Gula <steweg@gmail.com>



code was merged with latest bridge code and should be easily comparable

diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h
linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100
@@ -27,6 +27,14 @@ struct ip_tunnel {
__u32 o_seqno; /* The last output seqno */
int hlen; /* Precalculated GRE header length */
int mlink;
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+#define GRETAP_BR_HASH_BITS 8
+#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS)
+ struct hlist_head hash[GRETAP_BR_HASH_SIZE];
+ spinlock_t hash_lock;
+ unsigned long ageing_time;
+ struct timer_list gc_timer;
+#endif

struct ip_tunnel_parm parms;

diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig
linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100
@@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.

+config NET_IPGRE_BRIDGE
+ bool "IP: Ethernet over multipoint GRE over IP"
+ depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST
+ help
+ Allows you to use multipoint GRE VPN as virtual switch and interconnect
+ several L2 endpoints over L3 routed infrastructure. It is useful for
+ creating multipoint L2 VPNs which can be later used inside bridge
+ interfaces If you want to use. GRE multipoint L2 VPN feature say Y.
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c
linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100
@@ -52,6 +52,11 @@
#include <net/ip6_route.h>
#endif

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+#include <linux/jhash.h>
+#include <asm/unaligned.h>
+#endif
+
/*
Problems & solutions
@@ -134,6 +139,191 @@ struct ipgre_net {
struct net_device *fb_tunnel_dev;
};

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ /*
+ * This part of code includes codes to enable L2 ethernet
+ * switch virtualization over IP routed infrastructure with
+ * utilization of multicast capable endpoint using Ethernet
+ * over GRE
+ *
+ * Author: Stefan Gula
+ * Signed-off-by: Stefan Gula <steweg@gmail.com>
+ */
+struct mac_addr {
+ unsigned char addr[6];
+};
+
+struct ipgre_tap_bridge_entry {
+ struct hlist_node hlist;
+ u32 raddr;
+ struct mac_addr addr;
+ struct net_device *dev;
+ struct rcu_head rcu;
+ unsigned long updated;
+};
+
+static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly;
+static u32 ipgre_salt __read_mostly;
+
+int __net_init ipgre_tap_bridge_init(void)
+{
+ ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache",
+ sizeof(struct ipgre_tap_bridge_entry),
+ 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!ipgre_tap_bridge_cache)
+ return -ENOMEM;
+ get_random_bytes(&ipgre_salt, sizeof(ipgre_salt));
+ return 0;
+}
+
+void ipgre_tap_bridge_fini(void)
+{
+ kmem_cache_destroy(ipgre_tap_bridge_cache);
+}
+
+static inline int ipgre_tap_bridge_hash(const unsigned char *mac)
+{
+ u32 key = get_unaligned((u32 *)(mac + 2));
+ return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1);
+}
+
+static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel,
+ const struct ipgre_tap_bridge_entry *entry)
+{
+ return time_before_eq(entry->updated + tunnel->ageing_time,
+ jiffies);
+}
+
+static void ipgre_tap_bridge_rcu_free(struct rcu_head *head)
+{
+ struct ipgre_tap_bridge_entry *entry
+ = container_of(head, struct ipgre_tap_bridge_entry, rcu);
+ kmem_cache_free(ipgre_tap_bridge_cache, entry);
+}
+
+static inline void ipgre_tap_bridge_delete(struct
ipgre_tap_bridge_entry *entry)
+{
+ hlist_del_rcu(&entry->hlist);
+ call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free);
+}
+
+void ipgre_tap_bridge_cleanup(unsigned long _data)
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel *)_data;
+ unsigned long delay = tunnel->ageing_time;
+ unsigned long next_timer = jiffies + tunnel->ageing_time;
+ int i;
+ spin_lock(&tunnel->hash_lock);
+ for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) {
+ struct ipgre_tap_bridge_entry *entry;
+ struct hlist_node *h, *n;
+ hlist_for_each_entry_safe(entry, h, n,
+ &tunnel->hash[i], hlist)
+ {
+ unsigned long this_timer;
+ this_timer = entry->updated + delay;
+ if (time_before_eq(this_timer, jiffies))
+ ipgre_tap_bridge_delete(entry);
+ else if (time_before(this_timer, next_timer))
+ next_timer = this_timer;
+ }
+ }
+ spin_unlock(&tunnel->hash_lock);
+ mod_timer(&tunnel->gc_timer, round_jiffies_up(next_timer));
+}
+
+void ipgre_tap_bridge_flush(struct ip_tunnel *tunnel)
+{
+ int i;
+ spin_lock_bh(&tunnel->hash_lock);
+ for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) {
+ struct ipgre_tap_bridge_entry *entry;
+ struct hlist_node *h, *n;
+ hlist_for_each_entry_safe(entry, h, n,
+ &tunnel->hash[i], hlist)
+ {
+ ipgre_tap_bridge_delete(entry);
+ }
+ }
+ spin_unlock_bh(&tunnel->hash_lock);
+}
+
+struct ipgre_tap_bridge_entry *__ipgre_tap_bridge_get(struct ip_tunnel *tunnel,
+ const unsigned char *addr)
+{
+ struct hlist_node *h;
+ struct ipgre_tap_bridge_entry *entry;
+ hlist_for_each_entry_rcu(entry, h,
+ &tunnel->hash[ipgre_tap_bridge_hash(addr)], hlist)
+ {
+ if (!compare_ether_addr(entry->addr.addr, addr)) {
+ if (unlikely(ipgre_tap_bridge_has_expired(tunnel,
+ entry)))
+ break;
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find(
+ struct hlist_head *head,
+ const unsigned char *addr)
+{
+ struct hlist_node *h;
+ struct ipgre_tap_bridge_entry *entry;
+ hlist_for_each_entry(entry, h, head, hlist) {
+ if (!compare_ether_addr(entry->addr.addr, addr))
+ return entry;
+ }
+ return NULL;
+}
+
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find_rcu(
+ struct hlist_head *head,
+ const unsigned char *addr)
+{
+ struct hlist_node *h;
+ struct ipgre_tap_bridge_entry *entry;
+ hlist_for_each_entry_rcu(entry, h, head, hlist) {
+ if (!compare_ether_addr(entry->addr.addr, addr))
+ return entry;
+ }
+ return NULL;
+}
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create(
+ struct hlist_head *head,
+ u32 source,
+ const unsigned char *addr, struct net_device *dev)
+{
+ struct ipgre_tap_bridge_entry *entry;
+ entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC);
+ if (entry) {
+ memcpy(entry->addr.addr, addr, ETH_ALEN);
+ hlist_add_head_rcu(&entry->hlist, head);
+ entry->raddr = source;
+ entry->dev = dev;
+ entry->updated = jiffies;
+ }
+ return entry;
+}
+
+__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
+ const unsigned char *addr)
+{
+ struct ipgre_tap_bridge_entry *entry;
+ entry = __ipgre_tap_bridge_get(tunnel, addr);
+ if (entry == NULL)
+ return 0;
+ else
+ return entry->raddr;
+}
+
+#endif
/* Tunnel hash table */

/*
@@ -563,6 +753,13 @@ static int ipgre_rcv(struct sk_buff *skb
int offset = 4;
__be16 gre_proto;

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ u32 orig_source;
+ struct hlist_head *head;
+ struct ipgre_tap_bridge_entry *entry;
+ struct ethhdr *tethhdr;
+#endif
+
if (!pskb_may_pull(skb, 16))
goto drop_nolock;

@@ -659,10 +856,39 @@ static int ipgre_rcv(struct sk_buff *skb
tunnel->dev->stats.rx_errors++;
goto drop;
}
-
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ orig_source = iph->saddr;
+#endif
iph = ip_hdr(skb);
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ if (ipv4_is_multicast(tunnel->parms.iph.daddr)) {
+ tethhdr = eth_hdr(skb);
+ if (!is_multicast_ether_addr(
+ tethhdr->h_source)) {
+ head = &tunnel->hash[
+ ipgre_tap_bridge_hash(
+ tethhdr->h_source)];
+ entry = ipgre_tap_bridge_find_rcu(head,
+ tethhdr->h_source);
+ if (likely(entry)) {
+ entry->raddr = orig_source;
+ entry->updated = jiffies;
+ } else {
+ spin_lock_bh(&tunnel->hash_lock);
+ if (!ipgre_tap_bridge_find(head,
+ tethhdr->h_source))
+ ipgre_tap_bridge_create(
+ head,
+ orig_source,
+ tethhdr->h_source,
+ tunnel->dev);
+ spin_unlock_bh(&tunnel->hash_lock);
+ }
+ }
+ }
+#endif
}

tstats = this_cpu_ptr(tunnel->dev->tstats);
@@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str
tiph = &tunnel->parms.iph;
}

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ rcu_read_lock();
+ if ((dev->type == ARPHRD_ETHER) &&
+ ipv4_is_multicast(tunnel->parms.iph.daddr))
+ dst = ipgre_tap_bridge_get_raddr(tunnel,
+ ((struct ethhdr *)skb->data)->h_dest);
+ rcu_read_unlock();
+ if (dst == 0)
+ dst = tiph->daddr;
+ if (dst == 0) {
+#else
if ((dst = tiph->daddr) == 0) {
+#endif
/* NBMA tunnel */

if (skb_dst(skb) == NULL) {
@@ -1209,6 +1447,16 @@ static int ipgre_open(struct net_device
return -EADDRNOTAVAIL;
t->mlink = dev->ifindex;
ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ if (t->dev->type == ARPHRD_ETHER) {
+ INIT_HLIST_HEAD(t->hash);
+ spin_lock_init(&t->hash_lock);
+ t->ageing_time = 300 * HZ;
+ setup_timer(&t->gc_timer, ipgre_tap_bridge_cleanup,
+ (unsigned long) t);
+ mod_timer(&t->gc_timer, jiffies + t->ageing_time);
+ }
+#endif
}
return 0;
}
@@ -1219,6 +1467,12 @@ static int ipgre_close(struct net_device

if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
struct in_device *in_dev;
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ if (t->dev->type == ARPHRD_ETHER) {
+ ipgre_tap_bridge_flush(t);
+ del_timer_sync(&t->gc_timer);
+ }
+#endif
in_dev = inetdev_by_index(dev_net(dev), t->mlink);
if (in_dev)
ip_mc_dec_group(in_dev, t->parms.iph.daddr);
@@ -1341,6 +1595,12 @@ static int __net_init ipgre_init_net(str
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
int err;

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ err = ipgre_tap_bridge_init();
+ if (err)
+ goto err_out;
+#endif
+
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
ipgre_tunnel_setup);
if (!ign->fb_tunnel_dev) {
@@ -1362,6 +1622,10 @@ static int __net_init ipgre_init_net(str
err_reg_dev:
ipgre_dev_free(ign->fb_tunnel_dev);
err_alloc_dev:
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ ipgre_tap_bridge_fini();
+err_out:
+#endif
return err;
}

@@ -1375,6 +1639,9 @@ static void __net_exit ipgre_exit_net(st
ipgre_destroy_tunnels(ign, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ ipgre_tap_bridge_fini();
+#endif
}

static struct pernet_operations ipgre_net_ops = {
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
email Follow the discussionReplies 14 repliesReplies Make a reply

Similar topics

Replies

#1 David Lamparter
January 16th, 2012 - 03:30 pm ET | Report spam
At the risk of repeating myself, Linux GRE support already has
provisions for multipoint tunnels. And unlike your code, those reuse the
existing neighbor table infrastructure, including all of its user
interface and introspection capabilities.

It's actually slightly visible in your patch:

On Mon, Jan 16, 2012 at 08:45:14PM +0100, Štefan Gula wrote:
+++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100
@@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str


[...]
/* NBMA tunnel */

if (skb_dst(skb) == NULL) {




-David
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#2 Eric Dumazet
January 16th, 2012 - 03:30 pm ET | Report spam
Le lundi 16 janvier 2012 à 20:45 +0100, Štefan Gula a écrit :
From: Stefan Gula

This patch is an extension for current Ethernet over GRE
implementation, which allows user to create virtual bridge (multipoint
VPN) and forward traffic based on Ethernet MAC address information in
it. It simulates the Bridge behavior learning mechanism, but instead
of learning port ID from which given MAC address comes, it learns IP
address of peer which encapsulated given packet. Multicast, Broadcast
and unknown-multicast traffic is send over network as multicast
encapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be
represented as one single virtual switch on logical level and be also
represented as one multicast IPv4 address on network level.

Signed-off-by: Stefan Gula



code was merged with latest bridge code and should be easily comparable




Please make sure it applies properly on net-next tree.

That is mandatory.

diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h
linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100
@@ -27,6 +27,14 @@ struct ip_tunnel {
__u32 o_seqno; /* The last output seqno */
int hlen; /* Precalculated GRE header length */
int mlink;
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+#define GRETAP_BR_HASH_BITS 8
+#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS)
+ struct hlist_head hash[GRETAP_BR_HASH_SIZE];
+ spinlock_t hash_lock;
+ unsigned long ageing_time;
+ struct timer_list gc_timer;
+#endif

struct ip_tunnel_parm parms;

diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig
linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100
@@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.

+config NET_IPGRE_BRIDGE
+ bool "IP: Ethernet over multipoint GRE over IP"
+ depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST
+ help
+ Allows you to use multipoint GRE VPN as virtual switch and interconnect
+ several L2 endpoints over L3 routed infrastructure. It is useful for
+ creating multipoint L2 VPNs which can be later used inside bridge
+ interfaces If you want to use. GRE multipoint L2 VPN feature say Y.
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c
linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 20:42:03.000000000 +0100
@@ -52,6 +52,11 @@
#include <net/ip6_route.h>
#endif

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+#include <linux/jhash.h>
+#include <asm/unaligned.h>
+#endif
+
/*
Problems & solutions
@@ -134,6 +139,191 @@ struct ipgre_net {
struct net_device *fb_tunnel_dev;
};

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ /*
+ * This part of code includes codes to enable L2 ethernet
+ * switch virtualization over IP routed infrastructure with
+ * utilization of multicast capable endpoint using Ethernet
+ * over GRE
+ *
+ * Author: Stefan Gula
+ * Signed-off-by: Stefan Gula
+ */
+struct mac_addr {
+ unsigned char addr[6];



Did I mention : ETH_ALEN ?

+};
+
+struct ipgre_tap_bridge_entry {
+ struct hlist_node hlist;
+ u32 raddr;



__be32 raddr ?

+ struct mac_addr addr;
+ struct net_device *dev;
+ struct rcu_head rcu;
+ unsigned long updated;
+};
+
+static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly;
+static u32 ipgre_salt __read_mostly;
+
+int __net_init ipgre_tap_bridge_init(void)
+{
+ ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache",
+ sizeof(struct ipgre_tap_bridge_entry),
+ 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!ipgre_tap_bridge_cache)
+ return -ENOMEM;
+ get_random_bytes(&ipgre_salt, sizeof(ipgre_salt));
+ return 0;
+}
+
+void ipgre_tap_bridge_fini(void)
+{
+ kmem_cache_destroy(ipgre_tap_bridge_cache);
+}
+
+static inline int ipgre_tap_bridge_hash(const unsigned char *mac)
+{
+ u32 key = get_unaligned((u32 *)(mac + 2));
+ return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1);
+}
+
+static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel,
+ const struct ipgre_tap_bridge_entry *entry)
+{
+ return time_before_eq(entry->updated + tunnel->ageing_time,
+ jiffies);
+}
+
+static void ipgre_tap_bridge_rcu_free(struct rcu_head *head)
+{
+ struct ipgre_tap_bridge_entry *entry
+ = container_of(head, struct ipgre_tap_bridge_entry, rcu);
+ kmem_cache_free(ipgre_tap_bridge_cache, entry);
+}
+
+static inline void ipgre_tap_bridge_delete(struct
ipgre_tap_bridge_entry *entry)
+{
+ hlist_del_rcu(&entry->hlist);
+ call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free);
+}



Did I mention : kfree_rcu() ?

+
+
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find(
+ struct hlist_head *head,
+ const unsigned char *addr)
+{
+ struct hlist_node *h;
+ struct ipgre_tap_bridge_entry *entry;
+ hlist_for_each_entry(entry, h, head, hlist) {
+ if (!compare_ether_addr(entry->addr.addr, addr))
+ return entry;
+ }
+ return NULL;
+}
+
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find_rcu(
+ struct hlist_head *head,
+ const unsigned char *addr)
+{
+ struct hlist_node *h;
+ struct ipgre_tap_bridge_entry *entry;
+ hlist_for_each_entry_rcu(entry, h, head, hlist) {
+ if (!compare_ether_addr(entry->addr.addr, addr))
+ return entry;
+ }
+ return NULL;
+}
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create(
+ struct hlist_head *head,
+ u32 source,



__be32 source,

+ const unsigned char *addr, struct net_device *dev)
+{
+ struct ipgre_tap_bridge_entry *entry;
+ entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC);
+ if (entry) {
+ memcpy(entry->addr.addr, addr, ETH_ALEN);
+ hlist_add_head_rcu(&entry->hlist, head);



Thats bogus.

You must init all entry fields _before_ inserting entry in hash table.

+ entry->raddr = source;
+ entry->dev = dev;
+ entry->updated = jiffies;
+ }
+ return entry;
+}
+
+__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
+ const unsigned char *addr)
+{
+ struct ipgre_tap_bridge_entry *entry;
+ entry = __ipgre_tap_bridge_get(tunnel, addr);
+ if (entry == NULL)
+ return 0;
+ else
+ return entry->raddr;
+}
+
+#endif
/* Tunnel hash table */

/*
@@ -563,6 +753,13 @@ static int ipgre_rcv(struct sk_buff *skb
int offset = 4;
__be16 gre_proto;

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ u32 orig_source;



Please run sparse on your code... (make C=2 ...)

orig_source is not u32, but __be32

+ struct hlist_head *head;
+ struct ipgre_tap_bridge_entry *entry;
+ struct ethhdr *tethhdr;
+#endif
+
if (!pskb_may_pull(skb, 16))
goto drop_nolock;

@@ -659,10 +856,39 @@ static int ipgre_rcv(struct sk_buff *skb
tunnel->dev->stats.rx_errors++;
goto drop;
}
-
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ orig_source = iph->saddr;
+#endif
iph = ip_hdr(skb);
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ if (ipv4_is_multicast(tunnel->parms.iph.daddr)) {
+ tethhdr = eth_hdr(skb);
+ if (!is_multicast_ether_addr(
+ tethhdr->h_source)) {
+ head = &tunnel->hash[
+ ipgre_tap_bridge_hash(
+ tethhdr->h_source)];
+ entry = ipgre_tap_bridge_find_rcu(head,
+ tethhdr->h_source);
+ if (likely(entry)) {
+ entry->raddr = orig_source;
+ entry->updated = jiffies;
+ } else {
+ spin_lock_bh(&tunnel->hash_lock);



You dont need the _bh() variant here, since we run from softirq handler.

+ if (!ipgre_tap_bridge_find(head,
+ tethhdr->h_source))
+ ipgre_tap_bridge_create(
+ head,
+ orig_source,
+ tethhdr->h_source,
+ tunnel->dev);
+ spin_unlock_bh(&tunnel->hash_lock);
+ }
+ }
+ }
+#endif
}

tstats = this_cpu_ptr(tunnel->dev->tstats);
@@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str
tiph = &tunnel->parms.iph;
}

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ rcu_read_lock();
+ if ((dev->type == ARPHRD_ETHER) &&
+ ipv4_is_multicast(tunnel->parms.iph.daddr))
+ dst = ipgre_tap_bridge_get_raddr(tunnel,
+ ((struct ethhdr *)skb->data)->h_dest);
+ rcu_read_unlock();



this rcu_read_lock()/rcu_read_unlock() pair should be done in
ipgre_tap_bridge_get_raddr() instead... so we dont hit this for all
packets.

+ if (dst == 0)
+ dst = tiph->daddr;
+ if (dst == 0) {
+#else
if ((dst = tiph->daddr) == 0) {
+#endif
/* NBMA tunnel */




General comment :

It would be nice this stuff is installed on a new "ip tunnel add"
option...

Hash table is 2048 bytes long... and an "ip tunnel " option would permit
to size the hash table eventually, instead of fixed 256 slots.



To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#3 Anonym
January 16th, 2012 - 06:00 pm ET | Report spam
Dňa 16. januára 2012 21:29, David Lamparter napísal/a:
At the risk of repeating myself, Linux GRE support already has
provisions for multipoint tunnels. And unlike your code, those reuse the
existing neighbor table infrastructure, including all of its user
interface and introspection capabilities.

It's actually slightly visible in your patch:

On Mon, Jan 16, 2012 at 08:45:14PM +0100, Štefan Gula wrote:
+++ linux-3.2.1-my/net/ipv4/ip_gre.c  2012-01-16 20:42:03.000000000 +0100
@@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str


[...]
              /* NBMA tunnel */

              if (skb_dst(skb) == NULL) {




-David



That code you are referring to is used only for routed traffic inside
GRE - L3 traffic over L3 routed infrastructure. My patch is dealing
with L2 traffic over L3 routed infrastructure - so the decision here
is based on destination MAC addresses and not based on IPv4/IPv6
addresses.

Stefan Gula
CCNP, CCIP
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#4 Eric Dumazet
January 16th, 2012 - 11:50 pm ET | Report spam
Le mardi 17 janvier 2012 à 00:11 +0100, Štefan Gula a écrit :

I agree with you, but for the start of this feature I believe static
slots size is enough here - same limitation is inside the original
linux bridge code. I have merged hopefully all your comments and here
is the newest patch:





1) Before sending a new version of your patch, please fix your mailer,
you can read Documentation/email-clients.txt for hints.

Send it to you and check you can apply it.
Then, once you are confident its OK, you can send it to netdev.

Right now, it doesnt apply, because of unexpected line wraps.

# cd next-next
# cat /tmp/patch | patch -p1
patching file include/net/ipip.h
patching file net/ipv4/Kconfig
patching file net/ipv4/ip_gre.c
patch: **** malformed patch at line 495: ipgre_tap_bridge_entry *entry)


2) You call ipgre_tap_bridge_fini() from ipgre_exit_net() and
ipgre_init_net(), thats completely bogus if CONFIG_NET_NS=y

Just remove the struct kmem_cache *ipgre_tap_bridge_cache
and use instead kmalloc(sizeof(...))/kfree(ptr) instead.

3) ipgre_tap_bridge_init() should not be __net_init, but __init


4) I cant find why you store 'struct net_device *dev;' in a
ipgre_tap_bridge_entry, it seems you never read it ?

5) Also please add an empty line between variable declaration and
function body. Also, we prefer an alignement of parameters.

For example :

static __be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
const unsigned char *addr)
{
__be32 raddr;
struct ipgre_tap_bridge_entry *entry;
rcu_read_lock();
entry = __ipgre_tap_bridge_get(tunnel, addr);
if (entry == NULL)
raddr = 0;
else
raddr = entry->raddr;
rcu_read_unlock();
return raddr;
}

should be :

static __be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
const unsigned char *addr)
{
__be32 raddr = 0;
struct ipgre_tap_bridge_entry *entry;

rcu_read_lock();
entry = __ipgre_tap_bridge_get(tunnel, addr);
if (entry)
raddr = entry->raddr;
rcu_read_unlock();

return raddr;
}






To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
#5 Anonym
January 17th, 2012 - 03:10 am ET | Report spam
Dňa 17. januára 2012 5:47, Eric Dumazet napísal/a:
Le mardi 17 janvier 2012 à 00:11 +0100, Štefan Gula a écrit :

I agree with you, but for the start of this feature I believe static
slots size is enough here - same limitation is inside the original
linux bridge code. I have merged hopefully all your comments and here
is the newest patch:





1) Before sending a new version of your patch, please fix your mailer,
you can read Documentation/email-clients.txt for hints.



Sorry, I have to test it as I am using clasical gmail web GUI to send emails

Send it to you and check you can apply it.
Then, once you are confident its OK, you can send it to netdev.

Right now, it doesnt apply, because of unexpected line wraps.

# cd next-next
# cat /tmp/patch | patch -p1
patching file include/net/ipip.h
patching file net/ipv4/Kconfig
patching file net/ipv4/ip_gre.c
patch: **** malformed patch at line 495: ipgre_tap_bridge_entry *entry)


2) You call ipgre_tap_bridge_fini() from ipgre_exit_net() and
ipgre_init_net(), thats completely bogus if CONFIG_NET_NS=y

Just remove the struct kmem_cache *ipgre_tap_bridge_cache
and use instead kmalloc(sizeof(...))/kfree(ptr) instead.



As this is completely the same part of code from net/bridge/br_fdb.c,
can you give me a hint about how change that as I believe it should be
changed also there?

3) ipgre_tap_bridge_init() should not be __net_init, but __init



Ok


4) I cant find why you store 'struct net_device *dev;' in a
ipgre_tap_bridge_entry, it seems you never read it ?



yes you are right, it is not needed - removed from code

5) Also please add an empty line between variable declaration and
function body. Also, we prefer an alignement of parameters.



I used scripts/checkpatch.pl to check my coding styles, but apparently
this is missing from it as it never complains before about this.
Anyway hopefully done ok based your comments

For example :

static __be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
       const unsigned char *addr)
{
       __be32 raddr;
       struct ipgre_tap_bridge_entry *entry;
       rcu_read_lock();
       entry = __ipgre_tap_bridge_get(tunnel, addr);
       if (entry == NULL)
               raddr = 0;
       else
               raddr = entry->raddr;
       rcu_read_unlock();
       return raddr;
}

should be :

static __be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
                                        const unsigned char *addr)
{
       __be32 raddr = 0;
       struct ipgre_tap_bridge_entry *entry;

       rcu_read_lock();
       entry = __ipgre_tap_bridge_get(tunnel, addr);
       if (entry)
               raddr = entry->raddr;
       rcu_read_unlock();

       return raddr;
}








To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Replies Reply to this message
Help Create a new topicNext page Replies Make a reply
Search Make your own search