Commit e1a80002 authored by Herbert Xu's avatar Herbert Xu Committed by David S. Miller

gre: Add Transparent Ethernet Bridging

This patch adds support for Ethernet over GRE encapsulation.
This is exposed to user-space with a new link type of "gretap"
instead of "gre".  It will create an ARPHRD_ETHER device in
lieu of the usual ARPHRD_IPGRE.

Note that to preserver backwards compatibility all Transparent
Ethernet Bridging packets are passed to an ARPHRD_IPGRE tunnel
if its key matches and there is no ARPHRD_ETHER device whose
key matches more closely.
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c19e654d
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#define ETH_P_DIAG 0x6005 /* DEC Diagnostics */ #define ETH_P_DIAG 0x6005 /* DEC Diagnostics */
#define ETH_P_CUST 0x6006 /* DEC Customer use */ #define ETH_P_CUST 0x6006 /* DEC Customer use */
#define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */ #define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */
#define ETH_P_TEB 0x6558 /* Trans Ether Bridging */
#define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */ #define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */
#define ETH_P_ATALK 0x809B /* Appletalk DDP */ #define ETH_P_ATALK 0x809B /* Appletalk DDP */
#define ETH_P_AARP 0x80F3 /* Appletalk AARP */ #define ETH_P_AARP 0x80F3 /* Appletalk AARP */
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/inetdevice.h> #include <linux/inetdevice.h>
#include <linux/igmp.h> #include <linux/igmp.h>
#include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h> #include <linux/if_ether.h>
#include <net/sock.h> #include <net/sock.h>
...@@ -166,38 +167,64 @@ static DEFINE_RWLOCK(ipgre_lock); ...@@ -166,38 +167,64 @@ static DEFINE_RWLOCK(ipgre_lock);
/* Given src, dst and key, find appropriate for input tunnel. */ /* Given src, dst and key, find appropriate for input tunnel. */
static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
__be32 remote, __be32 local, __be32 key) __be32 remote, __be32 local,
__be32 key, __be16 gre_proto)
{ {
unsigned h0 = HASH(remote); unsigned h0 = HASH(remote);
unsigned h1 = HASH(key); unsigned h1 = HASH(key);
struct ip_tunnel *t; struct ip_tunnel *t;
struct ip_tunnel *t2 = NULL;
struct ipgre_net *ign = net_generic(net, ipgre_net_id); struct ipgre_net *ign = net_generic(net, ipgre_net_id);
int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
ARPHRD_ETHER : ARPHRD_IPGRE;
for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
return t; if (t->dev->type == dev_type)
return t;
if (t->dev->type == ARPHRD_IPGRE && !t2)
t2 = t;
}
} }
} }
for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
if (remote == t->parms.iph.daddr) { if (remote == t->parms.iph.daddr) {
if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
return t; if (t->dev->type == dev_type)
return t;
if (t->dev->type == ARPHRD_IPGRE && !t2)
t2 = t;
}
} }
} }
for (t = ign->tunnels_l[h1]; t; t = t->next) { for (t = ign->tunnels_l[h1]; t; t = t->next) {
if (local == t->parms.iph.saddr || if (local == t->parms.iph.saddr ||
(local == t->parms.iph.daddr && (local == t->parms.iph.daddr &&
ipv4_is_multicast(local))) { ipv4_is_multicast(local))) {
if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
return t; if (t->dev->type == dev_type)
return t;
if (t->dev->type == ARPHRD_IPGRE && !t2)
t2 = t;
}
} }
} }
for (t = ign->tunnels_wc[h1]; t; t = t->next) { for (t = ign->tunnels_wc[h1]; t; t = t->next) {
if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
return t; if (t->dev->type == dev_type)
return t;
if (t->dev->type == ARPHRD_IPGRE && !t2)
t2 = t;
}
} }
if (t2)
return t2;
if (ign->fb_tunnel_dev->flags&IFF_UP) if (ign->fb_tunnel_dev->flags&IFF_UP)
return netdev_priv(ign->fb_tunnel_dev); return netdev_priv(ign->fb_tunnel_dev);
return NULL; return NULL;
...@@ -252,25 +279,37 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) ...@@ -252,25 +279,37 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
} }
} }
static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
struct ip_tunnel_parm *parms, int create) struct ip_tunnel_parm *parms,
int type)
{ {
__be32 remote = parms->iph.daddr; __be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr; __be32 local = parms->iph.saddr;
__be32 key = parms->i_key; __be32 key = parms->i_key;
struct ip_tunnel *t, **tp, *nt; struct ip_tunnel *t, **tp;
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
key == t->parms.i_key &&
type == t->dev->type)
break;
return t;
}
static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
struct ip_tunnel_parm *parms, int create)
{
struct ip_tunnel *t, *nt;
struct net_device *dev; struct net_device *dev;
char name[IFNAMSIZ]; char name[IFNAMSIZ];
struct ipgre_net *ign = net_generic(net, ipgre_net_id); struct ipgre_net *ign = net_generic(net, ipgre_net_id);
for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) { t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { if (t || !create)
if (key == t->parms.i_key) return t;
return t;
}
}
if (!create)
return NULL;
if (parms->name[0]) if (parms->name[0])
strlcpy(name, parms->name, IFNAMSIZ); strlcpy(name, parms->name, IFNAMSIZ);
...@@ -385,8 +424,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) ...@@ -385,8 +424,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
read_lock(&ipgre_lock); read_lock(&ipgre_lock);
t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
(flags&GRE_KEY) ? flags & GRE_KEY ?
*(((__be32*)p) + (grehlen>>2) - 1) : 0); *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
p[1]);
if (t == NULL || t->parms.iph.daddr == 0 || if (t == NULL || t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr)) ipv4_is_multicast(t->parms.iph.daddr))
goto out; goto out;
...@@ -436,6 +476,7 @@ static int ipgre_rcv(struct sk_buff *skb) ...@@ -436,6 +476,7 @@ static int ipgre_rcv(struct sk_buff *skb)
u32 seqno = 0; u32 seqno = 0;
struct ip_tunnel *tunnel; struct ip_tunnel *tunnel;
int offset = 4; int offset = 4;
__be16 gre_proto;
if (!pskb_may_pull(skb, 16)) if (!pskb_may_pull(skb, 16))
goto drop_nolock; goto drop_nolock;
...@@ -475,20 +516,22 @@ static int ipgre_rcv(struct sk_buff *skb) ...@@ -475,20 +516,22 @@ static int ipgre_rcv(struct sk_buff *skb)
} }
} }
gre_proto = *(__be16 *)(h + 2);
read_lock(&ipgre_lock); read_lock(&ipgre_lock);
if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
iph->saddr, iph->daddr, key)) != NULL) { iph->saddr, iph->daddr, key,
gre_proto))) {
struct net_device_stats *stats = &tunnel->dev->stats; struct net_device_stats *stats = &tunnel->dev->stats;
secpath_reset(skb); secpath_reset(skb);
skb->protocol = *(__be16*)(h + 2); skb->protocol = gre_proto;
/* WCCP version 1 and 2 protocol decoding. /* WCCP version 1 and 2 protocol decoding.
* - Change protocol to IP * - Change protocol to IP
* - When dealing with WCCPv2, Skip extra 4 bytes in GRE header * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
*/ */
if (flags == 0 && if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
skb->protocol == htons(ETH_P_WCCP)) {
skb->protocol = htons(ETH_P_IP); skb->protocol = htons(ETH_P_IP);
if ((*(h + offset) & 0xF0) != 0x40) if ((*(h + offset) & 0xF0) != 0x40)
offset += 4; offset += 4;
...@@ -496,7 +539,6 @@ static int ipgre_rcv(struct sk_buff *skb) ...@@ -496,7 +539,6 @@ static int ipgre_rcv(struct sk_buff *skb)
skb->mac_header = skb->network_header; skb->mac_header = skb->network_header;
__pskb_pull(skb, offset); __pskb_pull(skb, offset);
skb_reset_network_header(skb);
skb_postpull_rcsum(skb, skb_transport_header(skb), offset); skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
skb->pkt_type = PACKET_HOST; skb->pkt_type = PACKET_HOST;
#ifdef CONFIG_NET_IPGRE_BROADCAST #ifdef CONFIG_NET_IPGRE_BROADCAST
...@@ -524,13 +566,30 @@ static int ipgre_rcv(struct sk_buff *skb) ...@@ -524,13 +566,30 @@ static int ipgre_rcv(struct sk_buff *skb)
} }
tunnel->i_seqno = seqno + 1; tunnel->i_seqno = seqno + 1;
} }
/* Warning: All skb pointers will be invalidated! */
if (tunnel->dev->type == ARPHRD_ETHER) {
if (!pskb_may_pull(skb, ETH_HLEN)) {
stats->rx_length_errors++;
stats->rx_errors++;
goto drop;
}
iph = ip_hdr(skb);
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
}
stats->rx_packets++; stats->rx_packets++;
stats->rx_bytes += skb->len; stats->rx_bytes += skb->len;
skb->dev = tunnel->dev; skb->dev = tunnel->dev;
dst_release(skb->dst); dst_release(skb->dst);
skb->dst = NULL; skb->dst = NULL;
nf_reset(skb); nf_reset(skb);
skb_reset_network_header(skb);
ipgre_ecn_decapsulate(iph, skb); ipgre_ecn_decapsulate(iph, skb);
netif_rx(skb); netif_rx(skb);
read_unlock(&ipgre_lock); read_unlock(&ipgre_lock);
return(0); return(0);
...@@ -565,7 +624,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -565,7 +624,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_error; goto tx_error;
} }
if (dev->header_ops) { if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
gre_hlen = 0; gre_hlen = 0;
tiph = (struct iphdr*)skb->data; tiph = (struct iphdr*)skb->data;
} else { } else {
...@@ -741,8 +803,9 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -741,8 +803,9 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
} }
((__be16*)(iph+1))[0] = tunnel->parms.o_flags; ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
((__be16*)(iph+1))[1] = skb->protocol; ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
htons(ETH_P_TEB) : skb->protocol;
if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
__be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
...@@ -804,7 +867,9 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) ...@@ -804,7 +867,9 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
tdev = rt->u.dst.dev; tdev = rt->u.dst.dev;
ip_rt_put(rt); ip_rt_put(rt);
} }
dev->flags |= IFF_POINTOPOINT;
if (dev->type != ARPHRD_ETHER)
dev->flags |= IFF_POINTOPOINT;
} }
if (!tdev && tunnel->parms.link) if (!tdev && tunnel->parms.link)
...@@ -1250,6 +1315,30 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) ...@@ -1250,6 +1315,30 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
return 0; return 0;
} }
static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
{
__be32 daddr;
if (tb[IFLA_ADDRESS]) {
if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
return -EINVAL;
if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
return -EADDRNOTAVAIL;
}
if (!data)
goto out;
if (data[IFLA_GRE_REMOTE]) {
memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
if (!daddr)
return -EINVAL;
}
out:
return ipgre_tunnel_validate(tb, data);
}
static void ipgre_netlink_parms(struct nlattr *data[], static void ipgre_netlink_parms(struct nlattr *data[],
struct ip_tunnel_parm *parms) struct ip_tunnel_parm *parms)
{ {
...@@ -1291,6 +1380,35 @@ static void ipgre_netlink_parms(struct nlattr *data[], ...@@ -1291,6 +1380,35 @@ static void ipgre_netlink_parms(struct nlattr *data[],
parms->iph.frag_off = htons(IP_DF); parms->iph.frag_off = htons(IP_DF);
} }
static int ipgre_tap_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
tunnel = netdev_priv(dev);
tunnel->dev = dev;
strcpy(tunnel->parms.name, dev->name);
ipgre_tunnel_bind_dev(dev);
return 0;
}
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
dev->init = ipgre_tap_init;
dev->uninit = ipgre_tunnel_uninit;
dev->destructor = free_netdev;
dev->hard_start_xmit = ipgre_tunnel_xmit;
dev->change_mtu = ipgre_tunnel_change_mtu;
dev->iflink = 0;
dev->features |= NETIF_F_NETNS_LOCAL;
}
static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[]) struct nlattr *data[])
{ {
...@@ -1303,9 +1421,12 @@ static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], ...@@ -1303,9 +1421,12 @@ static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
nt = netdev_priv(dev); nt = netdev_priv(dev);
ipgre_netlink_parms(data, &nt->parms); ipgre_netlink_parms(data, &nt->parms);
if (ipgre_tunnel_locate(net, &nt->parms, 0)) if (ipgre_tunnel_find(net, &nt->parms, dev->type))
return -EEXIST; return -EEXIST;
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
random_ether_addr(dev->dev_addr);
mtu = ipgre_tunnel_bind_dev(dev); mtu = ipgre_tunnel_bind_dev(dev);
if (!tb[IFLA_MTU]) if (!tb[IFLA_MTU])
dev->mtu = mtu; dev->mtu = mtu;
...@@ -1455,6 +1576,19 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = { ...@@ -1455,6 +1576,19 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
.fill_info = ipgre_fill_info, .fill_info = ipgre_fill_info,
}; };
static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
.kind = "gretap",
.maxtype = IFLA_GRE_MAX,
.policy = ipgre_policy,
.priv_size = sizeof(struct ip_tunnel),
.setup = ipgre_tap_setup,
.validate = ipgre_tap_validate,
.newlink = ipgre_newlink,
.changelink = ipgre_changelink,
.get_size = ipgre_get_size,
.fill_info = ipgre_fill_info,
};
/* /*
* And now the modules code and kernel interface. * And now the modules code and kernel interface.
*/ */
...@@ -1478,9 +1612,15 @@ static int __init ipgre_init(void) ...@@ -1478,9 +1612,15 @@ static int __init ipgre_init(void)
if (err < 0) if (err < 0)
goto rtnl_link_failed; goto rtnl_link_failed;
err = rtnl_link_register(&ipgre_tap_ops);
if (err < 0)
goto tap_ops_failed;
out: out:
return err; return err;
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed: rtnl_link_failed:
unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
gen_device_failed: gen_device_failed:
...@@ -1490,6 +1630,7 @@ static int __init ipgre_init(void) ...@@ -1490,6 +1630,7 @@ static int __init ipgre_init(void)
static void __exit ipgre_fini(void) static void __exit ipgre_fini(void)
{ {
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops); rtnl_link_unregister(&ipgre_link_ops);
unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
...@@ -1500,3 +1641,4 @@ module_init(ipgre_init); ...@@ -1500,3 +1641,4 @@ module_init(ipgre_init);
module_exit(ipgre_fini); module_exit(ipgre_fini);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_ALIAS("rtnl-link-gre"); MODULE_ALIAS("rtnl-link-gre");
MODULE_ALIAS("rtnl-link-gretap");
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment