Commit df0651f8 authored by David S. Miller's avatar David S. Miller

Merge branch 'ip6_tunnel-add-MPLS-support'

Vadim Fedorenko says:

====================
ip6_tunnel: add MPLS support

The support for MPLS-in-IPv4 was added earlier. This patchset adds
support for MPLS-in-IPv6.

Changes in v2:
- Eliminate ifdefs IS_ENABLE(CONFIG_MPLS)
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 87566b44 1515aa70
......@@ -89,6 +89,11 @@ struct ip6_tnl_net {
struct ip6_tnl __rcu *collect_md_tun;
};
static inline int ip6_tnl_mpls_supported(void)
{
return IS_ENABLED(CONFIG_MPLS);
}
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
struct pcpu_sw_netstats tmp, sum = { 0 };
......@@ -718,6 +723,20 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
static int
mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
__u32 rel_info = ntohl(info);
int err, rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset);
return err;
}
static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
const struct ipv6hdr *ipv6h,
struct sk_buff *skb)
......@@ -740,6 +759,14 @@ static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
return IP6_ECN_decapsulate(ipv6h, skb);
}
static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
const struct ipv6hdr *ipv6h,
struct sk_buff *skb)
{
/* ECN is not supported in AF_MPLS */
return 0;
}
__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
const struct in6_addr *laddr,
const struct in6_addr *raddr)
......@@ -901,6 +928,11 @@ static const struct tnl_ptk_info tpi_v4 = {
.proto = htons(ETH_P_IP),
};
static const struct tnl_ptk_info tpi_mpls = {
/* no tunnel info required for mplsip6. */
.proto = htons(ETH_P_MPLS_UC),
};
static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
const struct tnl_ptk_info *tpi,
int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
......@@ -958,6 +990,12 @@ static int ip6ip6_rcv(struct sk_buff *skb)
ip6ip6_dscp_ecn_decapsulate);
}
static int mplsip6_rcv(struct sk_buff *skb)
{
return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls,
mplsip6_dscp_ecn_decapsulate);
}
struct ipv6_tel_txoption {
struct ipv6_txoptions ops;
__u8 dst_opt[8];
......@@ -1232,6 +1270,8 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
ipv6_push_frag_opts(skb, &opt.ops, &proto);
}
skb_set_inner_ipproto(skb, proto);
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
......@@ -1253,22 +1293,22 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
EXPORT_SYMBOL(ip6_tnl_xmit);
static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
u8 protocol)
{
struct ip6_tnl *t = netdev_priv(dev);
struct ipv6hdr *ipv6h;
const struct iphdr *iph;
int encap_limit = -1;
__u16 offset;
struct flowi6 fl6;
__u8 dsfield;
__u8 dsfield, orig_dsfield;
__u32 mtu;
u8 tproto;
int err;
iph = ip_hdr(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
tproto = READ_ONCE(t->parms.proto);
if (tproto != IPPROTO_IPIP && tproto != 0)
if (tproto != protocol && tproto != 0)
return -1;
if (t->parms.collect_md) {
......@@ -1281,87 +1321,33 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
return -1;
key = &tun_info->key;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_IPIP;
fl6.flowi6_proto = protocol;
fl6.saddr = key->u.ipv6.src;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
dsfield = key->tos;
switch (protocol) {
case IPPROTO_IPIP:
iph = ip_hdr(skb);
orig_dsfield = ipv4_get_dsfield(iph);
break;
case IPPROTO_IPV6:
ipv6h = ipv6_hdr(skb);
orig_dsfield = ipv6_get_dsfield(ipv6h);
break;
default:
orig_dsfield = dsfield;
break;
}
} else {
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_IPIP;
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
dsfield = ipv4_get_dsfield(iph);
else
dsfield = ip6_tclass(t->parms.flowinfo);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
else
fl6.flowi6_mark = t->parms.fwmark;
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
IPPROTO_IPIP);
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
return -1;
}
return 0;
}
static inline int
ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct ipv6hdr *ipv6h;
int encap_limit = -1;
__u16 offset;
struct flowi6 fl6;
__u8 dsfield;
__u32 mtu;
u8 tproto;
int err;
ipv6h = ipv6_hdr(skb);
tproto = READ_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
ip6_tnl_addr_conflict(t, ipv6h))
return -1;
if (t->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET6))
return -1;
key = &tun_info->key;
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_IPV6;
fl6.saddr = key->u.ipv6.src;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
dsfield = key->tos;
} else {
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
ipv6h = ipv6_hdr(skb);
if (protocol == IPPROTO_IPV6) {
offset = ip6_tnl_parse_tlv_enc_lim(skb,
skb_network_header(skb));
/* ip6_tnl_parse_tlv_enc_lim() might have
* reallocated skb->head
*/
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
......@@ -1372,38 +1358,63 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
return -1;
}
encap_limit = tel->encap_limit - 1;
} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
encap_limit = t->parms.encap_limit;
}
}
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_IPV6;
fl6.flowi6_proto = protocol;
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
else
fl6.flowi6_mark = t->parms.fwmark;
switch (protocol) {
case IPPROTO_IPIP:
iph = ip_hdr(skb);
orig_dsfield = ipv4_get_dsfield(iph);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
dsfield = ipv6_get_dsfield(ipv6h);
dsfield = orig_dsfield;
else
dsfield = ip6_tclass(t->parms.flowinfo);
break;
case IPPROTO_IPV6:
ipv6h = ipv6_hdr(skb);
orig_dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
dsfield = orig_dsfield;
else
dsfield = ip6_tclass(t->parms.flowinfo);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
else
fl6.flowi6_mark = t->parms.fwmark;
break;
default:
orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo);
break;
}
}
fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield);
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
IPPROTO_IPV6);
protocol);
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
switch (protocol) {
case IPPROTO_IPIP:
icmp_send(skb, ICMP_DEST_UNREACH,
ICMP_FRAG_NEEDED, htonl(mtu));
break;
case IPPROTO_IPV6:
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
break;
default:
break;
}
return -1;
}
......@@ -1415,6 +1426,7 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
u8 ipproto;
int ret;
if (!pskb_inet_may_pull(skb))
......@@ -1422,15 +1434,21 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
ret = ip4ip6_tnl_xmit(skb, dev);
ipproto = IPPROTO_IPIP;
break;
case htons(ETH_P_IPV6):
ret = ip6ip6_tnl_xmit(skb, dev);
if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb)))
goto tx_err;
ipproto = IPPROTO_IPV6;
break;
case htons(ETH_P_MPLS_UC):
ipproto = IPPROTO_MPLS;
break;
default:
goto tx_err;
}
ret = ipxip6_tnl_xmit(skb, dev, ipproto);
if (ret < 0)
goto tx_err;
......@@ -2218,6 +2236,12 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
.priority = 1,
};
static struct xfrm6_tunnel mplsip6_handler __read_mostly = {
.handler = mplsip6_rcv,
.err_handler = mplsip6_err,
.priority = 1,
};
static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
......@@ -2332,6 +2356,15 @@ static int __init ip6_tunnel_init(void)
pr_err("%s: can't register ip6ip6\n", __func__);
goto out_ip6ip6;
}
if (ip6_tnl_mpls_supported()) {
err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS);
if (err < 0) {
pr_err("%s: can't register mplsip6\n", __func__);
goto out_mplsip6;
}
}
err = rtnl_link_register(&ip6_link_ops);
if (err < 0)
goto rtnl_link_failed;
......@@ -2339,6 +2372,9 @@ static int __init ip6_tunnel_init(void)
return 0;
rtnl_link_failed:
if (ip6_tnl_mpls_supported())
xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS);
out_mplsip6:
xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
out_ip6ip6:
xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
......@@ -2361,6 +2397,9 @@ static void __exit ip6_tunnel_cleanup(void)
if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
pr_info("%s: can't deregister ip6ip6\n", __func__);
if (ip6_tnl_mpls_supported() &&
xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS))
pr_info("%s: can't deregister mplsip6\n", __func__);
unregister_pernet_device(&ip6_tnl_net_ops);
}
......
......@@ -21,8 +21,14 @@
static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
static struct xfrm6_tunnel __rcu *tunnelmpls6_handlers __read_mostly;
static DEFINE_MUTEX(tunnel6_mutex);
static inline int xfrm6_tunnel_mpls_supported(void)
{
return IS_ENABLED(CONFIG_MPLS);
}
int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
{
struct xfrm6_tunnel __rcu **pprev;
......@@ -32,8 +38,21 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
mutex_lock(&tunnel6_mutex);
for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
(t = rcu_dereference_protected(*pprev,
switch (family) {
case AF_INET6:
pprev = &tunnel6_handlers;
break;
case AF_INET:
pprev = &tunnel46_handlers;
break;
case AF_MPLS:
pprev = &tunnelmpls6_handlers;
break;
default:
goto err;
}
for (; (t = rcu_dereference_protected(*pprev,
lockdep_is_held(&tunnel6_mutex))) != NULL;
pprev = &t->next) {
if (t->priority > priority)
......@@ -62,8 +81,21 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
mutex_lock(&tunnel6_mutex);
for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
(t = rcu_dereference_protected(*pprev,
switch (family) {
case AF_INET6:
pprev = &tunnel6_handlers;
break;
case AF_INET:
pprev = &tunnel46_handlers;
break;
case AF_MPLS:
pprev = &tunnelmpls6_handlers;
break;
default:
goto err;
}
for (; (t = rcu_dereference_protected(*pprev,
lockdep_is_held(&tunnel6_mutex))) != NULL;
pprev = &t->next) {
if (t == handler) {
......@@ -73,6 +105,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
}
}
err:
mutex_unlock(&tunnel6_mutex);
synchronize_net();
......@@ -86,6 +119,24 @@ EXPORT_SYMBOL(xfrm6_tunnel_deregister);
handler != NULL; \
handler = rcu_dereference(handler->next)) \
static int tunnelmpls6_rcv(struct sk_buff *skb)
{
struct xfrm6_tunnel *handler;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
for_each_tunnel_rcu(tunnelmpls6_handlers, handler)
if (!handler->handler(skb))
return 0;
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
drop:
kfree_skb(skb);
return 0;
}
static int tunnel6_rcv(struct sk_buff *skb)
{
struct xfrm6_tunnel *handler;
......@@ -146,6 +197,18 @@ static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return -ENOENT;
}
static int tunnelmpls6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_tunnel *handler;
for_each_tunnel_rcu(tunnelmpls6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
return 0;
return -ENOENT;
}
static const struct inet6_protocol tunnel6_protocol = {
.handler = tunnel6_rcv,
.err_handler = tunnel6_err,
......@@ -158,6 +221,12 @@ static const struct inet6_protocol tunnel46_protocol = {
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
static const struct inet6_protocol tunnelmpls6_protocol = {
.handler = tunnelmpls6_rcv,
.err_handler = tunnelmpls6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
};
static int __init tunnel6_init(void)
{
if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) {
......@@ -169,6 +238,13 @@ static int __init tunnel6_init(void)
inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
return -EAGAIN;
}
if (xfrm6_tunnel_mpls_supported() &&
inet6_add_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) {
pr_err("%s: can't add protocol\n", __func__);
inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP);
return -EAGAIN;
}
return 0;
}
......@@ -178,6 +254,9 @@ static void __exit tunnel6_fini(void)
pr_err("%s: can't remove protocol\n", __func__);
if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
pr_err("%s: can't remove protocol\n", __func__);
if (xfrm6_tunnel_mpls_supported() &&
inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS))
pr_err("%s: can't remove protocol\n", __func__);
}
module_init(tunnel6_init);
......
......@@ -1593,7 +1593,8 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
dev->type == ARPHRD_IPGRE ||
dev->type == ARPHRD_IP6GRE ||
dev->type == ARPHRD_SIT ||
dev->type == ARPHRD_TUNNEL) {
dev->type == ARPHRD_TUNNEL ||
dev->type == ARPHRD_TUNNEL6) {
mdev = mpls_add_dev(dev);
if (IS_ERR(mdev))
return notifier_from_errno(PTR_ERR(mdev));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment