Commit efedce33 authored by Steffen Klassert's avatar Steffen Klassert

Merge branch 'xfrm: policy: replace session decode with flow dissector'

Florian Westphal says:

============
Remove the ipv4+ipv6 session decode functions and use generic flow
dissector to populate the flowi for the policy lookup.

Changes since v2:
- first patch broke CONFIG_XFRM=n builds

Changes since v1:
- Can't use skb_flow_dissect(), we might see skbs that have neither
  skb->sk nor skb->dev set. Flow dissector WARN()s in this case, it
  tries to check for a bpf program assigned in that net namespace.

Add a preparation patch to pass down 'struct net' in
xfrm_decode_session so its available for use in patch 3.

Changes since RFC:

- Drop mobility header support.  I don't think that anyone uses
  this.  MOBIKE doesn't appear to need this either.
- Drop fl6->flowlabel assignment, original code leaves it as 0.

There is no reason for this change other than to remove code.
============
Signed-off-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
parents e377240a 7a020709
......@@ -1207,20 +1207,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1);
}
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse);
static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family)
{
return __xfrm_decode_session(skb, fl, family, 0);
return __xfrm_decode_session(net, skb, fl, family, 0);
}
static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
return __xfrm_decode_session(skb, fl, family, 1);
return __xfrm_decode_session(net, skb, fl, family, 1);
}
int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
......@@ -1296,7 +1296,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
{
return 1;
}
static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
......
......@@ -517,7 +517,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
} else
return rt;
err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
if (err)
goto relookup_failed;
......
......@@ -288,11 +288,11 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
xfrm_decode_session(skb, &fl, AF_INET);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
case htons(ETH_P_IPV6):
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
xfrm_decode_session(skb, &fl, AF_INET6);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
default:
goto tx_err;
......
......@@ -62,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
#ifdef CONFIG_XFRM
if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
struct dst_entry *dst = skb_dst(skb);
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
......
......@@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
return dst;
}
err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
if (err)
goto relookup_failed;
......
......@@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err;
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
xfrm_decode_session(skb, &fl, AF_INET6);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
xfrm_decode_session(skb, &fl, AF_INET);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
default:
goto tx_err;
......
......@@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
......
......@@ -668,7 +668,7 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
struct flowi fl;
int err;
err = xfrm_decode_session(skb, &fl, family);
err = xfrm_decode_session(net, skb, &fl, family);
if (err < 0)
return err;
......
......@@ -538,7 +538,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IPV6):
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
xfrm_decode_session(skb, &fl, AF_INET6);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
if (!dst) {
fl.u.ip6.flowi6_oif = dev->ifindex;
fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
......@@ -553,7 +553,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
xfrm_decode_session(skb, &fl, AF_INET);
xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
if (!dst) {
struct rtable *rt;
......
......@@ -149,6 +149,21 @@ struct xfrm_pol_inexact_candidates {
struct hlist_head *res[XFRM_POL_CAND_MAX];
};
struct xfrm_flow_keys {
struct flow_dissector_key_basic basic;
struct flow_dissector_key_control control;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
} addrs;
struct flow_dissector_key_ip ip;
struct flow_dissector_key_icmp icmp;
struct flow_dissector_key_ports ports;
struct flow_dissector_key_keyid gre;
};
static struct flow_dissector xfrm_session_dissector __ro_after_init;
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
......@@ -2853,7 +2868,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
/* Fixup the mark to support VTI. */
skb_mark = skb->mark;
skb->mark = pol->mark.v;
xfrm_decode_session(skb, &fl, dst->ops->family);
xfrm_decode_session(net, skb, &fl, dst->ops->family);
skb->mark = skb_mark;
spin_unlock(&pq->hold_queue.lock);
......@@ -2889,7 +2904,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
/* Fixup the mark to support VTI. */
skb_mark = skb->mark;
skb->mark = pol->mark.v;
xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family);
skb->mark = skb_mark;
dst_hold(xfrm_dst_path(skb_dst(skb)));
......@@ -3367,209 +3382,92 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
}
static void
decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
{
const struct iphdr *iph = ip_hdr(skb);
int ihl = iph->ihl;
u8 *xprth = skb_network_header(skb) + ihl * 4;
struct flowi4 *fl4 = &fl->u.ip4;
int oif = 0;
if (skb_dst(skb) && skb_dst(skb)->dev)
oif = skb_dst(skb)->dev->ifindex;
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
fl4->flowi4_proto = iph->protocol;
fl4->daddr = reverse ? iph->saddr : iph->daddr;
fl4->saddr = reverse ? iph->daddr : iph->saddr;
fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
if (xprth + 4 < skb->data ||
pskb_may_pull(skb, xprth + 4 - skb->data)) {
__be16 *ports;
xprth = skb_network_header(skb) + ihl * 4;
ports = (__be16 *)xprth;
fl4->fl4_sport = ports[!!reverse];
fl4->fl4_dport = ports[!reverse];
}
break;
case IPPROTO_ICMP:
if (xprth + 2 < skb->data ||
pskb_may_pull(skb, xprth + 2 - skb->data)) {
u8 *icmp;
xprth = skb_network_header(skb) + ihl * 4;
icmp = xprth;
fl4->fl4_icmp_type = icmp[0];
fl4->fl4_icmp_code = icmp[1];
if (reverse) {
fl4->saddr = flkeys->addrs.ipv4.dst;
fl4->daddr = flkeys->addrs.ipv4.src;
fl4->fl4_sport = flkeys->ports.dst;
fl4->fl4_dport = flkeys->ports.src;
} else {
fl4->saddr = flkeys->addrs.ipv4.src;
fl4->daddr = flkeys->addrs.ipv4.dst;
fl4->fl4_sport = flkeys->ports.src;
fl4->fl4_dport = flkeys->ports.dst;
}
break;
case IPPROTO_GRE:
if (xprth + 12 < skb->data ||
pskb_may_pull(skb, xprth + 12 - skb->data)) {
__be16 *greflags;
__be32 *gre_hdr;
xprth = skb_network_header(skb) + ihl * 4;
greflags = (__be16 *)xprth;
gre_hdr = (__be32 *)xprth;
if (greflags[0] & GRE_KEY) {
if (greflags[0] & GRE_CSUM)
gre_hdr++;
fl4->fl4_gre_key = gre_hdr[1];
}
}
break;
default:
break;
}
}
fl4->flowi4_proto = flkeys->basic.ip_proto;
fl4->flowi4_tos = flkeys->ip.tos;
fl4->fl4_icmp_type = flkeys->icmp.type;
fl4->fl4_icmp_type = flkeys->icmp.code;
fl4->fl4_gre_key = flkeys->gre.keyid;
}
#if IS_ENABLED(CONFIG_IPV6)
static void
decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
{
struct flowi6 *fl6 = &fl->u.ip6;
int onlyproto = 0;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
u32 offset = sizeof(*hdr);
struct ipv6_opt_hdr *exthdr;
const unsigned char *nh = skb_network_header(skb);
u16 nhoff = IP6CB(skb)->nhoff;
int oif = 0;
u8 nexthdr;
if (!nhoff)
nhoff = offsetof(struct ipv6hdr, nexthdr);
nexthdr = nh[nhoff];
if (skb_dst(skb) && skb_dst(skb)->dev)
oif = skb_dst(skb)->dev->ifindex;
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
while (nh + offset + sizeof(*exthdr) < skb->data ||
pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
nh = skb_network_header(skb);
exthdr = (struct ipv6_opt_hdr *)(nh + offset);
switch (nexthdr) {
case NEXTHDR_FRAGMENT:
onlyproto = 1;
fallthrough;
case NEXTHDR_ROUTING:
case NEXTHDR_HOP:
case NEXTHDR_DEST:
offset += ipv6_optlen(exthdr);
nexthdr = exthdr->nexthdr;
break;
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
case IPPROTO_DCCP:
if (!onlyproto && (nh + offset + 4 < skb->data ||
pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
__be16 *ports;
nh = skb_network_header(skb);
ports = (__be16 *)(nh + offset);
fl6->fl6_sport = ports[!!reverse];
fl6->fl6_dport = ports[!reverse];
}
fl6->flowi6_proto = nexthdr;
return;
case IPPROTO_ICMPV6:
if (!onlyproto && (nh + offset + 2 < skb->data ||
pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
u8 *icmp;
nh = skb_network_header(skb);
icmp = (u8 *)(nh + offset);
fl6->fl6_icmp_type = icmp[0];
fl6->fl6_icmp_code = icmp[1];
}
fl6->flowi6_proto = nexthdr;
return;
case IPPROTO_GRE:
if (!onlyproto &&
(nh + offset + 12 < skb->data ||
pskb_may_pull(skb, nh + offset + 12 - skb->data))) {
struct gre_base_hdr *gre_hdr;
__be32 *gre_key;
nh = skb_network_header(skb);
gre_hdr = (struct gre_base_hdr *)(nh + offset);
gre_key = (__be32 *)(gre_hdr + 1);
if (gre_hdr->flags & GRE_KEY) {
if (gre_hdr->flags & GRE_CSUM)
gre_key++;
fl6->fl6_gre_key = *gre_key;
}
if (reverse) {
fl6->saddr = flkeys->addrs.ipv6.dst;
fl6->daddr = flkeys->addrs.ipv6.src;
fl6->fl6_sport = flkeys->ports.dst;
fl6->fl6_dport = flkeys->ports.src;
} else {
fl6->saddr = flkeys->addrs.ipv6.src;
fl6->daddr = flkeys->addrs.ipv6.dst;
fl6->fl6_sport = flkeys->ports.src;
fl6->fl6_dport = flkeys->ports.dst;
}
fl6->flowi6_proto = nexthdr;
return;
#if IS_ENABLED(CONFIG_IPV6_MIP6)
case IPPROTO_MH:
offset += ipv6_optlen(exthdr);
if (!onlyproto && (nh + offset + 3 < skb->data ||
pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
struct ip6_mh *mh;
nh = skb_network_header(skb);
mh = (struct ip6_mh *)(nh + offset);
fl6->fl6_mh_type = mh->ip6mh_type;
}
fl6->flowi6_proto = nexthdr;
return;
#endif
default:
fl6->flowi6_proto = nexthdr;
return;
}
}
fl6->flowi6_proto = flkeys->basic.ip_proto;
fl6->fl6_icmp_type = flkeys->icmp.type;
fl6->fl6_icmp_type = flkeys->icmp.code;
fl6->fl6_gre_key = flkeys->gre.keyid;
}
#endif
int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse)
{
struct xfrm_flow_keys flkeys;
memset(&flkeys, 0, sizeof(flkeys));
__skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys,
NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
switch (family) {
case AF_INET:
decode_session4(skb, fl, reverse);
decode_session4(&flkeys, fl, reverse);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
decode_session6(skb, fl, reverse);
decode_session6(&flkeys, fl, reverse);
break;
#endif
default:
return -EAFNOSUPPORT;
}
fl->flowi_mark = skb->mark;
if (reverse) {
fl->flowi_oif = skb->skb_iif;
} else {
int oif = 0;
if (skb_dst(skb) && skb_dst(skb)->dev)
oif = skb_dst(skb)->dev->ifindex;
fl->flowi_oif = oif;
}
return security_xfrm_decode_session(skb, &fl->flowi_secid);
}
EXPORT_SYMBOL(__xfrm_decode_session);
......@@ -3618,7 +3516,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK;
if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
return 0;
}
......@@ -3774,7 +3672,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
struct dst_entry *dst;
int res = 1;
if (xfrm_decode_session(skb, &fl, family) < 0) {
if (xfrm_decode_session(net, skb, &fl, family) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
return 0;
}
......@@ -4253,8 +4151,47 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
.exit = xfrm_net_exit,
};
static const struct flow_dissector_key xfrm_flow_dissector_keys[] = {
{
.key_id = FLOW_DISSECTOR_KEY_CONTROL,
.offset = offsetof(struct xfrm_flow_keys, control),
},
{
.key_id = FLOW_DISSECTOR_KEY_BASIC,
.offset = offsetof(struct xfrm_flow_keys, basic),
},
{
.key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
.offset = offsetof(struct xfrm_flow_keys, addrs.ipv4),
},
{
.key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
.offset = offsetof(struct xfrm_flow_keys, addrs.ipv6),
},
{
.key_id = FLOW_DISSECTOR_KEY_PORTS,
.offset = offsetof(struct xfrm_flow_keys, ports),
},
{
.key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
.offset = offsetof(struct xfrm_flow_keys, gre),
},
{
.key_id = FLOW_DISSECTOR_KEY_IP,
.offset = offsetof(struct xfrm_flow_keys, ip),
},
{
.key_id = FLOW_DISSECTOR_KEY_ICMP,
.offset = offsetof(struct xfrm_flow_keys, icmp),
},
};
void __init xfrm_init(void)
{
skb_flow_dissector_init(&xfrm_session_dissector,
xfrm_flow_dissector_keys,
ARRAY_SIZE(xfrm_flow_dissector_keys));
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
xfrm_input_init();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment