Commit 43d0035b authored by David S. Miller's avatar David S. Miller

Merge branch 'unmask-dscp-bits'

Ido Schimmel says:

====================
Unmask upper DSCP bits - part 2

tl;dr - This patchset continues to unmask the upper DSCP bits in the
IPv4 flow key in preparation for allowing IPv4 FIB rules to match on
DSCP. No functional changes are expected. Part 1 was merged in commit
("Merge branch 'unmask-upper-dscp-bits-part-1'").

The TOS field in the IPv4 flow key ('flowi4_tos') is used during FIB
lookup to match against the TOS selector in FIB rules and routes.

It is currently impossible for user space to configure FIB rules that
match on the DSCP value as the upper DSCP bits are either masked in the
various call sites that initialize the IPv4 flow key or along the path
to the FIB core.

In preparation for adding a DSCP selector to IPv4 and IPv6 FIB rules, we
need to make sure the entire DSCP value is present in the IPv4 flow key.
This patchset continues to unmask the upper DSCP bits, but this time in
the output route path.

Patches #1-#3 unmask the upper DSCP bits in the various places that
invoke the core output route lookup functions directly.

Patches #4-#6 do the same in three helpers that are widely used in the
output path to initialize the TOS field in the IPv4 flow key.

The rest of the patches continue to unmask these bits in call sites that
invoke the following wrappers around the core lookup functions:

Patch #7 - __ip_route_output_key()
Patches #8-#12 - ip_route_output_flow()

The next patchset will handle the callers of ip_route_output_ports() and
ip_route_output_key().

No functional changes are expected as commit 1fa3314c ("ipv4:
Centralize TOS matching") moved the masking of the upper DSCP bits to
the core where 'flowi4_tos' is matched against the TOS selector.

Changes since v1 [1]:

* Remove IPTOS_RT_MASK in patch #7 instead of in patch #6

[1] https://lore.kernel.org/netdev/20240827111813.2115285-1-idosch@nvidia.com/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cff69f72 50033400
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
*/ */
#include <net/inet_dscp.h>
#include "ipvlan.h" #include "ipvlan.h"
static u32 ipvlan_jhash_secret __read_mostly; static u32 ipvlan_jhash_secret __read_mostly;
...@@ -420,7 +422,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) ...@@ -420,7 +422,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
int err, ret = NET_XMIT_DROP; int err, ret = NET_XMIT_DROP;
struct flowi4 fl4 = { struct flowi4 fl4 = {
.flowi4_oif = dev->ifindex, .flowi4_oif = dev->ifindex,
.flowi4_tos = RT_TOS(ip4h->tos), .flowi4_tos = ip4h->tos & INET_DSCP_MASK,
.flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark, .flowi4_mark = skb->mark,
.daddr = ip4h->daddr, .daddr = ip4h->daddr,
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <net/sch_generic.h> #include <net/sch_generic.h>
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack.h>
#include <net/inet_dscp.h>
#define DRV_NAME "vrf" #define DRV_NAME "vrf"
#define DRV_VERSION "1.1" #define DRV_VERSION "1.1"
...@@ -520,7 +521,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, ...@@ -520,7 +521,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
/* needed to match OIF rule */ /* needed to match OIF rule */
fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_l3mdev = vrf_dev->ifindex;
fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.flowi4_iif = LOOPBACK_IFINDEX;
fl4.flowi4_tos = RT_TOS(ip4h->tos); fl4.flowi4_tos = ip4h->tos & INET_DSCP_MASK;
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = ip4h->protocol; fl4.flowi4_proto = ip4h->protocol;
fl4.daddr = ip4h->daddr; fl4.daddr = ip4h->daddr;
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <net/flow_dissector.h> #include <net/flow_dissector.h>
#include <net/netns/hash.h> #include <net/netns/hash.h>
#include <net/lwtunnel.h> #include <net/lwtunnel.h>
#include <net/inet_dscp.h>
#define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */
#define IPV4_MIN_MTU 68 /* RFC 791 */ #define IPV4_MIN_MTU 68 /* RFC 791 */
...@@ -258,7 +259,9 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet, ...@@ -258,7 +259,9 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet) static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
{ {
return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos)); u8 dsfield = ipc->tos != -1 ? ipc->tos : READ_ONCE(inet->tos);
return dsfield & INET_DSCP_MASK;
} }
/* datagram.c */ /* datagram.c */
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <net/ip_fib.h> #include <net/ip_fib.h>
#include <net/arp.h> #include <net/arp.h>
#include <net/ndisc.h> #include <net/ndisc.h>
#include <net/inet_dscp.h>
#include <linux/in_route.h> #include <linux/in_route.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
...@@ -45,7 +46,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk) ...@@ -45,7 +46,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk)
static inline __u8 ip_sock_rt_tos(const struct sock *sk) static inline __u8 ip_sock_rt_tos(const struct sock *sk)
{ {
return RT_TOS(READ_ONCE(inet_sk(sk)->tos)); return READ_ONCE(inet_sk(sk)->tos) & INET_DSCP_MASK;
} }
struct ip_tunnel_info; struct ip_tunnel_info;
...@@ -265,8 +266,6 @@ static inline void ip_rt_put(struct rtable *rt) ...@@ -265,8 +266,6 @@ static inline void ip_rt_put(struct rtable *rt)
dst_release(&rt->dst); dst_release(&rt->dst);
} }
#define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3)
extern const __u8 ip_tos2prio[16]; extern const __u8 ip_tos2prio[16];
static inline char rt_tos2priority(u8 tos) static inline char rt_tos2priority(u8 tos)
......
...@@ -2372,7 +2372,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, ...@@ -2372,7 +2372,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4 = { struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark, .flowi4_mark = skb->mark,
.flowi4_tos = RT_TOS(ip4h->tos), .flowi4_tos = ip4h->tos & INET_DSCP_MASK,
.flowi4_oif = dev->ifindex, .flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol, .flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr, .daddr = ip4h->daddr,
......
...@@ -93,6 +93,7 @@ ...@@ -93,6 +93,7 @@
#include <net/ip_fib.h> #include <net/ip_fib.h>
#include <net/l3mdev.h> #include <net/l3mdev.h>
#include <net/addrconf.h> #include <net/addrconf.h>
#include <net/inet_dscp.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/icmp.h> #include <trace/events/icmp.h>
...@@ -497,7 +498,7 @@ static struct rtable *icmp_route_lookup(struct net *net, ...@@ -497,7 +498,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->saddr = saddr; fl4->saddr = saddr;
fl4->flowi4_mark = mark; fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL); fl4->flowi4_uid = sock_net_uid(net, NULL);
fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_tos = tos & INET_DSCP_MASK;
fl4->flowi4_proto = IPPROTO_ICMP; fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type; fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code; fl4->fl4_icmp_code = code;
......
...@@ -77,6 +77,7 @@ ...@@ -77,6 +77,7 @@
#include <net/inetpeer.h> #include <net/inetpeer.h>
#include <net/inet_ecn.h> #include <net/inet_ecn.h>
#include <net/lwtunnel.h> #include <net/lwtunnel.h>
#include <net/inet_dscp.h>
#include <linux/bpf-cgroup.h> #include <linux/bpf-cgroup.h>
#include <linux/igmp.h> #include <linux/igmp.h>
#include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4.h>
...@@ -1621,7 +1622,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, ...@@ -1621,7 +1622,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
flowi4_init_output(&fl4, oif, flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark, IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
RT_TOS(arg->tos), arg->tos & INET_DSCP_MASK,
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg), ip_reply_arg_flowi_flags(arg),
daddr, saddr, daddr, saddr,
......
...@@ -512,7 +512,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4, ...@@ -512,7 +512,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
sk->sk_protocol; sk->sk_protocol;
} }
flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope, flowi4_init_output(fl4, oif, mark, tos & INET_DSCP_MASK, scope,
prot, flow_flags, iph->daddr, iph->saddr, 0, 0, prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
sock_net_uid(net, sk)); sock_net_uid(net, sk));
} }
...@@ -541,7 +541,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) ...@@ -541,7 +541,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
if (inet_opt && inet_opt->opt.srr) if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr; daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK, ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk), ip_sock_rt_scope(sk),
inet_test_bit(HDRINCL, sk) ? inet_test_bit(HDRINCL, sk) ?
IPPROTO_RAW : sk->sk_protocol, IPPROTO_RAW : sk->sk_protocol,
...@@ -2618,7 +2618,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, ...@@ -2618,7 +2618,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth; struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos &= IPTOS_RT_MASK; fl4->flowi4_tos &= INET_DSCP_MASK;
rcu_read_lock(); rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
...@@ -3261,7 +3261,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, ...@@ -3261,7 +3261,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.daddr = dst; fl4.daddr = dst;
fl4.saddr = src; fl4.saddr = src;
fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK; fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK;
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark; fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid; fl4.flowi4_uid = uid;
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#include <net/dsfield.h> #include <net/dsfield.h>
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/inet_dscp.h>
/* /*
This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
...@@ -935,8 +936,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ...@@ -935,8 +936,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
} }
flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark, flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6, tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE,
0, dst, tiph->saddr, 0, 0, IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0,
sock_net_uid(tunnel->net, NULL)); sock_net_uid(tunnel->net, NULL));
rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr); rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
......
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#ifdef CONFIG_XFRM_ESPINTCP #ifdef CONFIG_XFRM_ESPINTCP
#include <net/espintcp.h> #include <net/espintcp.h>
#endif #endif
#include <net/inet_dscp.h>
#include "xfrm_hash.h" #include "xfrm_hash.h"
...@@ -2561,7 +2562,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, ...@@ -2561,7 +2562,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
static int xfrm_get_tos(const struct flowi *fl, int family) static int xfrm_get_tos(const struct flowi *fl, int family)
{ {
if (family == AF_INET) if (family == AF_INET)
return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; return fl->u.ip4.flowi4_tos & INET_DSCP_MASK;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment