Commit eac1b93c authored by Coco Li's avatar Coco Li Committed by David S. Miller

gro: add ability to control gro max packet size

Eric Dumazet suggested to allow users to modify max GRO packet size.

We have seen GRO being disabled by users of appliances (such as
wifi access points) because of claimed bufferbloat issues,
or some work arounds in sch_cake, to split GRO/GSO packets.

Instead of disabling GRO completely, one can chose to limit
the maximum packet size of GRO packets, depending on their
latency constraints.

This patch adds a per device gro_max_size attribute
that can be changed with ip link command.

ip link set dev eth0 gro_max_size 16000
Suggested-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarCoco Li <lixiaoyan@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 007747a9
...@@ -1942,6 +1942,8 @@ enum netdev_ml_priv_type { ...@@ -1942,6 +1942,8 @@ enum netdev_ml_priv_type {
* dev->addr_list_lock. * dev->addr_list_lock.
* @unlink_list: As netif_addr_lock() can be called recursively, * @unlink_list: As netif_addr_lock() can be called recursively,
* keep a list of interfaces to be deleted. * keep a list of interfaces to be deleted.
* @gro_max_size: Maximum size of aggregated packet in generic
* receive offload (GRO)
* *
* @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @dev_addr_shadow: Copy of @dev_addr to catch direct writes.
* @linkwatch_dev_tracker: refcount tracker used by linkwatch. * @linkwatch_dev_tracker: refcount tracker used by linkwatch.
...@@ -2131,6 +2133,8 @@ struct net_device { ...@@ -2131,6 +2133,8 @@ struct net_device {
struct bpf_prog __rcu *xdp_prog; struct bpf_prog __rcu *xdp_prog;
unsigned long gro_flush_timeout; unsigned long gro_flush_timeout;
int napi_defer_hard_irqs; int napi_defer_hard_irqs;
#define GRO_MAX_SIZE 65536
unsigned int gro_max_size;
rx_handler_func_t __rcu *rx_handler; rx_handler_func_t __rcu *rx_handler;
void __rcu *rx_handler_data; void __rcu *rx_handler_data;
...@@ -4806,6 +4810,13 @@ static inline void netif_set_gso_max_segs(struct net_device *dev, ...@@ -4806,6 +4810,13 @@ static inline void netif_set_gso_max_segs(struct net_device *dev,
WRITE_ONCE(dev->gso_max_segs, segs); WRITE_ONCE(dev->gso_max_segs, segs);
} }
static inline void netif_set_gro_max_size(struct net_device *dev,
unsigned int size)
{
/* This pairs with the READ_ONCE() in skb_gro_receive() */
WRITE_ONCE(dev->gro_max_size, size);
}
static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
int pulled_hlen, u16 mac_offset, int pulled_hlen, u16 mac_offset,
int mac_len) int mac_len)
......
...@@ -347,6 +347,7 @@ enum { ...@@ -347,6 +347,7 @@ enum {
*/ */
IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_NAME,
IFLA_PARENT_DEV_BUS_NAME, IFLA_PARENT_DEV_BUS_NAME,
IFLA_GRO_MAX_SIZE,
__IFLA_MAX __IFLA_MAX
}; };
......
...@@ -10180,6 +10180,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, ...@@ -10180,6 +10180,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE; dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS; dev->gso_max_segs = GSO_MAX_SEGS;
dev->gro_max_size = GRO_MAX_SIZE;
dev->upper_level = 1; dev->upper_level = 1;
dev->lower_level = 1; dev->lower_level = 1;
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
......
...@@ -132,10 +132,14 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) ...@@ -132,10 +132,14 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int headlen = skb_headlen(skb); unsigned int headlen = skb_headlen(skb);
unsigned int len = skb_gro_len(skb); unsigned int len = skb_gro_len(skb);
unsigned int delta_truesize; unsigned int delta_truesize;
unsigned int gro_max_size;
unsigned int new_truesize; unsigned int new_truesize;
struct sk_buff *lp; struct sk_buff *lp;
if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
gro_max_size = READ_ONCE(p->dev->gro_max_size);
if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
return -E2BIG; return -E2BIG;
lp = NAPI_GRO_CB(p)->last; lp = NAPI_GRO_CB(p)->last;
......
...@@ -1026,6 +1026,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, ...@@ -1026,6 +1026,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */
+ nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
+ nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
+ nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
+ nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
...@@ -1728,6 +1729,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ...@@ -1728,6 +1729,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
#ifdef CONFIG_RPS #ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif #endif
...@@ -1880,6 +1882,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { ...@@ -1880,6 +1882,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
[IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
[IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING },
[IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 },
}; };
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
...@@ -2299,6 +2302,14 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], ...@@ -2299,6 +2302,14 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
} }
} }
if (tb[IFLA_GRO_MAX_SIZE]) {
u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]);
if (gro_max_size > GRO_MAX_SIZE) {
NL_SET_ERR_MSG(extack, "too big gro_max_size");
return -EINVAL;
}
}
return 0; return 0;
} }
...@@ -2772,6 +2783,15 @@ static int do_setlink(const struct sk_buff *skb, ...@@ -2772,6 +2783,15 @@ static int do_setlink(const struct sk_buff *skb,
} }
} }
if (tb[IFLA_GRO_MAX_SIZE]) {
u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]);
if (dev->gro_max_size ^ gro_max_size) {
netif_set_gro_max_size(dev, gro_max_size);
status |= DO_SETLINK_MODIFIED;
}
}
if (tb[IFLA_OPERSTATE]) if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
...@@ -3222,6 +3242,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, ...@@ -3222,6 +3242,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE])); netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE]));
if (tb[IFLA_GSO_MAX_SEGS]) if (tb[IFLA_GSO_MAX_SEGS])
netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS])); netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS]));
if (tb[IFLA_GRO_MAX_SIZE])
netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE]));
return dev; return dev;
} }
......
...@@ -347,6 +347,7 @@ enum { ...@@ -347,6 +347,7 @@ enum {
*/ */
IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_NAME,
IFLA_PARENT_DEV_BUS_NAME, IFLA_PARENT_DEV_BUS_NAME,
IFLA_GRO_MAX_SIZE,
__IFLA_MAX __IFLA_MAX
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment