Commit 8788260e authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'raw-add-drop-reasons-and-use-another-hash-function'

Eric Dumazet says:

====================
raw: add drop reasons and use another hash function

Two first patches add drop reasons to raw input processing.

Last patch spreads RAW sockets in the shared hash tables
to avoid long hash buckets in some cases.
====================

Link: https://lore.kernel.org/r/20230202094100.3083177-1-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents dfefcb0c 6579f5ba
......@@ -15,6 +15,8 @@
#include <net/inet_sock.h>
#include <net/protocol.h>
#include <net/netns/hash.h>
#include <linux/hash.h>
#include <linux/icmp.h>
extern struct proto raw_prot;
......@@ -29,13 +31,20 @@ int raw_local_deliver(struct sk_buff *, int);
int raw_rcv(struct sock *, struct sk_buff *);
#define RAW_HTABLE_SIZE MAX_INET_PROTOS
#define RAW_HTABLE_LOG 8
#define RAW_HTABLE_SIZE (1U << RAW_HTABLE_LOG)
struct raw_hashinfo {
spinlock_t lock;
struct hlist_nulls_head ht[RAW_HTABLE_SIZE];
struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
};
static inline u32 raw_hashfunc(const struct net *net, u32 proto)
{
return hash_32(net_hash_mix(net) ^ proto, RAW_HTABLE_LOG);
}
static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
{
int i;
......
......@@ -93,7 +93,7 @@ int raw_hash_sk(struct sock *sk)
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_nulls_head *hlist;
hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)];
spin_lock(&h->lock);
__sk_nulls_add_node_rcu(sk, hlist);
......@@ -160,9 +160,9 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
* RFC 1122: SHOULD pass TOS value up to the transport layer.
* -> It does. And not only TOS, but all IP header.
*/
static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
static int raw_v4_input(struct net *net, struct sk_buff *skb,
const struct iphdr *iph, int hash)
{
struct net *net = dev_net(skb->dev);
struct hlist_nulls_head *hlist;
struct hlist_nulls_node *hnode;
int sdif = inet_sdif(skb);
......@@ -193,9 +193,10 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
int hash = protocol & (RAW_HTABLE_SIZE - 1);
struct net *net = dev_net(skb->dev);
return raw_v4_input(skb, ip_hdr(skb), hash);
return raw_v4_input(net, skb, ip_hdr(skb),
raw_hashfunc(net, protocol));
}
static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
......@@ -271,7 +272,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
struct sock *sk;
int hash;
hash = protocol & (RAW_HTABLE_SIZE - 1);
hash = raw_hashfunc(net, protocol);
hlist = &raw_v4_hashinfo.ht[hash];
rcu_read_lock();
......@@ -287,11 +288,13 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
enum skb_drop_reason reason;
/* Charge it to the socket. */
ipv4_pktinfo_prepare(sk, skb);
if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
......@@ -302,7 +305,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
......
......@@ -152,7 +152,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
......@@ -338,7 +338,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
struct sock *sk;
int hash;
hash = nexthdr & (RAW_HTABLE_SIZE - 1);
hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
......@@ -355,17 +355,19 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
enum skb_drop_reason reason;
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
/* Charge it to the socket. */
skb_dst_drop(skb);
if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
......@@ -386,7 +388,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
......@@ -410,7 +412,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
kfree_skb(skb);
kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment