Commit 6738fc77 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-skb-introduce-kfree_skb_with_reason'

Menglong Dong says:

====================
net: skb: introduce kfree_skb_with_reason()

In this series patch, the interface kfree_skb_with_reason() is
introduced(), which is used to collect skb drop reason, and pass
it to 'kfree_skb' tracepoint. Therefor, 'drop_monitor' or eBPF is
able to monitor abnormal skb with detail reason.

In fact, this series patches are out of the intelligence of David
and Steve, I'm just a truck man :/

Previous discussion is here:

https://lore.kernel.org/netdev/20211118105752.1d46e990@gandalf.local.home/
https://lore.kernel.org/netdev/67b36bd8-2477-88ac-83a0-35a1eeaf40c9@gmail.com/

In the first patch, kfree_skb_with_reason() is introduced and
the 'reason' field is added to 'kfree_skb' tracepoint. In the
second patch, 'kfree_skb()' in replaced with 'kfree_skb_with_reason()'
in tcp_v4_rcv(). In the third patch, 'kfree_skb_with_reason()' is
used in __udp4_lib_rcv().

Changes since v3:
- fix some code style problems in skb.h

Changes since v2:
- rename kfree_skb_with_reason() to kfree_skb_reason()
- make kfree_skb() static inline, as Jakub suggested

Changes since v1:
- rename some drop reason, as David suggested
- add the third patch
====================

Link: https://lore.kernel.org/r/20220109063628.526990-1-imagedong@tencent.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 342402c4 1c7fab70
...@@ -305,6 +305,22 @@ struct sk_buff_head { ...@@ -305,6 +305,22 @@ struct sk_buff_head {
struct sk_buff; struct sk_buff;
/* The reason of skb drop, which is used in kfree_skb_reason().
* en...maybe they should be splited by group?
*
* Each item here should also be in 'TRACE_SKB_DROP_REASON', which is
* used to translate the reason to string.
*/
enum skb_drop_reason {
SKB_DROP_REASON_NOT_SPECIFIED,
SKB_DROP_REASON_NO_SOCKET,
SKB_DROP_REASON_PKT_TOO_SMALL,
SKB_DROP_REASON_TCP_CSUM,
SKB_DROP_REASON_TCP_FILTER,
SKB_DROP_REASON_UDP_CSUM,
SKB_DROP_REASON_MAX,
};
/* To allow 64K frame to be packed as single skb without frag_list we /* To allow 64K frame to be packed as single skb without frag_list we
* require 64K/PAGE_SIZE pages plus 1 additional page to allow for * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
* buffers which do not start on a page boundary. * buffers which do not start on a page boundary.
...@@ -1085,8 +1101,18 @@ static inline bool skb_unref(struct sk_buff *skb) ...@@ -1085,8 +1101,18 @@ static inline bool skb_unref(struct sk_buff *skb)
return true; return true;
} }
void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason);
/**
* kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason
* @skb: buffer to free
*/
static inline void kfree_skb(struct sk_buff *skb)
{
kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
}
void skb_release_head_state(struct sk_buff *skb); void skb_release_head_state(struct sk_buff *skb);
void kfree_skb(struct sk_buff *skb);
void kfree_skb_list(struct sk_buff *segs); void kfree_skb_list(struct sk_buff *segs);
void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
void skb_tx_error(struct sk_buff *skb); void skb_tx_error(struct sk_buff *skb);
......
...@@ -9,29 +9,56 @@ ...@@ -9,29 +9,56 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#define TRACE_SKB_DROP_REASON \
EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \
EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \
EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \
EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \
EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \
EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \
EMe(SKB_DROP_REASON_MAX, MAX)
#undef EM
#undef EMe
#define EM(a, b) TRACE_DEFINE_ENUM(a);
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
TRACE_SKB_DROP_REASON
#undef EM
#undef EMe
#define EM(a, b) { a, #b },
#define EMe(a, b) { a, #b }
/* /*
* Tracepoint for free an sk_buff: * Tracepoint for free an sk_buff:
*/ */
TRACE_EVENT(kfree_skb, TRACE_EVENT(kfree_skb,
TP_PROTO(struct sk_buff *skb, void *location), TP_PROTO(struct sk_buff *skb, void *location,
enum skb_drop_reason reason),
TP_ARGS(skb, location), TP_ARGS(skb, location, reason),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( void *, skbaddr ) __field(void *, skbaddr)
__field( void *, location ) __field(void *, location)
__field( unsigned short, protocol ) __field(unsigned short, protocol)
__field(enum skb_drop_reason, reason)
), ),
TP_fast_assign( TP_fast_assign(
__entry->skbaddr = skb; __entry->skbaddr = skb;
__entry->location = location; __entry->location = location;
__entry->protocol = ntohs(skb->protocol); __entry->protocol = ntohs(skb->protocol);
__entry->reason = reason;
), ),
TP_printk("skbaddr=%p protocol=%u location=%p", TP_printk("skbaddr=%p protocol=%u location=%p reason: %s",
__entry->skbaddr, __entry->protocol, __entry->location) __entry->skbaddr, __entry->protocol, __entry->location,
__print_symbolic(__entry->reason,
TRACE_SKB_DROP_REASON))
); );
TRACE_EVENT(consume_skb, TRACE_EVENT(consume_skb,
......
...@@ -4899,7 +4899,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) ...@@ -4899,7 +4899,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
trace_consume_skb(skb); trace_consume_skb(skb);
else else
trace_kfree_skb(skb, net_tx_action); trace_kfree_skb(skb, net_tx_action,
SKB_DROP_REASON_NOT_SPECIFIED);
if (skb->fclone != SKB_FCLONE_UNAVAILABLE) if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb); __kfree_skb(skb);
......
...@@ -110,7 +110,8 @@ static u32 net_dm_queue_len = 1000; ...@@ -110,7 +110,8 @@ static u32 net_dm_queue_len = 1000;
struct net_dm_alert_ops { struct net_dm_alert_ops {
void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
void *location); void *location,
enum skb_drop_reason reason);
void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
int work, int budget); int work, int budget);
void (*work_item_func)(struct work_struct *work); void (*work_item_func)(struct work_struct *work);
...@@ -262,7 +263,9 @@ static void trace_drop_common(struct sk_buff *skb, void *location) ...@@ -262,7 +263,9 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
spin_unlock_irqrestore(&data->lock, flags); spin_unlock_irqrestore(&data->lock, flags);
} }
static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
void *location,
enum skb_drop_reason reason)
{ {
trace_drop_common(skb, location); trace_drop_common(skb, location);
} }
...@@ -490,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = { ...@@ -490,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
static void net_dm_packet_trace_kfree_skb_hit(void *ignore, static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
struct sk_buff *skb, struct sk_buff *skb,
void *location) void *location,
enum skb_drop_reason reason)
{ {
ktime_t tstamp = ktime_get_real(); ktime_t tstamp = ktime_get_real();
struct per_cpu_dm_data *data; struct per_cpu_dm_data *data;
......
...@@ -759,21 +759,23 @@ void __kfree_skb(struct sk_buff *skb) ...@@ -759,21 +759,23 @@ void __kfree_skb(struct sk_buff *skb)
EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(__kfree_skb);
/** /**
* kfree_skb - free an sk_buff * kfree_skb_reason - free an sk_buff with special reason
* @skb: buffer to free * @skb: buffer to free
* @reason: reason why this skb is dropped
* *
* Drop a reference to the buffer and free it if the usage count has * Drop a reference to the buffer and free it if the usage count has
* hit zero. * hit zero. Meanwhile, pass the drop reason to 'kfree_skb'
* tracepoint.
*/ */
void kfree_skb(struct sk_buff *skb) void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{ {
if (!skb_unref(skb)) if (!skb_unref(skb))
return; return;
trace_kfree_skb(skb, __builtin_return_address(0)); trace_kfree_skb(skb, __builtin_return_address(0), reason);
__kfree_skb(skb); __kfree_skb(skb);
} }
EXPORT_SYMBOL(kfree_skb); EXPORT_SYMBOL(kfree_skb_reason);
void kfree_skb_list(struct sk_buff *segs) void kfree_skb_list(struct sk_buff *segs)
{ {
......
...@@ -1971,8 +1971,10 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -1971,8 +1971,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
const struct tcphdr *th; const struct tcphdr *th;
bool refcounted; bool refcounted;
struct sock *sk; struct sock *sk;
int drop_reason;
int ret; int ret;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST) if (skb->pkt_type != PACKET_HOST)
goto discard_it; goto discard_it;
...@@ -1984,8 +1986,10 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -1984,8 +1986,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data; th = (const struct tcphdr *)skb->data;
if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
goto bad_packet; goto bad_packet;
}
if (!pskb_may_pull(skb, th->doff * 4)) if (!pskb_may_pull(skb, th->doff * 4))
goto discard_it; goto discard_it;
...@@ -2090,8 +2094,10 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -2090,8 +2094,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
nf_reset_ct(skb); nf_reset_ct(skb);
if (tcp_filter(sk, skb)) if (tcp_filter(sk, skb)) {
drop_reason = SKB_DROP_REASON_TCP_FILTER;
goto discard_and_relse; goto discard_and_relse;
}
th = (const struct tcphdr *)skb->data; th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb); iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th); tcp_v4_fill_cb(skb, iph, th);
...@@ -2124,6 +2130,7 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -2124,6 +2130,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
return ret; return ret;
no_tcp_socket: no_tcp_socket:
drop_reason = SKB_DROP_REASON_NO_SOCKET;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it; goto discard_it;
...@@ -2131,6 +2138,7 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -2131,6 +2138,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
if (tcp_checksum_complete(skb)) { if (tcp_checksum_complete(skb)) {
csum_error: csum_error:
drop_reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb); trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet: bad_packet:
...@@ -2141,7 +2149,7 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -2141,7 +2149,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
discard_it: discard_it:
/* Discard frame. */ /* Discard frame. */
kfree_skb(skb); kfree_skb_reason(skb, drop_reason);
return 0; return 0;
discard_and_relse: discard_and_relse:
......
...@@ -2411,6 +2411,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ...@@ -2411,6 +2411,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
__be32 saddr, daddr; __be32 saddr, daddr;
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb->dev);
bool refcounted; bool refcounted;
int drop_reason;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
/* /*
* Validate the packet. * Validate the packet.
...@@ -2466,6 +2469,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ...@@ -2466,6 +2469,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp_lib_checksum_complete(skb)) if (udp_lib_checksum_complete(skb))
goto csum_error; goto csum_error;
drop_reason = SKB_DROP_REASON_NO_SOCKET;
__UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
...@@ -2473,10 +2477,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ...@@ -2473,10 +2477,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
* Hmm. We got an UDP packet to a port to which we * Hmm. We got an UDP packet to a port to which we
* don't wanna listen. Ignore it. * don't wanna listen. Ignore it.
*/ */
kfree_skb(skb); kfree_skb_reason(skb, drop_reason);
return 0; return 0;
short_packet: short_packet:
drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
proto == IPPROTO_UDPLITE ? "Lite" : "", proto == IPPROTO_UDPLITE ? "Lite" : "",
&saddr, ntohs(uh->source), &saddr, ntohs(uh->source),
...@@ -2489,6 +2494,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ...@@ -2489,6 +2494,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
* RFC1122: OK. Discards the bad packet silently (as far as * RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST). * the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/ */
drop_reason = SKB_DROP_REASON_UDP_CSUM;
net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
proto == IPPROTO_UDPLITE ? "Lite" : "", proto == IPPROTO_UDPLITE ? "Lite" : "",
&saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
...@@ -2496,7 +2502,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ...@@ -2496,7 +2502,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
__UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
drop: drop:
__UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb); kfree_skb_reason(skb, drop_reason);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment