Commit 14006f1d authored by Coco Li's avatar Coco Li Committed by David S. Miller

Documentations: Analyze heavily used Networking related structs

Analyzed a few structs in the networking stack by looking at variables
within them that are used in the TCP/IP fast path.

Fast path is defined as TCP path where data is transferred from sender to
receiver unidirectionally. It doesn't include phases other than
TCP_ESTABLISHED, nor does it look at error paths.

We hope to re-organizing variables that span many cachelines whose fast
path variables are also spread out, and this document can help future
developers keep networking fast path cachelines small.

Optimized_cacheline field is computed as
(Fastpath_Bytes/L3_cacheline_size_x86), and not the actual organized
results (see patches to come for these).

Investigation is done on 6.5

Name	                Struct_Cachelines  Cur_fastpath_cache Fastpath_Bytes Optimized_cacheline
tcp_sock	        42 (2664 Bytes)	   12   		396		8
net_device	        39 (2240 bytes)	   12			234		4
inet_sock	        15 (960 bytes)	   14			922		14
Inet_connection_sock	22 (1368 bytes)	   18			1166		18
Netns_ipv4 (sysctls)	12 (768 bytes)     4			77		2
linux_mib	        16 (1060)	   6			104		2

Note how there isn't much improvement space for inet_sock and
Inet_connection_sock because sk and icsk_inet respectively takes up so
much of the struct that rest of the variables become a small portion of
the struct size.

So, we decided to reorganize tcp_sock, net_device, netns_ipv4
Suggested-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarCoco Li <lixiaoyan@google.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 7453d7a6
......@@ -75,6 +75,7 @@ Contents:
mptcp-sysctl
multiqueue
napi
net_cachelines/index
netconsole
netdev-features
netdevices
......
.. SPDX-License-Identifier: GPL-2.0
.. Copyright (C) 2023 Google LLC
===================================
Common Networking Struct Cachelines
===================================
.. toctree::
:maxdepth: 1
inet_connection_sock
inet_sock
net_device
netns_ipv4_sysctl
snmp
tcp_sock
.. SPDX-License-Identifier: GPL-2.0
.. Copyright (C) 2023 Google LLC
=====================================================
inet_connection_sock struct fast path usage breakdown
=====================================================
Type Name fastpath_tx_access fastpath_rx_access comment
..struct ..inet_connection_sock
struct_inet_sock icsk_inet read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data
struct_request_sock_queue icsk_accept_queue - -
struct_inet_bind_bucket icsk_bind_hash read_mostly - tcp_set_state
struct_inet_bind2_bucket icsk_bind2_hash read_mostly - tcp_set_state,inet_put_port
unsigned_long icsk_timeout read_mostly - inet_csk_reset_xmit_timer,tcp_connect
struct_timer_list icsk_retransmit_timer read_mostly - inet_csk_reset_xmit_timer,tcp_connect
struct_timer_list icsk_delack_timer read_mostly - inet_csk_reset_xmit_timer,tcp_connect
u32 icsk_rto read_write - tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one
u32 icsk_rto_min - -
u32 icsk_delack_max - -
u32 icsk_pmtu_cookie read_write - tcp_sync_mss,tcp_current_mss,tcp_send_syn_data,tcp_connect_init,tcp_connect
struct_tcp_congestion_ops icsk_ca_ops read_write - tcp_cwnd_validate,tcp_tso_segs,tcp_ca_dst_init,tcp_connect_init,tcp_connect,tcp_write_xmit
struct_inet_connection_sock_af_ops icsk_af_ops read_mostly - tcp_finish_connect,tcp_send_syn_data,tcp_mtup_init,tcp_mtu_check_reprobe,tcp_mtu_probe,tcp_connect_init,tcp_connect,__tcp_transmit_skb
struct_tcp_ulp_ops* icsk_ulp_ops - -
void* icsk_ulp_data - -
u8:5 icsk_ca_state read_write - tcp_cwnd_application_limited,tcp_set_ca_state,tcp_enter_cwr,tcp_tso_should_defer,tcp_mtu_probe,tcp_schedule_loss_probe,tcp_write_xmit,__tcp_transmit_skb
u8:1 icsk_ca_initialized read_write - tcp_init_transfer,tcp_init_congestion_control,tcp_init_transfer,tcp_finish_connect,tcp_connect
u8:1 icsk_ca_setsockopt - -
u8:1 icsk_ca_dst_locked write_mostly - tcp_ca_dst_init,tcp_connect_init,tcp_connect
u8 icsk_retransmits write_mostly - tcp_connect_init,tcp_connect
u8 icsk_pending read_write - inet_csk_reset_xmit_timer,tcp_connect,tcp_check_probe_timer,__tcp_push_pending_frames,tcp_rearm_rto,tcp_event_new_data_sent,tcp_event_new_data_sent
u8 icsk_backoff write_mostly - tcp_write_queue_purge,tcp_connect_init
u8 icsk_syn_retries - -
u8 icsk_probes_out - -
u16 icsk_ext_hdr_len read_mostly - __tcp_mtu_to_mss,tcp_mtu_to_rss,tcp_mtu_probe,tcp_write_xmit,tcp_mtu_to_mss,
struct_icsk_ack_u8 pending read_write read_write inet_csk_ack_scheduled,__tcp_cleanup_rbuf,tcp_cleanup_rbuf,inet_csk_clear_xmit_timer,tcp_event_ack-sent,inet_csk_reset_xmit_timer
struct_icsk_ack_u8 quick read_write write_mostly tcp_dec_quickack_mode,tcp_event_ack_sent,__tcp_transmit_skb,__tcp_select_window,__tcp_cleanup_rbuf
struct_icsk_ack_u8 pingpong - -
struct_icsk_ack_u8 retry write_mostly read_write inet_csk_clear_xmit_timer,tcp_rearm_rto,tcp_event_new_data_sent,tcp_write_xmit,__tcp_send_ack,tcp_send_ack,
struct_icsk_ack_u8 ato read_mostly write_mostly tcp_dec_quickack_mode,tcp_event_ack_sent,__tcp_transmit_skb,__tcp_send_ack,tcp_send_ack
struct_icsk_ack_unsigned_long timeout read_write read_write inet_csk_reset_xmit_timer,tcp_connect
struct_icsk_ack_u32 lrcvtime read_write - tcp_finish_connect,tcp_connect,tcp_event_data_sent,__tcp_transmit_skb
struct_icsk_ack_u16 rcv_mss write_mostly read_mostly __tcp_select_window,__tcp_cleanup_rbuf,tcp_initialize_rcv_mss,tcp_connect_init
struct_icsk_mtup_int search_high read_write - tcp_mtup_init,tcp_sync_mss,tcp_connect_init,tcp_mtu_check_reprobe,tcp_write_xmit
struct_icsk_mtup_int search_low read_write - tcp_mtu_probe,tcp_mtu_check_reprobe,tcp_write_xmit,tcp_sync_mss,tcp_connect_init,tcp_mtup_init
struct_icsk_mtup_u32:31 probe_size read_write - tcp_mtup_init,tcp_connect_init,__tcp_transmit_skb
struct_icsk_mtup_u32:1 enabled read_write - tcp_mtup_init,tcp_sync_mss,tcp_connect_init,tcp_mtu_probe,tcp_write_xmit
struct_icsk_mtup_u32 probe_timestamp read_write - tcp_mtup_init,tcp_connect_init,tcp_mtu_check_reprobe,tcp_mtu_probe
u32 icsk_probes_tstamp - -
u32 icsk_user_timeout - -
u64[104/sizeof(u64)] icsk_ca_priv - -
.. SPDX-License-Identifier: GPL-2.0
.. Copyright (C) 2023 Google LLC
=====================================================
inet_connection_sock struct fast path usage breakdown
=====================================================
Type Name fastpath_tx_access fastpath_rx_access comment
..struct ..inet_sock
struct_sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data
struct_ipv6_pinfo* pinet6 - -
be16 inet_sport read_mostly - __tcp_transmit_skb
be32 inet_daddr read_mostly - ip_select_ident_segs
be32 inet_rcv_saddr - -
be16 inet_dport read_mostly - __tcp_transmit_skb
u16 inet_num - -
be32 inet_saddr - -
s16 uc_ttl read_mostly - __ip_queue_xmit/ip_select_ttl
u16 cmsg_flags - -
struct_ip_options_rcu* inet_opt read_mostly - __ip_queue_xmit
u16 inet_id read_mostly - ip_select_ident_segs
u8 tos read_mostly - ip_queue_xmit
u8 min_ttl - -
u8 mc_ttl - -
u8 pmtudisc - -
u8:1 recverr - -
u8:1 is_icsk - -
u8:1 freebind - -
u8:1 hdrincl - -
u8:1 mc_loop - -
u8:1 transparent - -
u8:1 mc_all - -
u8:1 nodefrag - -
u8:1 bind_address_no_port - -
u8:1 recverr_rfc4884 - -
u8:1 defer_connect read_mostly - tcp_sendmsg_fastopen
u8 rcv_tos - -
u8 convert_csum - -
int uc_index - -
int mc_index - -
be32 mc_addr - -
struct_ip_mc_socklist* mc_list - -
struct_inet_cork_full cork read_mostly - __tcp_transmit_skb
struct local_port_range - -
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -14986,6 +14986,7 @@ Q: https://patchwork.kernel.org/project/netdevbpf/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
F: Documentation/devicetree/bindings/net/
F: Documentation/networking/net_cachelines/net_device.rst
F: drivers/connector/
F: drivers/net/
F: include/dt-bindings/net/
......@@ -15041,6 +15042,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
F: Documentation/core-api/netlink.rst
F: Documentation/netlink/
F: Documentation/networking/
F: Documentation/networking/net_cachelines/
F: Documentation/process/maintainer-netdev.rst
F: Documentation/userspace-api/netlink/
F: include/linux/in.h
......@@ -15149,6 +15151,7 @@ NETWORKING [TCP]
M: Eric Dumazet <edumazet@google.com>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/networking/net_cachelines/tcp_sock.rst
F: include/linux/tcp.h
F: include/net/tcp.h
F: include/trace/events/tcp.h
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment