Commit 9f120e76 authored by David S. Miller's avatar David S. Miller

Merge branch 'mptcp-prereq'

Mat Martineau says:

====================
Multipath TCP: Prerequisites

v6 -> v7: Rename/move ULP clone helper to make inline-friendly (patch 5)

v5 -> v6: Fix BPF accessors for sk_type and sk_protocol (patch 2), fix
the width of an __unused bitfield (patch 6), and add some commit message
and comment text (patches 5 & 7).

v4 -> v5: Cover letter subject fix. No changes to commits.

v3 -> v4: Update coalesce/collapse of incoming MPTCP skbs (patch 7)

v2 -> v3: Ensure sk_type alignment in struct sock (patch 2)

v1 -> v2: sk_pacing_shift left as a regular struct member (patch 2), and
modified SACK space check based on recent -net fix (patch 9).

The MPTCP upstreaming community has been collaborating on an
upstreamable MPTCP implementation that complies with RFC 8684. A minimal
set of features to comply with the specification involves a sizeable set
of code changes, so David requested that we split this work in to
multiple, smaller patch sets to build up MPTCP infrastructure.

The minimal MPTCP feature set we are proposing for review in the v5.6
timeframe begins with these three parts:

Part 1 (this patch set): MPTCP prerequisites. Introduce some MPTCP
definitions, additional ULP and skb extension features, TCP option space
checking, and a few exported symbols.

Part 2: Single subflow implementation and self tests.

Part 3: Switch from MPTCP v0 (RFC 6824) to MPTCP v1 (new RFC 8684,
publication expected in the next few days).

Additional patches for multiple subflow support, path management, active
backup, and other features are in the pipeline for submission after
making progress with the above reviews.

Clone/fetch:
https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v7-part1)

Browse:
https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v7-part1

Thank you for your review. You can find us at mptcp@lists.01.org and
https://is.gd/mptcp_upstream
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6b3acfc3 8b69a803
...@@ -11573,6 +11573,16 @@ F: net/ipv6/calipso.c ...@@ -11573,6 +11573,16 @@ F: net/ipv6/calipso.c
F: net/netfilter/xt_CONNSECMARK.c F: net/netfilter/xt_CONNSECMARK.c
F: net/netfilter/xt_SECMARK.c F: net/netfilter/xt_SECMARK.c
NETWORKING [MPTCP]
M: Mat Martineau <mathew.j.martineau@linux.intel.com>
M: Matthieu Baerts <matthieu.baerts@tessares.net>
L: netdev@vger.kernel.org
L: mptcp@lists.01.org
W: https://github.com/multipath-tcp/mptcp_net-next/wiki
B: https://github.com/multipath-tcp/mptcp_net-next/issues
S: Maintained
F: include/net/mptcp.h
NETWORKING [TCP] NETWORKING [TCP]
M: Eric Dumazet <edumazet@google.com> M: Eric Dumazet <edumazet@google.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
......
...@@ -4096,6 +4096,9 @@ enum skb_ext_id { ...@@ -4096,6 +4096,9 @@ enum skb_ext_id {
#endif #endif
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
TC_SKB_EXT, TC_SKB_EXT,
#endif
#if IS_ENABLED(CONFIG_MPTCP)
SKB_EXT_MPTCP,
#endif #endif
SKB_EXT_NUM, /* must be last */ SKB_EXT_NUM, /* must be last */
}; };
...@@ -4117,6 +4120,9 @@ struct skb_ext { ...@@ -4117,6 +4120,9 @@ struct skb_ext {
char data[0] __aligned(8); char data[0] __aligned(8);
}; };
struct skb_ext *__skb_ext_alloc(void);
void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
struct skb_ext *ext);
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_put(struct skb_ext *ext); void __skb_ext_put(struct skb_ext *ext);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Multipath TCP
*
* Copyright (c) 2017 - 2019, Intel Corporation.
*/
#ifndef __NET_MPTCP_H
#define __NET_MPTCP_H
#include <linux/skbuff.h>
#include <linux/types.h>
/* MPTCP sk_buff extension data */
struct mptcp_ext {
u64 data_ack;
u64 data_seq;
u32 subflow_seq;
u16 data_len;
u8 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
ack64:1,
__unused:3;
/* one byte hole */
};
#ifdef CONFIG_MPTCP
/* move the skb extension owership, with the assumption that 'to' is
* newly allocated
*/
static inline void mptcp_skb_ext_move(struct sk_buff *to,
struct sk_buff *from)
{
if (!skb_ext_exist(from, SKB_EXT_MPTCP))
return;
if (WARN_ON_ONCE(to->active_extensions))
skb_ext_put(to);
to->active_extensions = from->active_extensions;
to->extensions = from->extensions;
from->active_extensions = 0;
}
static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext,
const struct mptcp_ext *from_ext)
{
/* MPTCP always clears the ext when adding it to the skb, so
* holes do not bother us here
*/
return !from_ext ||
(to_ext && from_ext &&
!memcmp(from_ext, to_ext, sizeof(struct mptcp_ext)));
}
/* check if skbs can be collapsed.
* MPTCP collapse is allowed if neither @to or @from carry an mptcp data
* mapping, or if the extension of @to is the same as @from.
* Collapsing is not possible if @to lacks an extension, but @from carries one.
*/
static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
const struct sk_buff *from)
{
return mptcp_ext_matches(skb_ext_find(to, SKB_EXT_MPTCP),
skb_ext_find(from, SKB_EXT_MPTCP));
}
#else
static inline void mptcp_skb_ext_move(struct sk_buff *to,
const struct sk_buff *from)
{
}
static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
const struct sk_buff *from)
{
return true;
}
#endif /* CONFIG_MPTCP */
#endif /* __NET_MPTCP_H */
...@@ -436,31 +436,15 @@ struct sock { ...@@ -436,31 +436,15 @@ struct sock {
* Because of non atomicity rules, all * Because of non atomicity rules, all
* changes are protected by socket lock. * changes are protected by socket lock.
*/ */
unsigned int __sk_flags_offset[0]; u8 sk_padding : 1,
#ifdef __BIG_ENDIAN_BITFIELD
#define SK_FL_PROTO_SHIFT 16
#define SK_FL_PROTO_MASK 0x00ff0000
#define SK_FL_TYPE_SHIFT 0
#define SK_FL_TYPE_MASK 0x0000ffff
#else
#define SK_FL_PROTO_SHIFT 8
#define SK_FL_PROTO_MASK 0x0000ff00
#define SK_FL_TYPE_SHIFT 16
#define SK_FL_TYPE_MASK 0xffff0000
#endif
unsigned int sk_padding : 1,
sk_kern_sock : 1, sk_kern_sock : 1,
sk_no_check_tx : 1, sk_no_check_tx : 1,
sk_no_check_rx : 1, sk_no_check_rx : 1,
sk_userlocks : 4, sk_userlocks : 4;
sk_protocol : 8,
sk_type : 16;
#define SK_PROTOCOL_MAX U8_MAX
u16 sk_gso_max_segs;
u8 sk_pacing_shift; u8 sk_pacing_shift;
u16 sk_type;
u16 sk_protocol;
u16 sk_gso_max_segs;
unsigned long sk_lingertime; unsigned long sk_lingertime;
struct proto *sk_prot_creator; struct proto *sk_prot_creator;
rwlock_t sk_callback_lock; rwlock_t sk_callback_lock;
...@@ -1480,6 +1464,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1480,6 +1464,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
sk_mem_uncharge(sk, skb->truesize); sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) && if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) { !sk->sk_tx_skb_cache && !skb_cloned(skb)) {
skb_ext_reset(skb);
skb_zcopy_clear(skb, true); skb_zcopy_clear(skb, true);
sk->sk_tx_skb_cache = skb; sk->sk_tx_skb_cache = skb;
return; return;
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <net/tcp_states.h> #include <net/tcp_states.h>
#include <net/inet_ecn.h> #include <net/inet_ecn.h>
#include <net/dst.h> #include <net/dst.h>
#include <net/mptcp.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/memcontrol.h> #include <linux/memcontrol.h>
...@@ -182,6 +183,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -182,6 +183,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
#define TCPOPT_EXP 254 /* Experimental */ #define TCPOPT_EXP 254 /* Experimental */
/* Magic number to be after the option value for sharing TCP /* Magic number to be after the option value for sharing TCP
...@@ -328,6 +330,9 @@ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, ...@@ -328,6 +330,9 @@ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags); size_t size, int flags);
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
size_t size, int flags); size_t size, int flags);
int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
int size_goal);
void tcp_release_cb(struct sock *sk); void tcp_release_cb(struct sock *sk);
void tcp_wfree(struct sk_buff *skb); void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk); void tcp_write_timer_handler(struct sock *sk);
...@@ -977,6 +982,13 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) ...@@ -977,6 +982,13 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
return likely(!TCP_SKB_CB(skb)->eor); return likely(!TCP_SKB_CB(skb)->eor);
} }
static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
const struct sk_buff *from)
{
return likely(tcp_skb_can_collapse_to(to) &&
mptcp_skb_can_collapse(to, from));
}
/* Events passed to congestion control interface */ /* Events passed to congestion control interface */
enum tcp_ca_event { enum tcp_ca_event {
CA_EVENT_TX_START, /* first transmit when no packets in flight */ CA_EVENT_TX_START, /* first transmit when no packets in flight */
...@@ -2002,6 +2014,11 @@ struct tcp_request_sock_ops { ...@@ -2002,6 +2014,11 @@ struct tcp_request_sock_ops {
enum tcp_synack_type synack_type); enum tcp_synack_type synack_type);
}; };
extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
#if IS_ENABLED(CONFIG_IPV6)
extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
#endif
#ifdef CONFIG_SYN_COOKIES #ifdef CONFIG_SYN_COOKIES
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
const struct sock *sk, struct sk_buff *skb, const struct sock *sk, struct sk_buff *skb,
...@@ -2153,6 +2170,9 @@ struct tcp_ulp_ops { ...@@ -2153,6 +2170,9 @@ struct tcp_ulp_ops {
/* diagnostic */ /* diagnostic */
int (*get_info)(const struct sock *sk, struct sk_buff *skb); int (*get_info)(const struct sock *sk, struct sk_buff *skb);
size_t (*get_info_size)(const struct sock *sk); size_t (*get_info_size)(const struct sock *sk);
/* clone ulp */
void (*clone)(const struct request_sock *req, struct sock *newsk,
const gfp_t priority);
char name[TCP_ULP_NAME_MAX]; char name[TCP_ULP_NAME_MAX];
struct module *owner; struct module *owner;
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
#define inet_protocol_names \ #define inet_protocol_names \
EM(IPPROTO_TCP) \ EM(IPPROTO_TCP) \
EM(IPPROTO_DCCP) \ EM(IPPROTO_DCCP) \
EMe(IPPROTO_SCTP) EM(IPPROTO_SCTP) \
EMe(IPPROTO_MPTCP)
#define tcp_state_names \ #define tcp_state_names \
EM(TCP_ESTABLISHED) \ EM(TCP_ESTABLISHED) \
...@@ -147,7 +148,7 @@ TRACE_EVENT(inet_sock_set_state, ...@@ -147,7 +148,7 @@ TRACE_EVENT(inet_sock_set_state,
__field(__u16, sport) __field(__u16, sport)
__field(__u16, dport) __field(__u16, dport)
__field(__u16, family) __field(__u16, family)
__field(__u8, protocol) __field(__u16, protocol)
__array(__u8, saddr, 4) __array(__u8, saddr, 4)
__array(__u8, daddr, 4) __array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16) __array(__u8, saddr_v6, 16)
......
...@@ -76,6 +76,8 @@ enum { ...@@ -76,6 +76,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS #define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_RAW = 255, /* Raw IP packets */ IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW #define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX IPPROTO_MAX
}; };
#endif #endif
......
...@@ -808,7 +808,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, ...@@ -808,7 +808,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk; struct sock *sk;
ax25_cb *ax25; ax25_cb *ax25;
if (protocol < 0 || protocol > SK_PROTOCOL_MAX) if (protocol < 0 || protocol > U8_MAX)
return -EINVAL; return -EINVAL;
if (!net_eq(net, &init_net)) if (!net_eq(net, &init_net))
......
...@@ -7607,21 +7607,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, ...@@ -7607,21 +7607,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break; break;
case offsetof(struct bpf_sock, type): case offsetof(struct bpf_sock, type):
BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2); *insn++ = BPF_LDX_MEM(
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, BPF_FIELD_SIZEOF(struct sock, sk_type),
offsetof(struct sock, __sk_flags_offset)); si->dst_reg, si->src_reg,
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); bpf_target_off(struct sock, sk_type,
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); sizeof_field(struct sock, sk_type),
*target_size = 2; target_size));
break; break;
case offsetof(struct bpf_sock, protocol): case offsetof(struct bpf_sock, protocol):
BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); *insn++ = BPF_LDX_MEM(
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, BPF_FIELD_SIZEOF(struct sock, sk_protocol),
offsetof(struct sock, __sk_flags_offset)); si->dst_reg, si->src_reg,
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); bpf_target_off(struct sock, sk_protocol,
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); sizeof_field(struct sock, sk_protocol),
*target_size = 1; target_size));
break; break;
case offsetof(struct bpf_sock, src_ip4): case offsetof(struct bpf_sock, src_ip4):
...@@ -7903,20 +7903,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, ...@@ -7903,20 +7903,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
break; break;
case offsetof(struct bpf_sock_addr, type): case offsetof(struct bpf_sock_addr, type):
SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
struct bpf_sock_addr_kern, struct sock, sk, struct sock, sk, sk_type);
__sk_flags_offset, BPF_W, 0);
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
break; break;
case offsetof(struct bpf_sock_addr, protocol): case offsetof(struct bpf_sock_addr, protocol):
SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
struct bpf_sock_addr_kern, struct sock, sk, struct sock, sk, sk_protocol);
__sk_flags_offset, BPF_W, 0);
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
SK_FL_PROTO_SHIFT);
break; break;
case offsetof(struct bpf_sock_addr, msg_src_ip4): case offsetof(struct bpf_sock_addr, msg_src_ip4):
...@@ -8835,11 +8828,11 @@ sk_reuseport_is_valid_access(int off, int size, ...@@ -8835,11 +8828,11 @@ sk_reuseport_is_valid_access(int off, int size,
skb, \ skb, \
SKB_FIELD) SKB_FIELD)
#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \ #define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \ SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
struct sock, \ struct sock, \
sk, \ sk, \
SK_FIELD, BPF_SIZE, EXTRA_OFF) SK_FIELD)
static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si, const struct bpf_insn *si,
...@@ -8863,16 +8856,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, ...@@ -8863,16 +8856,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
break; break;
case offsetof(struct sk_reuseport_md, ip_protocol): case offsetof(struct sk_reuseport_md, ip_protocol):
BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
BPF_W, 0);
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
SK_FL_PROTO_SHIFT);
/* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
* aware. No further narrowing or masking is needed.
*/
*target_size = 1;
break; break;
case offsetof(struct sk_reuseport_md, data_end): case offsetof(struct sk_reuseport_md, data_end):
......
...@@ -68,6 +68,7 @@ ...@@ -68,6 +68,7 @@
#include <net/ip6_checksum.h> #include <net/ip6_checksum.h>
#include <net/xfrm.h> #include <net/xfrm.h>
#include <net/mpls.h> #include <net/mpls.h>
#include <net/mptcp.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <trace/events/skb.h> #include <trace/events/skb.h>
...@@ -4109,6 +4110,9 @@ static const u8 skb_ext_type_len[] = { ...@@ -4109,6 +4110,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
#endif #endif
#if IS_ENABLED(CONFIG_MPTCP)
[SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
#endif
}; };
static __always_inline unsigned int skb_ext_total_length(void) static __always_inline unsigned int skb_ext_total_length(void)
...@@ -4122,6 +4126,9 @@ static __always_inline unsigned int skb_ext_total_length(void) ...@@ -4122,6 +4126,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#endif #endif
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
skb_ext_type_len[TC_SKB_EXT] + skb_ext_type_len[TC_SKB_EXT] +
#endif
#if IS_ENABLED(CONFIG_MPTCP)
skb_ext_type_len[SKB_EXT_MPTCP] +
#endif #endif
0; 0;
} }
...@@ -5980,7 +5987,14 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) ...@@ -5980,7 +5987,14 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE); return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
} }
static struct skb_ext *skb_ext_alloc(void) /**
* __skb_ext_alloc - allocate a new skb extensions storage
*
* Returns the newly allocated pointer. The pointer can later attached to a
* skb via __skb_ext_set().
* Note: caller must handle the skb_ext as an opaque data.
*/
struct skb_ext *__skb_ext_alloc(void)
{ {
struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
...@@ -6020,6 +6034,30 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old, ...@@ -6020,6 +6034,30 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
return new; return new;
} }
/**
* __skb_ext_set - attach the specified extension storage to this skb
* @skb: buffer
* @id: extension id
* @ext: extension storage previously allocated via __skb_ext_alloc()
*
* Existing extensions, if any, are cleared.
*
* Returns the pointer to the extension.
*/
void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
struct skb_ext *ext)
{
unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
skb_ext_put(skb);
newlen = newoff + skb_ext_type_len[id];
ext->chunks = newlen;
ext->offset[id] = newoff;
skb->extensions = ext;
skb->active_extensions = 1 << id;
return skb_ext_get_ptr(ext, id);
}
/** /**
* skb_ext_add - allocate space for given extension, COW if needed * skb_ext_add - allocate space for given extension, COW if needed
* @skb: buffer * @skb: buffer
...@@ -6053,7 +6091,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) ...@@ -6053,7 +6091,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
} else { } else {
newoff = SKB_EXT_CHUNKSIZEOF(*new); newoff = SKB_EXT_CHUNKSIZEOF(*new);
new = skb_ext_alloc(); new = __skb_ext_alloc();
if (!new) if (!new)
return NULL; return NULL;
} }
......
...@@ -670,7 +670,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, ...@@ -670,7 +670,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
{ {
struct sock *sk; struct sock *sk;
if (protocol < 0 || protocol > SK_PROTOCOL_MAX) if (protocol < 0 || protocol > U8_MAX)
return -EINVAL; return -EINVAL;
if (!net_eq(net, &init_net)) if (!net_eq(net, &init_net))
......
...@@ -770,6 +770,18 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, ...@@ -770,6 +770,18 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
} }
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
const gfp_t priority)
{
struct inet_connection_sock *icsk = inet_csk(newsk);
if (!icsk->icsk_ulp_ops)
return;
if (icsk->icsk_ulp_ops->clone)
icsk->icsk_ulp_ops->clone(req, newsk, priority);
}
/** /**
* inet_csk_clone_lock - clone an inet socket, and lock its clone * inet_csk_clone_lock - clone an inet socket, and lock its clone
* @sk: the socket to clone * @sk: the socket to clone
...@@ -810,6 +822,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, ...@@ -810,6 +822,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
/* Deinitialize accept_queue to trap illegal accesses. */ /* Deinitialize accept_queue to trap illegal accesses. */
memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
inet_clone_ulp(req, newsk, priority);
security_inet_csk_clone(newsk, req); security_inet_csk_clone(newsk, req);
} }
return newsk; return newsk;
......
...@@ -690,7 +690,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, ...@@ -690,7 +690,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
refcount_read(&sk->sk_wmem_alloc) > skb->truesize; refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
} }
static void tcp_push(struct sock *sk, int flags, int mss_now, void tcp_push(struct sock *sk, int flags, int mss_now,
int nonagle, int size_goal) int nonagle, int size_goal)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -925,7 +925,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, ...@@ -925,7 +925,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
return max(size_goal, mss_now); return max(size_goal, mss_now);
} }
static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
{ {
int mss_now; int mss_now;
......
...@@ -1422,7 +1422,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, ...@@ -1422,7 +1422,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
goto fallback; goto fallback;
if (!tcp_skb_can_collapse_to(prev)) if (!tcp_skb_can_collapse(prev, skb))
goto fallback; goto fallback;
in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
...@@ -4423,6 +4423,9 @@ static bool tcp_try_coalesce(struct sock *sk, ...@@ -4423,6 +4423,9 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false; return false;
if (!mptcp_skb_can_collapse(to, from))
return false;
#ifdef CONFIG_TLS_DEVICE #ifdef CONFIG_TLS_DEVICE
if (from->decrypted != to->decrypted) if (from->decrypted != to->decrypted)
return false; return false;
...@@ -4932,7 +4935,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, ...@@ -4932,7 +4935,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
/* The first skb to collapse is: /* The first skb to collapse is:
* - not SYN/FIN and * - not SYN/FIN and
* - bloated or contains data before "start" or * - bloated or contains data before "start" or
* overlaps to the next one. * overlaps to the next one and mptcp allow collapsing.
*/ */
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
(tcp_win_from_space(sk, skb->truesize) > skb->len || (tcp_win_from_space(sk, skb->truesize) > skb->len ||
...@@ -4941,7 +4944,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, ...@@ -4941,7 +4944,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
break; break;
} }
if (n && n != tail && if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
end_of_skbs = false; end_of_skbs = false;
break; break;
...@@ -4974,6 +4977,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, ...@@ -4974,6 +4977,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
else else
__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
skb_set_owner_r(nskb, sk); skb_set_owner_r(nskb, sk);
mptcp_skb_ext_move(nskb, skb);
/* Copy data, releasing collapsed skbs. */ /* Copy data, releasing collapsed skbs. */
while (copy > 0) { while (copy > 0) {
...@@ -4993,6 +4997,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, ...@@ -4993,6 +4997,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
skb = tcp_collapse_one(sk, skb, list, root); skb = tcp_collapse_one(sk, skb, list, root);
if (!skb || if (!skb ||
skb == tail || skb == tail ||
!mptcp_skb_can_collapse(nskb, skb) ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
goto end; goto end;
#ifdef CONFIG_TLS_DEVICE #ifdef CONFIG_TLS_DEVICE
......
...@@ -1426,7 +1426,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { ...@@ -1426,7 +1426,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout, .syn_ack_timeout = tcp_syn_ack_timeout,
}; };
static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.mss_clamp = TCP_MSS_DEFAULT, .mss_clamp = TCP_MSS_DEFAULT,
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
.req_md5_lookup = tcp_v4_md5_lookup, .req_md5_lookup = tcp_v4_md5_lookup,
......
...@@ -754,11 +754,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb ...@@ -754,11 +754,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) { if (unlikely(eff_sacks)) {
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
if (unlikely(remaining < TCPOLEN_SACK_BASE_ALIGNED +
TCPOLEN_SACK_PERBLOCK))
return size;
opts->num_sack_blocks = opts->num_sack_blocks =
min_t(unsigned int, eff_sacks, min_t(unsigned int, eff_sacks,
(remaining - TCPOLEN_SACK_BASE_ALIGNED) / (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
TCPOLEN_SACK_PERBLOCK); TCPOLEN_SACK_PERBLOCK);
if (likely(opts->num_sack_blocks))
size += TCPOLEN_SACK_BASE_ALIGNED + size += TCPOLEN_SACK_BASE_ALIGNED +
opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
} }
...@@ -2865,7 +2869,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, ...@@ -2865,7 +2869,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (!tcp_can_collapse(sk, skb)) if (!tcp_can_collapse(sk, skb))
break; break;
if (!tcp_skb_can_collapse_to(to)) if (!tcp_skb_can_collapse(to, skb))
break; break;
space -= skb->len; space -= skb->len;
......
...@@ -75,7 +75,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, ...@@ -75,7 +75,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped; static const struct inet_connection_sock_af_ops ipv6_mapped;
static const struct inet_connection_sock_af_ops ipv6_specific; const struct inet_connection_sock_af_ops ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
...@@ -819,7 +819,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { ...@@ -819,7 +819,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout, .syn_ack_timeout = tcp_syn_ack_timeout,
}; };
static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr), sizeof(struct ipv6hdr),
#ifdef CONFIG_TCP_MD5SIG #ifdef CONFIG_TCP_MD5SIG
...@@ -1794,7 +1794,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { ...@@ -1794,7 +1794,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_destructor = tcp_twsk_destructor, .twsk_destructor = tcp_twsk_destructor,
}; };
static const struct inet_connection_sock_af_ops ipv6_specific = { const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit, .queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check, .send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header, .rebuild_header = inet6_sk_rebuild_header,
......
...@@ -76,6 +76,8 @@ enum { ...@@ -76,6 +76,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS #define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_RAW = 255, /* Raw IP packets */ IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW #define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX IPPROTO_MAX
}; };
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment