Commit e3826f1e authored by Amerigo Wang's avatar Amerigo Wang Committed by David S. Miller

net: reserve ports for applications using fixed port numbers

(Dropped the infiniband part, because Tetsuo modified the related code,
I will send a separate patch for it once this is accepted.)

This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
allows users to reserve ports for third-party applications.

The reserved ports will not be used by automatic port assignments
(e.g. when calling connect() or bind() with port number 0). Explicit
port allocation behavior is unchanged.
Signed-off-by: default avatarOctavian Purdila <opurdila@ixiacom.com>
Signed-off-by: default avatarWANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9f977fb7
...@@ -588,6 +588,37 @@ ip_local_port_range - 2 INTEGERS ...@@ -588,6 +588,37 @@ ip_local_port_range - 2 INTEGERS
(i.e. by default) range 1024-4999 is enough to issue up to (i.e. by default) range 1024-4999 is enough to issue up to
2000 connections per second to systems supporting timestamps. 2000 connections per second to systems supporting timestamps.
ip_local_reserved_ports - list of comma separated ranges
Specify the ports which are reserved for known third-party
applications. These ports will not be used by automatic port
assignments (e.g. when calling connect() or bind() with port
number 0). Explicit port allocation behavior is unchanged.
The format used for both input and output is a comma separated
list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
10). Writing to the file will clear all previously reserved
ports and update the current list with the one given in the
input.
Note that ip_local_port_range and ip_local_reserved_ports
settings are independent and both are considered by the kernel
when determining which ports are available for automatic port
assignments.
You can reserve ports which are not in the current
ip_local_port_range, e.g.:
$ cat /proc/sys/net/ipv4/ip_local_port_range
32000 61000
$ cat /proc/sys/net/ipv4/ip_local_reserved_ports
8080,9148
although this is redundant. However such a setting is useful
if later the port range is changed to a value that will
include the reserved ports.
Default: Empty
ip_nonlocal_bind - BOOLEAN ip_nonlocal_bind - BOOLEAN
If set, allows processes to bind() to non-local IP addresses, If set, allows processes to bind() to non-local IP addresses,
which can be quite useful - but may break some applications. which can be quite useful - but may break some applications.
......
...@@ -184,6 +184,12 @@ extern struct local_ports { ...@@ -184,6 +184,12 @@ extern struct local_ports {
} sysctl_local_ports; } sysctl_local_ports;
extern void inet_get_local_port_range(int *low, int *high); extern void inet_get_local_port_range(int *low, int *high);
extern unsigned long *sysctl_local_reserved_ports;
static inline int inet_is_reserved_local_port(int port)
{
return test_bit(port, sysctl_local_reserved_ports);
}
extern int sysctl_ip_default_ttl; extern int sysctl_ip_default_ttl;
extern int sysctl_ip_nonlocal_bind; extern int sysctl_ip_nonlocal_bind;
......
...@@ -1573,9 +1573,13 @@ static int __init inet_init(void) ...@@ -1573,9 +1573,13 @@ static int __init inet_init(void)
BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
if (!sysctl_local_reserved_ports)
goto out;
rc = proto_register(&tcp_prot, 1); rc = proto_register(&tcp_prot, 1);
if (rc) if (rc)
goto out; goto out_free_reserved_ports;
rc = proto_register(&udp_prot, 1); rc = proto_register(&udp_prot, 1);
if (rc) if (rc)
...@@ -1674,6 +1678,8 @@ static int __init inet_init(void) ...@@ -1674,6 +1678,8 @@ static int __init inet_init(void)
proto_unregister(&udp_prot); proto_unregister(&udp_prot);
out_unregister_tcp_proto: out_unregister_tcp_proto:
proto_unregister(&tcp_prot); proto_unregister(&tcp_prot);
out_free_reserved_ports:
kfree(sysctl_local_reserved_ports);
goto out; goto out;
} }
......
...@@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = { ...@@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = {
.range = { 32768, 61000 }, .range = { 32768, 61000 },
}; };
unsigned long *sysctl_local_reserved_ports;
EXPORT_SYMBOL(sysctl_local_reserved_ports);
void inet_get_local_port_range(int *low, int *high) void inet_get_local_port_range(int *low, int *high)
{ {
unsigned seq; unsigned seq;
...@@ -108,6 +111,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -108,6 +111,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
smallest_size = -1; smallest_size = -1;
do { do {
if (inet_is_reserved_local_port(rover))
goto next_nolock;
head = &hashinfo->bhash[inet_bhashfn(net, rover, head = &hashinfo->bhash[inet_bhashfn(net, rover,
hashinfo->bhash_size)]; hashinfo->bhash_size)];
spin_lock(&head->lock); spin_lock(&head->lock);
...@@ -130,6 +135,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -130,6 +135,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
break; break;
next: next:
spin_unlock(&head->lock); spin_unlock(&head->lock);
next_nolock:
if (++rover > high) if (++rover > high)
rover = low; rover = low;
} while (--remaining > 0); } while (--remaining > 0);
......
...@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, ...@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable(); local_bh_disable();
for (i = 1; i <= remaining; i++) { for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining; port = low + (i + offset) % remaining;
if (inet_is_reserved_local_port(port))
continue;
head = &hinfo->bhash[inet_bhashfn(net, port, head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)]; hinfo->bhash_size)];
spin_lock(&head->lock); spin_lock(&head->lock);
......
...@@ -299,6 +299,13 @@ static struct ctl_table ipv4_table[] = { ...@@ -299,6 +299,13 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = ipv4_local_port_range, .proc_handler = ipv4_local_port_range,
}, },
{
.procname = "ip_local_reserved_ports",
.data = NULL, /* initialized in sysctl_ipv4_init */
.maxlen = 65536,
.mode = 0644,
.proc_handler = proc_do_large_bitmap,
},
#ifdef CONFIG_IP_MULTICAST #ifdef CONFIG_IP_MULTICAST
{ {
.procname = "igmp_max_memberships", .procname = "igmp_max_memberships",
...@@ -736,6 +743,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = { ...@@ -736,6 +743,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
static __init int sysctl_ipv4_init(void) static __init int sysctl_ipv4_init(void)
{ {
struct ctl_table_header *hdr; struct ctl_table_header *hdr;
struct ctl_table *i;
for (i = ipv4_table; i->procname; i++) {
if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
i->data = sysctl_local_reserved_ports;
break;
}
}
if (!i->procname)
return -EINVAL;
hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
if (hdr == NULL) if (hdr == NULL)
......
...@@ -233,7 +233,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, ...@@ -233,7 +233,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
*/ */
do { do {
if (low <= snum && snum <= high && if (low <= snum && snum <= high &&
!test_bit(snum >> udptable->log, bitmap)) !test_bit(snum >> udptable->log, bitmap) &&
!inet_is_reserved_local_port(snum))
goto found; goto found;
snum += rand; snum += rand;
} while (snum != first); } while (snum != first);
......
...@@ -5433,6 +5433,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) ...@@ -5433,6 +5433,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
rover++; rover++;
if ((rover < low) || (rover > high)) if ((rover < low) || (rover > high))
rover = low; rover = low;
if (inet_is_reserved_local_port(rover))
continue;
index = sctp_phashfn(rover); index = sctp_phashfn(rover);
head = &sctp_port_hashtable[index]; head = &sctp_port_hashtable[index];
sctp_spin_lock(&head->lock); sctp_spin_lock(&head->lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment