Commit f5dd3d0c authored by David Herrmann's avatar David Herrmann Committed by David S. Miller

net: introduce SO_BINDTOIFINDEX sockopt

This introduces a new generic SOL_SOCKET-level socket option called
SO_BINDTOIFINDEX. It behaves similar to SO_BINDTODEVICE, but takes a
network interface index as argument, rather than the network interface
name.

User-space often refers to network-interfaces via their index, but has
to temporarily resolve it to a name for a call into SO_BINDTODEVICE.
This might pose problems when the network-device is renamed
asynchronously by other parts of the system. When this happens, the
SO_BINDTODEVICE might either fail, or worse, it might bind to the wrong
device.

In most cases user-space only ever operates on devices which they
either manage themselves, or otherwise have a guarantee that the device
name will not change (e.g., devices that are UP cannot be renamed).
However, particularly in libraries this guarantee is non-obvious and it
would be nice if that race-condition would simply not exist. It would
make it easier for those libraries to operate even in situations where
the device-name might change under the hood.

A real use-case that we recently hit is trying to start the network
stack early in the initrd but make it survive into the real system.
Existing distributions rename network-interfaces during the transition
from initrd into the real system. This, obviously, cannot affect
devices that are up and running (unless you also consider moving them
between network-namespaces). However, the network manager now has to
make sure its management engine for dormant devices will not run in
parallel to these renames. Particularly, when you offload operations
like DHCP into separate processes, these might setup their sockets
early, and thus have to resolve the device-name possibly running into
this race-condition.

By avoiding a call to resolve the device-name, we no longer depend on
the name and can run network setup of dormant devices in parallel to
the transition off the initrd. The SO_BINDTOIFINDEX ioctl plugs this
race.
Reviewed-by: default avatarTom Gundersen <teg@jklm.no>
Signed-off-by: default avatarDavid Herrmann <dh.herrmann@gmail.com>
Acked-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 692d7b5d
...@@ -115,4 +115,6 @@ ...@@ -115,4 +115,6 @@
#define SO_TXTIME 61 #define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME #define SCM_TXTIME SO_TXTIME
#define SO_BINDTOIFINDEX 62
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -117,4 +117,6 @@ ...@@ -117,4 +117,6 @@
#define SO_TXTIME 61 #define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME #define SCM_TXTIME SO_TXTIME
#define SO_BINDTOIFINDEX 62
#endif /* _ASM_IA64_SOCKET_H */ #endif /* _ASM_IA64_SOCKET_H */
...@@ -126,4 +126,6 @@ ...@@ -126,4 +126,6 @@
#define SO_TXTIME 61 #define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME #define SCM_TXTIME SO_TXTIME
#define SO_BINDTOIFINDEX 62
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -107,4 +107,6 @@ ...@@ -107,4 +107,6 @@
#define SO_TXTIME 0x4036 #define SO_TXTIME 0x4036
#define SCM_TXTIME SO_TXTIME #define SCM_TXTIME SO_TXTIME
#define SO_BINDTOIFINDEX 0x4037
#endif /* _UAPI_ASM_SOCKET_H */ #endif /* _UAPI_ASM_SOCKET_H */
...@@ -114,4 +114,6 @@ ...@@ -114,4 +114,6 @@
#define SO_TXTIME 61 #define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME #define SCM_TXTIME SO_TXTIME
#define SO_BINDTOIFINDEX 62
#endif /* _ASM_SOCKET_H */ #endif /* _ASM_SOCKET_H */
...@@ -104,6 +104,8 @@ ...@@ -104,6 +104,8 @@
#define SO_TXTIME 0x003f #define SO_TXTIME 0x003f
#define SCM_TXTIME SO_TXTIME #define SCM_TXTIME SO_TXTIME
#define SO_BINDTOIFINDEX 0x0041
/* Security levels - as per NRL IPv6 - don't actually do anything */ /* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
......
...@@ -119,4 +119,6 @@ ...@@ -119,4 +119,6 @@
#define SO_TXTIME 61 #define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME #define SCM_TXTIME SO_TXTIME
#define SO_BINDTOIFINDEX 62
#endif /* _XTENSA_SOCKET_H */ #endif /* _XTENSA_SOCKET_H */
...@@ -110,4 +110,6 @@ ...@@ -110,4 +110,6 @@
#define SO_TXTIME 61 #define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME #define SCM_TXTIME SO_TXTIME
#define SO_BINDTOIFINDEX 62
#endif /* __ASM_GENERIC_SOCKET_H */ #endif /* __ASM_GENERIC_SOCKET_H */
...@@ -520,20 +520,43 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) ...@@ -520,20 +520,43 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
} }
EXPORT_SYMBOL(sk_dst_check); EXPORT_SYMBOL(sk_dst_check);
static int sock_setbindtodevice(struct sock *sk, char __user *optval, static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
int optlen)
{ {
int ret = -ENOPROTOOPT; int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES #ifdef CONFIG_NETDEVICES
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
char devname[IFNAMSIZ];
int index;
/* Sorry... */ /* Sorry... */
ret = -EPERM; ret = -EPERM;
if (!ns_capable(net->user_ns, CAP_NET_RAW)) if (!ns_capable(net->user_ns, CAP_NET_RAW))
goto out; goto out;
ret = -EINVAL;
if (ifindex < 0)
goto out;
sk->sk_bound_dev_if = ifindex;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
sk_dst_reset(sk);
ret = 0;
out:
#endif
return ret;
}
static int sock_setbindtodevice(struct sock *sk, char __user *optval,
int optlen)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
struct net *net = sock_net(sk);
char devname[IFNAMSIZ];
int index;
ret = -EINVAL; ret = -EINVAL;
if (optlen < 0) if (optlen < 0)
goto out; goto out;
...@@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval, ...@@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
} }
lock_sock(sk); lock_sock(sk);
sk->sk_bound_dev_if = index; ret = sock_setbindtodevice_locked(sk, index);
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
sk_dst_reset(sk);
release_sock(sk); release_sock(sk);
ret = 0;
out: out:
#endif #endif
...@@ -1055,6 +1073,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ...@@ -1055,6 +1073,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
} }
break; break;
case SO_BINDTOIFINDEX:
ret = sock_setbindtodevice_locked(sk, val);
break;
default: default:
ret = -ENOPROTOOPT; ret = -ENOPROTOOPT;
break; break;
...@@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, ...@@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
SOF_TXTIME_REPORT_ERRORS : 0; SOF_TXTIME_REPORT_ERRORS : 0;
break; break;
case SO_BINDTOIFINDEX:
v.val = sk->sk_bound_dev_if;
break;
default: default:
/* We implement the SO_SNDLOWAT etc to not be settable /* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7). * (1003.1g 7).
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment