Commit c742c59e authored by Sunil Muthuswamy's avatar Sunil Muthuswamy Committed by David S. Miller

hv_sock: Remove the accept port restriction

Currently, hv_sock restricts the port the guest socket can accept
connections on. hv_sock divides the socket port namespace into two parts
for server side (listening socket), 0-0x7FFFFFFF & 0x80000000-0xFFFFFFFF
(there are no restrictions on client port namespace). The first part
(0-0x7FFFFFFF) is reserved for sockets where connections can be accepted.
The second part (0x80000000-0xFFFFFFFF) is reserved for allocating ports
for the peer (host) socket, once a connection is accepted.
This reservation of the port namespace is specific to hv_sock and not
known by the generic vsock library (ex: af_vsock). This is problematic
because auto-binds/ephemeral ports are handled by the generic vsock
library and it has no knowledge of this port reservation and could
allocate a port that is not compatible with hv_sock (and legitimately so).
The issue hasn't surfaced so far because the auto-bind code of vsock
(__vsock_bind_stream) prior to the change 'VSOCK: bind to random port for
VMADDR_PORT_ANY' would start walking up from LAST_RESERVED_PORT (1023) and
start assigning ports. That will take a large number of iterations to hit
0x7FFFFFFF. But, after the above change to randomize port selection, the
issue has started coming up more frequently.
There has really been no good reason to have this port reservation logic
in hv_sock from the get go. Reserving a local port for peer ports is not
how things are handled generally. Peer ports should reflect the peer port.
This fixes the issue by lifting the port reservation, and also returns the
right peer port. Since the code converts the GUID to the peer port (by
using the first 4 bytes), there is a possibility of conflicts, but that
seems like a reasonable risk to take, given this is limited to vsock and
that only applies to all local sockets.
Signed-off-by: default avatarSunil Muthuswamy <sunilmut@microsoft.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f8d7408a
......@@ -138,28 +138,15 @@ struct hvsock {
****************************************************************************
* The only valid Service GUIDs, from the perspectives of both the host and *
* Linux VM, that can be connected by the other end, must conform to this *
* format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in *
* this range [0, 0x7FFFFFFF]. *
* format: <port>-facb-11e6-bd58-64006a7986d3. *
****************************************************************************
*
* When we write apps on the host to connect(), the GUID ServiceID is used.
* When we write apps in Linux VM to connect(), we only need to specify the
* port and the driver will form the GUID and use that to request the host.
*
* From the perspective of Linux VM:
* 1. the local ephemeral port (i.e. the local auto-bound port when we call
* connect() without explicit bind()) is generated by __vsock_bind_stream(),
* and the range is [1024, 0xFFFFFFFF).
* 2. the remote ephemeral port (i.e. the auto-generated remote port for
* a connect request initiated by the host's connect()) is generated by
* hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
*/
#define MAX_LISTEN_PORT ((u32)0x7FFFFFFF)
#define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT
#define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT
#define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1)
/* 00000000-facb-11e6-bd58-64006a7986d3 */
static const guid_t srv_id_template =
GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
......@@ -184,34 +171,6 @@ static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id)
vsock_addr_init(addr, VMADDR_CID_ANY, port);
}
static void hvs_remote_addr_init(struct sockaddr_vm *remote,
struct sockaddr_vm *local)
{
static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
struct sock *sk;
/* Remote peer is always the host */
vsock_addr_init(remote, VMADDR_CID_HOST, VMADDR_PORT_ANY);
while (1) {
/* Wrap around ? */
if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
host_ephemeral_port == VMADDR_PORT_ANY)
host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
remote->svm_port = host_ephemeral_port++;
sk = vsock_find_connected_socket(remote, local);
if (!sk) {
/* Found an available ephemeral port */
return;
}
/* Release refcnt got in vsock_find_connected_socket */
sock_put(sk);
}
}
static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
{
set_channel_pending_send_size(chan,
......@@ -341,12 +300,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
if_type = &chan->offermsg.offer.if_type;
if_instance = &chan->offermsg.offer.if_instance;
conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
/* The host or the VM should only listen on a port in
* [0, MAX_LISTEN_PORT]
*/
if (!is_valid_srv_id(if_type) ||
get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
if (!is_valid_srv_id(if_type))
return;
hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
......@@ -371,8 +325,11 @@ static void hvs_open_connection(struct vmbus_channel *chan)
vnew = vsock_sk(new);
hvs_addr_init(&vnew->local_addr, if_type);
hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
/* Remote peer is always the host */
vsock_addr_init(&vnew->remote_addr,
VMADDR_CID_HOST, VMADDR_PORT_ANY);
vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance);
ret = vsock_assign_transport(vnew, vsock_sk(sk));
/* Transport assigned (looking at remote_addr) must be the
* same where we received the request.
......@@ -766,16 +723,6 @@ static bool hvs_stream_is_active(struct vsock_sock *vsk)
static bool hvs_stream_allow(u32 cid, u32 port)
{
/* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
* reserved as ephemeral ports, which are used as the host's ports
* when the host initiates connections.
*
* Perform this check in the guest so an immediate error is produced
* instead of a timeout.
*/
if (port > MAX_HOST_LISTEN_PORT)
return false;
if (cid == VMADDR_CID_HOST)
return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment