Commit 43c9fc50 authored by Martin Wilck's avatar Martin Wilck Committed by Doug Ledford

rdma_rxe: make rxe work over 802.1q VLAN devices

This patch fixes RDMA/rxe over 802.1q VLAN devices.

Without it, I observed the following behavior:

a) adding a VLAN device to RXE via rxe_net_add() creates a non-functional
   RDMA device. This is caused by the logic in enum_all_gids_of_dev_cb() /
   is_eth_port_of_netdev(), which only considers networks connected to
   "upper devices" of the configured network device, resulting in an empty
   set of gids for a VLAN interface that is an "upper device" itself.
   Later attempts to connect via this rdma device fail in cma_acuire_dev()
   because no gids can be resolved.

b) adding the master device of the VLAN device instead seems to work
   initially, target addresses via VLAN devices are resolved successfully.
   But the connection times out because no 802.1q VLAN headers are
   inserted in the ethernet packets, which are therefore never received.
   This happens because the RXE layer sends the packets via the master
   device rather than the VLAN device.

The problem could be solved by changing either a) or b). My thinking was
that the logic in a) was created deliberately, thus I decided to work on
b). It turns out that the information about the VLAN interface for the gid
at hand is available in the AV information. My patch converts the RXE code
to use this netdev instead of rxe->ndev. With this change, RXE over vlan
works on my test system.
Signed-off-by: default avatarMartin Wilck <mwilck@suse.com>
Reviewed-by: default avatarMoni Shoua <monis@mellanox.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent baa00fcd
...@@ -182,11 +182,39 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, ...@@ -182,11 +182,39 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
#endif #endif
/*
* Derive the net_device from the av.
* For physical devices, this will just return rxe->ndev.
* But for VLAN devices, it will return the vlan dev.
* Caller should dev_put() the returned net_device.
*/
static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe,
int port_num,
struct rxe_av *av)
{
union ib_gid gid;
struct ib_gid_attr attr;
struct net_device *ndev = rxe->ndev;
if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index,
&gid, &attr) == 0 &&
attr.ndev && attr.ndev != ndev)
ndev = attr.ndev;
else
/* Only to ensure that caller may call dev_put() */
dev_hold(ndev);
return ndev;
}
static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
struct rxe_qp *qp, struct rxe_qp *qp,
struct rxe_av *av) struct rxe_av *av)
{ {
struct dst_entry *dst = NULL; struct dst_entry *dst = NULL;
struct net_device *ndev;
ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av);
if (qp_type(qp) == IB_QPT_RC) if (qp_type(qp) == IB_QPT_RC)
dst = sk_dst_get(qp->sk->sk); dst = sk_dst_get(qp->sk->sk);
...@@ -201,14 +229,14 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, ...@@ -201,14 +229,14 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
saddr = &av->sgid_addr._sockaddr_in.sin_addr; saddr = &av->sgid_addr._sockaddr_in.sin_addr;
daddr = &av->dgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr;
dst = rxe_find_route4(rxe->ndev, saddr, daddr); dst = rxe_find_route4(ndev, saddr, daddr);
} else if (av->network_type == RDMA_NETWORK_IPV6) { } else if (av->network_type == RDMA_NETWORK_IPV6) {
struct in6_addr *saddr6; struct in6_addr *saddr6;
struct in6_addr *daddr6; struct in6_addr *daddr6;
saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
dst = rxe_find_route6(rxe->ndev, saddr6, daddr6); dst = rxe_find_route6(ndev, saddr6, daddr6);
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
if (dst) if (dst)
qp->dst_cookie = qp->dst_cookie =
...@@ -217,6 +245,7 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, ...@@ -217,6 +245,7 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
} }
} }
dev_put(ndev);
return dst; return dst;
} }
...@@ -224,9 +253,14 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ...@@ -224,9 +253,14 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{ {
struct udphdr *udph; struct udphdr *udph;
struct net_device *ndev = skb->dev; struct net_device *ndev = skb->dev;
struct net_device *rdev = ndev;
struct rxe_dev *rxe = net_to_rxe(ndev); struct rxe_dev *rxe = net_to_rxe(ndev);
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
if (!rxe && is_vlan_dev(rdev)) {
rdev = vlan_dev_real_dev(ndev);
rxe = net_to_rxe(rdev);
}
if (!rxe) if (!rxe)
goto drop; goto drop;
...@@ -498,6 +532,10 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, ...@@ -498,6 +532,10 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
{ {
unsigned int hdr_len; unsigned int hdr_len;
struct sk_buff *skb; struct sk_buff *skb;
struct net_device *ndev;
const int port_num = 1;
ndev = rxe_netdev_from_av(rxe, port_num, av);
if (av->network_type == RDMA_NETWORK_IPV4) if (av->network_type == RDMA_NETWORK_IPV4)
hdr_len = ETH_HLEN + sizeof(struct udphdr) + hdr_len = ETH_HLEN + sizeof(struct udphdr) +
...@@ -506,26 +544,30 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, ...@@ -506,26 +544,30 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
hdr_len = ETH_HLEN + sizeof(struct udphdr) + hdr_len = ETH_HLEN + sizeof(struct udphdr) +
sizeof(struct ipv6hdr); sizeof(struct ipv6hdr);
skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(rxe->ndev), skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
GFP_ATOMIC); GFP_ATOMIC);
if (unlikely(!skb))
if (unlikely(!skb)) {
dev_put(ndev);
return NULL; return NULL;
}
skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));
skb->dev = rxe->ndev; skb->dev = ndev;
if (av->network_type == RDMA_NETWORK_IPV4) if (av->network_type == RDMA_NETWORK_IPV4)
skb->protocol = htons(ETH_P_IP); skb->protocol = htons(ETH_P_IP);
else else
skb->protocol = htons(ETH_P_IPV6); skb->protocol = htons(ETH_P_IPV6);
pkt->rxe = rxe; pkt->rxe = rxe;
pkt->port_num = 1; pkt->port_num = port_num;
pkt->hdr = skb_put(skb, paylen); pkt->hdr = skb_put(skb, paylen);
pkt->mask |= RXE_GRH_MASK; pkt->mask |= RXE_GRH_MASK;
memset(pkt->hdr, 0, paylen); memset(pkt->hdr, 0, paylen);
dev_put(ndev);
return skb; return skb;
} }
......
...@@ -347,7 +347,7 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) ...@@ -347,7 +347,7 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
IB_GID_TYPE_ROCE_UDP_ENCAP, IB_GID_TYPE_ROCE_UDP_ENCAP,
1, rxe->ndev, NULL); 1, skb->dev, NULL);
} }
/* rxe_rcv is called from the interface driver */ /* rxe_rcv is called from the interface driver */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment