Commit 859844e5 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-ipv6-Address-checks-need-to-consider-the-L3-domain'

David Ahern says:

====================
net/ipv6: Address checks need to consider the L3 domain

IPv6 prohibits a local address from being used as a gateway for a route.
However, it is ok for the gateway to be a local address in a different L3
domain (e.g., VRF). This allows, for example, veth pairs to connect VRFs.

ip6_route_info_create calls ipv6_chk_addr_and_flags for gateway addresses
to determine if the address is a local one, but ipv6_chk_addr_and_flags
does not currently consider L3 domains. As a result routes can not be
added in one VRF with a nexthop that points to a local address in a
second VRF.

Resolve by comparing the l3mdev for the passed in device and requiring an
l3mdev match with the device containing an address. The intent of checking
for an address on the specified device versus any device in the domain is
mantained by a new argument to skip the check between the passed in device
and the device with the address.

Patch 1 moves the gateway validation from ip6_route_info_create into a
helper; the function is long enough and refactoring drops the indent
level.

Patch 2 adds a skip_dev_check argument to ipv6_chk_addr_and_flags to
allow a device to always be passed yet skip the device check when
looking at addresses and fixes up a few ipv6_chk_addr callers that
pass a NULL device.

Patch 3 adds l3mdev checks to ipv6_chk_addr_and_flags.

Patches 4 and 5 do some refactoring to the fib_tests script and then
patch 6 adds nexthop validation tests.

v4
- separated l3mdev check into a separate patch (patch 3 of this set)
  as suggested by Kirill
- consolidated dev and ipv6_chk_addr_and_flags call into 1 if (Kirill)
- added a temp variable for gw type (Kirill)

v3
- set skip_dev_check in ipv6_chk_addr based on dev == NULL (per
  comment from Ido)

v2
- handle 2 variations of route spec with sane error path
- add test cases
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1ad2ff02 654d3a78
......@@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict);
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict,
u32 banned_flags);
const struct net_device *dev, bool skip_dev_check,
int strict, u32 banned_flags);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
......
......@@ -1851,22 +1851,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict)
{
return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
strict, IFA_F_TENTATIVE);
}
EXPORT_SYMBOL(ipv6_chk_addr);
/* device argument is used to find the L3 domain of interest. If
* skip_dev_check is set, then the ifp device is not checked against
* the passed in dev argument. So the 2 cases for addresses checks are:
* 1. does the address exist in the L3 domain that dev is part of
* (skip_dev_check = true), or
*
* 2. does the address exist on the specific device
* (skip_dev_check = false)
*/
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict,
u32 banned_flags)
const struct net_device *dev, bool skip_dev_check,
int strict, u32 banned_flags)
{
unsigned int hash = inet6_addr_hash(net, addr);
const struct net_device *l3mdev;
struct inet6_ifaddr *ifp;
u32 ifp_flags;
rcu_read_lock();
l3mdev = l3mdev_master_dev_rcu(dev);
if (skip_dev_check)
dev = NULL;
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
continue;
/* Decouple optimistic from tentative for evaluation here.
* Ban optimistic addresses explicitly, when required.
*/
......
......@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
if (ipv6_chk_addr(net, addr, NULL, 0))
if (ifindex)
dev = __dev_get_by_index(net, ifindex);
if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
return -EINVAL;
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
......@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
dev = __dev_get_by_flags(net, IFF_UP,
IFF_UP | IFF_LOOPBACK);
}
} else
dev = __dev_get_by_index(net, ifindex);
}
if (!dev) {
err = -ENODEV;
......
......@@ -801,8 +801,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
!ipv6_chk_addr(net, &src_info->ipi6_addr,
strict ? dev : NULL, 0) &&
!ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
dev, !strict, 0,
IFA_F_TENTATIVE) &&
!ipv6_chk_acast_addr_src(net, dev,
&src_info->ipi6_addr))
err = -EINVAL;
......
......@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
ldev = dev_get_by_index_rcu(net, p->link);
if ((ipv6_addr_is_multicast(laddr) ||
likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
0, IFA_F_TENTATIVE))) &&
((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
0, IFA_F_TENTATIVE))))
ret = 1;
}
return ret;
......@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
true, 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
else
......
......@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
int probes = atomic_read(&neigh->probes);
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
dev, 1,
dev, false, 1,
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
......
......@@ -2550,7 +2550,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
static int ip6_route_check_nh_onlink(struct net *net,
struct fib6_config *cfg,
struct net_device *dev,
const struct net_device *dev,
struct netlink_ext_ack *extack)
{
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
......@@ -2626,6 +2626,79 @@ static int ip6_route_check_nh(struct net *net,
return err;
}
static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
struct net_device **_dev, struct inet6_dev **idev,
struct netlink_ext_ack *extack)
{
const struct in6_addr *gw_addr = &cfg->fc_gateway;
int gwa_type = ipv6_addr_type(gw_addr);
bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
const struct net_device *dev = *_dev;
bool need_addr_check = !dev;
int err = -EINVAL;
/* if gw_addr is local we will fail to detect this in case
* address is still TENTATIVE (DAD in progress). rt6_lookup()
* will return already-added prefix route via interface that
* prefix route was assigned to, which might be non-loopback.
*/
if (dev &&
ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
goto out;
}
if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
/* IPv6 strictly inhibits using not link-local
* addresses as nexthop address.
* Otherwise, router will not able to send redirects.
* It is very good, but in some (rare!) circumstances
* (SIT, PtP, NBMA NOARP links) it is handy to allow
* some exceptions. --ANK
* We allow IPv4-mapped nexthops to support RFC4798-type
* addressing
*/
if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
NL_SET_ERR_MSG(extack, "Invalid gateway address");
goto out;
}
if (cfg->fc_flags & RTNH_F_ONLINK)
err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
else
err = ip6_route_check_nh(net, cfg, _dev, idev);
if (err)
goto out;
}
/* reload in case device was changed */
dev = *_dev;
err = -EINVAL;
if (!dev) {
NL_SET_ERR_MSG(extack, "Egress device not specified");
goto out;
} else if (dev->flags & IFF_LOOPBACK) {
NL_SET_ERR_MSG(extack,
"Egress device can not be loopback device for this route");
goto out;
}
/* if we did not check gw_addr above, do so now that the
* egress device has been resolved.
*/
if (need_addr_check &&
ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
goto out;
}
err = 0;
out:
return err;
}
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
......@@ -2808,61 +2881,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
}
if (cfg->fc_flags & RTF_GATEWAY) {
const struct in6_addr *gw_addr;
int gwa_type;
gw_addr = &cfg->fc_gateway;
gwa_type = ipv6_addr_type(gw_addr);
/* if gw_addr is local we will fail to detect this in case
* address is still TENTATIVE (DAD in progress). rt6_lookup()
* will return already-added prefix route via interface that
* prefix route was assigned to, which might be non-loopback.
*/
err = -EINVAL;
if (ipv6_chk_addr_and_flags(net, gw_addr,
gwa_type & IPV6_ADDR_LINKLOCAL ?
dev : NULL, 0, 0)) {
NL_SET_ERR_MSG(extack, "Invalid gateway address");
goto out;
}
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
Otherwise, router will not able to send redirects.
It is very good, but in some (rare!) circumstances
(SIT, PtP, NBMA NOARP links) it is handy to allow
some exceptions. --ANK
We allow IPv4-mapped nexthops to support RFC4798-type
addressing
*/
if (!(gwa_type & (IPV6_ADDR_UNICAST |
IPV6_ADDR_MAPPED))) {
NL_SET_ERR_MSG(extack,
"Invalid gateway address");
goto out;
}
if (cfg->fc_flags & RTNH_F_ONLINK) {
err = ip6_route_check_nh_onlink(net, cfg, dev,
extack);
} else {
err = ip6_route_check_nh(net, cfg, &dev, &idev);
}
err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
if (err)
goto out;
}
err = -EINVAL;
if (!dev) {
NL_SET_ERR_MSG(extack, "Egress device not specified");
goto out;
} else if (dev->flags & IFF_LOOPBACK) {
NL_SET_ERR_MSG(extack,
"Egress device can not be loopback device for this route");
goto out;
}
rt->rt6i_gateway = cfg->fc_gateway;
}
err = -ENODEV;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment