addrlabel.c 14.1 KB
Newer Older
1 2 3 4 5 6 7 8
/*
 * IPv6 Address Label subsystem
 * for the IPv6 "Default" Source Address Selection
 *
 * Copyright (C)2007 USAGI/WIDE Project
 */
/*
 * Author:
9
 *	YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
10 11 12 13 14 15
 */

#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/in6.h>
16
#include <linux/slab.h>
17 18 19 20
#include <net/addrconf.h>
#include <linux/if_addrlabel.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
21
#include <linux/refcount.h>
22 23 24 25

#if 0
#define ADDRLABEL(x...) printk(x)
#else
26
#define ADDRLABEL(x...) do { ; } while (0)
27 28 29 30 31
#endif

/*
 * Policy Table
 */
32
struct ip6addrlbl_entry {
33
	possible_net_t lbl_net;
34 35 36 37 38 39
	struct in6_addr prefix;
	int prefixlen;
	int ifindex;
	int addrtype;
	u32 label;
	struct hlist_node list;
40
	refcount_t refcnt;
41 42 43 44 45 46 47 48 49 50
	struct rcu_head rcu;
};

static struct ip6addrlbl_table
{
	struct hlist_head head;
	spinlock_t lock;
	u32 seq;
} ip6addrlbl_table;

51 52 53
static inline
struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
{
Eric Dumazet's avatar
Eric Dumazet committed
54
	return read_pnet(&lbl->lbl_net);
55 56
}

57
/*
58
 * Default policy table (RFC6724 + extensions)
59 60 61 62 63 64 65 66 67 68
 *
 * prefix		addr_type	label
 * -------------------------------------------------------------------------
 * ::1/128		LOOPBACK	0
 * ::/0			N/A		1
 * 2002::/16		N/A		2
 * ::/96		COMPATv4	3
 * ::ffff:0:0/96	V4MAPPED	4
 * fc00::/7		N/A		5		ULA (RFC 4193)
 * 2001::/32		N/A		6		Teredo (RFC 4380)
69
 * 2001:10::/28		N/A		7		ORCHID (RFC 4843)
70 71 72
 * fec0::/10		N/A		11		Site-local
 *							(deprecated by RFC3879)
 * 3ffe::/16		N/A		12		6bone
73 74
 *
 * Note: 0xffffffff is used if we do not have any policies.
75
 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
76 77 78 79
 */

#define IPV6_ADDR_LABEL_DEFAULT	0xffffffffUL

80
static const __net_initconst struct ip6addrlbl_init_table
81 82 83 84 85 86 87 88
{
	const struct in6_addr *prefix;
	int prefixlen;
	u32 label;
} ip6addrlbl_init_table[] = {
	{	/* ::/0 */
		.prefix = &in6addr_any,
		.label = 1,
89 90
	}, {	/* fc00::/7 */
		.prefix = &(struct in6_addr){ { { 0xfc } } } ,
91 92
		.prefixlen = 7,
		.label = 5,
93 94
	}, {	/* fec0::/10 */
		.prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },
95 96
		.prefixlen = 10,
		.label = 11,
97 98
	}, {	/* 2002::/16 */
		.prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },
99 100
		.prefixlen = 16,
		.label = 2,
101 102
	}, {	/* 3ffe::/16 */
		.prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },
103 104
		.prefixlen = 16,
		.label = 12,
105 106
	}, {	/* 2001::/32 */
		.prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },
107 108
		.prefixlen = 32,
		.label = 6,
109 110
	}, {	/* 2001:10::/28 */
		.prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },
111 112
		.prefixlen = 28,
		.label = 7,
113 114
	}, {	/* ::ffff:0:0 */
		.prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },
115 116
		.prefixlen = 96,
		.label = 4,
117
	}, {	/* ::/96 */
118 119 120
		.prefix = &in6addr_any,
		.prefixlen = 96,
		.label = 3,
121
	}, {	/* ::1/128 */
122 123 124 125 126 127 128 129 130 131 132 133
		.prefix = &in6addr_loopback,
		.prefixlen = 128,
		.label = 0,
	}
};

/* Object management */
static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
{
	kfree(p);
}

134 135 136 137 138
static void ip6addrlbl_free_rcu(struct rcu_head *h)
{
	ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
}

139
static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
140
{
141
	return refcount_inc_not_zero(&p->refcnt);
142 143 144 145
}

static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
{
146
	if (refcount_dec_and_test(&p->refcnt))
147
		call_rcu(&p->rcu, ip6addrlbl_free_rcu);
148 149 150
}

/* Find label */
151 152 153 154
static bool __ip6addrlbl_match(struct net *net,
			       const struct ip6addrlbl_entry *p,
			       const struct in6_addr *addr,
			       int addrtype, int ifindex)
155
{
156
	if (!net_eq(ip6addrlbl_net(p), net))
157
		return false;
158
	if (p->ifindex && p->ifindex != ifindex)
159
		return false;
160
	if (p->addrtype && p->addrtype != addrtype)
161
		return false;
162
	if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
163 164
		return false;
	return true;
165 166
}

167 168
static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
						  const struct in6_addr *addr,
169 170 171
						  int type, int ifindex)
{
	struct ip6addrlbl_entry *p;
172
	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
173
		if (__ip6addrlbl_match(net, p, addr, type, ifindex))
174 175 176 177 178
			return p;
	}
	return NULL;
}

179 180
u32 ipv6_addr_label(struct net *net,
		    const struct in6_addr *addr, int type, int ifindex)
181 182 183 184 185 186 187
{
	u32 label;
	struct ip6addrlbl_entry *p;

	type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;

	rcu_read_lock();
188
	p = __ipv6_addr_label(net, addr, type, ifindex);
189 190 191
	label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
	rcu_read_unlock();

Harvey Harrison's avatar
Harvey Harrison committed
192
	ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n",
193
		  __func__, addr, type, ifindex, label);
194 195 196 197 198

	return label;
}

/* allocate one entry */
199 200
static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
						 const struct in6_addr *prefix,
201 202
						 int prefixlen, int ifindex,
						 u32 label)
203 204 205 206
{
	struct ip6addrlbl_entry *newp;
	int addrtype;

Harvey Harrison's avatar
Harvey Harrison committed
207
	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n",
208
		  __func__, prefix, prefixlen, ifindex, (unsigned int)label);
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238

	addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);

	switch (addrtype) {
	case IPV6_ADDR_MAPPED:
		if (prefixlen > 96)
			return ERR_PTR(-EINVAL);
		if (prefixlen < 96)
			addrtype = 0;
		break;
	case IPV6_ADDR_COMPATv4:
		if (prefixlen != 96)
			addrtype = 0;
		break;
	case IPV6_ADDR_LOOPBACK:
		if (prefixlen != 128)
			addrtype = 0;
		break;
	}

	newp = kmalloc(sizeof(*newp), GFP_KERNEL);
	if (!newp)
		return ERR_PTR(-ENOMEM);

	ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
	newp->prefixlen = prefixlen;
	newp->ifindex = ifindex;
	newp->addrtype = addrtype;
	newp->label = label;
	INIT_HLIST_NODE(&newp->list);
239
	write_pnet(&newp->lbl_net, net);
240
	refcount_set(&newp->refcnt, 1);
241 242 243 244
	return newp;
}

/* add a label */
245
static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
246
{
247 248
	struct hlist_node *n;
	struct ip6addrlbl_entry *last = NULL, *p = NULL;
249 250
	int ret = 0;

251 252
	ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
		  replace);
253

254 255 256 257 258 259 260
	hlist_for_each_entry_safe(p, n,	&ip6addrlbl_table.head, list) {
		if (p->prefixlen == newp->prefixlen &&
		    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
		    p->ifindex == newp->ifindex &&
		    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
			if (!replace) {
				ret = -EEXIST;
261 262
				goto out;
			}
263 264 265 266 267 268 269
			hlist_replace_rcu(&p->list, &newp->list);
			ip6addrlbl_put(p);
			goto out;
		} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
			   (p->prefixlen < newp->prefixlen)) {
			hlist_add_before_rcu(&newp->list, &p->list);
			goto out;
270
		}
271
		last = p;
272
	}
273
	if (last)
274
		hlist_add_behind_rcu(&newp->list, &last->list);
275 276
	else
		hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
277 278 279 280 281 282 283
out:
	if (!ret)
		ip6addrlbl_table.seq++;
	return ret;
}

/* add a label */
284 285
static int ip6addrlbl_add(struct net *net,
			  const struct in6_addr *prefix, int prefixlen,
286
			  int ifindex, u32 label, int replace)
287 288 289 290
{
	struct ip6addrlbl_entry *newp;
	int ret = 0;

Harvey Harrison's avatar
Harvey Harrison committed
291
	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
292 293
		  __func__, prefix, prefixlen, ifindex, (unsigned int)label,
		  replace);
294

295
	newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
296 297 298 299 300 301 302 303 304 305 306
	if (IS_ERR(newp))
		return PTR_ERR(newp);
	spin_lock(&ip6addrlbl_table.lock);
	ret = __ip6addrlbl_add(newp, replace);
	spin_unlock(&ip6addrlbl_table.lock);
	if (ret)
		ip6addrlbl_free(newp);
	return ret;
}

/* remove a label */
307 308
static int __ip6addrlbl_del(struct net *net,
			    const struct in6_addr *prefix, int prefixlen,
309
			    int ifindex)
310 311
{
	struct ip6addrlbl_entry *p = NULL;
312
	struct hlist_node *n;
313 314
	int ret = -ESRCH;

Harvey Harrison's avatar
Harvey Harrison committed
315
	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
316
		  __func__, prefix, prefixlen, ifindex);
317

318
	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
319
		if (p->prefixlen == prefixlen &&
320
		    net_eq(ip6addrlbl_net(p), net) &&
321 322 323 324 325 326 327 328 329 330 331
		    p->ifindex == ifindex &&
		    ipv6_addr_equal(&p->prefix, prefix)) {
			hlist_del_rcu(&p->list);
			ip6addrlbl_put(p);
			ret = 0;
			break;
		}
	}
	return ret;
}

332 333
static int ip6addrlbl_del(struct net *net,
			  const struct in6_addr *prefix, int prefixlen,
334
			  int ifindex)
335 336 337 338
{
	struct in6_addr prefix_buf;
	int ret;

Harvey Harrison's avatar
Harvey Harrison committed
339
	ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
340
		  __func__, prefix, prefixlen, ifindex);
341 342 343

	ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
	spin_lock(&ip6addrlbl_table.lock);
344
	ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
345 346 347 348 349
	spin_unlock(&ip6addrlbl_table.lock);
	return ret;
}

/* add default label */
350
static int __net_init ip6addrlbl_net_init(struct net *net)
351 352 353 354
{
	int err = 0;
	int i;

355
	ADDRLABEL(KERN_DEBUG "%s\n", __func__);
356 357

	for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
358 359
		int ret = ip6addrlbl_add(net,
					 ip6addrlbl_init_table[i].prefix,
360 361 362 363 364 365 366 367 368 369
					 ip6addrlbl_init_table[i].prefixlen,
					 0,
					 ip6addrlbl_init_table[i].label, 0);
		/* XXX: should we free all rules when we catch an error? */
		if (ret && (!err || err != -ENOMEM))
			err = ret;
	}
	return err;
}

370 371 372
static void __net_exit ip6addrlbl_net_exit(struct net *net)
{
	struct ip6addrlbl_entry *p = NULL;
373
	struct hlist_node *n;
374 375 376

	/* Remove all labels belonging to the exiting net */
	spin_lock(&ip6addrlbl_table.lock);
377
	hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
378 379 380 381 382 383 384 385 386 387 388 389 390
		if (net_eq(ip6addrlbl_net(p), net)) {
			hlist_del_rcu(&p->list);
			ip6addrlbl_put(p);
		}
	}
	spin_unlock(&ip6addrlbl_table.lock);
}

static struct pernet_operations ipv6_addr_label_ops = {
	.init = ip6addrlbl_net_init,
	.exit = ip6addrlbl_net_exit,
};

391 392 393 394
int __init ipv6_addr_label_init(void)
{
	spin_lock_init(&ip6addrlbl_table.lock);

395
	return register_pernet_subsys(&ipv6_addr_label_ops);
396 397
}

398 399 400 401 402
void ipv6_addr_label_cleanup(void)
{
	unregister_pernet_subsys(&ipv6_addr_label_ops);
}

403 404 405 406 407
static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
	[IFAL_ADDRESS]		= { .len = sizeof(struct in6_addr), },
	[IFAL_LABEL]		= { .len = sizeof(u32), },
};

408 409
static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
			     struct netlink_ext_ack *extack)
410
{
411
	struct net *net = sock_net(skb->sk);
412 413 414 415 416 417
	struct ifaddrlblmsg *ifal;
	struct nlattr *tb[IFAL_MAX+1];
	struct in6_addr *pfx;
	u32 label;
	int err = 0;

418 419
	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
			  extack);
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
	if (err < 0)
		return err;

	ifal = nlmsg_data(nlh);

	if (ifal->ifal_family != AF_INET6 ||
	    ifal->ifal_prefixlen > 128)
		return -EINVAL;

	if (!tb[IFAL_ADDRESS])
		return -EINVAL;
	pfx = nla_data(tb[IFAL_ADDRESS]);

	if (!tb[IFAL_LABEL])
		return -EINVAL;
	label = nla_get_u32(tb[IFAL_LABEL]);
	if (label == IPV6_ADDR_LABEL_DEFAULT)
		return -EINVAL;

439
	switch (nlh->nlmsg_type) {
440
	case RTM_NEWADDRLABEL:
441 442 443 444
		if (ifal->ifal_index &&
		    !__dev_get_by_index(net, ifal->ifal_index))
			return -EINVAL;

445
		err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
446 447 448 449
				     ifal->ifal_index, label,
				     nlh->nlmsg_flags & NLM_F_REPLACE);
		break;
	case RTM_DELADDRLABEL:
450
		err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen,
451 452 453 454 455 456 457 458
				     ifal->ifal_index);
		break;
	default:
		err = -EOPNOTSUPP;
	}
	return err;
}

459 460
static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
			      int prefixlen, int ifindex, u32 lseq)
461 462 463 464 465 466 467 468 469 470 471 472
{
	struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
	ifal->ifal_family = AF_INET6;
	ifal->ifal_prefixlen = prefixlen;
	ifal->ifal_flags = 0;
	ifal->ifal_index = ifindex;
	ifal->ifal_seq = lseq;
};

static int ip6addrlbl_fill(struct sk_buff *skb,
			   struct ip6addrlbl_entry *p,
			   u32 lseq,
473
			   u32 portid, u32 seq, int event,
474 475
			   unsigned int flags)
{
476
	struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
477 478 479 480 481 482
					 sizeof(struct ifaddrlblmsg), flags);
	if (!nlh)
		return -EMSGSIZE;

	ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);

483
	if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 ||
484 485 486 487 488
	    nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
		nlmsg_cancel(skb, nlh);
		return -EMSGSIZE;
	}

489 490
	nlmsg_end(skb, nlh);
	return 0;
491 492 493 494
}

static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
495
	struct net *net = sock_net(skb->sk);
496 497 498 499 500
	struct ip6addrlbl_entry *p;
	int idx = 0, s_idx = cb->args[0];
	int err;

	rcu_read_lock();
501
	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
502 503
		if (idx >= s_idx &&
		    net_eq(ip6addrlbl_net(p), net)) {
504 505 506 507 508 509
			err = ip6addrlbl_fill(skb, p,
					      ip6addrlbl_table.seq,
					      NETLINK_CB(cb->skb).portid,
					      cb->nlh->nlmsg_seq,
					      RTM_NEWADDRLABEL,
					      NLM_F_MULTI);
510
			if (err < 0)
511 512 513 514 515 516 517 518 519 520 521
				break;
		}
		idx++;
	}
	rcu_read_unlock();
	cb->args[0] = idx;
	return skb->len;
}

static inline int ip6addrlbl_msgsize(void)
{
Eric Dumazet's avatar
Eric Dumazet committed
522
	return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
523
		+ nla_total_size(16)	/* IFAL_ADDRESS */
Eric Dumazet's avatar
Eric Dumazet committed
524
		+ nla_total_size(4);	/* IFAL_LABEL */
525 526
}

527 528
static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
			  struct netlink_ext_ack *extack)
529
{
530
	struct net *net = sock_net(in_skb->sk);
531 532 533 534 535 536 537 538
	struct ifaddrlblmsg *ifal;
	struct nlattr *tb[IFAL_MAX+1];
	struct in6_addr *addr;
	u32 lseq;
	int err = 0;
	struct ip6addrlbl_entry *p;
	struct sk_buff *skb;

539 540
	err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
			  extack);
541 542 543 544 545 546 547 548 549 550
	if (err < 0)
		return err;

	ifal = nlmsg_data(nlh);

	if (ifal->ifal_family != AF_INET6 ||
	    ifal->ifal_prefixlen != 128)
		return -EINVAL;

	if (ifal->ifal_index &&
551
	    !__dev_get_by_index(net, ifal->ifal_index))
552 553 554 555 556 557 558
		return -EINVAL;

	if (!tb[IFAL_ADDRESS])
		return -EINVAL;
	addr = nla_data(tb[IFAL_ADDRESS]);

	rcu_read_lock();
559
	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
560
	if (p && !ip6addrlbl_hold(p))
561 562 563 564 565 566 567 568 569
		p = NULL;
	lseq = ip6addrlbl_table.seq;
	rcu_read_unlock();

	if (!p) {
		err = -ESRCH;
		goto out;
	}

570 571
	skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
	if (!skb) {
572 573 574 575 576
		ip6addrlbl_put(p);
		return -ENOBUFS;
	}

	err = ip6addrlbl_fill(skb, p, lseq,
577
			      NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
578 579 580 581 582 583 584 585 586 587
			      RTM_NEWADDRLABEL, 0);

	ip6addrlbl_put(p);

	if (err < 0) {
		WARN_ON(err == -EMSGSIZE);
		kfree_skb(skb);
		goto out;
	}

588
	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
589 590 591 592 593 594
out:
	return err;
}

void __init ipv6_addr_label_rtnl_register(void)
{
595
	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
596
			NULL, 0);
597
	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
598
			NULL, 0);
599
	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
600
			ip6addrlbl_dump, 0);
601 602
}