Commit ac27bdc8 authored by David S. Miller's avatar David S. Miller

Merge nuts.ninka.net:/home/davem/src/BK/network-2.5

into nuts.ninka.net:/home/davem/src/BK/net-2.5
parents f7353c67 9af22f90
......@@ -301,7 +301,8 @@ enum
NET_IPV4_NONLOCAL_BIND=88,
NET_IPV4_ICMP_RATELIMIT=89,
NET_IPV4_ICMP_RATEMASK=90,
NET_TCP_TW_REUSE=91
NET_TCP_TW_REUSE=91,
NET_TCP_FRTO=92
};
enum {
......
......@@ -366,6 +366,9 @@ struct tcp_opt {
unsigned int keepalive_intvl; /* time interval between keep alive probes */
int linger2;
int frto_counter; /* Number of new acks after RTO */
__u32 frto_highmark; /* snd_nxt when RTO occurred */
unsigned long last_synq_overflow;
};
......
/*
*
* Flow based forwarding rules (usage: firewalling, etc)
* Generic internet FLOW.
*
*/
......@@ -8,12 +8,16 @@
#define _NET_FLOW_H
struct flowi {
int proto; /* {TCP, UDP, ICMP} */
int oif;
int iif;
union {
struct {
__u32 daddr;
__u32 saddr;
__u32 fwmark;
__u8 tos;
__u8 scope;
} ip4_u;
struct {
......@@ -27,9 +31,12 @@ struct flowi {
#define fl6_flowlabel nl_u.ip6_u.flowlabel
#define fl4_dst nl_u.ip4_u.daddr
#define fl4_src nl_u.ip4_u.saddr
#define fl4_fwmark nl_u.ip4_u.fwmark
#define fl4_tos nl_u.ip4_u.tos
#define fl4_scope nl_u.ip4_u.scope
int oif;
__u8 proto;
__u8 flags;
union {
struct {
__u16 sport;
......@@ -41,61 +48,8 @@ struct flowi {
__u8 code;
} icmpt;
unsigned long data;
__u32 spi;
} uli_u;
};
#define FLOWR_NODECISION 0 /* rule not appliable to flow */
#define FLOWR_SELECT 1 /* flow must follow this rule */
#define FLOWR_CLEAR 2 /* priority level clears flow */
#define FLOWR_ERROR 3
struct fl_acc_args {
int type;
#define FL_ARG_FORWARD 1
#define FL_ARG_ORIGIN 2
union {
struct sk_buff *skb;
struct {
struct sock *sk;
struct flowi *flow;
} fl_o;
} fl_u;
};
struct pkt_filter {
atomic_t refcnt;
unsigned int offset;
__u32 value;
__u32 mask;
struct pkt_filter *next;
};
#define FLR_INPUT 1
#define FLR_OUTPUT 2
struct flow_filter {
int type;
union {
struct pkt_filter *filter;
struct sock *sk;
} u;
};
struct flow_rule {
struct flow_rule_ops *ops;
unsigned char private[0];
};
struct flow_rule_ops {
int (*accept)(struct rt6_info *rt,
struct rt6_info *rule,
struct fl_acc_args *args,
struct rt6_info **nrt);
};
#endif
......@@ -70,14 +70,6 @@ struct rt6_info
u8 rt6i_hoplimit;
atomic_t rt6i_ref;
union {
struct flow_rule *rt6iu_flowr;
struct flow_filter *rt6iu_filter;
} flow_u;
#define rt6i_flowr flow_u.rt6iu_flowr
#define rt6i_filter flow_u.rt6iu_filter
struct rt6key rt6i_dst;
struct rt6key rt6i_src;
};
......
#ifndef __NET_IP6_FW_H
#define __NET_IP6_FW_H
#define IP6_FW_LISTHEAD 0x1000
#define IP6_FW_ACCEPT 0x0001
#define IP6_FW_REJECT 0x0002
#define IP6_FW_DEBUG 2
#define IP6_FW_MSG_ADD 1
#define IP6_FW_MSG_DEL 2
#define IP6_FW_MSG_REPORT 3
/*
* Fast "hack" user interface
*/
struct ip6_fw_msg {
struct in6_addr dst;
struct in6_addr src;
int dst_len;
int src_len;
int action;
int policy;
int proto;
union {
struct {
__u16 sport;
__u16 dport;
} transp;
unsigned long data;
int icmp_type;
} u;
int msg_len;
};
#ifdef __KERNEL__
#include <net/flow.h>
struct ip6_fw_rule {
struct flow_rule flowr;
struct ip6_fw_rule *next;
struct ip6_fw_rule *prev;
struct flowi info;
unsigned long policy;
};
#endif
#endif
......@@ -17,6 +17,7 @@
#define _NET_IP_FIB_H
#include <linux/config.h>
#include <net/flow.h>
struct kern_rta
{
......@@ -117,7 +118,7 @@ struct fib_table
{
unsigned char tb_id;
unsigned tb_stamp;
int (*tb_lookup)(struct fib_table *tb, const struct rt_key *key, struct fib_result *res);
int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
int (*tb_insert)(struct fib_table *table, struct rtmsg *r,
struct kern_rta *rta, struct nlmsghdr *n,
struct netlink_skb_parms *req);
......@@ -130,7 +131,7 @@ struct fib_table
int (*tb_get_info)(struct fib_table *table, char *buf,
int first, int count);
void (*tb_select_default)(struct fib_table *table,
const struct rt_key *key, struct fib_result *res);
const struct flowi *flp, struct fib_result *res);
unsigned char tb_data[0];
};
......@@ -152,18 +153,18 @@ static inline struct fib_table *fib_new_table(int id)
return fib_get_table(id);
}
static inline int fib_lookup(const struct rt_key *key, struct fib_result *res)
static inline int fib_lookup(const struct flowi *flp, struct fib_result *res)
{
if (local_table->tb_lookup(local_table, key, res) &&
main_table->tb_lookup(main_table, key, res))
if (local_table->tb_lookup(local_table, flp, res) &&
main_table->tb_lookup(main_table, flp, res))
return -ENETUNREACH;
return 0;
}
static inline void fib_select_default(const struct rt_key *key, struct fib_result *res)
static inline void fib_select_default(const struct flowi *flp, struct fib_result *res)
{
if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
main_table->tb_select_default(main_table, key, res);
main_table->tb_select_default(main_table, flp, res);
}
#else /* CONFIG_IP_MULTIPLE_TABLES */
......@@ -171,7 +172,7 @@ static inline void fib_select_default(const struct rt_key *key, struct fib_resul
#define main_table (fib_tables[RT_TABLE_MAIN])
extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
extern int fib_lookup(const struct rt_key *key, struct fib_result *res);
extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
extern struct fib_table *__fib_new_table(int id);
extern void fib_rule_put(struct fib_rule *r);
......@@ -191,7 +192,7 @@ static inline struct fib_table *fib_new_table(int id)
return fib_tables[id] ? : __fib_new_table(id);
}
extern void fib_select_default(const struct rt_key *key, struct fib_result *res);
extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
#endif /* CONFIG_IP_MULTIPLE_TABLES */
......@@ -204,13 +205,13 @@ extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar
extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
struct net_device *dev, u32 *spec_dst, u32 *itag);
extern void fib_select_multipath(const struct rt_key *key, struct fib_result *res);
extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
/* Exported by fib_semantics.c */
extern int ip_fib_check_default(u32 gw, struct net_device *dev);
extern void fib_release_info(struct fib_info *);
extern int fib_semantic_match(int type, struct fib_info *,
const struct rt_key *, struct fib_result*);
const struct flowi *, struct fib_result*);
extern struct fib_info *fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
const struct nlmsghdr *, int *err);
extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, struct kern_rta *rta, struct fib_info *fi);
......
......@@ -27,6 +27,7 @@
#include <linux/config.h>
#include <net/dst.h>
#include <net/inetpeer.h>
#include <net/flow.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/route.h>
......@@ -45,19 +46,6 @@
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sk->localroute)
struct rt_key
{
__u32 dst;
__u32 src;
int iif;
int oif;
#ifdef CONFIG_IP_ROUTE_FWMARK
__u32 fwmark;
#endif
__u8 tos;
__u8 scope;
};
struct inet_peer;
struct rtable
{
......@@ -78,7 +66,7 @@ struct rtable
__u32 rt_gateway;
/* Cache lookup keys */
struct rt_key key;
struct flowi fl;
/* Miscellaneous cached information */
__u32 rt_spec_dst; /* RFC1122 specific destination */
......@@ -124,7 +112,7 @@ extern void ip_rt_redirect(u32 old_gw, u32 dst, u32 new_gw,
u32 src, u8 tos, struct net_device *dev);
extern void ip_rt_advice(struct rtable **rp, int advice);
extern void rt_cache_flush(int how);
extern int ip_route_output_key(struct rtable **, const struct rt_key *key);
extern int ip_route_output_key(struct rtable **, const struct flowi *flp);
extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin);
extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu);
extern void ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu);
......@@ -136,16 +124,6 @@ extern int ip_rt_ioctl(unsigned int cmd, void *arg);
extern void ip_rt_get_source(u8 *src, struct rtable *rt);
extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb);
/* Deprecated: use ip_route_output_key directly */
static inline int ip_route_output(struct rtable **rp,
u32 daddr, u32 saddr, u32 tos, int oif)
{
struct rt_key key = { dst:daddr, src:saddr, oif:oif, tos:tos };
return ip_route_output_key(rp, &key);
}
static inline void ip_rt_put(struct rtable * rt)
{
if (rt)
......@@ -163,15 +141,20 @@ static inline char rt_tos2priority(u8 tos)
static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif)
{
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
.saddr = src,
.tos = tos } },
.oif = oif };
int err;
err = ip_route_output(rp, dst, src, tos, oif);
err = ip_route_output_key(rp, &fl);
if (err || (dst && src))
return err;
dst = (*rp)->rt_dst;
src = (*rp)->rt_src;
fl.fl4_dst = (*rp)->rt_dst;
fl.fl4_src = (*rp)->rt_src;
ip_rt_put(*rp);
*rp = NULL;
return ip_route_output(rp, dst, src, tos, oif);
return ip_route_output_key(rp, &fl);
}
extern void rt_bind_peer(struct rtable *rt, int create);
......
......@@ -472,6 +472,7 @@ extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_tw_reuse;
extern int sysctl_tcp_frto;
extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
......@@ -1855,4 +1856,17 @@ static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
#define TCP_CHECK_TIMER(sk) do { } while (0)
static inline int tcp_use_frto(const struct sock *sk)
{
const struct tcp_opt *tp = tcp_sk(sk);
/* F-RTO must be activated in sysctl and there must be some
* unsent new data, and the advertised window should allow
* sending it.
*/
return (sysctl_tcp_frto && tp->send_head &&
!after(TCP_SKB_CB(tp->send_head)->end_seq,
tp->snd_una + tp->snd_wnd));
}
#endif /* _TCP_H */
......@@ -509,6 +509,7 @@ int clip_setentry(struct atm_vcc *vcc,u32 ip)
struct atmarp_entry *entry;
int error;
struct clip_vcc *clip_vcc;
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1 } } };
struct rtable *rt;
if (vcc->push != clip_push) {
......@@ -525,7 +526,7 @@ int clip_setentry(struct atm_vcc *vcc,u32 ip)
unlink_clip_vcc(clip_vcc);
return 0;
}
error = ip_route_output(&rt,ip,0,1,0);
error = ip_route_output_key(&rt,&fl);
if (error) return error;
neigh = __neigh_lookup(&clip_tbl,&ip,rt->u.dst.dev,1);
ip_rt_put(rt);
......
......@@ -563,13 +563,15 @@ int ip_route_me_harder(struct sk_buff **pskb)
{
struct iphdr *iph = (*pskb)->nh.iph;
struct rtable *rt;
struct rt_key key = { dst:iph->daddr,
src:iph->saddr,
oif:(*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
tos:RT_TOS(iph->tos)|RTO_CONN,
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = iph->daddr,
.saddr = iph->saddr,
.tos = RT_TOS(iph->tos)|RTO_CONN,
#ifdef CONFIG_IP_ROUTE_FWMARK
fwmark:(*pskb)->nfmark
.fwmark = (*pskb)->nfmark
#endif
} },
.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
};
struct net_device *dev_src = NULL;
int err;
......@@ -578,10 +580,10 @@ int ip_route_me_harder(struct sk_buff **pskb)
0 or a local address; however some non-standard hacks like
ipt_REJECT.c:send_reset() can cause packets with foreign
saddr to be appear on the NF_IP_LOCAL_OUT hook -MB */
if(key.src && !(dev_src = ip_dev_find(key.src)))
key.src = 0;
if(fl.fl4_src && !(dev_src = ip_dev_find(fl.fl4_src)))
fl.fl4_src = 0;
if ((err=ip_route_output_key(&rt, &key)) != 0) {
if ((err=ip_route_output_key(&rt, &fl)) != 0) {
printk("route_me_harder: ip_route_output_key(dst=%u.%u.%u.%u, src=%u.%u.%u.%u, oif=%d, tos=0x%x, fwmark=0x%lx) error %d\n",
NIPQUAD(iph->daddr), NIPQUAD(iph->saddr),
(*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0,
......
......@@ -93,6 +93,7 @@
#include <linux/smp_lock.h>
#include <linux/inet.h>
#include <linux/igmp.h>
#include <linux/netdevice.h>
#include <linux/brlock.h>
#include <net/ip.h>
......
......@@ -347,11 +347,13 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev)
{
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
.saddr = tip } } };
struct rtable *rt;
int flag = 0;
/*unsigned long now; */
if (ip_route_output(&rt, sip, tip, 0, 0) < 0)
if (ip_route_output_key(&rt, &fl) < 0)
return 1;
if (rt->u.dst.dev != dev) {
NET_INC_STATS_BH(ArpFilter);
......@@ -890,8 +892,10 @@ int arp_req_set(struct arpreq *r, struct net_device * dev)
if (r->arp_flags & ATF_PERM)
r->arp_flags |= ATF_COM;
if (dev == NULL) {
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
.tos = RTO_ONLINK } } };
struct rtable * rt;
if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0)
if ((err = ip_route_output_key(&rt, &fl)) != 0)
return err;
dev = rt->u.dst.dev;
ip_rt_put(rt);
......@@ -974,8 +978,10 @@ int arp_req_delete(struct arpreq *r, struct net_device * dev)
}
if (dev == NULL) {
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
.tos = RTO_ONLINK } } };
struct rtable * rt;
if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0)
if ((err = ip_route_output_key(&rt, &fl)) != 0)
return err;
dev = rt->u.dst.dev;
ip_rt_put(rt);
......
......@@ -144,17 +144,15 @@ fib_get_procinfo(char *buffer, char **start, off_t offset, int length)
struct net_device * ip_dev_find(u32 addr)
{
struct rt_key key;
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
struct fib_result res;
struct net_device *dev = NULL;
memset(&key, 0, sizeof(key));
key.dst = addr;
#ifdef CONFIG_IP_MULTIPLE_TABLES
res.r = NULL;
#endif
if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) {
return NULL;
}
if (res.type != RTN_LOCAL)
......@@ -170,7 +168,7 @@ struct net_device * ip_dev_find(u32 addr)
unsigned inet_addr_type(u32 addr)
{
struct rt_key key;
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
struct fib_result res;
unsigned ret = RTN_BROADCAST;
......@@ -179,15 +177,13 @@ unsigned inet_addr_type(u32 addr)
if (MULTICAST(addr))
return RTN_MULTICAST;
memset(&key, 0, sizeof(key));
key.dst = addr;
#ifdef CONFIG_IP_MULTIPLE_TABLES
res.r = NULL;
#endif
if (local_table) {
ret = RTN_UNICAST;
if (local_table->tb_lookup(local_table, &key, &res) == 0) {
if (local_table->tb_lookup(local_table, &fl, &res) == 0) {
ret = res.type;
fib_res_put(&res);
}
......@@ -207,18 +203,15 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
struct net_device *dev, u32 *spec_dst, u32 *itag)
{
struct in_device *in_dev;
struct rt_key key;
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = src,
.saddr = dst,
.tos = tos } },
.iif = oif };
struct fib_result res;
int no_addr, rpf;
int ret;
key.dst = src;
key.src = dst;
key.tos = tos;
key.oif = 0;
key.iif = oif;
key.scope = RT_SCOPE_UNIVERSE;
no_addr = rpf = 0;
read_lock(&inetdev_lock);
in_dev = __in_dev_get(dev);
......@@ -231,7 +224,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
if (in_dev == NULL)
goto e_inval;
if (fib_lookup(&key, &res))
if (fib_lookup(&fl, &res))
goto last_resort;
if (res.type != RTN_UNICAST)
goto e_inval_res;
......@@ -252,10 +245,10 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
goto last_resort;
if (rpf)
goto e_inval;
key.oif = dev->ifindex;
fl.oif = dev->ifindex;
ret = 0;
if (fib_lookup(&key, &res) == 0) {
if (fib_lookup(&fl, &res) == 0) {
if (res.type == RTN_UNICAST) {
*spec_dst = FIB_RES_PREFSRC(res);
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
......
......@@ -266,7 +266,7 @@ fn_new_zone(struct fn_hash *table, int z)
}
static int
fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
{
int err;
struct fn_zone *fz;
......@@ -275,7 +275,7 @@ fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result
read_lock(&fib_hash_lock);
for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
struct fib_node *f;
fn_key_t k = fz_key(key->dst, fz);
fn_key_t k = fz_key(flp->fl4_dst, fz);
for (f = fz_chain(k, fz); f; f = f->fn_next) {
if (!fn_key_eq(k, f->fn_key)) {
......@@ -285,17 +285,17 @@ fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result
continue;
}
#ifdef CONFIG_IP_ROUTE_TOS
if (f->fn_tos && f->fn_tos != key->tos)
if (f->fn_tos && f->fn_tos != flp->fl4_tos)
continue;
#endif
f->fn_state |= FN_S_ACCESSED;
if (f->fn_state&FN_S_ZOMBIE)
continue;
if (f->fn_scope < key->scope)
if (f->fn_scope < flp->fl4_scope)
continue;
err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res);
err = fib_semantic_match(f->fn_type, FIB_INFO(f), flp, res);
if (err == 0) {
res->type = f->fn_type;
res->scope = f->fn_scope;
......@@ -338,7 +338,7 @@ static int fib_detect_death(struct fib_info *fi, int order,
}
static void
fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
{
int order, last_idx;
struct fib_node *f;
......
......@@ -307,28 +307,28 @@ static void fib_rules_attach(struct net_device *dev)
}
}
int fib_lookup(const struct rt_key *key, struct fib_result *res)
int fib_lookup(const struct flowi *flp, struct fib_result *res)
{
int err;
struct fib_rule *r, *policy;
struct fib_table *tb;
u32 daddr = key->dst;
u32 saddr = key->src;
u32 daddr = flp->fl4_dst;
u32 saddr = flp->fl4_src;
FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
NIPQUAD(key->dst), NIPQUAD(key->src));
NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
read_lock(&fib_rules_lock);
for (r = fib_rules; r; r=r->r_next) {
if (((saddr^r->r_src) & r->r_srcmask) ||
((daddr^r->r_dst) & r->r_dstmask) ||
#ifdef CONFIG_IP_ROUTE_TOS
(r->r_tos && r->r_tos != key->tos) ||
(r->r_tos && r->r_tos != flp->fl4_tos) ||
#endif
#ifdef CONFIG_IP_ROUTE_FWMARK
(r->r_fwmark && r->r_fwmark != key->fwmark) ||
(r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
#endif
(r->r_ifindex && r->r_ifindex != key->iif))
(r->r_ifindex && r->r_ifindex != flp->iif))
continue;
FRprintk("tb %d r %d ", r->r_table, r->r_action);
......@@ -351,7 +351,7 @@ FRprintk("tb %d r %d ", r->r_table, r->r_action);
if ((tb = fib_get_table(r->r_table)) == NULL)
continue;
err = tb->tb_lookup(tb, key, res);
err = tb->tb_lookup(tb, flp, res);
if (err == 0) {
res->r = policy;
if (policy)
......@@ -369,13 +369,13 @@ FRprintk("FAILURE\n");
return -ENETUNREACH;
}
void fib_select_default(const struct rt_key *key, struct fib_result *res)
void fib_select_default(const struct flowi *flp, struct fib_result *res)
{
if (res->r && res->r->r_action == RTN_UNICAST &&
FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
struct fib_table *tb;
if ((tb = fib_get_table(res->r->r_table)) != NULL)
tb->tb_select_default(tb, key, res);
tb->tb_select_default(tb, flp, res);
}
}
......
......@@ -349,7 +349,6 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
int err;
if (nh->nh_gw) {
struct rt_key key;
struct fib_result res;
#ifdef CONFIG_IP_ROUTE_PERVASIVE
......@@ -372,16 +371,18 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n
nh->nh_scope = RT_SCOPE_LINK;
return 0;
}
memset(&key, 0, sizeof(key));
key.dst = nh->nh_gw;
key.oif = nh->nh_oif;
key.scope = r->rtm_scope + 1;
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = nh->nh_gw,
.scope = r->rtm_scope + 1 } },
.oif = nh->nh_oif };
/* It is not necessary, but requires a bit of thinking */
if (key.scope < RT_SCOPE_LINK)
key.scope = RT_SCOPE_LINK;
if ((err = fib_lookup(&key, &res)) != 0)
if (fl.fl4_scope < RT_SCOPE_LINK)
fl.fl4_scope = RT_SCOPE_LINK;
if ((err = fib_lookup(&fl, &res)) != 0)
return err;
}
err = -EINVAL;
if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
goto out;
......@@ -578,7 +579,7 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
}
int
fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struct fib_result *res)
{
int err = fib_props[type].error;
......@@ -603,7 +604,7 @@ fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, stru
for_nexthops(fi) {
if (nh->nh_flags&RTNH_F_DEAD)
continue;
if (!key->oif || key->oif == nh->nh_oif)
if (!flp->oif || flp->oif == nh->nh_oif)
break;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
......@@ -949,7 +950,7 @@ int fib_sync_up(struct net_device *dev)
fair weighted route distribution.
*/
void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
{
struct fib_info *fi = res->fi;
int w;
......
......@@ -418,9 +418,14 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (ipc.opt->srr)
daddr = icmp_param->replyopts.faddr;
}
if (ip_route_output(&rt, daddr, rt->rt_spec_dst,
RT_TOS(skb->nh.iph->tos), 0))
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
.tos = RT_TOS(skb->nh.iph->tos) } } };
if (ip_route_output_key(&rt, &fl))
goto out_unlock;
}
if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
icmp_param->data.icmph.code)) {
ip_build_xmit(sk, icmp_glue_bits, icmp_param,
......@@ -526,8 +531,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
* Restore original addresses if packet has been translated.
*/
if (rt->rt_flags & RTCF_NAT && IPCB(skb_in)->flags & IPSKB_TRANSLATED) {
iph->daddr = rt->key.dst;
iph->saddr = rt->key.src;
iph->daddr = rt->fl.fl4_dst;
iph->saddr = rt->fl.fl4_src;
}
#endif
......@@ -539,9 +544,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
IPTOS_PREC_INTERNETCONTROL) :
iph->tos;
if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
{
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->saddr,
.saddr = saddr,
.tos = RT_TOS(tos) } } };
if (ip_route_output_key(&rt, &fl))
goto out_unlock;
}
if (ip_options_echo(&icmp_param.replyopts, skb_in))
goto ende;
......@@ -563,9 +572,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts;
if (icmp_param.replyopts.srr) {
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = icmp_param.replyopts.faddr,
.saddr = saddr,
.tos = RT_TOS(tos) } } };
ip_rt_put(rt);
if (ip_route_output(&rt, icmp_param.replyopts.faddr,
saddr, RT_TOS(tos), 0))
if (ip_route_output_key(&rt, &fl))
goto out_unlock;
}
......
......@@ -207,8 +207,12 @@ static int igmp_send_report(struct net_device *dev, u32 group, int type)
if (type == IGMP_HOST_LEAVE_MESSAGE)
dst = IGMP_ALL_ROUTER;
if (ip_route_output(&rt, dst, 0, 0, dev->ifindex))
{
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst } },
.oif = dev->ifindex };
if (ip_route_output_key(&rt, &fl))
return -1;
}
if (rt->rt_src == 0) {
ip_rt_put(rt);
return -1;
......@@ -374,7 +378,7 @@ int igmp_rcv(struct sk_buff *skb)
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMP_HOST_NEW_MEMBERSHIP_REPORT:
/* Is it our report looped back? */
if (((struct rtable*)skb->dst)->key.iif == 0)
if (((struct rtable*)skb->dst)->fl.iif == 0)
break;
igmp_heard_report(in_dev, ih->group);
break;
......@@ -608,6 +612,8 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = imr->imr_address.s_addr } } };
struct rtable *rt;
struct net_device *dev = NULL;
struct in_device *idev = NULL;
......@@ -619,7 +625,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
__dev_put(dev);
}
if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0)) {
if (!dev && !ip_route_output_key(&rt, &fl)) {
dev = rt->u.dst.dev;
ip_rt_put(rt);
}
......
......@@ -412,6 +412,7 @@ void ipgre_err(struct sk_buff *skb, u32 info)
u16 flags;
int grehlen = (iph->ihl<<2) + 4;
struct sk_buff *skb2;
struct flowi fl;
struct rtable *rt;
if (p[1] != htons(ETH_P_IP))
......@@ -488,7 +489,10 @@ void ipgre_err(struct sk_buff *skb, u32 info)
skb2->nh.raw = skb2->data;
/* Try to guess incoming interface */
if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
memset(&fl, 0, sizeof(fl));
fl.fl4_dst = eiph->saddr;
fl.fl4_tos = RT_TOS(eiph->tos);
if (ip_route_output_key(&rt, &fl)) {
kfree_skb(skb2);
return;
}
......@@ -498,7 +502,10 @@ void ipgre_err(struct sk_buff *skb, u32 info)
if (rt->rt_flags&RTCF_LOCAL) {
ip_rt_put(rt);
rt = NULL;
if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
fl.fl4_dst = eiph->daddr;
fl.fl4_src = eiph->saddr;
fl.fl4_tos = eiph->tos;
if (ip_route_output_key(&rt, &fl) ||
rt->u.dst.dev->type != ARPHRD_IPGRE) {
ip_rt_put(rt);
kfree_skb(skb2);
......@@ -619,7 +626,7 @@ int ipgre_rcv(struct sk_buff *skb)
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (MULTICAST(iph->daddr)) {
/* Looped back packet, drop it! */
if (((struct rtable*)skb->dst)->key.iif == 0)
if (((struct rtable*)skb->dst)->fl.iif == 0)
goto drop;
tunnel->stat.multicast++;
skb->pkt_type = PACKET_BROADCAST;
......@@ -749,10 +756,17 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
tos &= ~1;
}
if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = dst,
.saddr = tiph->saddr,
.tos = RT_TOS(tos) } },
.oif = tunnel->parms.link };
if (ip_route_output_key(&rt, &fl)) {
tunnel->stat.tx_carrier_errors++;
goto tx_error;
}
}
tdev = rt->u.dst.dev;
if (tdev == dev) {
......@@ -1104,10 +1118,13 @@ static int ipgre_open(struct net_device *dev)
MOD_INC_USE_COUNT;
if (MULTICAST(t->parms.iph.daddr)) {
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = t->parms.iph.daddr,
.saddr = t->parms.iph.saddr,
.tos = RT_TOS(t->parms.iph.tos) } },
.oif = t->parms.link };
struct rtable *rt;
if (ip_route_output(&rt, t->parms.iph.daddr,
t->parms.iph.saddr, RT_TOS(t->parms.iph.tos),
t->parms.link)) {
if (ip_route_output_key(&rt, &fl)) {
MOD_DEC_USE_COUNT;
return -EADDRNOTAVAIL;
}
......@@ -1177,8 +1194,13 @@ static int ipgre_tunnel_init(struct net_device *dev)
/* Guess output device to choose reasonable mtu and hard_header_len */
if (iph->daddr) {
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = iph->daddr,
.saddr = iph->saddr,
.tos = RT_TOS(iph->tos) } },
.oif = tunnel->parms.link };
struct rtable *rt;
if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
if (!ip_route_output_key(&rt, &fl)) {
tdev = rt->u.dst.dev;
ip_rt_put(rt);
}
......
......@@ -117,23 +117,21 @@ ip_do_nat(struct sk_buff *skb)
if (rt->rt_flags&RTCF_SNAT) {
if (ciph->daddr != osaddr) {
struct fib_result res;
struct rt_key key;
unsigned flags = 0;
key.src = ciph->daddr;
key.dst = ciph->saddr;
key.iif = skb->dev->ifindex;
key.oif = 0;
struct flowi fl = { .nl_u =
{ .ip4_u =
{ .daddr = ciph->saddr,
.saddr = ciph->daddr,
#ifdef CONFIG_IP_ROUTE_TOS
key.tos = RT_TOS(ciph->tos);
#endif
#ifdef CONFIG_IP_ROUTE_FWMARK
key.fwmark = 0;
.tos = RT_TOS(ciph->tos)
#endif
} },
.iif = skb->dev->ifindex };
/* Use fib_lookup() until we get our own
* hash table of NATed hosts -- Rani
*/
if (fib_lookup(&key, &res) == 0) {
if (fib_lookup(&fl, &res) == 0) {
if (res.r) {
ciph->daddr = fib_rules_policy(ciph->daddr, &res, &flags);
if (ciph->daddr != idaddr)
......
......@@ -372,14 +372,20 @@ int ip_queue_xmit(struct sk_buff *skb)
if(opt && opt->srr)
daddr = opt->faddr;
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = inet->saddr,
.tos = RT_CONN_FLAGS(sk) } },
.oif = sk->bound_dev_if };
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times itself
* out.
*/
if (ip_route_output(&rt, daddr, inet->saddr,
RT_CONN_FLAGS(sk),
sk->bound_dev_if))
if (ip_route_output_key(&rt, &fl))
goto no_route;
}
__sk_dst_set(sk, &rt->u.dst);
tcp_v4_setup_caps(sk, &rt->u.dst);
}
......@@ -991,8 +997,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
daddr = replyopts.opt.faddr;
}
if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rt->rt_spec_dst,
.tos = RT_TOS(skb->nh.iph->tos) } } };
if (ip_route_output_key(&rt, &fl))
return;
}
/* And let IP do all the hard work.
......
......@@ -355,6 +355,7 @@ void ipip_err(struct sk_buff *skb, u32 info)
int rel_code = 0;
int rel_info = 0;
struct sk_buff *skb2;
struct flowi fl;
struct rtable *rt;
if (len < hlen + sizeof(struct iphdr))
......@@ -417,7 +418,10 @@ void ipip_err(struct sk_buff *skb, u32 info)
skb2->nh.raw = skb2->data;
/* Try to guess incoming interface */
if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
memset(&fl, 0, sizeof(fl));
fl.fl4_daddr = eiph->saddr;
fl.fl4_tos = RT_TOS(eiph->tos);
if (ip_route_output_key(&rt, &key)) {
kfree_skb(skb2);
return;
}
......@@ -427,7 +431,10 @@ void ipip_err(struct sk_buff *skb, u32 info)
if (rt->rt_flags&RTCF_LOCAL) {
ip_rt_put(rt);
rt = NULL;
if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
fl.fl4_daddr = eiph->daddr;
fl.fl4_src = eiph->saddr;
fl.fl4_tos = eiph->tos;
if (ip_route_output_key(&rt, &fl) ||
rt->u.dst.dev->type != ARPHRD_IPGRE) {
ip_rt_put(rt);
kfree_skb(skb2);
......@@ -560,10 +567,17 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_error_icmp;
}
if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = dst,
.saddr = tiph->saddr,
.tos = RT_TOS(tos) } },
.oif = tunnel->parms.link };
if (ip_route_output_key(&rt, &fl)) {
tunnel->stat.tx_carrier_errors++;
goto tx_error_icmp;
}
}
tdev = rt->u.dst.dev;
if (tdev == dev) {
......@@ -822,8 +836,13 @@ static int ipip_tunnel_init(struct net_device *dev)
ipip_tunnel_init_gen(dev);
if (iph->daddr) {
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = iph->daddr,
.saddr = iph->saddr,
.tos = RT_TOS(iph->tos) } },
.oif = tunnel->parms.link };
struct rtable *rt;
if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
if (!ip_route_output_key(&rt, &fl)) {
tdev = rt->u.dst.dev;
ip_rt_put(rt);
}
......
......@@ -1146,11 +1146,20 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
#endif
if (vif->flags&VIFF_TUNNEL) {
if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = vif->remote,
.saddr = vif->local,
.tos = RT_TOS(iph->tos) } },
.oif = vif->link };
if (ip_route_output_key(&rt, &fl))
return;
encap = sizeof(struct iphdr);
} else {
if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = iph->daddr,
.tos = RT_TOS(iph->tos) } },
.oif = vif->link };
if (ip_route_output_key(&rt, &fl))
return;
}
......@@ -1244,7 +1253,7 @@ int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
if (vif_table[vif].dev != skb->dev) {
int true_vifi;
if (((struct rtable*)skb->dst)->key.iif == 0) {
if (((struct rtable*)skb->dst)->fl.iif == 0) {
/* It is our own packet, looped back.
Very complicated situation...
......
......@@ -68,12 +68,13 @@ do_masquerade(struct sk_buff **pskb, const struct net_device *dev)
/* Setup the masquerade, if not already */
if (!info->initialized) {
u_int32_t newsrc;
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->daddr } } };
struct rtable *rt;
struct ip_nat_multi_range range;
/* Pass 0 instead of saddr, since it's going to be changed
anyway. */
if (ip_route_output(&rt, iph->daddr, 0, 0, 0) != 0) {
if (ip_route_output_key(&rt, &fl) != 0) {
DEBUGP("ipnat_rule_masquerade: Can't reroute.\n");
return NF_DROP;
}
......
......@@ -209,10 +209,11 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
static int
do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp)
{
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = var_ip } } };
struct rtable *rt;
/* FIXME: IPTOS_TOS(iph->tos) --RR */
if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) {
if (ip_route_output_key(&rt, &fl) != 0) {
DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n",
NIPQUAD(var_ip));
return 0;
......
......@@ -69,7 +69,6 @@ masquerade_target(struct sk_buff **pskb,
struct ip_nat_multi_range newrange;
u_int32_t newsrc;
struct rtable *rt;
struct rt_key key;
IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
......@@ -84,18 +83,22 @@ masquerade_target(struct sk_buff **pskb,
mr = targinfo;
key.dst = (*pskb)->nh.iph->daddr;
key.src = 0; /* Unknown: that's what we're trying to establish */
key.tos = RT_TOS((*pskb)->nh.iph->tos)|RTO_CONN;
key.oif = out->ifindex;
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = (*pskb)->nh.iph->daddr,
.tos = (RT_TOS((*pskb)->nh.iph->tos) |
RTO_CONN),
#ifdef CONFIG_IP_ROUTE_FWMARK
key.fwmark = (*pskb)->nfmark;
.fwmark = (*pskb)->nfmark
#endif
if (ip_route_output_key(&rt, &key) != 0) {
} },
.oif = out->ifindex };
if (ip_route_output_key(&rt, &fl) != 0) {
/* Shouldn't happen */
printk("MASQUERADE: No route: Rusty's brain broke!\n");
return NF_DROP;
}
}
newsrc = rt->rt_src;
DEBUGP("newsrc = %u.%u.%u.%u\n", NIPQUAD(newsrc));
......
......@@ -44,12 +44,13 @@ struct in_device;
static int route_mirror(struct sk_buff *skb)
{
struct iphdr *iph = skb->nh.iph;
struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->saddr,
.saddr = iph->daddr,
.tos = RT_TOS(iph->tos) | RTO_CONN } } };
struct rtable *rt;
/* Backwards */
if (ip_route_output(&rt, iph->saddr, iph->daddr,
RT_TOS(iph->tos) | RTO_CONN,
0)) {
if (ip_route_output_key(&rt, &fl)) {
return 0;
}
......
......@@ -130,12 +130,19 @@ static void send_reset(struct sk_buff *oldskb, int local)
nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph,
nskb->nh.iph->ihl);
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = nskb->nh.iph->daddr,
.saddr = (local ?
nskb->nh.iph->saddr :
0),
.tos = (RT_TOS(nskb->nh.iph->tos) |
RTO_CONN) } } };
/* Routing: if not headed for us, route won't like source */
if (ip_route_output(&rt, nskb->nh.iph->daddr,
local ? nskb->nh.iph->saddr : 0,
RT_TOS(nskb->nh.iph->tos) | RTO_CONN,
0) != 0)
if (ip_route_output_key(&rt, &fl))
goto free_nskb;
}
dst_release(nskb->dst);
nskb->dst = &rt->u.dst;
......@@ -207,9 +214,14 @@ static void send_unreach(struct sk_buff *skb_in, int code)
tos = (iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL;
if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = iph->saddr,
.saddr = saddr,
.tos = RT_TOS(tos) } } };
if (ip_route_output_key(&rt, &fl))
return;
}
/* RFC says return as much as we can without exceeding 576 bytes. */
length = skb_in->len + sizeof(struct iphdr) + sizeof(struct icmphdr);
......
......@@ -402,8 +402,14 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
rfh.saddr = inet->mc_addr;
}
err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif);
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = rfh.saddr,
.tos = tos } },
.oif = ipc.oif };
err = ip_route_output_key(&rt, &fl);
}
if (err)
goto done;
......
This diff is collapsed.
......@@ -171,15 +171,18 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
* hasn't changed since we received the original syn, but I see
* no easy way to do this.
*/
if (ip_route_output(&rt,
opt &&
opt->srr ? opt->faddr : req->af.v4_req.rmt_addr,
req->af.v4_req.loc_addr,
RT_CONN_FLAGS(sk),
0)) {
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = ((opt && opt->srr) ?
opt->faddr :
req->af.v4_req.rmt_addr),
.saddr = req->af.v4_req.loc_addr,
.tos = RT_CONN_FLAGS(sk) } } };
if (ip_route_output_key(&rt, &fl)) {
tcp_openreq_free(req);
goto out;
}
}
/* Try to redo what tcp_v4_send_synack did. */
req->window_clamp = rt->u.dst.window;
......
......@@ -221,6 +221,8 @@ ctl_table ipv4_table[] = {
&sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_TCP_TW_REUSE, "tcp_tw_reuse",
&sysctl_tcp_tw_reuse, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_TCP_FRTO, "tcp_frto",
&sysctl_tcp_frto, sizeof(int), 0644, NULL, &proc_dointvec},
{0}
};
......
......@@ -60,6 +60,7 @@
* Pasi Sarolahti,
* Panu Kuhlberg: Experimental audit of TCP (re)transmission
* engine. Lots of bugs are found.
* Pasi Sarolahti: F-RTO for dealing with spurious RTOs
*/
#include <linux/config.h>
......@@ -86,6 +87,7 @@ int sysctl_tcp_adv_win_scale = 2;
int sysctl_tcp_stdurg = 0;
int sysctl_tcp_rfc1337 = 0;
int sysctl_tcp_max_orphans = NR_FILE;
int sysctl_tcp_frto = 0;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
......@@ -968,6 +970,89 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
return flag;
}
/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
* segments to see from the next ACKs whether any data was really missing.
* If the RTO was spurious, new ACKs should arrive.
*/
void tcp_enter_frto(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb;
tp->frto_counter = 1;
if (tp->ca_state <= TCP_CA_Disorder ||
tp->snd_una == tp->high_seq ||
(tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(tp);
tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
}
/* Have to clear retransmission markers here to keep the bookkeeping
* in shape, even though we are not yet in Loss state.
* If something was really lost, it is eventually caught up
* in tcp_enter_frto_loss.
*/
tp->retrans_out = 0;
tp->undo_marker = tp->snd_una;
tp->undo_retrans = 0;
for_retrans_queue(skb, sk, tp) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
}
tcp_sync_left_out(tp);
tp->ca_state = TCP_CA_Open;
tp->frto_highmark = tp->snd_nxt;
}
/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
* which indicates that we should follow the traditional RTO recovery,
* i.e. mark everything lost and do go-back-N retransmission.
*/
void tcp_enter_frto_loss(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb;
int cnt = 0;
tp->sacked_out = 0;
tp->lost_out = 0;
tp->fackets_out = 0;
for_retrans_queue(skb, sk, tp) {
cnt++;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
/* Do not mark those segments lost that were
* forward transmitted after RTO
*/
if(!after(TCP_SKB_CB(skb)->end_seq,
tp->frto_highmark)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out++;
}
} else {
tp->sacked_out++;
tp->fackets_out = cnt;
}
}
tcp_sync_left_out(tp);
tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->undo_marker = 0;
tp->frto_counter = 0;
tp->reordering = min_t(unsigned int, tp->reordering,
sysctl_tcp_reordering);
tp->ca_state = TCP_CA_Loss;
tp->high_seq = tp->frto_highmark;
TCP_ECN_queue_cwr(tp);
}
void tcp_clear_retrans(struct tcp_opt *tp)
{
tp->left_out = 0;
......@@ -1539,6 +1624,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* E. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */
if (tp->ca_state == TCP_CA_Open) {
if (!sysctl_tcp_frto)
BUG_TRAP(tp->retrans_out == 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
......@@ -1910,6 +1996,41 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp,
return flag;
}
static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
{
struct tcp_opt *tp = tcp_sk(sk);
tcp_sync_left_out(tp);
if (tp->snd_una == prior_snd_una ||
!before(tp->snd_una, tp->frto_highmark)) {
/* RTO was caused by loss, start retransmitting in
* go-back-N slow start
*/
tcp_enter_frto_loss(sk);
return;
}
if (tp->frto_counter == 1) {
/* First ACK after RTO advances the window: allow two new
* segments out.
*/
tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
} else {
/* Also the second ACK after RTO advances the window.
* The RTO was likely spurious. Reduce cwnd and continue
* in congestion avoidance
*/
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
tcp_moderate_cwnd(tp);
}
/* F-RTO affects on two new ACKs following RTO.
* At latest on third ACK the TCP behavor is back to normal.
*/
tp->frto_counter = (tp->frto_counter + 1) % 3;
}
/* This routine deals with incoming acks, but not outgoing ones. */
static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
{
......@@ -1968,6 +2089,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk);
if (tp->frto_counter)
tcp_process_frto(sk, prior_snd_una);
if (tcp_ack_is_dubious(tp, flag)) {
/* Advanve CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) &&
......
......@@ -1266,11 +1266,15 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk,
{
struct rtable *rt;
struct ip_options *opt = req->af.v4_req.opt;
if (ip_route_output(&rt, ((opt && opt->srr) ? opt->faddr :
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = ((opt && opt->srr) ?
opt->faddr :
req->af.v4_req.rmt_addr),
req->af.v4_req.loc_addr,
RT_CONN_FLAGS(sk), sk->bound_dev_if)) {
.saddr = req->af.v4_req.loc_addr,
.tos = RT_CONN_FLAGS(sk) } },
.oif = sk->bound_dev_if };
if (ip_route_output_key(&rt, &fl)) {
IP_INC_STATS_BH(IpOutNoRoutes);
return NULL;
}
......@@ -1909,8 +1913,15 @@ int tcp_v4_rebuild_header(struct sock *sk)
if (inet->opt && inet->opt->srr)
daddr = inet->opt->faddr;
err = ip_route_output(&rt, daddr, inet->saddr,
RT_CONN_FLAGS(sk), sk->bound_dev_if);
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = inet->saddr,
.tos = RT_CONN_FLAGS(sk) } },
.oif = sk->bound_dev_if };
err = ip_route_output_key(&rt, &fl);
}
if (!err) {
__sk_dst_set(sk, &rt->u.dst);
tcp_v4_setup_caps(sk, &rt->u.dst);
......
......@@ -718,6 +718,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->snd_cwnd = 2;
newtp->snd_cwnd_cnt = 0;
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
newtp->ca_state = TCP_CA_Open;
tcp_init_xmit_timers(newsk);
skb_queue_head_init(&newtp->out_of_order_queue);
......
......@@ -374,7 +374,11 @@ static void tcp_retransmit_timer(struct sock *sk)
}
}
if (tcp_use_frto(sk)) {
tcp_enter_frto(sk);
} else {
tcp_enter_loss(sk, 0);
}
if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) {
/* Retransmission failed because of local congestion,
......
......@@ -528,7 +528,12 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
rt = (struct rtable*)sk_dst_check(sk, 0);
if (rt == NULL) {
err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = ufh.saddr,
.tos = tos } },
.oif = ipc.oif };
err = ip_route_output_key(&rt, &fl);
if (err)
goto out;
......
......@@ -2,9 +2,6 @@
# IPv6 configuration
#
#bool ' IPv6: flow policy support' CONFIG_RT6_POLICY
#bool ' IPv6: firewall support' CONFIG_IPV6_FIREWALL
if [ "$CONFIG_NETFILTER" != "n" ]; then
source net/ipv6/netfilter/Config.in
fi
......@@ -12,7 +12,6 @@ ipv6-objs := af_inet6.o ip6_output.o ip6_input.o addrconf.o sit.o \
exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
ip6_flowlabel.o ipv6_syms.o
#obj-$(CONFIG_IPV6_FIREWALL) += ip6_fw.o
obj-$(CONFIG_NETFILTER) += netfilter/
include $(TOPDIR)/Rules.make
......@@ -452,7 +452,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt)
*/
if ((iter->rt6i_dev == rt->rt6i_dev) &&
(iter->rt6i_flowr == rt->rt6i_flowr) &&
(ipv6_addr_cmp(&iter->rt6i_gateway,
&rt->rt6i_gateway) == 0)) {
if (!(iter->rt6i_flags&RTF_EXPIRES))
......
/*
* IPv6 Firewall
* Linux INET6 implementation
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
* $Id: ip6_fw.c,v 1.16 2001/10/31 08:17:58 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/route.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/udp.h>
#include <linux/init.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_fw.h>
#include <net/netlink.h>
static unsigned long ip6_fw_rule_cnt;
static struct ip6_fw_rule ip6_fw_rule_list = {
{0},
NULL, NULL,
{0},
IP6_FW_REJECT
};
static int ip6_fw_accept(struct dst_entry *dst, struct fl_acc_args *args);
struct flow_rule_ops ip6_fw_ops = {
ip6_fw_accept
};
static struct rt6_info ip6_fw_null_entry = {
{{NULL, 0, 0, NULL,
0, 0, 0, 0, 0, 0, 0, 0, -ENETUNREACH, NULL, NULL,
ip6_pkt_discard, ip6_pkt_discard, NULL}},
NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0UL,
0, &ip6_fw_rule_list, {{{{0}}}, 128}, {{{{0}}}, 128}
};
static struct fib6_node ip6_fw_fib = {
NULL, NULL, NULL, NULL,
&ip6_fw_null_entry,
0, RTN_ROOT|RTN_TL_ROOT, 0
};
rwlock_t ip6_fw_lock = RW_LOCK_UNLOCKED;
static void ip6_rule_add(struct ip6_fw_rule *rl)
{
struct ip6_fw_rule *next;
write_lock_bh(&ip6_fw_lock);
ip6_fw_rule_cnt++;
next = &ip6_fw_rule_list;
rl->next = next;
rl->prev = next->prev;
rl->prev->next = rl;
next->prev = rl;
write_unlock_bh(&ip6_fw_lock);
}
static void ip6_rule_del(struct ip6_fw_rule *rl)
{
struct ip6_fw_rule *next, *prev;
write_lock_bh(&ip6_fw_lock);
ip6_fw_rule_cnt--;
next = rl->next;
prev = rl->prev;
next->prev = prev;
prev->next = next;
write_unlock_bh(&ip6_fw_lock);
}
static __inline__ struct ip6_fw_rule * ip6_fwrule_alloc(void)
{
struct ip6_fw_rule *rl;
rl = kmalloc(sizeof(struct ip6_fw_rule), GFP_ATOMIC);
if (rl)
{
memset(rl, 0, sizeof(struct ip6_fw_rule));
rl->flowr.ops = &ip6_fw_ops;
}
return rl;
}
static __inline__ void ip6_fwrule_free(struct ip6_fw_rule * rl)
{
kfree(rl);
}
static __inline__ int port_match(int rl_port, int fl_port)
{
int res = 0;
if (rl_port == 0 || (rl_port == fl_port))
res = 1;
return res;
}
static int ip6_fw_accept_trans(struct ip6_fw_rule *rl,
struct fl_acc_args *args)
{
int res = FLOWR_NODECISION;
int proto = 0;
int sport = 0;
int dport = 0;
switch (args->type) {
case FL_ARG_FORWARD:
{
struct sk_buff *skb = args->fl_u.skb;
struct ipv6hdr *hdr = skb->nh.ipv6h;
int len;
len = skb->len - sizeof(struct ipv6hdr);
proto = hdr->nexthdr;
switch (proto) {
case IPPROTO_TCP:
{
struct tcphdr *th;
if (len < sizeof(struct tcphdr)) {
res = FLOWR_ERROR;
goto out;
}
th = (struct tcphdr *)(hdr + 1);
sport = th->source;
dport = th->dest;
break;
}
case IPPROTO_UDP:
{
struct udphdr *uh;
if (len < sizeof(struct udphdr)) {
res = FLOWR_ERROR;
goto out;
}
uh = (struct udphdr *)(hdr + 1);
sport = uh->source;
dport = uh->dest;
break;
}
default:
goto out;
};
break;
}
case FL_ARG_ORIGIN:
{
proto = args->fl_u.fl_o.flow->proto;
if (proto == IPPROTO_ICMPV6) {
goto out;
} else {
sport = args->fl_u.fl_o.flow->uli_u.ports.sport;
dport = args->fl_u.fl_o.flow->uli_u.ports.dport;
}
break;
}
if (proto == rl->info.proto &&
port_match(args->fl_u.fl_o.flow->uli_u.ports.sport, sport) &&
port_match(args->fl_u.fl_o.flow->uli_u.ports.dport, dport)) {
if (rl->policy & IP6_FW_REJECT)
res = FLOWR_SELECT;
else
res = FLOWR_CLEAR;
}
default:
#if IP6_FW_DEBUG >= 1
printk(KERN_DEBUG "ip6_fw_accept: unknown arg type\n");
#endif
goto out;
};
out:
return res;
}
static int ip6_fw_accept(struct dst_entry *dst, struct fl_acc_args *args)
{
struct rt6_info *rt;
struct ip6_fw_rule *rl;
int proto;
int res = FLOWR_NODECISION;
rt = (struct rt6_info *) dst;
rl = (struct ip6_fw_rule *) rt->rt6i_flowr;
proto = rl->info.proto;
switch (proto) {
case 0:
if (rl->policy & IP6_FW_REJECT)
res = FLOWR_SELECT;
else
res = FLOWR_CLEAR;
break;
case IPPROTO_TCP:
case IPPROTO_UDP:
res = ip6_fw_accept_trans(rl, args);
break;
case IPPROTO_ICMPV6:
};
return res;
}
static struct dst_entry * ip6_fw_dup(struct dst_entry *frule,
struct dst_entry *rt,
struct fl_acc_args *args)
{
struct ip6_fw_rule *rl;
struct rt6_info *nrt;
struct rt6_info *frt;
frt = (struct rt6_info *) frule;
rl = (struct ip6_fw_rule *) frt->rt6i_flowr;
nrt = ip6_rt_copy((struct rt6_info *) rt);
if (nrt) {
nrt->u.dst.input = frule->input;
nrt->u.dst.output = frule->output;
nrt->rt6i_flowr = flow_clone(frt->rt6i_flowr);
nrt->rt6i_flags |= RTF_CACHE;
nrt->rt6i_tstamp = jiffies;
}
return (struct dst_entry *) nrt;
}
int ip6_fw_reject(struct sk_buff *skb)
{
#if IP6_FW_DEBUG >= 1
printk(KERN_DEBUG "packet rejected: \n");
#endif
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADM_PROHIBITED, 0,
skb->dev);
/*
* send it via netlink, as (rule, skb)
*/
kfree_skb(skb);
return 0;
}
int ip6_fw_discard(struct sk_buff *skb)
{
printk(KERN_DEBUG "ip6_fw: BUG fw_reject called\n");
kfree_skb(skb);
return 0;
}
int ip6_fw_msg_add(struct ip6_fw_msg *msg)
{
struct in6_rtmsg rtmsg;
struct ip6_fw_rule *rl;
struct rt6_info *rt;
int err;
ipv6_addr_copy(&rtmsg.rtmsg_dst, &msg->dst);
ipv6_addr_copy(&rtmsg.rtmsg_src, &msg->src);
rtmsg.rtmsg_dst_len = msg->dst_len;
rtmsg.rtmsg_src_len = msg->src_len;
rtmsg.rtmsg_metric = IP6_RT_PRIO_FW;
rl = ip6_fwrule_alloc();
if (rl == NULL)
return -ENOMEM;
rl->policy = msg->policy;
rl->info.proto = msg->proto;
rl->info.uli_u.data = msg->u.data;
rtmsg.rtmsg_flags = RTF_NONEXTHOP|RTF_POLICY;
err = ip6_route_add(&rtmsg);
if (err) {
ip6_fwrule_free(rl);
return err;
}
/* The rest will not work for now. --ABK (989725) */
#ifndef notdef
ip6_fwrule_free(rl);
return -EPERM;
#else
rt->u.dst.error = -EPERM;
if (msg->policy == IP6_FW_ACCEPT) {
/*
* Accept rules are never selected
* (i.e. packets use normal forwarding)
*/
rt->u.dst.input = ip6_fw_discard;
rt->u.dst.output = ip6_fw_discard;
} else {
rt->u.dst.input = ip6_fw_reject;
rt->u.dst.output = ip6_fw_reject;
}
ip6_rule_add(rl);
rt->rt6i_flowr = flow_clone((struct flow_rule *)rl);
return 0;
#endif
}
static int ip6_fw_msgrcv(int unit, struct sk_buff *skb)
{
int count = 0;
while (skb->len) {
struct ip6_fw_msg *msg;
if (skb->len < sizeof(struct ip6_fw_msg)) {
count = -EINVAL;
break;
}
msg = (struct ip6_fw_msg *) skb->data;
skb_pull(skb, sizeof(struct ip6_fw_msg));
count += sizeof(struct ip6_fw_msg);
switch (msg->action) {
case IP6_FW_MSG_ADD:
ip6_fw_msg_add(msg);
break;
case IP6_FW_MSG_DEL:
break;
default:
return -EINVAL;
};
}
return count;
}
static void ip6_fw_destroy(struct flow_rule *rl)
{
ip6_fwrule_free((struct ip6_fw_rule *)rl);
}
#ifdef MODULE
#define ip6_fw_init module_init
#endif
void __init ip6_fw_init(void)
{
netlink_attach(NETLINK_IP6_FW, ip6_fw_msgrcv);
}
#ifdef MODULE
void cleanup_module(void)
{
netlink_detach(NETLINK_IP6_FW);
}
#endif
......@@ -157,7 +157,7 @@ ip6t_local_hook(unsigned int hook,
hop_limit = (*pskb)->nh.ipv6h->hop_limit;
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = (u_int32_t) (*pskb)->nh.ipv6h;
flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL);
......
......@@ -56,8 +56,6 @@
#include <linux/sysctl.h>
#endif
#undef CONFIG_RT6_POLICY
/* Set to 3 to get tracing. */
#define RT6_DEBUG 2
......@@ -103,16 +101,22 @@ static struct dst_ops ip6_dst_ops = {
};
struct rt6_info ip6_null_entry = {
{{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-ENETUNREACH, NULL, NULL,
ip6_pkt_discard, ip6_pkt_discard,
#ifdef CONFIG_NET_CLS_ROUTE
0,
#endif
&ip6_dst_ops}},
NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
.u = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
.dev = &loopback_dev,
.obsolete = -1,
.error = -ENETUNREACH,
.input = ip6_pkt_discard,
.output = ip6_pkt_discard,
.ops = &ip6_dst_ops
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_metric = ~(u32) 0,
.rt6i_hoplimit = 255,
.rt6i_ref = ATOMIC_INIT(1),
};
struct fib6_node ip6_routing_table = {
......@@ -121,24 +125,6 @@ struct fib6_node ip6_routing_table = {
0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
};
#ifdef CONFIG_RT6_POLICY
int ip6_rt_policy = 0;
struct pol_chain *rt6_pol_list = NULL;
static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
struct in6_addr *daddr,
struct in6_addr *saddr,
struct fl_acc_args *args);
#else
#define ip6_rt_policy (0)
#endif
/* Protects all the ip6 fib */
rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
......@@ -386,38 +372,6 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
return &ip6_null_entry;
}
#ifdef CONFIG_RT6_POLICY
static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
struct sk_buff *skb)
{
struct in6_addr *daddr, *saddr;
struct fl_acc_args arg;
arg.type = FL_ARG_FORWARD;
arg.fl_u.skb = skb;
saddr = &skb->nh.ipv6h->saddr;
daddr = &skb->nh.ipv6h->daddr;
return rt6_flow_lookup(rt, daddr, saddr, &arg);
}
static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
struct sock *sk,
struct flowi *fl)
{
struct fl_acc_args arg;
arg.type = FL_ARG_ORIGIN;
arg.fl_u.fl_o.sk = sk;
arg.fl_u.fl_o.flow = fl;
return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
&arg);
}
#endif
#define BACKTRACK() \
if (rt == &ip6_null_entry && strict) { \
while ((fn = fn->parent) != NULL) { \
......@@ -450,32 +404,15 @@ void ip6_route_input(struct sk_buff *skb)
rt = fn->leaf;
if ((rt->rt6i_flags & RTF_CACHE)) {
if (ip6_rt_policy == 0) {
rt = rt6_device_match(rt, skb->dev->ifindex, strict);
BACKTRACK();
dst_clone(&rt->u.dst);
goto out;
}
#ifdef CONFIG_RT6_POLICY
if ((rt->rt6i_flags & RTF_FLOW)) {
struct rt6_info *sprt;
for (sprt = rt; sprt; sprt = sprt->u.next) {
if (rt6_flow_match_in(sprt, skb)) {
rt = sprt;
dst_clone(&rt->u.dst);
goto out;
}
}
}
#endif
}
rt = rt6_device_match(rt, skb->dev->ifindex, 0);
BACKTRACK();
if (ip6_rt_policy == 0) {
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
read_unlock_bh(&rt6_lock);
......@@ -490,13 +427,6 @@ void ip6_route_input(struct sk_buff *skb)
goto relookup;
}
dst_clone(&rt->u.dst);
} else {
#ifdef CONFIG_RT6_POLICY
rt = rt6_flow_lookup_in(rt, skb);
#else
/* NEVER REACHED */
#endif
}
out:
read_unlock_bh(&rt6_lock);
......@@ -525,27 +455,11 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
rt = fn->leaf;
if ((rt->rt6i_flags & RTF_CACHE)) {
if (ip6_rt_policy == 0) {
rt = rt6_device_match(rt, fl->oif, strict);
BACKTRACK();
dst_clone(&rt->u.dst);
goto out;
}
#ifdef CONFIG_RT6_POLICY
if ((rt->rt6i_flags & RTF_FLOW)) {
struct rt6_info *sprt;
for (sprt = rt; sprt; sprt = sprt->u.next) {
if (rt6_flow_match_out(sprt, sk)) {
rt = sprt;
dst_clone(&rt->u.dst);
goto out;
}
}
}
#endif
}
if (rt->rt6i_flags & RTF_DEFAULT) {
if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
rt = rt6_best_dflt(rt, fl->oif);
......@@ -554,7 +468,6 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
BACKTRACK();
}
if (ip6_rt_policy == 0) {
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
read_unlock_bh(&rt6_lock);
......@@ -570,13 +483,6 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
goto relookup;
}
dst_clone(&rt->u.dst);
} else {
#ifdef CONFIG_RT6_POLICY
rt = rt6_flow_lookup_out(rt, sk, fl);
#else
/* NEVER REACHED */
#endif
}
out:
read_unlock_bh(&rt6_lock);
......@@ -1304,121 +1210,6 @@ int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
return err;
}
#ifdef CONFIG_RT6_POLICY
static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
{
struct flow_filter *frule;
struct pkt_filter *filter;
int res = 1;
if ((frule = rt->rt6i_filter) == NULL)
goto out;
if (frule->type != FLR_INPUT) {
res = 0;
goto out;
}
for (filter = frule->u.filter; filter; filter = filter->next) {
__u32 *word;
word = (__u32 *) skb->h.raw;
word += filter->offset;
if ((*word ^ filter->value) & filter->mask) {
res = 0;
break;
}
}
out:
return res;
}
static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
{
struct flow_filter *frule;
int res = 1;
if ((frule = rt->rt6i_filter) == NULL)
goto out;
if (frule->type != FLR_INPUT) {
res = 0;
goto out;
}
if (frule->u.sk != sk)
res = 0;
out:
return res;
}
static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
struct in6_addr *daddr,
struct in6_addr *saddr,
struct fl_acc_args *args)
{
struct flow_rule *frule;
struct rt6_info *nrt = NULL;
struct pol_chain *pol;
for (pol = rt6_pol_list; pol; pol = pol->next) {
struct fib6_node *fn;
struct rt6_info *sprt;
fn = fib6_lookup(pol->rules, daddr, saddr);
do {
for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
int res;
frule = sprt->rt6i_flowr;
#if RT6_DEBUG >= 2
if (frule == NULL) {
printk(KERN_DEBUG "NULL flowr\n");
goto error;
}
#endif
res = frule->ops->accept(rt, sprt, args, &nrt);
switch (res) {
case FLOWR_SELECT:
goto found;
case FLOWR_CLEAR:
goto next_policy;
case FLOWR_NODECISION:
break;
default:
goto error;
};
}
fn = fn->parent;
} while ((fn->fn_flags & RTN_TL_ROOT) == 0);
next_policy:
}
error:
dst_clone(&ip6_null_entry.u.dst);
return &ip6_null_entry;
found:
if (nrt == NULL)
goto error;
nrt->rt6i_flags |= RTF_CACHE;
dst_clone(&nrt->u.dst);
err = rt6_ins(nrt);
if (err)
nrt->u.dst.error = err;
return nrt;
}
#endif
static int fib6_ifdown(struct rt6_info *rt, void *arg)
{
if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
......
......@@ -502,10 +502,17 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
dst = addr6->s6_addr32[3];
}
if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
{
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = dst,
.saddr = tiph->saddr,
.tos = RT_TOS(tos) } },
.oif = tunnel->parms.link };
if (ip_route_output_key(&rt, &fl)) {
tunnel->stat.tx_carrier_errors++;
goto tx_error_icmp;
}
}
if (rt->rt_type != RTN_UNICAST) {
tunnel->stat.tx_carrier_errors++;
goto tx_error_icmp;
......@@ -777,8 +784,13 @@ static int ipip6_tunnel_init(struct net_device *dev)
ipip6_tunnel_init_gen(dev);
if (iph->daddr) {
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = iph->daddr,
.saddr = iph->saddr,
.tos = RT_TOS(iph->tos) } },
.oif = tunnel->parms.link };
struct rtable *rt;
if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
if (!ip_route_output_key(&rt, &fl)) {
tdev = rt->u.dst.dev;
ip_rt_put(rt);
}
......
......@@ -154,7 +154,7 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
if (head == NULL)
goto old_method;
iif = ((struct rtable*)dst)->key.iif;
iif = ((struct rtable*)dst)->fl.iif;
h = route4_fastmap_hash(id, iif);
if (id == head->fastmap[h].id &&
......
......@@ -260,16 +260,10 @@ int sctp_v4_get_dst_mtu(const sockaddr_storage_t *address)
{
int dst_mtu = SCTP_DEFAULT_MAXSEGMENT;
struct rtable *rt;
struct rt_key key = {
.dst = address->v4.sin_addr.s_addr,
.src = 0,
.iif = 0,
.oif = 0,
.tos = 0,
.scope = 0
};
if (ip_route_output_key(&rt, &key)) {
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = address->v4.sin_addr.s_addr } } };
if (ip_route_output_key(&rt, &fl)) {
SCTP_DEBUG_PRINTK("sctp_v4_get_dst_mtu:ip_route_output_key"
" failed, returning %d as dst_mtu\n",
dst_mtu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment