Commit 41384136 authored by David S. Miller's avatar David S. Miller

Merge branch 'connection-tracking-support-for-bridge'

Pablo Neira Ayuso says:

====================
connection tracking support for bridge

This patchset adds native connection tracking support for the bridge.

Patch #1 and #2 extract code from IPv4/IPv6 fragmentation core and
introduce the fraglist splitter. That splits a skbuff fraglist into
independent fragments.

Patch #3 and #4 also extract code from IPv4/IPv6 fragmentation core
and introduce the skbuff into fragments transformer. This can be used
by linearized skbuffs (eg. coming from nfqueue and ct helpers) as well
as cloned skbuffs (that are either seen either with taps or with bridge
port flooding).

Patch #5 moves the specific IPCB() code from these new fragment
splitter/transformer APIs into the IPv4 stack. The bridge has a
different control buffer layout and it starts using this new APIs in
this patchset.

Patch #6 adds basic infrastructure that allows to register bridge
conntrack support.

Patch #7 adds bridge conntrack support (only for IPv4 in this patch).

Patch #8 adds IPv6 support for the bridge conntrack support.

Patch #9 registers the IPv4/IPv6 conntrack hooks in case the bridge
conntrack is used to deal with local traffic, ie. prerouting -> input
bridge hook path. This cover the bridge interface has a IP address
scenario.

Before this patchset, only chance for people to do stateful filtering is
to use the `br_netfilter` emulation layer, that turns bridge frame into
IPv4/IPv6 packets and inject them into the IPv4/IPv6 hooks. Apparently,
this module allows users to use iptables and all of its feature-set from
the bridge, including stateful filtering. However, this approach is
flawed in many aspects that have been discussed many times. This is a
step forward to deprecate `br_netfilter'.

v2: Fix English typo in commit message.
v3: Fix another English typo in commit message.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d48ecb40 af9573be
...@@ -19,6 +19,7 @@ struct ip6_rt_info { ...@@ -19,6 +19,7 @@ struct ip6_rt_info {
}; };
struct nf_queue_entry; struct nf_queue_entry;
struct nf_ct_bridge_frag_data;
/* /*
* Hook functions for ipv6 to allow xt_* modules to be built-in even * Hook functions for ipv6 to allow xt_* modules to be built-in even
...@@ -39,6 +40,15 @@ struct nf_ipv6_ops { ...@@ -39,6 +40,15 @@ struct nf_ipv6_ops {
int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *)); int (*output)(struct net *, struct sock *, struct sk_buff *));
int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
#if IS_MODULE(CONFIG_IPV6)
int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user);
int (*br_fragment)(struct net *net, struct sock *sk,
struct sk_buff *skb,
struct nf_ct_bridge_frag_data *data,
int (*output)(struct net *, struct sock *sk,
const struct nf_ct_bridge_frag_data *data,
struct sk_buff *));
#endif
}; };
#ifdef CONFIG_NETFILTER #ifdef CONFIG_NETFILTER
...@@ -86,6 +96,46 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, ...@@ -86,6 +96,46 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst,
#endif #endif
} }
static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb,
u32 user)
{
#if IS_MODULE(CONFIG_IPV6)
const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
if (!v6_ops)
return 1;
return v6_ops->br_defrag(net, skb, user);
#else
return nf_ct_frag6_gather(net, skb, user);
#endif
}
int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct nf_ct_bridge_frag_data *data,
int (*output)(struct net *, struct sock *sk,
const struct nf_ct_bridge_frag_data *data,
struct sk_buff *));
static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
struct sk_buff *skb,
struct nf_ct_bridge_frag_data *data,
int (*output)(struct net *, struct sock *sk,
const struct nf_ct_bridge_frag_data *data,
struct sk_buff *))
{
#if IS_MODULE(CONFIG_IPV6)
const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
if (!v6_ops)
return 1;
return v6_ops->br_fragment(net, sk, skb, data, output);
#else
return br_ip6_fragment(net, sk, skb, data, output);
#endif
}
int ip6_route_me_harder(struct net *net, struct sk_buff *skb); int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
......
...@@ -165,6 +165,45 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); ...@@ -165,6 +165,45 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *)); int (*output)(struct net *, struct sock *, struct sk_buff *));
struct ip_fraglist_iter {
struct sk_buff *frag_list;
struct sk_buff *frag;
struct iphdr *iph;
int offset;
unsigned int hlen;
};
void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
unsigned int hlen, struct ip_fraglist_iter *iter);
void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter);
static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter)
{
struct sk_buff *skb = iter->frag;
iter->frag = skb->next;
skb_mark_not_on_list(skb);
return skb;
}
struct ip_frag_state {
struct iphdr *iph;
unsigned int hlen;
unsigned int ll_rs;
unsigned int mtu;
unsigned int left;
int offset;
int ptr;
__be16 not_last_frag;
};
void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs,
unsigned int mtu, struct ip_frag_state *state);
struct sk_buff *ip_frag_next(struct sk_buff *skb,
struct ip_frag_state *state);
void ip_send_check(struct iphdr *ip); void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
......
...@@ -154,6 +154,50 @@ struct frag_hdr { ...@@ -154,6 +154,50 @@ struct frag_hdr {
#define IP6_MF 0x0001 #define IP6_MF 0x0001
#define IP6_OFFSET 0xFFF8 #define IP6_OFFSET 0xFFF8
struct ip6_fraglist_iter {
struct ipv6hdr *tmp_hdr;
struct sk_buff *frag_list;
struct sk_buff *frag;
int offset;
unsigned int hlen;
__be32 frag_id;
u8 nexthdr;
};
int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
u8 nexthdr, __be32 frag_id,
struct ip6_fraglist_iter *iter);
void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter);
static inline struct sk_buff *ip6_fraglist_next(struct ip6_fraglist_iter *iter)
{
struct sk_buff *skb = iter->frag;
iter->frag = skb->next;
skb_mark_not_on_list(skb);
return skb;
}
struct ip6_frag_state {
u8 *prevhdr;
unsigned int hlen;
unsigned int mtu;
unsigned int left;
int offset;
int ptr;
int hroom;
int troom;
__be32 frag_id;
u8 nexthdr;
};
void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state);
struct sk_buff *ip6_frag_next(struct sk_buff *skb,
struct ip6_frag_state *state);
#define IP6_REPLY_MARK(net, mark) \ #define IP6_REPLY_MARK(net, mark) \
((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
......
...@@ -49,6 +49,7 @@ union nf_conntrack_expect_proto { ...@@ -49,6 +49,7 @@ union nf_conntrack_expect_proto {
struct nf_conntrack_net { struct nf_conntrack_net {
unsigned int users4; unsigned int users4;
unsigned int users6; unsigned int users6;
unsigned int users_bridge;
}; };
#include <linux/types.h> #include <linux/types.h>
......
#ifndef NF_CONNTRACK_BRIDGE_
#define NF_CONNTRACK_BRIDGE_
struct nf_ct_bridge_info {
struct nf_hook_ops *ops;
unsigned int ops_size;
struct module *me;
};
void nf_ct_bridge_register(struct nf_ct_bridge_info *info);
void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info);
struct nf_ct_bridge_frag_data {
char mac[ETH_HLEN];
bool vlan_present;
u16 vlan_tci;
__be16 vlan_proto;
};
#endif
...@@ -64,6 +64,9 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) ...@@ -64,6 +64,9 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
return ret; return ret;
} }
unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo);
void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *proto); const struct nf_conntrack_l4proto *proto);
......
...@@ -56,6 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -56,6 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br_switchdev_frame_unmark(skb); br_switchdev_frame_unmark(skb);
BR_INPUT_SKB_CB(skb)->brdev = dev; BR_INPUT_SKB_CB(skb)->brdev = dev;
BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
eth = eth_hdr(skb); eth = eth_hdr(skb);
......
...@@ -425,6 +425,7 @@ struct net_bridge { ...@@ -425,6 +425,7 @@ struct net_bridge {
struct br_input_skb_cb { struct br_input_skb_cb {
struct net_device *brdev; struct net_device *brdev;
u16 frag_max_size;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
u8 igmp; u8 igmp;
u8 mrouters_only:1; u8 mrouters_only:1;
......
...@@ -19,6 +19,20 @@ config NF_LOG_BRIDGE ...@@ -19,6 +19,20 @@ config NF_LOG_BRIDGE
tristate "Bridge packet logging" tristate "Bridge packet logging"
select NF_LOG_COMMON select NF_LOG_COMMON
config NF_CONNTRACK_BRIDGE
tristate "IPv4/IPV6 bridge connection tracking support"
depends on NF_CONNTRACK
default n
help
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
into connections. This is used to enhance packet filtering via
stateful policies. Enable this if you want native tracking from
the bridge. This provides a replacement for the `br_netfilter'
infrastructure.
To compile it as a module, choose M here. If unsure, say N.
endif # NF_TABLES_BRIDGE endif # NF_TABLES_BRIDGE
menuconfig BRIDGE_NF_EBTABLES menuconfig BRIDGE_NF_EBTABLES
......
...@@ -5,6 +5,9 @@ ...@@ -5,6 +5,9 @@
obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
# packet logging # packet logging
obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
......
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/route.h>
#include <net/ip.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_bridge.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_tables.h>
#include "../br_private.h"
/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
* has been linearized or cloned.
*/
static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct sk_buff *skb,
struct nf_ct_bridge_frag_data *data,
int (*output)(struct net *, struct sock *sk,
const struct nf_ct_bridge_frag_data *data,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
unsigned int hlen, ll_rs, mtu;
struct ip_frag_state state;
struct iphdr *iph;
int err;
/* for offloaded checksums cleanup checksum before fragmentation */
if (skb->ip_summed == CHECKSUM_PARTIAL &&
(err = skb_checksum_help(skb)))
goto blackhole;
iph = ip_hdr(skb);
/*
* Setup starting values
*/
hlen = iph->ihl * 4;
frag_max_size -= hlen;
ll_rs = LL_RESERVED_SPACE(skb->dev);
mtu = skb->dev->mtu;
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
struct ip_fraglist_iter iter;
struct sk_buff *frag;
if (first_len - hlen > mtu ||
skb_headroom(skb) < ll_rs)
goto blackhole;
if (skb_cloned(skb))
goto slow_path;
skb_walk_frags(skb, frag) {
if (frag->len > mtu ||
skb_headroom(frag) < hlen + ll_rs)
goto blackhole;
if (skb_shared(frag))
goto slow_path;
}
ip_fraglist_init(skb, iph, hlen, &iter);
for (;;) {
if (iter.frag)
ip_fraglist_prepare(skb, &iter);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
skb = ip_fraglist_next(&iter);
}
return err;
}
slow_path:
/* This is a linearized skbuff, the original geometry is lost for us.
* This may also be a clone skbuff, we could preserve the geometry for
* the copies but probably not worth the effort.
*/
ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
while (state.left > 0) {
struct sk_buff *skb2;
skb2 = ip_frag_next(skb, &state);
if (IS_ERR(skb2)) {
err = PTR_ERR(skb2);
goto blackhole;
}
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
}
consume_skb(skb);
return err;
blackhole:
kfree_skb(skb);
return 0;
}
/* ip_defrag() expects IPCB() in place. */
static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
size_t inet_skb_parm_size)
{
memcpy(cb, skb->cb, sizeof(*cb));
memset(skb->cb, 0, inet_skb_parm_size);
}
static void br_skb_cb_restore(struct sk_buff *skb,
const struct br_input_skb_cb *cb,
u16 fragsz)
{
memcpy(skb->cb, cb, sizeof(*cb));
BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
}
static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
const struct nf_hook_state *state)
{
u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
enum ip_conntrack_info ctinfo;
struct br_input_skb_cb cb;
const struct nf_conn *ct;
int err;
if (!ip_is_fragment(ip_hdr(skb)))
return NF_ACCEPT;
ct = nf_ct_get(skb, &ctinfo);
if (ct)
zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
local_bh_disable();
err = ip_defrag(state->net, skb,
IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
local_bh_enable();
if (!err) {
br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
skb->ignore_df = 1;
return NF_ACCEPT;
}
return NF_STOLEN;
}
static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
const struct nf_hook_state *state)
{
u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
enum ip_conntrack_info ctinfo;
struct br_input_skb_cb cb;
const struct nf_conn *ct;
int err;
ct = nf_ct_get(skb, &ctinfo);
if (ct)
zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
err = nf_ipv6_br_defrag(state->net, skb,
IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
/* queued */
if (err == -EINPROGRESS)
return NF_STOLEN;
br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
return err == 0 ? NF_ACCEPT : NF_DROP;
}
static int nf_ct_br_ip_check(const struct sk_buff *skb)
{
const struct iphdr *iph;
int nhoff, len;
nhoff = skb_network_offset(skb);
iph = ip_hdr(skb);
if (iph->ihl < 5 ||
iph->version != 4)
return -1;
len = ntohs(iph->tot_len);
if (skb->len < nhoff + len ||
len < (iph->ihl * 4))
return -1;
return 0;
}
static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
int nhoff, len;
nhoff = skb_network_offset(skb);
hdr = ipv6_hdr(skb);
if (hdr->version != 6)
return -1;
len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
if (skb->len < len)
return -1;
return 0;
}
static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_hook_state bridge_state = *state;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
u32 len;
int ret;
ct = nf_ct_get(skb, &ctinfo);
if ((ct && !nf_ct_is_template(ct)) ||
ctinfo == IP_CT_UNTRACKED)
return NF_ACCEPT;
switch (skb->protocol) {
case htons(ETH_P_IP):
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
return NF_ACCEPT;
len = ntohs(ip_hdr(skb)->tot_len);
if (pskb_trim_rcsum(skb, len))
return NF_ACCEPT;
if (nf_ct_br_ip_check(skb))
return NF_ACCEPT;
bridge_state.pf = NFPROTO_IPV4;
ret = nf_ct_br_defrag4(skb, &bridge_state);
break;
case htons(ETH_P_IPV6):
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
return NF_ACCEPT;
len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
if (pskb_trim_rcsum(skb, len))
return NF_ACCEPT;
if (nf_ct_br_ipv6_check(skb))
return NF_ACCEPT;
bridge_state.pf = NFPROTO_IPV6;
ret = nf_ct_br_defrag6(skb, &bridge_state);
break;
default:
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
return NF_ACCEPT;
}
if (ret != NF_ACCEPT)
return ret;
return nf_conntrack_in(skb, &bridge_state);
}
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
struct nf_ct_bridge_frag_data *data)
{
if (skb_vlan_tag_present(skb)) {
data->vlan_present = true;
data->vlan_tci = skb->vlan_tci;
data->vlan_proto = skb->vlan_proto;
} else {
data->vlan_present = false;
}
skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
}
static unsigned int
nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
int (*output)(struct net *, struct sock *sk,
const struct nf_ct_bridge_frag_data *data,
struct sk_buff *))
{
struct nf_ct_bridge_frag_data data;
if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
return NF_ACCEPT;
nf_ct_bridge_frag_save(skb, &data);
switch (skb->protocol) {
case htons(ETH_P_IP):
nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
break;
case htons(ETH_P_IPV6):
nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
break;
default:
WARN_ON_ONCE(1);
return NF_DROP;
}
return NF_STOLEN;
}
/* Actually only slow path refragmentation needs this. */
static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
const struct nf_ct_bridge_frag_data *data)
{
int err;
err = skb_cow_head(skb, ETH_HLEN);
if (err) {
kfree_skb(skb);
return -ENOMEM;
}
if (data->vlan_present)
__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
skb_reset_mac_header(skb);
return 0;
}
static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
const struct nf_ct_bridge_frag_data *data,
struct sk_buff *skb)
{
int err;
err = nf_ct_bridge_frag_restore(skb, data);
if (err < 0)
return err;
return br_dev_queue_push_xmit(net, sk, skb);
}
static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
int protoff;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
return nf_conntrack_confirm(skb);
switch (skb->protocol) {
case htons(ETH_P_IP):
protoff = skb_network_offset(skb) + ip_hdrlen(skb);
break;
case htons(ETH_P_IPV6): {
unsigned char pnum = ipv6_hdr(skb)->nexthdr;
__be16 frag_off;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
return nf_conntrack_confirm(skb);
}
break;
default:
return NF_ACCEPT;
}
return nf_confirm(skb, protoff, ct, ctinfo);
}
static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
int ret;
ret = nf_ct_bridge_confirm(skb);
if (ret != NF_ACCEPT)
return ret;
return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
}
static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
{
.hook = nf_ct_bridge_pre,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = nf_ct_bridge_post,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
};
static struct nf_ct_bridge_info bridge_info = {
.ops = nf_ct_bridge_hook_ops,
.ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops),
.me = THIS_MODULE,
};
static int __init nf_conntrack_l3proto_bridge_init(void)
{
nf_ct_bridge_register(&bridge_info);
return 0;
}
static void __exit nf_conntrack_l3proto_bridge_fini(void)
{
nf_ct_bridge_unregister(&bridge_info);
}
module_init(nf_conntrack_l3proto_bridge_init);
module_exit(nf_conntrack_l3proto_bridge_fini);
MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
MODULE_LICENSE("GPL");
...@@ -525,9 +525,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) ...@@ -525,9 +525,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_hash(to, from); skb_copy_hash(to, from);
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
#ifdef CONFIG_NET_SCHED #ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index; to->tc_index = from->tc_index;
#endif #endif
...@@ -561,6 +558,176 @@ static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -561,6 +558,176 @@ static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return ip_do_fragment(net, sk, skb, output); return ip_do_fragment(net, sk, skb, output);
} }
void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
unsigned int hlen, struct ip_fraglist_iter *iter)
{
unsigned int first_len = skb_pagelen(skb);
iter->frag_list = skb_shinfo(skb)->frag_list;
iter->frag = iter->frag_list;
skb_frag_list_init(skb);
iter->offset = 0;
iter->iph = iph;
iter->hlen = hlen;
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
iph->tot_len = htons(first_len);
iph->frag_off = htons(IP_MF);
ip_send_check(iph);
}
EXPORT_SYMBOL(ip_fraglist_init);
static void ip_fraglist_ipcb_prepare(struct sk_buff *skb,
struct ip_fraglist_iter *iter)
{
struct sk_buff *to = iter->frag;
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(skb)->flags;
if (iter->offset == 0)
ip_options_fragment(to);
}
void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
{
unsigned int hlen = iter->hlen;
struct iphdr *iph = iter->iph;
struct sk_buff *frag;
frag = iter->frag;
frag->ip_summed = CHECKSUM_NONE;
skb_reset_transport_header(frag);
__skb_push(frag, hlen);
skb_reset_network_header(frag);
memcpy(skb_network_header(frag), iph, hlen);
iter->iph = ip_hdr(frag);
iph = iter->iph;
iph->tot_len = htons(frag->len);
ip_copy_metadata(frag, skb);
iter->offset += skb->len - hlen;
iph->frag_off = htons(iter->offset >> 3);
if (frag->next)
iph->frag_off |= htons(IP_MF);
/* Ready, complete checksum */
ip_send_check(iph);
}
EXPORT_SYMBOL(ip_fraglist_prepare);
void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
unsigned int ll_rs, unsigned int mtu,
struct ip_frag_state *state)
{
struct iphdr *iph = ip_hdr(skb);
state->hlen = hlen;
state->ll_rs = ll_rs;
state->mtu = mtu;
state->left = skb->len - hlen; /* Space per frame */
state->ptr = hlen; /* Where to start from */
state->offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
state->not_last_frag = iph->frag_off & htons(IP_MF);
}
EXPORT_SYMBOL(ip_frag_init);
static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
bool first_frag, struct ip_frag_state *state)
{
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
if (IPCB(from)->flags & IPSKB_FRAG_PMTU)
state->iph->frag_off |= htons(IP_DF);
/* ANK: dirty, but effective trick. Upgrade options only if
* the segment to be fragmented was THE FIRST (otherwise,
* options are already fixed) and make it ONCE
* on the initial skb, so that all the following fragments
* will inherit fixed options.
*/
if (first_frag)
ip_options_fragment(from);
}
struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state)
{
unsigned int len = state->left;
struct sk_buff *skb2;
struct iphdr *iph;
len = state->left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > state->mtu)
len = state->mtu;
/* IF: we are not sending up to and including the packet end
then align the next start on an eight byte boundary */
if (len < state->left) {
len &= ~7;
}
/* Allocate buffer */
skb2 = alloc_skb(len + state->hlen + state->ll_rs, GFP_ATOMIC);
if (!skb2)
return ERR_PTR(-ENOMEM);
/*
* Set up data on packet
*/
ip_copy_metadata(skb2, skb);
skb_reserve(skb2, state->ll_rs);
skb_put(skb2, len + state->hlen);
skb_reset_network_header(skb2);
skb2->transport_header = skb2->network_header + state->hlen;
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
/*
* Copy the packet header into the new buffer.
*/
skb_copy_from_linear_data(skb, skb_network_header(skb2), state->hlen);
/*
* Copy a block of the IP datagram.
*/
if (skb_copy_bits(skb, state->ptr, skb_transport_header(skb2), len))
BUG();
state->left -= len;
/*
* Fill in the new header fields.
*/
iph = ip_hdr(skb2);
iph->frag_off = htons((state->offset >> 3));
/*
* Added AC : If we are fragmenting a fragment that's not the
* last fragment then keep MF on each bit
*/
if (state->left > 0 || state->not_last_frag)
iph->frag_off |= htons(IP_MF);
state->ptr += len;
state->offset += len;
iph->tot_len = htons(len + state->hlen);
ip_send_check(iph);
return skb2;
}
EXPORT_SYMBOL(ip_frag_next);
/* /*
* This IP datagram is too large to be sent in one piece. Break it up into * This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus * smaller pieces (each of size equal to IP header plus
...@@ -572,12 +739,11 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -572,12 +739,11 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *)) int (*output)(struct net *, struct sock *, struct sk_buff *))
{ {
struct iphdr *iph; struct iphdr *iph;
int ptr;
struct sk_buff *skb2; struct sk_buff *skb2;
unsigned int mtu, hlen, left, len, ll_rs;
int offset;
__be16 not_last_frag;
struct rtable *rt = skb_rtable(skb); struct rtable *rt = skb_rtable(skb);
unsigned int mtu, hlen, ll_rs;
struct ip_fraglist_iter iter;
struct ip_frag_state state;
int err = 0; int err = 0;
/* for offloaded checksums cleanup checksum before fragmentation */ /* for offloaded checksums cleanup checksum before fragmentation */
...@@ -642,49 +808,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -642,49 +808,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
} }
/* Everything is OK. Generate! */ /* Everything is OK. Generate! */
ip_fraglist_init(skb, iph, hlen, &iter);
err = 0;
offset = 0;
frag = skb_shinfo(skb)->frag_list;
skb_frag_list_init(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
iph->tot_len = htons(first_len);
iph->frag_off = htons(IP_MF);
ip_send_check(iph);
for (;;) { for (;;) {
/* Prepare header of the next frame, /* Prepare header of the next frame,
* before previous one went down. */ * before previous one went down. */
if (frag) { if (iter.frag) {
frag->ip_summed = CHECKSUM_NONE; ip_fraglist_ipcb_prepare(skb, &iter);
skb_reset_transport_header(frag); ip_fraglist_prepare(skb, &iter);
__skb_push(frag, hlen);
skb_reset_network_header(frag);
memcpy(skb_network_header(frag), iph, hlen);
iph = ip_hdr(frag);
iph->tot_len = htons(frag->len);
ip_copy_metadata(frag, skb);
if (offset == 0)
ip_options_fragment(frag);
offset += skb->len - hlen;
iph->frag_off = htons(offset>>3);
if (frag->next)
iph->frag_off |= htons(IP_MF);
/* Ready, complete checksum */
ip_send_check(iph);
} }
err = output(net, sk, skb); err = output(net, sk, skb);
if (!err) if (!err)
IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES); IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
if (err || !frag) if (err || !iter.frag)
break; break;
skb = frag; skb = ip_fraglist_next(&iter);
frag = skb->next;
skb_mark_not_on_list(skb);
} }
if (err == 0) { if (err == 0) {
...@@ -692,7 +833,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -692,7 +833,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return 0; return 0;
} }
kfree_skb_list(frag); kfree_skb_list(iter.frag_list);
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
return err; return err;
...@@ -708,105 +849,29 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -708,105 +849,29 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
} }
slow_path: slow_path:
iph = ip_hdr(skb);
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
/* /*
* Fragment the datagram. * Fragment the datagram.
*/ */
offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3; ip_frag_init(skb, hlen, ll_rs, mtu, &state);
not_last_frag = iph->frag_off & htons(IP_MF);
/* /*
* Keep copying data until we run out. * Keep copying data until we run out.
*/ */
while (left > 0) { while (state.left > 0) {
len = left; bool first_frag = (state.offset == 0);
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
len = mtu;
/* IF: we are not sending up to and including the packet end
then align the next start on an eight byte boundary */
if (len < left) {
len &= ~7;
}
/* Allocate buffer */ skb2 = ip_frag_next(skb, &state);
skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC); if (IS_ERR(skb2)) {
if (!skb2) { err = PTR_ERR(skb2);
err = -ENOMEM;
goto fail; goto fail;
} }
ip_frag_ipcb(skb, skb2, first_frag, &state);
/*
* Set up data on packet
*/
ip_copy_metadata(skb2, skb);
skb_reserve(skb2, ll_rs);
skb_put(skb2, len + hlen);
skb_reset_network_header(skb2);
skb2->transport_header = skb2->network_header + hlen;
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
/*
* Copy the packet header into the new buffer.
*/
skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
/*
* Copy a block of the IP datagram.
*/
if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
BUG();
left -= len;
/*
* Fill in the new header fields.
*/
iph = ip_hdr(skb2);
iph->frag_off = htons((offset >> 3));
if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
iph->frag_off |= htons(IP_DF);
/* ANK: dirty, but effective trick. Upgrade options only if
* the segment to be fragmented was THE FIRST (otherwise,
* options are already fixed) and make it ONCE
* on the initial skb, so that all the following fragments
* will inherit fixed options.
*/
if (offset == 0)
ip_options_fragment(skb);
/*
* Added AC : If we are fragmenting a fragment that's not the
* last fragment then keep MF on each bit
*/
if (left > 0 || not_last_frag)
iph->frag_off |= htons(IP_MF);
ptr += len;
offset += len;
/* /*
* Put this fragment into the sending queue. * Put this fragment into the sending queue.
*/ */
iph->tot_len = htons(len + hlen);
ip_send_check(iph);
err = output(net, sk, skb2); err = output(net, sk, skb2);
if (err) if (err)
goto fail; goto fail;
......
...@@ -592,6 +592,170 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) ...@@ -592,6 +592,170 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from); skb_copy_secmark(to, from);
} }
int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
u8 nexthdr, __be32 frag_id,
struct ip6_fraglist_iter *iter)
{
unsigned int first_len;
struct frag_hdr *fh;
/* BUILD HEADER */
*prevhdr = NEXTHDR_FRAGMENT;
iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
if (!iter->tmp_hdr)
return -ENOMEM;
iter->frag_list = skb_shinfo(skb)->frag_list;
iter->frag = iter->frag_list;
skb_frag_list_init(skb);
iter->offset = 0;
iter->hlen = hlen;
iter->frag_id = frag_id;
iter->nexthdr = nexthdr;
__skb_pull(skb, hlen);
fh = __skb_push(skb, sizeof(struct frag_hdr));
__skb_push(skb, hlen);
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(IP6_MF);
fh->identification = frag_id;
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
return 0;
}
EXPORT_SYMBOL(ip6_fraglist_init);
void ip6_fraglist_prepare(struct sk_buff *skb,
struct ip6_fraglist_iter *iter)
{
struct sk_buff *frag = iter->frag;
unsigned int hlen = iter->hlen;
struct frag_hdr *fh;
frag->ip_summed = CHECKSUM_NONE;
skb_reset_transport_header(frag);
fh = __skb_push(frag, sizeof(struct frag_hdr));
__skb_push(frag, hlen);
skb_reset_network_header(frag);
memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
fh->nexthdr = iter->nexthdr;
fh->reserved = 0;
fh->frag_off = htons(iter->offset);
if (frag->next)
fh->frag_off |= htons(IP6_MF);
fh->identification = iter->frag_id;
ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
ip6_copy_metadata(frag, skb);
}
EXPORT_SYMBOL(ip6_fraglist_prepare);
void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
{
state->prevhdr = prevhdr;
state->nexthdr = nexthdr;
state->frag_id = frag_id;
state->hlen = hlen;
state->mtu = mtu;
state->left = skb->len - hlen; /* Space per frame */
state->ptr = hlen; /* Where to start from */
state->hroom = hdr_room;
state->troom = needed_tailroom;
state->offset = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
{
u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
struct sk_buff *frag;
struct frag_hdr *fh;
unsigned int len;
len = state->left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > state->mtu)
len = state->mtu;
/* IF: we are not sending up to and including the packet end
then align the next start on an eight byte boundary */
if (len < state->left)
len &= ~7;
/* Allocate buffer */
frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
state->hroom + state->troom, GFP_ATOMIC);
if (!frag)
return ERR_PTR(-ENOMEM);
/*
* Set up data on packet
*/
ip6_copy_metadata(frag, skb);
skb_reserve(frag, state->hroom);
skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
skb_reset_network_header(frag);
fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
frag->transport_header = (frag->network_header + state->hlen +
sizeof(struct frag_hdr));
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if (skb->sk)
skb_set_owner_w(frag, skb->sk);
/*
* Copy the packet header into the new buffer.
*/
skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
fragnexthdr_offset = skb_network_header(frag);
fragnexthdr_offset += prevhdr - skb_network_header(skb);
*fragnexthdr_offset = NEXTHDR_FRAGMENT;
/*
* Build fragment header.
*/
fh->nexthdr = state->nexthdr;
fh->reserved = 0;
fh->identification = state->frag_id;
/*
* Copy a block of the IP datagram.
*/
BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
len));
state->left -= len;
fh->frag_off = htons(state->offset);
if (state->left > 0)
fh->frag_off |= htons(IP6_MF);
ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
state->ptr += len;
state->offset += len;
return frag;
}
EXPORT_SYMBOL(ip6_frag_next);
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *)) int (*output)(struct net *, struct sock *, struct sk_buff *))
{ {
...@@ -599,12 +763,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -599,12 +763,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL; inet6_sk(skb->sk) : NULL;
struct ipv6hdr *tmp_hdr; struct ip6_frag_state state;
struct frag_hdr *fh; unsigned int mtu, hlen, nexthdr_offset;
unsigned int mtu, hlen, left, len, nexthdr_offset; int hroom, err = 0;
int hroom, troom;
__be32 frag_id; __be32 frag_id;
int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0; u8 *prevhdr, nexthdr = 0;
err = ip6_find_1stfragopt(skb, &prevhdr); err = ip6_find_1stfragopt(skb, &prevhdr);
...@@ -651,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -651,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
hroom = LL_RESERVED_SPACE(rt->dst.dev); hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) { if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb); unsigned int first_len = skb_pagelen(skb);
struct ip6_fraglist_iter iter;
struct sk_buff *frag2; struct sk_buff *frag2;
if (first_len - hlen > mtu || if (first_len - hlen > mtu ||
...@@ -678,74 +841,29 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -678,74 +841,29 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
skb->truesize -= frag->truesize; skb->truesize -= frag->truesize;
} }
err = 0; err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
offset = 0; &iter);
/* BUILD HEADER */ if (err < 0)
*prevhdr = NEXTHDR_FRAGMENT;
tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
if (!tmp_hdr) {
err = -ENOMEM;
goto fail; goto fail;
}
frag = skb_shinfo(skb)->frag_list;
skb_frag_list_init(skb);
__skb_pull(skb, hlen);
fh = __skb_push(skb, sizeof(struct frag_hdr));
__skb_push(skb, hlen);
skb_reset_network_header(skb);
memcpy(skb_network_header(skb), tmp_hdr, hlen);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(IP6_MF);
fh->identification = frag_id;
first_len = skb_pagelen(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->len = first_len;
ipv6_hdr(skb)->payload_len = htons(first_len -
sizeof(struct ipv6hdr));
for (;;) { for (;;) {
/* Prepare header of the next frame, /* Prepare header of the next frame,
* before previous one went down. */ * before previous one went down. */
if (frag) { if (iter.frag)
frag->ip_summed = CHECKSUM_NONE; ip6_fraglist_prepare(skb, &iter);
skb_reset_transport_header(frag);
fh = __skb_push(frag, sizeof(struct frag_hdr));
__skb_push(frag, hlen);
skb_reset_network_header(frag);
memcpy(skb_network_header(frag), tmp_hdr,
hlen);
offset += skb->len - hlen - sizeof(struct frag_hdr);
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->frag_off = htons(offset);
if (frag->next)
fh->frag_off |= htons(IP6_MF);
fh->identification = frag_id;
ipv6_hdr(frag)->payload_len =
htons(frag->len -
sizeof(struct ipv6hdr));
ip6_copy_metadata(frag, skb);
}
err = output(net, sk, skb); err = output(net, sk, skb);
if (!err) if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES); IPSTATS_MIB_FRAGCREATES);
if (err || !frag) if (err || !iter.frag)
break; break;
skb = frag; skb = ip6_fraglist_next(&iter);
frag = skb->next;
skb_mark_not_on_list(skb);
} }
kfree(tmp_hdr); kfree(iter.tmp_hdr);
if (err == 0) { if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
...@@ -753,7 +871,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -753,7 +871,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return 0; return 0;
} }
kfree_skb_list(frag); kfree_skb_list(iter.frag_list);
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS); IPSTATS_MIB_FRAGFAILS);
...@@ -770,90 +888,25 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, ...@@ -770,90 +888,25 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
} }
slow_path: slow_path:
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
/* /*
* Fragment the datagram. * Fragment the datagram.
*/ */
troom = rt->dst.dev->needed_tailroom; ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
&state);
/* /*
* Keep copying data until we run out. * Keep copying data until we run out.
*/ */
while (left > 0) {
u8 *fragnexthdr_offset;
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
len = mtu;
/* IF: we are not sending up to and including the packet end
then align the next start on an eight byte boundary */
if (len < left) {
len &= ~7;
}
/* Allocate buffer */ while (state.left > 0) {
frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + frag = ip6_frag_next(skb, &state);
hroom + troom, GFP_ATOMIC); if (IS_ERR(frag)) {
if (!frag) { err = PTR_ERR(frag);
err = -ENOMEM;
goto fail; goto fail;
} }
/*
* Set up data on packet
*/
ip6_copy_metadata(frag, skb);
skb_reserve(frag, hroom);
skb_put(frag, len + hlen + sizeof(struct frag_hdr));
skb_reset_network_header(frag);
fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
frag->transport_header = (frag->network_header + hlen +
sizeof(struct frag_hdr));
/*
* Charge the memory for the fragment to any owner
* it might possess
*/
if (skb->sk)
skb_set_owner_w(frag, skb->sk);
/*
* Copy the packet header into the new buffer.
*/
skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
fragnexthdr_offset = skb_network_header(frag);
fragnexthdr_offset += prevhdr - skb_network_header(skb);
*fragnexthdr_offset = NEXTHDR_FRAGMENT;
/*
* Build fragment header.
*/
fh->nexthdr = nexthdr;
fh->reserved = 0;
fh->identification = frag_id;
/*
* Copy a block of the IP datagram.
*/
BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
len));
left -= len;
fh->frag_off = htons(offset);
if (left > 0)
fh->frag_off |= htons(IP6_MF);
ipv6_hdr(frag)->payload_len = htons(frag->len -
sizeof(struct ipv6hdr));
ptr += len;
offset += len;
/* /*
* Put this fragment into the sending queue. * Put this fragment into the sending queue.
*/ */
......
...@@ -16,6 +16,9 @@ ...@@ -16,6 +16,9 @@
#include <net/ip6_route.h> #include <net/ip6_route.h>
#include <net/xfrm.h> #include <net/xfrm.h>
#include <net/netfilter/nf_queue.h> #include <net/netfilter/nf_queue.h>
#include <net/netfilter/nf_conntrack_bridge.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include "../bridge/br_private.h"
int ip6_route_me_harder(struct net *net, struct sk_buff *skb) int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
{ {
...@@ -109,6 +112,122 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst, ...@@ -109,6 +112,122 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst,
} }
EXPORT_SYMBOL_GPL(__nf_ip6_route); EXPORT_SYMBOL_GPL(__nf_ip6_route);
int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct nf_ct_bridge_frag_data *data,
int (*output)(struct net *, struct sock *sk,
const struct nf_ct_bridge_frag_data *data,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
unsigned int mtu, hlen;
int hroom, err = 0;
__be32 frag_id;
err = ip6_find_1stfragopt(skb, &prevhdr);
if (err < 0)
goto blackhole;
hlen = err;
nexthdr = *prevhdr;
mtu = skb->dev->mtu;
if (frag_max_size > mtu ||
frag_max_size < IPV6_MIN_MTU)
goto blackhole;
mtu = frag_max_size;
if (mtu < hlen + sizeof(struct frag_hdr) + 8)
goto blackhole;
mtu -= hlen + sizeof(struct frag_hdr);
frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
(err = skb_checksum_help(skb)))
goto blackhole;
hroom = LL_RESERVED_SPACE(skb->dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
goto blackhole;
if (skb_cloned(skb))
goto slow_path;
skb_walk_frags(skb, frag2) {
if (frag2->len > mtu ||
skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
goto blackhole;
/* Partially cloned skb? */
if (skb_shared(frag2))
goto slow_path;
}
err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
&iter);
if (err < 0)
goto blackhole;
for (;;) {
/* Prepare header of the next frame,
* before previous one went down.
*/
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
skb = ip6_fraglist_next(&iter);
}
kfree(iter.tmp_hdr);
if (!err)
return 0;
kfree_skb_list(iter.frag_list);
return err;
}
slow_path:
/* This is a linearized skbuff, the original geometry is lost for us.
* This may also be a clone skbuff, we could preserve the geometry for
* the copies but probably not worth the effort.
*/
ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom,
LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id,
&state);
while (state.left > 0) {
struct sk_buff *skb2;
skb2 = ip6_frag_next(skb, &state);
if (IS_ERR(skb2)) {
err = PTR_ERR(skb2);
goto blackhole;
}
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
}
consume_skb(skb);
return err;
blackhole:
kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL_GPL(br_ip6_fragment);
static const struct nf_ipv6_ops ipv6ops = { static const struct nf_ipv6_ops ipv6ops = {
#if IS_MODULE(CONFIG_IPV6) #if IS_MODULE(CONFIG_IPV6)
.chk_addr = ipv6_chk_addr, .chk_addr = ipv6_chk_addr,
...@@ -119,6 +238,10 @@ static const struct nf_ipv6_ops ipv6ops = { ...@@ -119,6 +238,10 @@ static const struct nf_ipv6_ops ipv6ops = {
.route_input = ip6_route_input, .route_input = ip6_route_input,
.fragment = ip6_fragment, .fragment = ip6_fragment,
.reroute = nf_ip6_reroute, .reroute = nf_ip6_reroute,
#if IS_MODULE(CONFIG_NF_CONNTRACK_BRIDGE)
.br_defrag = nf_ct_frag6_gather,
.br_fragment = br_ip6_fragment,
#endif
}; };
int __init ipv6_netfilter_init(void) int __init ipv6_netfilter_init(void)
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_bridge.h>
#include <net/netfilter/nf_log.h> #include <net/netfilter/nf_log.h>
#include <linux/ip.h> #include <linux/ip.h>
...@@ -120,10 +121,8 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) ...@@ -120,10 +121,8 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
}; };
EXPORT_SYMBOL_GPL(nf_ct_l4proto_find); EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
static unsigned int nf_confirm(struct sk_buff *skb, unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo)
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{ {
const struct nf_conn_help *help; const struct nf_conn_help *help;
...@@ -154,6 +153,7 @@ static unsigned int nf_confirm(struct sk_buff *skb, ...@@ -154,6 +153,7 @@ static unsigned int nf_confirm(struct sk_buff *skb,
/* We've seen it coming out the other side: confirm it */ /* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb); return nf_conntrack_confirm(skb);
} }
EXPORT_SYMBOL_GPL(nf_confirm);
static unsigned int ipv4_confirm(void *priv, static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb, struct sk_buff *skb,
...@@ -442,12 +442,14 @@ static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto) ...@@ -442,12 +442,14 @@ static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
return 0; return 0;
} }
static struct nf_ct_bridge_info *nf_ct_bridge_info;
static int nf_ct_netns_do_get(struct net *net, u8 nfproto) static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{ {
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
bool fixup_needed = false; bool fixup_needed = false, retry = true;
int err = 0; int err = 0;
retry:
mutex_lock(&nf_ct_proto_mutex); mutex_lock(&nf_ct_proto_mutex);
switch (nfproto) { switch (nfproto) {
...@@ -487,6 +489,32 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto) ...@@ -487,6 +489,32 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
fixup_needed = true; fixup_needed = true;
break; break;
#endif #endif
case NFPROTO_BRIDGE:
if (!nf_ct_bridge_info) {
if (!retry) {
err = -EPROTO;
goto out_unlock;
}
mutex_unlock(&nf_ct_proto_mutex);
request_module("nf_conntrack_bridge");
retry = false;
goto retry;
}
if (!try_module_get(nf_ct_bridge_info->me)) {
err = -EPROTO;
goto out_unlock;
}
cnet->users_bridge++;
if (cnet->users_bridge > 1)
goto out_unlock;
err = nf_register_net_hooks(net, nf_ct_bridge_info->ops,
nf_ct_bridge_info->ops_size);
if (err)
cnet->users_bridge = 0;
else
fixup_needed = true;
break;
default: default:
err = -EPROTO; err = -EPROTO;
break; break;
...@@ -519,47 +547,99 @@ static void nf_ct_netns_do_put(struct net *net, u8 nfproto) ...@@ -519,47 +547,99 @@ static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
ARRAY_SIZE(ipv6_conntrack_ops)); ARRAY_SIZE(ipv6_conntrack_ops));
break; break;
#endif #endif
} case NFPROTO_BRIDGE:
if (!nf_ct_bridge_info)
break;
if (cnet->users_bridge && (--cnet->users_bridge == 0))
nf_unregister_net_hooks(net, nf_ct_bridge_info->ops,
nf_ct_bridge_info->ops_size);
module_put(nf_ct_bridge_info->me);
break;
}
mutex_unlock(&nf_ct_proto_mutex); mutex_unlock(&nf_ct_proto_mutex);
} }
int nf_ct_netns_get(struct net *net, u8 nfproto) static int nf_ct_netns_inet_get(struct net *net)
{ {
int err; int err;
if (nfproto == NFPROTO_INET) {
err = nf_ct_netns_do_get(net, NFPROTO_IPV4); err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
if (err < 0) if (err < 0)
goto err1; goto err1;
err = nf_ct_netns_do_get(net, NFPROTO_IPV6); err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
if (err < 0) if (err < 0)
goto err2; goto err2;
} else {
err = nf_ct_netns_do_get(net, nfproto);
if (err < 0)
goto err1;
}
return 0;
return err;
err2: err2:
nf_ct_netns_put(net, NFPROTO_IPV4); nf_ct_netns_put(net, NFPROTO_IPV4);
err1: err1:
return err; return err;
} }
int nf_ct_netns_get(struct net *net, u8 nfproto)
{
int err;
switch (nfproto) {
case NFPROTO_INET:
err = nf_ct_netns_inet_get(net);
break;
case NFPROTO_BRIDGE:
err = nf_ct_netns_do_get(net, NFPROTO_BRIDGE);
if (err < 0)
return err;
err = nf_ct_netns_inet_get(net);
if (err < 0) {
nf_ct_netns_put(net, NFPROTO_BRIDGE);
return err;
}
break;
default:
err = nf_ct_netns_do_get(net, nfproto);
break;
}
return err;
}
EXPORT_SYMBOL_GPL(nf_ct_netns_get); EXPORT_SYMBOL_GPL(nf_ct_netns_get);
void nf_ct_netns_put(struct net *net, uint8_t nfproto) void nf_ct_netns_put(struct net *net, uint8_t nfproto)
{ {
if (nfproto == NFPROTO_INET) { switch (nfproto) {
case NFPROTO_BRIDGE:
nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
/* fall through */
case NFPROTO_INET:
nf_ct_netns_do_put(net, NFPROTO_IPV4); nf_ct_netns_do_put(net, NFPROTO_IPV4);
nf_ct_netns_do_put(net, NFPROTO_IPV6); nf_ct_netns_do_put(net, NFPROTO_IPV6);
} else { break;
default:
nf_ct_netns_do_put(net, nfproto); nf_ct_netns_do_put(net, nfproto);
break;
} }
} }
EXPORT_SYMBOL_GPL(nf_ct_netns_put); EXPORT_SYMBOL_GPL(nf_ct_netns_put);
void nf_ct_bridge_register(struct nf_ct_bridge_info *info)
{
WARN_ON(nf_ct_bridge_info);
mutex_lock(&nf_ct_proto_mutex);
nf_ct_bridge_info = info;
mutex_unlock(&nf_ct_proto_mutex);
}
EXPORT_SYMBOL_GPL(nf_ct_bridge_register);
void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info)
{
WARN_ON(!nf_ct_bridge_info);
mutex_lock(&nf_ct_proto_mutex);
nf_ct_bridge_info = NULL;
mutex_unlock(&nf_ct_proto_mutex);
}
EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister);
int nf_conntrack_proto_init(void) int nf_conntrack_proto_init(void)
{ {
int ret; int ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment