Commit e110861f authored by Lorenzo Colitti's avatar Lorenzo Colitti Committed by David S. Miller

net: add a sysctl to reflect the fwmark on replies

Kernel-originated IP packets that have no user socket associated
with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.)
are emitted with a mark of zero. Add a sysctl to make them have
the same mark as the packet they are replying to.

This allows an administrator that wishes to do so to use
mark-based routing, firewalling, etc. for these replies by
marking the original packets inbound.

Tested using user-mode linux:
 - ICMP/ICMPv6 echo replies and errors.
 - TCP RST packets (IPv4 and IPv6).
Signed-off-by: default avatarLorenzo Colitti <lorenzo@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 87e067cd
...@@ -231,6 +231,9 @@ void ipfrag_init(void); ...@@ -231,6 +231,9 @@ void ipfrag_init(void);
void ip_static_sysctl_init(void); void ip_static_sysctl_init(void);
#define IP4_REPLY_MARK(net, mark) \
((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
static inline bool ip_is_fragment(const struct iphdr *iph) static inline bool ip_is_fragment(const struct iphdr *iph)
{ {
return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
......
...@@ -113,6 +113,9 @@ struct frag_hdr { ...@@ -113,6 +113,9 @@ struct frag_hdr {
#define IP6_MF 0x0001 #define IP6_MF 0x0001
#define IP6_OFFSET 0xFFF8 #define IP6_OFFSET 0xFFF8
#define IP6_REPLY_MARK(net, mark) \
((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
#include <net/sock.h> #include <net/sock.h>
/* sysctls */ /* sysctls */
......
...@@ -77,6 +77,8 @@ struct netns_ipv4 { ...@@ -77,6 +77,8 @@ struct netns_ipv4 {
int sysctl_ip_no_pmtu_disc; int sysctl_ip_no_pmtu_disc;
int sysctl_ip_fwd_use_pmtu; int sysctl_ip_fwd_use_pmtu;
int sysctl_fwmark_reflect;
struct ping_group_range ping_group_range; struct ping_group_range ping_group_range;
atomic_t dev_addr_genid; atomic_t dev_addr_genid;
......
...@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 { ...@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
int flowlabel_consistency; int flowlabel_consistency;
int icmpv6_time; int icmpv6_time;
int anycast_src_echo_reply; int anycast_src_echo_reply;
int fwmark_reflect;
}; };
struct netns_ipv6 { struct netns_ipv6 {
......
...@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct sock *sk; struct sock *sk;
struct inet_sock *inet; struct inet_sock *inet;
__be32 daddr, saddr; __be32 daddr, saddr;
u32 mark = IP4_REPLY_MARK(net, skb->mark);
if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
return; return;
...@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
icmp_param->data.icmph.checksum = 0; icmp_param->data.icmph.checksum = 0;
inet->tos = ip_hdr(skb)->tos; inet->tos = ip_hdr(skb)->tos;
sk->sk_mark = mark;
daddr = ipc.addr = ip_hdr(skb)->saddr; daddr = ipc.addr = ip_hdr(skb)->saddr;
saddr = fib_compute_spec_dst(skb); saddr = fib_compute_spec_dst(skb);
ipc.opt = NULL; ipc.opt = NULL;
...@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) ...@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
memset(&fl4, 0, sizeof(fl4)); memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr; fl4.daddr = daddr;
fl4.saddr = saddr; fl4.saddr = saddr;
fl4.flowi4_mark = mark;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_proto = IPPROTO_ICMP;
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
...@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net, ...@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4, struct flowi4 *fl4,
struct sk_buff *skb_in, struct sk_buff *skb_in,
const struct iphdr *iph, const struct iphdr *iph,
__be32 saddr, u8 tos, __be32 saddr, u8 tos, u32 mark,
int type, int code, int type, int code,
struct icmp_bxm *param) struct icmp_bxm *param)
{ {
...@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net, ...@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->daddr = (param->replyopts.opt.opt.srr ? fl4->daddr = (param->replyopts.opt.opt.srr ?
param->replyopts.opt.opt.faddr : iph->saddr); param->replyopts.opt.opt.faddr : iph->saddr);
fl4->saddr = saddr; fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_tos = RT_TOS(tos);
fl4->flowi4_proto = IPPROTO_ICMP; fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type; fl4->fl4_icmp_type = type;
...@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ...@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
struct flowi4 fl4; struct flowi4 fl4;
__be32 saddr; __be32 saddr;
u8 tos; u8 tos;
u32 mark;
struct net *net; struct net *net;
struct sock *sk; struct sock *sk;
...@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ...@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
IPTOS_PREC_INTERNETCONTROL) : IPTOS_PREC_INTERNETCONTROL) :
iph->tos; iph->tos;
mark = IP4_REPLY_MARK(net, skb_in->mark);
if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
goto out_unlock; goto out_unlock;
...@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ...@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
icmp_param->skb = skb_in; icmp_param->skb = skb_in;
icmp_param->offset = skb_network_offset(skb_in); icmp_param->offset = skb_network_offset(skb_in);
inet_sk(sk)->tos = tos; inet_sk(sk)->tos = tos;
sk->sk_mark = mark;
ipc.addr = iph->saddr; ipc.addr = iph->saddr;
ipc.opt = &icmp_param->replyopts.opt; ipc.opt = &icmp_param->replyopts.opt;
ipc.tx_flags = 0; ipc.tx_flags = 0;
ipc.ttl = 0; ipc.ttl = 0;
ipc.tos = -1; ipc.tos = -1;
rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
type, code, icmp_param); type, code, icmp_param);
if (IS_ERR(rt)) if (IS_ERR(rt))
goto out_unlock; goto out_unlock;
......
...@@ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, ...@@ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
daddr = replyopts.opt.opt.faddr; daddr = replyopts.opt.opt.faddr;
} }
flowi4_init_output(&fl4, arg->bound_dev_if, 0, flowi4_init_output(&fl4, arg->bound_dev_if,
IP4_REPLY_MARK(net, skb->mark),
RT_TOS(arg->tos), RT_TOS(arg->tos),
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg), ip_reply_arg_flowi_flags(arg),
......
...@@ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
{
.procname = "fwmark_reflect",
.data = &init_net.ipv4.sysctl_fwmark_reflect,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ } { }
}; };
......
...@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) ...@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
int len; int len;
int hlimit; int hlimit;
int err = 0; int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head || if ((u8 *)hdr < skb->head ||
(skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
...@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) ...@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
fl6.daddr = hdr->saddr; fl6.daddr = hdr->saddr;
if (saddr) if (saddr)
fl6.saddr = *saddr; fl6.saddr = *saddr;
fl6.flowi6_mark = mark;
fl6.flowi6_oif = iif; fl6.flowi6_oif = iif;
fl6.fl6_icmp_type = type; fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code; fl6.fl6_icmp_code = code;
...@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) ...@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
sk = icmpv6_xmit_lock(net); sk = icmpv6_xmit_lock(net);
if (sk == NULL) if (sk == NULL)
return; return;
sk->sk_mark = mark;
np = inet6_sk(sk); np = inet6_sk(sk);
if (!icmpv6_xrlim_allow(sk, type, &fl6)) if (!icmpv6_xrlim_allow(sk, type, &fl6))
...@@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ...@@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
int err = 0; int err = 0;
int hlimit; int hlimit;
u8 tclass; u8 tclass;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
saddr = &ipv6_hdr(skb)->daddr; saddr = &ipv6_hdr(skb)->daddr;
...@@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ...@@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.saddr = *saddr; fl6.saddr = *saddr;
fl6.flowi6_oif = skb->dev->ifindex; fl6.flowi6_oif = skb->dev->ifindex;
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net); sk = icmpv6_xmit_lock(net);
if (sk == NULL) if (sk == NULL)
return; return;
sk->sk_mark = mark;
np = inet6_sk(sk); np = inet6_sk(sk);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
......
...@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { ...@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "fwmark_reflect",
.data = &init_net.ipv6.sysctl.fwmark_reflect,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ } { }
}; };
......
...@@ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, ...@@ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
fl6.flowi6_oif = inet6_iif(skb); fl6.flowi6_oif = inet6_iif(skb);
else else
fl6.flowi6_oif = oif; fl6.flowi6_oif = oif;
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest; fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source; fl6.fl6_sport = t1->source;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment