Commit 5486f5bf authored by Christophe Leroy's avatar Christophe Leroy Committed by David S. Miller

net: Force inlining of checksum functions in net/checksum.h

All functions defined as static inline in net/checksum.h are
meant to be inlined for performance reason.

But since commit ac7c3e4f ("compiler: enable
CONFIG_OPTIMIZE_INLINING forcibly") the compiler is allowed to
uninline functions when it wants.

Fair enough in the general case, but for tiny performance critical
checksum helpers that's counter-productive.

The problem mainly arises when selecting CONFIG_CC_OPTIMISE_FOR_SIZE,
Those helpers being 'static inline' in header files you suddenly find
them duplicated many times in the resulting vmlinux.

Here is a typical exemple when building powerpc pmac32_defconfig
with CONFIG_CC_OPTIMISE_FOR_SIZE. csum_sub() appears 4 times:

	c04a23cc <csum_sub>:
	c04a23cc:	7c 84 20 f8 	not     r4,r4
	c04a23d0:	7c 63 20 14 	addc    r3,r3,r4
	c04a23d4:	7c 63 01 94 	addze   r3,r3
	c04a23d8:	4e 80 00 20 	blr
		...
	c04a2ce8:	4b ff f6 e5 	bl      c04a23cc <csum_sub>
		...
	c04a2d2c:	4b ff f6 a1 	bl      c04a23cc <csum_sub>
		...
	c04a2d54:	4b ff f6 79 	bl      c04a23cc <csum_sub>
		...
	c04a754c <csum_sub>:
	c04a754c:	7c 84 20 f8 	not     r4,r4
	c04a7550:	7c 63 20 14 	addc    r3,r3,r4
	c04a7554:	7c 63 01 94 	addze   r3,r3
	c04a7558:	4e 80 00 20 	blr
		...
	c04ac930:	4b ff ac 1d 	bl      c04a754c <csum_sub>
		...
	c04ad264:	4b ff a2 e9 	bl      c04a754c <csum_sub>
		...
	c04e3b08 <csum_sub>:
	c04e3b08:	7c 84 20 f8 	not     r4,r4
	c04e3b0c:	7c 63 20 14 	addc    r3,r3,r4
	c04e3b10:	7c 63 01 94 	addze   r3,r3
	c04e3b14:	4e 80 00 20 	blr
		...
	c04e5788:	4b ff e3 81 	bl      c04e3b08 <csum_sub>
		...
	c04e65c8:	4b ff d5 41 	bl      c04e3b08 <csum_sub>
		...
	c0512d34 <csum_sub>:
	c0512d34:	7c 84 20 f8 	not     r4,r4
	c0512d38:	7c 63 20 14 	addc    r3,r3,r4
	c0512d3c:	7c 63 01 94 	addze   r3,r3
	c0512d40:	4e 80 00 20 	blr
		...
	c0512dfc:	4b ff ff 39 	bl      c0512d34 <csum_sub>
		...
	c05138bc:	4b ff f4 79 	bl      c0512d34 <csum_sub>
		...

Restore the expected behaviour by using __always_inline for all
functions defined in net/checksum.h

vmlinux size is even reduced by 256 bytes with this patch:

	   text	   data	    bss	    dec	    hex	filename
	6980022	2515362	 194384	9689768	 93daa8	vmlinux.before
	6979862	2515266	 194384	9689512	 93d9a8	vmlinux.now

Fixes: ac7c3e4f ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly")
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarChristophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0033fced
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include <asm/checksum.h> #include <asm/checksum.h>
#ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
static inline static __always_inline
__wsum csum_and_copy_from_user (const void __user *src, void *dst, __wsum csum_and_copy_from_user (const void __user *src, void *dst,
int len) int len)
{ {
...@@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, ...@@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst,
#endif #endif
#ifndef HAVE_CSUM_COPY_USER #ifndef HAVE_CSUM_COPY_USER
static __inline__ __wsum csum_and_copy_to_user static __always_inline __wsum csum_and_copy_to_user
(const void *src, void __user *dst, int len) (const void *src, void __user *dst, int len)
{ {
__wsum sum = csum_partial(src, len, ~0U); __wsum sum = csum_partial(src, len, ~0U);
...@@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user ...@@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user
#endif #endif
#ifndef _HAVE_ARCH_CSUM_AND_COPY #ifndef _HAVE_ARCH_CSUM_AND_COPY
static inline __wsum static __always_inline __wsum
csum_partial_copy_nocheck(const void *src, void *dst, int len) csum_partial_copy_nocheck(const void *src, void *dst, int len)
{ {
memcpy(dst, src, len); memcpy(dst, src, len);
...@@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) ...@@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
#endif #endif
#ifndef HAVE_ARCH_CSUM_ADD #ifndef HAVE_ARCH_CSUM_ADD
static inline __wsum csum_add(__wsum csum, __wsum addend) static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
{ {
u32 res = (__force u32)csum; u32 res = (__force u32)csum;
res += (__force u32)addend; res += (__force u32)addend;
...@@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) ...@@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
} }
#endif #endif
static inline __wsum csum_sub(__wsum csum, __wsum addend) static __always_inline __wsum csum_sub(__wsum csum, __wsum addend)
{ {
return csum_add(csum, ~addend); return csum_add(csum, ~addend);
} }
static inline __sum16 csum16_add(__sum16 csum, __be16 addend) static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend)
{ {
u16 res = (__force u16)csum; u16 res = (__force u16)csum;
...@@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) ...@@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend)
return (__force __sum16)(res + (res < (__force u16)addend)); return (__force __sum16)(res + (res < (__force u16)addend));
} }
static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
{ {
return csum16_add(csum, ~addend); return csum16_add(csum, ~addend);
} }
static inline __wsum csum_shift(__wsum sum, int offset) static __always_inline __wsum csum_shift(__wsum sum, int offset)
{ {
/* rotate sum to align it with a 16b boundary */ /* rotate sum to align it with a 16b boundary */
if (offset & 1) if (offset & 1)
...@@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset) ...@@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset)
return sum; return sum;
} }
static inline __wsum static __always_inline __wsum
csum_block_add(__wsum csum, __wsum csum2, int offset) csum_block_add(__wsum csum, __wsum csum2, int offset)
{ {
return csum_add(csum, csum_shift(csum2, offset)); return csum_add(csum, csum_shift(csum2, offset));
} }
static inline __wsum static __always_inline __wsum
csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
{ {
return csum_block_add(csum, csum2, offset); return csum_block_add(csum, csum2, offset);
} }
static inline __wsum static __always_inline __wsum
csum_block_sub(__wsum csum, __wsum csum2, int offset) csum_block_sub(__wsum csum, __wsum csum2, int offset)
{ {
return csum_block_add(csum, ~csum2, offset); return csum_block_add(csum, ~csum2, offset);
} }
static inline __wsum csum_unfold(__sum16 n) static __always_inline __wsum csum_unfold(__sum16 n)
{ {
return (__force __wsum)n; return (__force __wsum)n;
} }
static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) static __always_inline
__wsum csum_partial_ext(const void *buff, int len, __wsum sum)
{ {
return csum_partial(buff, len, sum); return csum_partial(buff, len, sum);
} }
#define CSUM_MANGLED_0 ((__force __sum16)0xffff) #define CSUM_MANGLED_0 ((__force __sum16)0xffff)
static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
{ {
*sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
} }
static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{ {
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
...@@ -136,7 +137,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) ...@@ -136,7 +137,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
* m : old value of a 16bit field * m : old value of a 16bit field
* m' : new value of a 16bit field * m' : new value of a 16bit field
*/ */
static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new)
{ {
*sum = ~csum16_add(csum16_sub(~(*sum), old), new); *sum = ~csum16_add(csum16_sub(~(*sum), old), new);
} }
...@@ -150,15 +151,15 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, ...@@ -150,15 +151,15 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
__wsum diff, bool pseudohdr); __wsum diff, bool pseudohdr);
static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, static __always_inline
__be16 from, __be16 to, void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
bool pseudohdr) __be16 from, __be16 to, bool pseudohdr)
{ {
inet_proto_csum_replace4(sum, skb, (__force __be32)from, inet_proto_csum_replace4(sum, skb, (__force __be32)from,
(__force __be32)to, pseudohdr); (__force __be32)to, pseudohdr);
} }
static inline __wsum remcsum_adjust(void *ptr, __wsum csum, static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum,
int start, int offset) int start, int offset)
{ {
__sum16 *psum = (__sum16 *)(ptr + offset); __sum16 *psum = (__sum16 *)(ptr + offset);
...@@ -175,12 +176,12 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, ...@@ -175,12 +176,12 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
return delta; return delta;
} }
static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta)
{ {
*psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum));
} }
static inline __wsum wsum_negate(__wsum val) static __always_inline __wsum wsum_negate(__wsum val)
{ {
return (__force __wsum)-((__force u32)val); return (__force __wsum)-((__force u32)val);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment