Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
e182a345
Commit
e182a345
authored
Oct 26, 2011
by
Pekka Enberg
Browse files
Options
Browse Files
Download
Plain Diff
Merge branches 'slab/next' and 'slub/partial' into slab/for-linus
parents
3cfef952
fe353178
dcc3be6a
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
425 additions
and
182 deletions
+425
-182
Documentation/vm/00-INDEX
Documentation/vm/00-INDEX
+0
-2
include/linux/mm_types.h
include/linux/mm_types.h
+13
-1
include/linux/slub_def.h
include/linux/slub_def.h
+4
-0
mm/slab.c
mm/slab.c
+7
-12
mm/slub.c
mm/slub.c
+392
-166
tools/slub/slabinfo.c
tools/slub/slabinfo.c
+9
-1
No files found.
Documentation/vm/00-INDEX
View file @
e182a345
...
...
@@ -30,8 +30,6 @@ page_migration
- description of page migration in NUMA systems.
pagemap.txt
- pagemap, from the userspace perspective
slabinfo.c
- source code for a tool to get reports about slabs.
slub.txt
- a short users guide for SLUB.
unevictable-lru.txt
...
...
include/linux/mm_types.h
View file @
e182a345
...
...
@@ -79,9 +79,21 @@ struct page {
};
/* Third double word block */
struct
list_head
lru
;
/* Pageout list, eg. active_list
union
{
struct
list_head
lru
;
/* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
struct
{
/* slub per cpu partial pages */
struct
page
*
next
;
/* Next partial slab */
#ifdef CONFIG_64BIT
int
pages
;
/* Nr of partial slabs left */
int
pobjects
;
/* Approximate # of objects */
#else
short
int
pages
;
short
int
pobjects
;
#endif
};
};
/* Remainder is not double word aligned */
union
{
...
...
include/linux/slub_def.h
View file @
e182a345
...
...
@@ -36,12 +36,15 @@ enum stat_item {
ORDER_FALLBACK
,
/* Number of times fallback was necessary */
CMPXCHG_DOUBLE_CPU_FAIL
,
/* Failure of this_cpu_cmpxchg_double */
CMPXCHG_DOUBLE_FAIL
,
/* Number of times that cmpxchg double did not match */
CPU_PARTIAL_ALLOC
,
/* Used cpu partial on alloc */
CPU_PARTIAL_FREE
,
/* USed cpu partial on free */
NR_SLUB_STAT_ITEMS
};
struct
kmem_cache_cpu
{
void
**
freelist
;
/* Pointer to next available object */
unsigned
long
tid
;
/* Globally unique transaction id */
struct
page
*
page
;
/* The slab from which we are allocating */
struct
page
*
partial
;
/* Partially allocated frozen slabs */
int
node
;
/* The node of the page (or -1 for debug) */
#ifdef CONFIG_SLUB_STATS
unsigned
stat
[
NR_SLUB_STAT_ITEMS
];
...
...
@@ -79,6 +82,7 @@ struct kmem_cache {
int
size
;
/* The size of an object including meta data */
int
objsize
;
/* The size of an object without meta data */
int
offset
;
/* Free pointer offset. */
int
cpu_partial
;
/* Number of per cpu partial objects to keep around */
struct
kmem_cache_order_objects
oo
;
/* Allocation and freeing of slabs */
...
...
mm/slab.c
View file @
e182a345
...
...
@@ -1851,15 +1851,15 @@ static void dump_line(char *data, int offset, int limit)
unsigned
char
error
=
0
;
int
bad_count
=
0
;
printk
(
KERN_ERR
"%03x:"
,
offset
);
printk
(
KERN_ERR
"%03x:
"
,
offset
);
for
(
i
=
0
;
i
<
limit
;
i
++
)
{
if
(
data
[
offset
+
i
]
!=
POISON_FREE
)
{
error
=
data
[
offset
+
i
];
bad_count
++
;
}
printk
(
" %02x"
,
(
unsigned
char
)
data
[
offset
+
i
]);
}
printk
(
"
\n
"
);
print_hex_dump
(
KERN_CONT
,
""
,
0
,
16
,
1
,
&
data
[
offset
],
limit
,
1
);
if
(
bad_count
==
1
)
{
error
^=
POISON_FREE
;
...
...
@@ -3039,14 +3039,9 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
printk
(
KERN_ERR
"slab: Internal list corruption detected in "
"cache '%s'(%d), slabp %p(%d). Hexdump:
\n
"
,
cachep
->
name
,
cachep
->
num
,
slabp
,
slabp
->
inuse
);
for
(
i
=
0
;
i
<
sizeof
(
*
slabp
)
+
cachep
->
num
*
sizeof
(
kmem_bufctl_t
);
i
++
)
{
if
(
i
%
16
==
0
)
printk
(
"
\n
%03x:"
,
i
);
printk
(
" %02x"
,
((
unsigned
char
*
)
slabp
)[
i
]);
}
printk
(
"
\n
"
);
print_hex_dump
(
KERN_ERR
,
""
,
DUMP_PREFIX_OFFSET
,
16
,
1
,
slabp
,
sizeof
(
*
slabp
)
+
cachep
->
num
*
sizeof
(
kmem_bufctl_t
),
1
);
BUG
();
}
}
...
...
@@ -4584,7 +4579,7 @@ static const struct file_operations proc_slabstats_operations = {
static
int
__init
slab_proc_init
(
void
)
{
proc_create
(
"slabinfo"
,
S_IWUSR
|
S_IRU
GO
,
NULL
,
&
proc_slabinfo_operations
);
proc_create
(
"slabinfo"
,
S_IWUSR
|
S_IRU
SR
,
NULL
,
&
proc_slabinfo_operations
);
#ifdef CONFIG_DEBUG_SLAB_LEAK
proc_create
(
"slab_allocators"
,
0
,
NULL
,
&
proc_slabstats_operations
);
#endif
...
...
mm/slub.c
View file @
e182a345
...
...
@@ -467,34 +467,8 @@ static int disable_higher_order_debug;
*/
static
void
print_section
(
char
*
text
,
u8
*
addr
,
unsigned
int
length
)
{
int
i
,
offset
;
int
newline
=
1
;
char
ascii
[
17
];
ascii
[
16
]
=
0
;
for
(
i
=
0
;
i
<
length
;
i
++
)
{
if
(
newline
)
{
printk
(
KERN_ERR
"%8s 0x%p: "
,
text
,
addr
+
i
);
newline
=
0
;
}
printk
(
KERN_CONT
" %02x"
,
addr
[
i
]);
offset
=
i
%
16
;
ascii
[
offset
]
=
isgraph
(
addr
[
i
])
?
addr
[
i
]
:
'.'
;
if
(
offset
==
15
)
{
printk
(
KERN_CONT
" %s
\n
"
,
ascii
);
newline
=
1
;
}
}
if
(
!
newline
)
{
i
%=
16
;
while
(
i
<
16
)
{
printk
(
KERN_CONT
" "
);
ascii
[
i
]
=
' '
;
i
++
;
}
printk
(
KERN_CONT
" %s
\n
"
,
ascii
);
}
print_hex_dump
(
KERN_ERR
,
text
,
DUMP_PREFIX_ADDRESS
,
16
,
1
,
addr
,
length
,
1
);
}
static
struct
track
*
get_track
(
struct
kmem_cache
*
s
,
void
*
object
,
...
...
@@ -625,12 +599,12 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
p
,
p
-
addr
,
get_freepointer
(
s
,
p
));
if
(
p
>
addr
+
16
)
print_section
(
"Bytes b4"
,
p
-
16
,
16
);
print_section
(
"Object"
,
p
,
min_t
(
unsigned
long
,
s
->
objsize
,
PAGE_SIZE
));
print_section
(
"Bytes b4 "
,
p
-
16
,
16
);
print_section
(
"Object "
,
p
,
min_t
(
unsigned
long
,
s
->
objsize
,
PAGE_SIZE
));
if
(
s
->
flags
&
SLAB_RED_ZONE
)
print_section
(
"Redzone"
,
p
+
s
->
objsize
,
print_section
(
"Redzone
"
,
p
+
s
->
objsize
,
s
->
inuse
-
s
->
objsize
);
if
(
s
->
offset
)
...
...
@@ -643,7 +617,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
if
(
off
!=
s
->
size
)
/* Beginning of the filler is the free pointer */
print_section
(
"Padding"
,
p
+
off
,
s
->
size
-
off
);
print_section
(
"Padding
"
,
p
+
off
,
s
->
size
-
off
);
dump_stack
();
}
...
...
@@ -838,7 +812,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
end
--
;
slab_err
(
s
,
page
,
"Padding overwritten. 0x%p-0x%p"
,
fault
,
end
-
1
);
print_section
(
"Padding"
,
end
-
remainder
,
remainder
);
print_section
(
"Padding
"
,
end
-
remainder
,
remainder
);
restore_bytes
(
s
,
"slab padding"
,
POISON_INUSE
,
end
-
remainder
,
end
);
return
0
;
...
...
@@ -987,7 +961,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
page
->
freelist
);
if
(
!
alloc
)
print_section
(
"Object"
,
(
void
*
)
object
,
s
->
objsize
);
print_section
(
"Object
"
,
(
void
*
)
object
,
s
->
objsize
);
dump_stack
();
}
...
...
@@ -1447,7 +1421,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
set_freepointer
(
s
,
last
,
NULL
);
page
->
freelist
=
start
;
page
->
inuse
=
0
;
page
->
inuse
=
page
->
objects
;
page
->
frozen
=
1
;
out:
return
page
;
...
...
@@ -1534,7 +1508,7 @@ static inline void add_partial(struct kmem_cache_node *n,
struct
page
*
page
,
int
tail
)
{
n
->
nr_partial
++
;
if
(
tail
)
if
(
tail
==
DEACTIVATE_TO_TAIL
)
list_add_tail
(
&
page
->
lru
,
&
n
->
partial
);
else
list_add
(
&
page
->
lru
,
&
n
->
partial
);
...
...
@@ -1554,10 +1528,13 @@ static inline void remove_partial(struct kmem_cache_node *n,
* Lock slab, remove from the partial list and put the object into the
* per cpu freelist.
*
* Returns a list of objects or NULL if it fails.
*
* Must hold list_lock.
*/
static
inline
int
acquire_slab
(
struct
kmem_cache
*
s
,
struct
kmem_cache_node
*
n
,
struct
page
*
page
)
static
inline
void
*
acquire_slab
(
struct
kmem_cache
*
s
,
struct
kmem_cache_node
*
n
,
struct
page
*
page
,
int
mode
)
{
void
*
freelist
;
unsigned
long
counters
;
...
...
@@ -1572,7 +1549,8 @@ static inline int acquire_slab(struct kmem_cache *s,
freelist
=
page
->
freelist
;
counters
=
page
->
counters
;
new
.
counters
=
counters
;
new
.
inuse
=
page
->
objects
;
if
(
mode
)
new
.
inuse
=
page
->
objects
;
VM_BUG_ON
(
new
.
frozen
);
new
.
frozen
=
1
;
...
...
@@ -1583,32 +1561,19 @@ static inline int acquire_slab(struct kmem_cache *s,
"lock and freeze"
));
remove_partial
(
n
,
page
);
if
(
freelist
)
{
/* Populate the per cpu freelist */
this_cpu_write
(
s
->
cpu_slab
->
freelist
,
freelist
);
this_cpu_write
(
s
->
cpu_slab
->
page
,
page
);
this_cpu_write
(
s
->
cpu_slab
->
node
,
page_to_nid
(
page
));
return
1
;
}
else
{
/*
* Slab page came from the wrong list. No object to allocate
* from. Put it onto the correct list and continue partial
* scan.
*/
printk
(
KERN_ERR
"SLUB: %s : Page without available objects on"
" partial list
\n
"
,
s
->
name
);
return
0
;
}
return
freelist
;
}
static
int
put_cpu_partial
(
struct
kmem_cache
*
s
,
struct
page
*
page
,
int
drain
);
/*
* Try to allocate a partial slab from a specific node.
*/
static
struct
page
*
get_partial_node
(
struct
kmem_cache
*
s
,
struct
kmem_cache_node
*
n
)
static
void
*
get_partial_node
(
struct
kmem_cache
*
s
,
struct
kmem_cache_node
*
n
,
struct
kmem_cache_cpu
*
c
)
{
struct
page
*
page
;
struct
page
*
page
,
*
page2
;
void
*
object
=
NULL
;
/*
* Racy check. If we mistakenly see no partial slabs then we
...
...
@@ -1620,26 +1585,43 @@ static struct page *get_partial_node(struct kmem_cache *s,
return
NULL
;
spin_lock
(
&
n
->
list_lock
);
list_for_each_entry
(
page
,
&
n
->
partial
,
lru
)
if
(
acquire_slab
(
s
,
n
,
page
))
goto
out
;
page
=
NULL
;
out:
list_for_each_entry_safe
(
page
,
page2
,
&
n
->
partial
,
lru
)
{
void
*
t
=
acquire_slab
(
s
,
n
,
page
,
object
==
NULL
);
int
available
;
if
(
!
t
)
break
;
if
(
!
object
)
{
c
->
page
=
page
;
c
->
node
=
page_to_nid
(
page
);
stat
(
s
,
ALLOC_FROM_PARTIAL
);
object
=
t
;
available
=
page
->
objects
-
page
->
inuse
;
}
else
{
page
->
freelist
=
t
;
available
=
put_cpu_partial
(
s
,
page
,
0
);
}
if
(
kmem_cache_debug
(
s
)
||
available
>
s
->
cpu_partial
/
2
)
break
;
}
spin_unlock
(
&
n
->
list_lock
);
return
page
;
return
object
;
}
/*
* Get a page from somewhere. Search in increasing NUMA distances.
*/
static
struct
page
*
get_any_partial
(
struct
kmem_cache
*
s
,
gfp_t
flags
)
static
struct
page
*
get_any_partial
(
struct
kmem_cache
*
s
,
gfp_t
flags
,
struct
kmem_cache_cpu
*
c
)
{
#ifdef CONFIG_NUMA
struct
zonelist
*
zonelist
;
struct
zoneref
*
z
;
struct
zone
*
zone
;
enum
zone_type
high_zoneidx
=
gfp_zone
(
flags
);
struct
page
*
page
;
void
*
object
;
/*
* The defrag ratio allows a configuration of the tradeoffs between
...
...
@@ -1672,10 +1654,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
if
(
n
&&
cpuset_zone_allowed_hardwall
(
zone
,
flags
)
&&
n
->
nr_partial
>
s
->
min_partial
)
{
page
=
get_partial_node
(
s
,
n
);
if
(
page
)
{
object
=
get_partial_node
(
s
,
n
,
c
);
if
(
object
)
{
put_mems_allowed
();
return
page
;
return
object
;
}
}
}
...
...
@@ -1687,16 +1669,17 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
/*
* Get a partial page, lock it and return it.
*/
static
struct
page
*
get_partial
(
struct
kmem_cache
*
s
,
gfp_t
flags
,
int
node
)
static
void
*
get_partial
(
struct
kmem_cache
*
s
,
gfp_t
flags
,
int
node
,
struct
kmem_cache_cpu
*
c
)
{
struct
page
*
page
;
void
*
object
;
int
searchnode
=
(
node
==
NUMA_NO_NODE
)
?
numa_node_id
()
:
node
;
page
=
get_partial_node
(
s
,
get_node
(
s
,
searchnode
)
);
if
(
page
||
node
!=
NUMA_NO_NODE
)
return
page
;
object
=
get_partial_node
(
s
,
get_node
(
s
,
searchnode
),
c
);
if
(
object
||
node
!=
NUMA_NO_NODE
)
return
object
;
return
get_any_partial
(
s
,
flags
);
return
get_any_partial
(
s
,
flags
,
c
);
}
#ifdef CONFIG_PREEMPT
...
...
@@ -1765,9 +1748,6 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
for_each_possible_cpu
(
cpu
)
per_cpu_ptr
(
s
->
cpu_slab
,
cpu
)
->
tid
=
init_tid
(
cpu
);
}
/*
* Remove the cpu slab
*/
/*
* Remove the cpu slab
...
...
@@ -1781,13 +1761,13 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
enum
slab_modes
l
=
M_NONE
,
m
=
M_NONE
;
void
*
freelist
;
void
*
nextfree
;
int
tail
=
0
;
int
tail
=
DEACTIVATE_TO_HEAD
;
struct
page
new
;
struct
page
old
;
if
(
page
->
freelist
)
{
stat
(
s
,
DEACTIVATE_REMOTE_FREES
);
tail
=
1
;
tail
=
DEACTIVATE_TO_TAIL
;
}
c
->
tid
=
next_tid
(
c
->
tid
);
...
...
@@ -1893,7 +1873,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
if
(
m
==
M_PARTIAL
)
{
add_partial
(
n
,
page
,
tail
);
stat
(
s
,
tail
?
DEACTIVATE_TO_TAIL
:
DEACTIVATE_TO_HEAD
);
stat
(
s
,
tail
);
}
else
if
(
m
==
M_FULL
)
{
...
...
@@ -1920,6 +1900,123 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
}
}
/* Unfreeze all the cpu partial slabs */
static
void
unfreeze_partials
(
struct
kmem_cache
*
s
)
{
struct
kmem_cache_node
*
n
=
NULL
;
struct
kmem_cache_cpu
*
c
=
this_cpu_ptr
(
s
->
cpu_slab
);
struct
page
*
page
;
while
((
page
=
c
->
partial
))
{
enum
slab_modes
{
M_PARTIAL
,
M_FREE
};
enum
slab_modes
l
,
m
;
struct
page
new
;
struct
page
old
;
c
->
partial
=
page
->
next
;
l
=
M_FREE
;
do
{
old
.
freelist
=
page
->
freelist
;
old
.
counters
=
page
->
counters
;
VM_BUG_ON
(
!
old
.
frozen
);
new
.
counters
=
old
.
counters
;
new
.
freelist
=
old
.
freelist
;
new
.
frozen
=
0
;
if
(
!
new
.
inuse
&&
(
!
n
||
n
->
nr_partial
>
s
->
min_partial
))
m
=
M_FREE
;
else
{
struct
kmem_cache_node
*
n2
=
get_node
(
s
,
page_to_nid
(
page
));
m
=
M_PARTIAL
;
if
(
n
!=
n2
)
{
if
(
n
)
spin_unlock
(
&
n
->
list_lock
);
n
=
n2
;
spin_lock
(
&
n
->
list_lock
);
}
}
if
(
l
!=
m
)
{
if
(
l
==
M_PARTIAL
)
remove_partial
(
n
,
page
);
else
add_partial
(
n
,
page
,
1
);
l
=
m
;
}
}
while
(
!
cmpxchg_double_slab
(
s
,
page
,
old
.
freelist
,
old
.
counters
,
new
.
freelist
,
new
.
counters
,
"unfreezing slab"
));
if
(
m
==
M_FREE
)
{
stat
(
s
,
DEACTIVATE_EMPTY
);
discard_slab
(
s
,
page
);
stat
(
s
,
FREE_SLAB
);
}
}
if
(
n
)
spin_unlock
(
&
n
->
list_lock
);
}
/*
* Put a page that was just frozen (in __slab_free) into a partial page
* slot if available. This is done without interrupts disabled and without
* preemption disabled. The cmpxchg is racy and may put the partial page
* onto a random cpus partial slot.
*
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
int
put_cpu_partial
(
struct
kmem_cache
*
s
,
struct
page
*
page
,
int
drain
)
{
struct
page
*
oldpage
;
int
pages
;
int
pobjects
;
do
{
pages
=
0
;
pobjects
=
0
;
oldpage
=
this_cpu_read
(
s
->
cpu_slab
->
partial
);
if
(
oldpage
)
{
pobjects
=
oldpage
->
pobjects
;
pages
=
oldpage
->
pages
;
if
(
drain
&&
pobjects
>
s
->
cpu_partial
)
{
unsigned
long
flags
;
/*
* partial array is full. Move the existing
* set to the per node partial list.
*/
local_irq_save
(
flags
);
unfreeze_partials
(
s
);
local_irq_restore
(
flags
);
pobjects
=
0
;
pages
=
0
;
}
}
pages
++
;
pobjects
+=
page
->
objects
-
page
->
inuse
;
page
->
pages
=
pages
;
page
->
pobjects
=
pobjects
;
page
->
next
=
oldpage
;
}
while
(
this_cpu_cmpxchg
(
s
->
cpu_slab
->
partial
,
oldpage
,
page
)
!=
oldpage
);
stat
(
s
,
CPU_PARTIAL_FREE
);
return
pobjects
;
}
static
inline
void
flush_slab
(
struct
kmem_cache
*
s
,
struct
kmem_cache_cpu
*
c
)
{
stat
(
s
,
CPUSLAB_FLUSH
);
...
...
@@ -1935,8 +2032,12 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
struct
kmem_cache_cpu
*
c
=
per_cpu_ptr
(
s
->
cpu_slab
,
cpu
);
if
(
likely
(
c
&&
c
->
page
))
flush_slab
(
s
,
c
);
if
(
likely
(
c
))
{
if
(
c
->
page
)
flush_slab
(
s
,
c
);
unfreeze_partials
(
s
);
}
}
static
void
flush_cpu_slab
(
void
*
d
)
...
...
@@ -2027,12 +2128,39 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
}
}
static
inline
void
*
new_slab_objects
(
struct
kmem_cache
*
s
,
gfp_t
flags
,
int
node
,
struct
kmem_cache_cpu
**
pc
)
{
void
*
object
;
struct
kmem_cache_cpu
*
c
;
struct
page
*
page
=
new_slab
(
s
,
flags
,
node
);
if
(
page
)
{
c
=
__this_cpu_ptr
(
s
->
cpu_slab
);
if
(
c
->
page
)
flush_slab
(
s
,
c
);
/*
* No other reference to the page yet so we can
* muck around with it freely without cmpxchg
*/
object
=
page
->
freelist
;
page
->
freelist
=
NULL
;
stat
(
s
,
ALLOC_SLAB
);
c
->
node
=
page_to_nid
(
page
);
c
->
page
=
page
;
*
pc
=
c
;
}
else
object
=
NULL
;
return
object
;
}
/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
*
* Interrupts are disabled.
*
* Processing is still very fast if new objects have been freed to the
* regular freelist. In that case we simply take over the regular freelist
* as the lockless freelist and zap the regular freelist.
...
...
@@ -2049,7 +2177,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned
long
addr
,
struct
kmem_cache_cpu
*
c
)
{
void
**
object
;
struct
page
*
page
;
unsigned
long
flags
;
struct
page
new
;
unsigned
long
counters
;
...
...
@@ -2064,13 +2191,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
c
=
this_cpu_ptr
(
s
->
cpu_slab
);
#endif
/* We handle __GFP_ZERO in the caller */
gfpflags
&=
~
__GFP_ZERO
;
page
=
c
->
page
;
if
(
!
page
)
if
(
!
c
->
page
)
goto
new_slab
;
redo:
if
(
unlikely
(
!
node_match
(
c
,
node
)))
{
stat
(
s
,
ALLOC_NODE_MISMATCH
);
deactivate_slab
(
s
,
c
);
...
...
@@ -2080,8 +2203,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
stat
(
s
,
ALLOC_SLOWPATH
);
do
{
object
=
page
->
freelist
;
counters
=
page
->
counters
;
object
=
c
->
page
->
freelist
;
counters
=
c
->
page
->
counters
;
new
.
counters
=
counters
;
VM_BUG_ON
(
!
new
.
frozen
);
...
...
@@ -2093,17 +2216,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
*
* If there are objects left then we retrieve them
* and use them to refill the per cpu queue.
*/
*/
new
.
inuse
=
page
->
objects
;
new
.
inuse
=
c
->
page
->
objects
;
new
.
frozen
=
object
!=
NULL
;
}
while
(
!
__cmpxchg_double_slab
(
s
,
page
,
}
while
(
!
__cmpxchg_double_slab
(
s
,
c
->
page
,
object
,
counters
,
NULL
,
new
.
counters
,
"__slab_alloc"
));
if
(
unlikely
(
!
object
)
)
{
if
(
!
object
)
{
c
->
page
=
NULL
;
stat
(
s
,
DEACTIVATE_BYPASS
);
goto
new_slab
;
...
...
@@ -2112,58 +2235,47 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
stat
(
s
,
ALLOC_REFILL
);
load_freelist:
VM_BUG_ON
(
!
page
->
frozen
);
c
->
freelist
=
get_freepointer
(
s
,
object
);
c
->
tid
=
next_tid
(
c
->
tid
);
local_irq_restore
(
flags
);
return
object
;
new_slab:
page
=
get_partial
(
s
,
gfpflags
,
node
);
if
(
page
)
{
stat
(
s
,
ALLOC_FROM_PARTIAL
);
object
=
c
->
freelist
;
if
(
kmem_cache_debug
(
s
))
goto
debug
;
goto
load_freelist
;
if
(
c
->
partial
)
{
c
->
page
=
c
->
partial
;
c
->
partial
=
c
->
page
->
next
;
c
->
node
=
page_to_nid
(
c
->
page
);
stat
(
s
,
CPU_PARTIAL_ALLOC
);
c
->
freelist
=
NULL
;
goto
redo
;
}
page
=
new_slab
(
s
,
gfpflags
,
node
);
/* Then do expensive stuff like retrieving pages from the partial lists */
object
=
get_partial
(
s
,
gfpflags
,
node
,
c
);
if
(
page
)
{
c
=
__this_cpu_ptr
(
s
->
cpu_slab
);
if
(
c
->
page
)
flush_slab
(
s
,
c
);
if
(
unlikely
(
!
object
))
{
/*
* No other reference to the page yet so we can
* muck around with it freely without cmpxchg
*/
object
=
page
->
freelist
;
page
->
freelist
=
NULL
;
page
->
inuse
=
page
->
objects
;
object
=
new_slab_objects
(
s
,
gfpflags
,
node
,
&
c
);
stat
(
s
,
ALLOC_SLAB
);
c
->
node
=
page_to_nid
(
page
);
c
->
page
=
page
;
if
(
unlikely
(
!
object
))
{
if
(
!
(
gfpflags
&
__GFP_NOWARN
)
&&
printk_ratelimit
())
slab_out_of_memory
(
s
,
gfpflags
,
node
)
;
if
(
kmem_cache_debug
(
s
))
goto
debug
;
goto
load_freelist
;
local_irq_restore
(
flags
);
return
NULL
;
}
}
if
(
!
(
gfpflags
&
__GFP_NOWARN
)
&&
printk_ratelimit
())
slab_out_of_memory
(
s
,
gfpflags
,
node
);
local_irq_restore
(
flags
);
return
NULL
;
debug:
if
(
!
object
||
!
alloc_debug_processing
(
s
,
page
,
object
,
addr
))
goto
new_slab
;
if
(
likely
(
!
kmem_cache_debug
(
s
)))
goto
load_freelist
;
/* Only entered in the debug case */
if
(
!
alloc_debug_processing
(
s
,
c
->
page
,
object
,
addr
))
goto
new_slab
;
/* Slab failed checks. Next slab needed */
c
->
freelist
=
get_freepointer
(
s
,
object
);
deactivate_slab
(
s
,
c
);
c
->
page
=
NULL
;
c
->
node
=
NUMA_NO_NODE
;
local_irq_restore
(
flags
);
return
object
;
...
...
@@ -2333,16 +2445,29 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
was_frozen
=
new
.
frozen
;
new
.
inuse
--
;
if
((
!
new
.
inuse
||
!
prior
)
&&
!
was_frozen
&&
!
n
)
{
n
=
get_node
(
s
,
page_to_nid
(
page
));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
* drop the list_lock without any processing.
*
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
spin_lock_irqsave
(
&
n
->
list_lock
,
flags
);
if
(
!
kmem_cache_debug
(
s
)
&&
!
prior
)
/*
* Slab was on no list before and will be partially empty
* We can defer the list move and instead freeze it.
*/
new
.
frozen
=
1
;
else
{
/* Needs to be taken off a list */
n
=
get_node
(
s
,
page_to_nid
(
page
));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
* drop the list_lock without any processing.
*
* Otherwise the list_lock will synchronize with
* other processors updating the list of slabs.
*/
spin_lock_irqsave
(
&
n
->
list_lock
,
flags
);
}
}
inuse
=
new
.
inuse
;
...
...
@@ -2352,7 +2477,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
"__slab_free"
));
if
(
likely
(
!
n
))
{
/*
/*
* If we just froze the page then put it onto the
* per cpu partial list.
*/
if
(
new
.
frozen
&&
!
was_frozen
)
put_cpu_partial
(
s
,
page
,
1
);
/*
* The list lock was not taken therefore no list
* activity can be necessary.
*/
...
...
@@ -2377,7 +2510,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
*/
if
(
unlikely
(
!
prior
))
{
remove_full
(
s
,
page
);
add_partial
(
n
,
page
,
1
);
add_partial
(
n
,
page
,
DEACTIVATE_TO_TAIL
);
stat
(
s
,
FREE_ADD_PARTIAL
);
}
}
...
...
@@ -2421,7 +2554,6 @@ static __always_inline void slab_free(struct kmem_cache *s,
slab_free_hook
(
s
,
x
);
redo:
/*
* Determine the currently cpus per cpu slab.
* The cpu may change afterward. However that does not matter since
...
...
@@ -2685,7 +2817,7 @@ static void early_kmem_cache_node_alloc(int node)
n
=
page
->
freelist
;
BUG_ON
(
!
n
);
page
->
freelist
=
get_freepointer
(
kmem_cache_node
,
n
);
page
->
inuse
++
;
page
->
inuse
=
1
;
page
->
frozen
=
0
;
kmem_cache_node
->
node
[
node
]
=
n
;
#ifdef CONFIG_SLUB_DEBUG
...
...
@@ -2695,7 +2827,7 @@ static void early_kmem_cache_node_alloc(int node)
init_kmem_cache_node
(
n
,
kmem_cache_node
);
inc_slabs_node
(
kmem_cache_node
,
node
,
page
->
objects
);
add_partial
(
n
,
page
,
0
);
add_partial
(
n
,
page
,
DEACTIVATE_TO_HEAD
);
}
static
void
free_kmem_cache_nodes
(
struct
kmem_cache
*
s
)
...
...
@@ -2911,7 +3043,34 @@ static int kmem_cache_open(struct kmem_cache *s,
* The larger the object size is, the more pages we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial
(
s
,
ilog2
(
s
->
size
));
set_min_partial
(
s
,
ilog2
(
s
->
size
)
/
2
);
/*
* cpu_partial determined the maximum number of objects kept in the
* per cpu partial lists of a processor.
*
* Per cpu partial lists mainly contain slabs that just have one
* object freed. If they are used for allocation then they can be
* filled up again with minimal effort. The slab will never hit the
* per node partial lists and therefore no locking will be required.
*
* This setting also determines
*
* A) The number of objects from per cpu partial slabs dumped to the
* per node list when we reach the limit.
* B) The number of objects in cpu partial slabs to extract from the
* per node list when we run out of per cpu objects. We only fetch 50%
* to keep some capacity around for frees.
*/
if
(
s
->
size
>=
PAGE_SIZE
)
s
->
cpu_partial
=
2
;
else
if
(
s
->
size
>=
1024
)
s
->
cpu_partial
=
6
;
else
if
(
s
->
size
>=
256
)
s
->
cpu_partial
=
13
;
else
s
->
cpu_partial
=
30
;
s
->
refcount
=
1
;
#ifdef CONFIG_NUMA
s
->
remote_node_defrag_ratio
=
1000
;
...
...
@@ -2970,13 +3129,13 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
/*
* Attempt to free all partial slabs on a node.
* This is called from kmem_cache_close(). We must be the last thread
* using the cache and therefore we do not need to lock anymore.
*/
static
void
free_partial
(
struct
kmem_cache
*
s
,
struct
kmem_cache_node
*
n
)
{
unsigned
long
flags
;
struct
page
*
page
,
*
h
;
spin_lock_irqsave
(
&
n
->
list_lock
,
flags
);
list_for_each_entry_safe
(
page
,
h
,
&
n
->
partial
,
lru
)
{
if
(
!
page
->
inuse
)
{
remove_partial
(
n
,
page
);
...
...
@@ -2986,7 +3145,6 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
"Objects remaining on kmem_cache_close()"
);
}
}
spin_unlock_irqrestore
(
&
n
->
list_lock
,
flags
);
}
/*
...
...
@@ -3020,6 +3178,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
s
->
refcount
--
;
if
(
!
s
->
refcount
)
{
list_del
(
&
s
->
list
);
up_write
(
&
slub_lock
);
if
(
kmem_cache_close
(
s
))
{
printk
(
KERN_ERR
"SLUB %s: %s called for cache that "
"still has objects.
\n
"
,
s
->
name
,
__func__
);
...
...
@@ -3028,8 +3187,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
if
(
s
->
flags
&
SLAB_DESTROY_BY_RCU
)
rcu_barrier
();
sysfs_slab_remove
(
s
);
}
up_write
(
&
slub_lock
);
}
else
up_write
(
&
slub_lock
);
}
EXPORT_SYMBOL
(
kmem_cache_destroy
);
...
...
@@ -3347,23 +3506,23 @@ int kmem_cache_shrink(struct kmem_cache *s)
* list_lock. page->inuse here is the upper limit.
*/
list_for_each_entry_safe
(
page
,
t
,
&
n
->
partial
,
lru
)
{
if
(
!
page
->
inuse
)
{
remove_partial
(
n
,
page
);
discard_slab
(
s
,
page
);
}
else
{
list_move
(
&
page
->
lru
,
slabs_by_inuse
+
page
->
inuse
);
}
list_move
(
&
page
->
lru
,
slabs_by_inuse
+
page
->
inuse
);
if
(
!
page
->
inuse
)
n
->
nr_partial
--
;
}
/*
* Rebuild the partial list with the slabs filled up most
* first and the least used slabs at the end.
*/
for
(
i
=
objects
-
1
;
i
>
=
0
;
i
--
)
for
(
i
=
objects
-
1
;
i
>
0
;
i
--
)
list_splice
(
slabs_by_inuse
+
i
,
n
->
partial
.
prev
);
spin_unlock_irqrestore
(
&
n
->
list_lock
,
flags
);
/* Release empty slabs */
list_for_each_entry_safe
(
page
,
t
,
slabs_by_inuse
,
lru
)
discard_slab
(
s
,
page
);
}
kfree
(
slabs_by_inuse
);
...
...
@@ -4319,6 +4478,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
for_each_possible_cpu
(
cpu
)
{
struct
kmem_cache_cpu
*
c
=
per_cpu_ptr
(
s
->
cpu_slab
,
cpu
);
struct
page
*
page
;
if
(
!
c
||
c
->
node
<
0
)
continue
;
...
...
@@ -4334,6 +4494,13 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
total
+=
x
;
nodes
[
c
->
node
]
+=
x
;
}
page
=
c
->
partial
;
if
(
page
)
{
x
=
page
->
pobjects
;
total
+=
x
;
nodes
[
c
->
node
]
+=
x
;
}
per_cpu
[
c
->
node
]
++
;
}
}
...
...
@@ -4412,11 +4579,12 @@ struct slab_attribute {
};
#define SLAB_ATTR_RO(_name) \
static struct slab_attribute _name##_attr = __ATTR_RO(_name)
static struct slab_attribute _name##_attr = \
__ATTR(_name, 0400, _name##_show, NULL)
#define SLAB_ATTR(_name) \
static struct slab_attribute _name##_attr = \
__ATTR(_name, 06
44
, _name##_show, _name##_store)
__ATTR(_name, 06
00
, _name##_show, _name##_store)
static
ssize_t
slab_size_show
(
struct
kmem_cache
*
s
,
char
*
buf
)
{
...
...
@@ -4485,6 +4653,27 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
}
SLAB_ATTR
(
min_partial
);
static
ssize_t
cpu_partial_show
(
struct
kmem_cache
*
s
,
char
*
buf
)
{
return
sprintf
(
buf
,
"%u
\n
"
,
s
->
cpu_partial
);
}
static
ssize_t
cpu_partial_store
(
struct
kmem_cache
*
s
,
const
char
*
buf
,
size_t
length
)
{
unsigned
long
objects
;
int
err
;
err
=
strict_strtoul
(
buf
,
10
,
&
objects
);
if
(
err
)
return
err
;
s
->
cpu_partial
=
objects
;
flush_all
(
s
);
return
length
;
}
SLAB_ATTR
(
cpu_partial
);
static
ssize_t
ctor_show
(
struct
kmem_cache
*
s
,
char
*
buf
)
{
if
(
!
s
->
ctor
)
...
...
@@ -4523,6 +4712,37 @@ static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
}
SLAB_ATTR_RO
(
objects_partial
);
static
ssize_t
slabs_cpu_partial_show
(
struct
kmem_cache
*
s
,
char
*
buf
)
{
int
objects
=
0
;
int
pages
=
0
;
int
cpu
;
int
len
;
for_each_online_cpu
(
cpu
)
{
struct
page
*
page
=
per_cpu_ptr
(
s
->
cpu_slab
,
cpu
)
->
partial
;
if
(
page
)
{
pages
+=
page
->
pages
;
objects
+=
page
->
pobjects
;
}
}
len
=
sprintf
(
buf
,
"%d(%d)"
,
objects
,
pages
);
#ifdef CONFIG_SMP
for_each_online_cpu
(
cpu
)
{
struct
page
*
page
=
per_cpu_ptr
(
s
->
cpu_slab
,
cpu
)
->
partial
;
if
(
page
&&
len
<
PAGE_SIZE
-
20
)
len
+=
sprintf
(
buf
+
len
,
" C%d=%d(%d)"
,
cpu
,
page
->
pobjects
,
page
->
pages
);
}
#endif
return
len
+
sprintf
(
buf
+
len
,
"
\n
"
);
}
SLAB_ATTR_RO
(
slabs_cpu_partial
);
static
ssize_t
reclaim_account_show
(
struct
kmem_cache
*
s
,
char
*
buf
)
{
return
sprintf
(
buf
,
"%d
\n
"
,
!!
(
s
->
flags
&
SLAB_RECLAIM_ACCOUNT
));
...
...
@@ -4845,6 +5065,8 @@ STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
STAT_ATTR
(
ORDER_FALLBACK
,
order_fallback
);
STAT_ATTR
(
CMPXCHG_DOUBLE_CPU_FAIL
,
cmpxchg_double_cpu_fail
);
STAT_ATTR
(
CMPXCHG_DOUBLE_FAIL
,
cmpxchg_double_fail
);
STAT_ATTR
(
CPU_PARTIAL_ALLOC
,
cpu_partial_alloc
);
STAT_ATTR
(
CPU_PARTIAL_FREE
,
cpu_partial_free
);
#endif
static
struct
attribute
*
slab_attrs
[]
=
{
...
...
@@ -4853,6 +5075,7 @@ static struct attribute *slab_attrs[] = {
&
objs_per_slab_attr
.
attr
,
&
order_attr
.
attr
,
&
min_partial_attr
.
attr
,
&
cpu_partial_attr
.
attr
,
&
objects_attr
.
attr
,
&
objects_partial_attr
.
attr
,
&
partial_attr
.
attr
,
...
...
@@ -4865,6 +5088,7 @@ static struct attribute *slab_attrs[] = {
&
destroy_by_rcu_attr
.
attr
,
&
shrink_attr
.
attr
,
&
reserved_attr
.
attr
,
&
slabs_cpu_partial_attr
.
attr
,
#ifdef CONFIG_SLUB_DEBUG
&
total_objects_attr
.
attr
,
&
slabs_attr
.
attr
,
...
...
@@ -4906,6 +5130,8 @@ static struct attribute *slab_attrs[] = {
&
order_fallback_attr
.
attr
,
&
cmpxchg_double_fail_attr
.
attr
,
&
cmpxchg_double_cpu_fail_attr
.
attr
,
&
cpu_partial_alloc_attr
.
attr
,
&
cpu_partial_free_attr
.
attr
,
#endif
#ifdef CONFIG_FAILSLAB
&
failslab_attr
.
attr
,
...
...
@@ -5257,7 +5483,7 @@ static const struct file_operations proc_slabinfo_operations = {
static
int
__init
slab_proc_init
(
void
)
{
proc_create
(
"slabinfo"
,
S_IRU
GO
,
NULL
,
&
proc_slabinfo_operations
);
proc_create
(
"slabinfo"
,
S_IRU
SR
,
NULL
,
&
proc_slabinfo_operations
);
return
0
;
}
module_init
(
slab_proc_init
);
...
...
tools/slub/slabinfo.c
View file @
e182a345
...
...
@@ -42,6 +42,7 @@ struct slabinfo {
unsigned
long
deactivate_remote_frees
,
order_fallback
;
unsigned
long
cmpxchg_double_cpu_fail
,
cmpxchg_double_fail
;
unsigned
long
alloc_node_mismatch
,
deactivate_bypass
;
unsigned
long
cpu_partial_alloc
,
cpu_partial_free
;
int
numa
[
MAX_NODES
];
int
numa_partial
[
MAX_NODES
];
}
slabinfo
[
MAX_SLABS
];
...
...
@@ -455,6 +456,11 @@ static void slab_stats(struct slabinfo *s)
s
->
alloc_from_partial
*
100
/
total_alloc
,
s
->
free_remove_partial
*
100
/
total_free
);
printf
(
"Cpu partial list %8lu %8lu %3lu %3lu
\n
"
,
s
->
cpu_partial_alloc
,
s
->
cpu_partial_free
,
s
->
cpu_partial_alloc
*
100
/
total_alloc
,
s
->
cpu_partial_free
*
100
/
total_free
);
printf
(
"RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu
\n
"
,
s
->
deactivate_remote_frees
,
s
->
free_frozen
,
s
->
deactivate_remote_frees
*
100
/
total_alloc
,
...
...
@@ -1145,7 +1151,7 @@ static void read_slab_dir(void)
switch
(
de
->
d_type
)
{
case
DT_LNK
:
alias
->
name
=
strdup
(
de
->
d_name
);
count
=
readlink
(
de
->
d_name
,
buffer
,
sizeof
(
buffer
));
count
=
readlink
(
de
->
d_name
,
buffer
,
sizeof
(
buffer
)
-
1
);
if
(
count
<
0
)
fatal
(
"Cannot read symlink %s
\n
"
,
de
->
d_name
);
...
...
@@ -1209,6 +1215,8 @@ static void read_slab_dir(void)
slab
->
order_fallback
=
get_obj
(
"order_fallback"
);
slab
->
cmpxchg_double_cpu_fail
=
get_obj
(
"cmpxchg_double_cpu_fail"
);
slab
->
cmpxchg_double_fail
=
get_obj
(
"cmpxchg_double_fail"
);
slab
->
cpu_partial_alloc
=
get_obj
(
"cpu_partial_alloc"
);
slab
->
cpu_partial_free
=
get_obj
(
"cpu_partial_free"
);
slab
->
alloc_node_mismatch
=
get_obj
(
"alloc_node_mismatch"
);
slab
->
deactivate_bypass
=
get_obj
(
"deactivate_bypass"
);
chdir
(
".."
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment