Commit 1db98bcf authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge still more updates from Andrew Morton:
 "18 patches.

  Subsystems affected by this patch series: mm (memcg and cleanups) and
  epoll"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm/Kconfig: fix spelling mistake "whats" -> "what's"
  selftests/filesystems: expand epoll with epoll_pwait2
  epoll: wire up syscall epoll_pwait2
  epoll: add syscall epoll_pwait2
  epoll: convert internal api to timespec64
  epoll: eliminate unnecessary lock for zero timeout
  epoll: replace gotos with a proper loop
  epoll: pull all code between fetch_events and send_event into the loop
  epoll: simplify and optimize busy loop logic
  epoll: move eavail next to the list_empty_careful check
  epoll: pull fatal signal checks into ep_send_events()
  epoll: simplify signal handling
  epoll: check for events when removing a timed out thread from the wait queue
  mm/memcontrol:rewrite mem_cgroup_page_lruvec()
  mm, kvm: account kvm_vcpu_mmap to kmemcg
  mm/memcg: remove unused definitions
  mm/memcg: warning on !memcg after readahead page charged
  mm/memcg: bail early from swap accounting if memcg disabled
parents 3644e2d2 01ab1ede
......@@ -480,3 +480,4 @@
548 common pidfd_getfd sys_pidfd_getfd
549 common faccessat2 sys_faccessat2
550 common process_madvise sys_process_madvise
551 common epoll_pwait2 sys_epoll_pwait2
......@@ -454,3 +454,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
......@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
#define __NR_compat_syscalls 441
#define __NR_compat_syscalls 442
#endif
#define __ARCH_WANT_SYS_CLONE
......
......@@ -889,6 +889,8 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
__SYSCALL(__NR_faccessat2, sys_faccessat2)
#define __NR_process_madvise 440
__SYSCALL(__NR_process_madvise, sys_process_madvise)
#define __NR_epoll_pwait2 441
__SYSCALL(__NR_epoll_pwait2, sys_epoll_pwait2)
/*
* Please add new compat syscalls above this comment and update
......
......@@ -361,3 +361,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
......@@ -440,3 +440,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
......@@ -446,3 +446,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
......@@ -379,3 +379,4 @@
438 n32 pidfd_getfd sys_pidfd_getfd
439 n32 faccessat2 sys_faccessat2
440 n32 process_madvise sys_process_madvise
441 n32 epoll_pwait2 sys_epoll_pwait2
......@@ -355,3 +355,4 @@
438 n64 pidfd_getfd sys_pidfd_getfd
439 n64 faccessat2 sys_faccessat2
440 n64 process_madvise sys_process_madvise
441 n64 epoll_pwait2 sys_epoll_pwait2
......@@ -428,3 +428,4 @@
438 o32 pidfd_getfd sys_pidfd_getfd
439 o32 faccessat2 sys_faccessat2
440 o32 process_madvise sys_process_madvise
441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
......@@ -438,3 +438,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
......@@ -530,3 +530,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
......@@ -443,3 +443,4 @@
438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 sys_epoll_pwait2
......@@ -443,3 +443,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
......@@ -486,3 +486,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
......@@ -445,3 +445,4 @@
438 i386 pidfd_getfd sys_pidfd_getfd
439 i386 faccessat2 sys_faccessat2
440 i386 process_madvise sys_process_madvise
441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
......@@ -362,6 +362,7 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
#
# Due to a historical design error, certain syscalls are numbered differently
......
......@@ -9869,7 +9869,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
r = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!page)
goto fail_free_lapic;
vcpu->arch.pio_data = page_address(page);
......
......@@ -411,3 +411,4 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
This diff is collapsed.
......@@ -537,6 +537,12 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
int maxevents, int timeout,
const compat_sigset_t __user *sigmask,
compat_size_t sigsetsize);
asmlinkage long compat_sys_epoll_pwait2(int epfd,
struct epoll_event __user *events,
int maxevents,
const struct __kernel_timespec __user *timeout,
const compat_sigset_t __user *sigmask,
compat_size_t sigsetsize);
/* fs/fcntl.c */
asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
......
......@@ -620,9 +620,10 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
/**
* mem_cgroup_lruvec - get the lru list vector for a memcg & node
* @memcg: memcg of the wanted lruvec
* @pgdat: pglist_data
*
* Returns the lru list vector holding pages for a given @memcg &
* @node combination. This can be the node lruvec, if the memory
* @pgdat combination. This can be the node lruvec, if the memory
* controller is disabled.
*/
static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
......@@ -652,7 +653,21 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
return lruvec;
}
struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
/**
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
* @page: the page
* @pgdat: pgdat of the page
*
* This function relies on page->mem_cgroup being stable.
*/
static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
struct pglist_data *pgdat)
{
struct mem_cgroup *memcg = page_memcg(page);
VM_WARN_ON_ONCE_PAGE(!memcg, page);
return mem_cgroup_lruvec(memcg, pgdat);
}
static inline bool lruvec_holds_page_lru_lock(struct page *page,
struct lruvec *lruvec)
......@@ -913,41 +928,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
local_irq_restore(flags);
}
/**
* mod_memcg_page_state - update page state statistics
* @page: the page
* @idx: page state item to account
* @val: number of pages (positive or negative)
*
* The @page must be locked or the caller must use lock_page_memcg()
* to prevent double accounting when the page is concurrently being
* moved to another memcg:
*
* lock_page(page) or lock_page_memcg(page)
* if (TestClearPageState(page))
* mod_memcg_page_state(page, state, -1);
* unlock_page(page) or unlock_page_memcg(page)
*
* Kernel pages are an exception to this, since they'll never move.
*/
static inline void __mod_memcg_page_state(struct page *page,
int idx, int val)
{
struct mem_cgroup *memcg = page_memcg(page);
if (memcg)
__mod_memcg_state(memcg, idx, val);
}
static inline void mod_memcg_page_state(struct page *page,
int idx, int val)
{
struct mem_cgroup *memcg = page_memcg(page);
if (memcg)
mod_memcg_state(memcg, idx, val);
}
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
......@@ -1395,18 +1375,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
{
}
static inline void __mod_memcg_page_state(struct page *page,
int idx,
int nr)
{
}
static inline void mod_memcg_page_state(struct page *page,
int idx,
int nr)
{
}
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
......@@ -1479,34 +1447,6 @@ static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
}
#endif /* CONFIG_MEMCG */
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void __inc_memcg_state(struct mem_cgroup *memcg,
int idx)
{
__mod_memcg_state(memcg, idx, 1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void __dec_memcg_state(struct mem_cgroup *memcg,
int idx)
{
__mod_memcg_state(memcg, idx, -1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void __inc_memcg_page_state(struct page *page,
int idx)
{
__mod_memcg_page_state(page, idx, 1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void __dec_memcg_page_state(struct page *page,
int idx)
{
__mod_memcg_page_state(page, idx, -1);
}
static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
__mod_lruvec_kmem_state(p, idx, 1);
......@@ -1517,34 +1457,6 @@ static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx)
__mod_lruvec_kmem_state(p, idx, -1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void inc_memcg_state(struct mem_cgroup *memcg,
int idx)
{
mod_memcg_state(memcg, idx, 1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void dec_memcg_state(struct mem_cgroup *memcg,
int idx)
{
mod_memcg_state(memcg, idx, -1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void inc_memcg_page_state(struct page *page,
int idx)
{
mod_memcg_page_state(page, idx, 1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void dec_memcg_page_state(struct page *page,
int idx)
{
mod_memcg_page_state(page, idx, -1);
}
static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
{
struct mem_cgroup *memcg;
......@@ -1733,21 +1645,6 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order)
__memcg_kmem_uncharge_page(page, order);
}
static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
unsigned int nr_pages)
{
if (memcg_kmem_enabled())
return __memcg_kmem_charge(memcg, gfp, nr_pages);
return 0;
}
static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg,
unsigned int nr_pages)
{
if (memcg_kmem_enabled())
__memcg_kmem_uncharge(memcg, nr_pages);
}
/*
* A helper for accessing memcg's kmem_id, used for getting
* corresponding LRU lists.
......
......@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm);
BUG(); \
} \
} while (0)
#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \
static bool __section(".data.once") __warned; \
int __ret_warn_once = !!(cond); \
\
if (unlikely(__ret_warn_once && !__warned)) { \
dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
__warned = true; \
WARN_ON(1); \
} \
unlikely(__ret_warn_once); \
})
#define VM_WARN_ON(cond) (void)WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
......@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm);
#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
......
......@@ -362,6 +362,11 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
int maxevents, int timeout,
const sigset_t __user *sigmask,
size_t sigsetsize);
asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events,
int maxevents,
const struct __kernel_timespec __user *timeout,
const sigset_t __user *sigmask,
size_t sigsetsize);
/* fs/fcntl.c */
asmlinkage long sys_dup(unsigned int fildes);
......
......@@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
__SYSCALL(__NR_faccessat2, sys_faccessat2)
#define __NR_process_madvise 440
__SYSCALL(__NR_process_madvise, sys_process_madvise)
#define __NR_epoll_pwait2 441
__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
#undef __NR_syscalls
#define __NR_syscalls 441
#define __NR_syscalls 442
/*
* 32 bit systems traditionally used different
......
......@@ -68,6 +68,8 @@ COND_SYSCALL(epoll_create1);
COND_SYSCALL(epoll_ctl);
COND_SYSCALL(epoll_pwait);
COND_SYSCALL_COMPAT(epoll_pwait);
COND_SYSCALL(epoll_pwait2);
COND_SYSCALL_COMPAT(epoll_pwait2);
/* fs/fcntl.c */
......
......@@ -713,7 +713,7 @@ config ZSMALLOC_STAT
select DEBUG_FS
help
This option enables code in the zsmalloc to collect various
statistics about whats happening in zsmalloc and exports that
statistics about what's happening in zsmalloc and exports that
information to userspace via debugfs.
If unsure, say N.
......
......@@ -1342,46 +1342,6 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
}
#endif
/**
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
* @page: the page
* @pgdat: pgdat of the page
*
* This function relies on page's memcg being stable - see the
* access rules in commit_charge().
*/
struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
{
struct mem_cgroup_per_node *mz;
struct mem_cgroup *memcg;
struct lruvec *lruvec;
if (mem_cgroup_disabled()) {
lruvec = &pgdat->__lruvec;
goto out;
}
memcg = page_memcg(page);
/*
* Swapcache readahead pages are added to the LRU - and
* possibly migrated - before they are charged.
*/
if (!memcg)
memcg = root_mem_cgroup;
mz = mem_cgroup_page_nodeinfo(memcg, page);
lruvec = &mz->lruvec;
out:
/*
* Since a node can be onlined after the mem_cgroup was created,
* we have to be prepared to initialize lruvec->zone here;
* and if offlined then reonlined, we need to reinitialize it.
*/
if (unlikely(lruvec->pgdat != pgdat))
lruvec->pgdat = pgdat;
return lruvec;
}
/**
* lock_page_lruvec - lock and return lruvec for a given page.
* @page: the page
......@@ -6987,6 +6947,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
return;
memcg = page_memcg(oldpage);
VM_WARN_ON_ONCE_PAGE(!memcg, oldpage);
if (!memcg)
return;
......@@ -7178,12 +7139,15 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
if (mem_cgroup_disabled())
return;
if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
return;
memcg = page_memcg(page);
/* Readahead page, never charged */
VM_WARN_ON_ONCE_PAGE(!memcg, page);
if (!memcg)
return;
......@@ -7242,12 +7206,15 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
struct mem_cgroup *memcg;
unsigned short oldid;
if (mem_cgroup_disabled())
return 0;
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return 0;
memcg = page_memcg(page);
/* Readahead page, never charged */
VM_WARN_ON_ONCE_PAGE(!memcg, page);
if (!memcg)
return 0;
......
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <asm/unistd.h>
#include <linux/time_types.h>
#include <poll.h>
#include <unistd.h>
#include <assert.h>
......@@ -21,6 +23,19 @@ struct epoll_mtcontext
pthread_t waiter;
};
#ifndef __NR_epoll_pwait2
#define __NR_epoll_pwait2 -1
#endif
static inline int sys_epoll_pwait2(int fd, struct epoll_event *events,
int maxevents,
const struct __kernel_timespec *timeout,
const sigset_t *sigset, size_t sigsetsize)
{
return syscall(__NR_epoll_pwait2, fd, events, maxevents, timeout,
sigset, sigsetsize);
}
static void signal_handler(int signum)
{
}
......@@ -3377,4 +3392,61 @@ TEST(epoll61)
close(ctx.evfd);
}
/* Equivalent to basic test epoll1, but exercising epoll_pwait2. */
TEST(epoll62)
{
int efd;
int sfd[2];
struct epoll_event e;
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
efd = epoll_create(1);
ASSERT_GE(efd, 0);
e.events = EPOLLIN;
ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
ASSERT_EQ(write(sfd[1], "w", 1), 1);
EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1);
EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1);
close(efd);
close(sfd[0]);
close(sfd[1]);
}
/* Epoll_pwait2 basic timeout test. */
TEST(epoll63)
{
const int cfg_delay_ms = 10;
unsigned long long tdiff;
struct __kernel_timespec ts;
int efd;
int sfd[2];
struct epoll_event e;
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
efd = epoll_create(1);
ASSERT_GE(efd, 0);
e.events = EPOLLIN;
ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
ts.tv_sec = 0;
ts.tv_nsec = cfg_delay_ms * 1000 * 1000;
tdiff = msecs();
EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, &ts, NULL, 0), 0);
tdiff = msecs() - tdiff;
EXPECT_GE(tdiff, cfg_delay_ms);
close(efd);
close(sfd[0]);
close(sfd[1]);
}
TEST_HARNESS_MAIN
......@@ -111,7 +111,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
{
struct page *page;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!page)
return -ENOMEM;
......
......@@ -3116,7 +3116,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
}
BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE);
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
goto vcpu_free;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment