Commit 4962a856 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "A mix of fixes and a few stragglers. In detail:

   - Revert the bogus __read_mostly that we discussed for the initial
     pull request.

   - Fix a merge window regression with fixed file registration error
     path handling.

   - Fix io-wq numa node affinities.

   - Series abstracting out an io_identity struct, making it both easier
     to see what the personality items are, and also easier to to adopt
     more. Use this to cover audit logging.

   - Fix for read-ahead disabled block condition in async buffered
     reads, and using single page read-ahead to unify what
     generic_file_buffer_read() path is used.

   - Series for REQ_F_COMP_LOCKED fix and removal of it (Pavel)

   - Poll fix (Pavel)"

* tag 'io_uring-5.10-2020-10-20' of git://git.kernel.dk/linux-block: (21 commits)
  io_uring: use blk_queue_nowait() to check if NOWAIT supported
  mm: use limited read-ahead to satisfy read
  mm: mark async iocb read as NOWAIT once some data has been copied
  io_uring: fix double poll mask init
  io-wq: inherit audit loginuid and sessionid
  io_uring: use percpu counters to track inflight requests
  io_uring: assign new io_identity for task if members have changed
  io_uring: store io_identity in io_uring_task
  io_uring: COW io_identity on mismatch
  io_uring: move io identity items into separate struct
  io_uring: rely solely on work flags to determine personality.
  io_uring: pass required context in as flags
  io-wq: assign NUMA node locality if appropriate
  io_uring: fix error path cleanup in io_sqe_files_register()
  Revert "io_uring: mark io_uring_fops/io_op_defs as __read_mostly"
  io_uring: fix REQ_F_COMP_LOCKED by killing it
  io_uring: dig out COMP_LOCK from deep call chain
  io_uring: don't put a poll req under spinlock
  io_uring: don't unnecessarily clear F_LINK_TIMEOUT
  io_uring: don't set COMP_LOCKED if won't put
  ...
parents 38525c69 9ba0d0c8
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include <linux/task_work.h> #include <linux/task_work.h>
#include <linux/blk-cgroup.h> #include <linux/blk-cgroup.h>
#include <linux/audit.h>
#include "io-wq.h" #include "io-wq.h"
...@@ -429,14 +430,10 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work) ...@@ -429,14 +430,10 @@ static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
mmput(worker->mm); mmput(worker->mm);
worker->mm = NULL; worker->mm = NULL;
} }
if (!work->mm)
return;
if (mmget_not_zero(work->mm)) { if (mmget_not_zero(work->identity->mm)) {
kthread_use_mm(work->mm); kthread_use_mm(work->identity->mm);
worker->mm = work->mm; worker->mm = work->identity->mm;
/* hang on to this mm */
work->mm = NULL;
return; return;
} }
...@@ -448,9 +445,11 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker, ...@@ -448,9 +445,11 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
struct io_wq_work *work) struct io_wq_work *work)
{ {
#ifdef CONFIG_BLK_CGROUP #ifdef CONFIG_BLK_CGROUP
if (work->blkcg_css != worker->blkcg_css) { if (!(work->flags & IO_WQ_WORK_BLKCG))
kthread_associate_blkcg(work->blkcg_css); return;
worker->blkcg_css = work->blkcg_css; if (work->identity->blkcg_css != worker->blkcg_css) {
kthread_associate_blkcg(work->identity->blkcg_css);
worker->blkcg_css = work->identity->blkcg_css;
} }
#endif #endif
} }
...@@ -458,9 +457,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker, ...@@ -458,9 +457,9 @@ static inline void io_wq_switch_blkcg(struct io_worker *worker,
static void io_wq_switch_creds(struct io_worker *worker, static void io_wq_switch_creds(struct io_worker *worker,
struct io_wq_work *work) struct io_wq_work *work)
{ {
const struct cred *old_creds = override_creds(work->creds); const struct cred *old_creds = override_creds(work->identity->creds);
worker->cur_creds = work->creds; worker->cur_creds = work->identity->creds;
if (worker->saved_creds) if (worker->saved_creds)
put_cred(old_creds); /* creds set by previous switch */ put_cred(old_creds); /* creds set by previous switch */
else else
...@@ -470,20 +469,26 @@ static void io_wq_switch_creds(struct io_worker *worker, ...@@ -470,20 +469,26 @@ static void io_wq_switch_creds(struct io_worker *worker,
static void io_impersonate_work(struct io_worker *worker, static void io_impersonate_work(struct io_worker *worker,
struct io_wq_work *work) struct io_wq_work *work)
{ {
if (work->files && current->files != work->files) { if ((work->flags & IO_WQ_WORK_FILES) &&
current->files != work->identity->files) {
task_lock(current); task_lock(current);
current->files = work->files; current->files = work->identity->files;
current->nsproxy = work->nsproxy; current->nsproxy = work->identity->nsproxy;
task_unlock(current); task_unlock(current);
} }
if (work->fs && current->fs != work->fs) if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
current->fs = work->fs; current->fs = work->identity->fs;
if (work->mm != worker->mm) if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
io_wq_switch_mm(worker, work); io_wq_switch_mm(worker, work);
if (worker->cur_creds != work->creds) if ((work->flags & IO_WQ_WORK_CREDS) &&
worker->cur_creds != work->identity->creds)
io_wq_switch_creds(worker, work); io_wq_switch_creds(worker, work);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize; current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
io_wq_switch_blkcg(worker, work); io_wq_switch_blkcg(worker, work);
#ifdef CONFIG_AUDIT
current->loginuid = work->identity->loginuid;
current->sessionid = work->identity->sessionid;
#endif
} }
static void io_assign_current_work(struct io_worker *worker, static void io_assign_current_work(struct io_worker *worker,
...@@ -496,6 +501,11 @@ static void io_assign_current_work(struct io_worker *worker, ...@@ -496,6 +501,11 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched(); cond_resched();
} }
#ifdef CONFIG_AUDIT
current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
current->sessionid = AUDIT_SID_UNSET;
#endif
spin_lock_irq(&worker->lock); spin_lock_irq(&worker->lock);
worker->cur_work = work; worker->cur_work = work;
spin_unlock_irq(&worker->lock); spin_unlock_irq(&worker->lock);
...@@ -676,6 +686,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) ...@@ -676,6 +686,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
kfree(worker); kfree(worker);
return false; return false;
} }
kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
raw_spin_lock_irq(&wqe->lock); raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
......
#ifndef INTERNAL_IO_WQ_H #ifndef INTERNAL_IO_WQ_H
#define INTERNAL_IO_WQ_H #define INTERNAL_IO_WQ_H
#include <linux/io_uring.h>
struct io_wq; struct io_wq;
enum { enum {
...@@ -10,6 +12,12 @@ enum { ...@@ -10,6 +12,12 @@ enum {
IO_WQ_WORK_NO_CANCEL = 8, IO_WQ_WORK_NO_CANCEL = 8,
IO_WQ_WORK_CONCURRENT = 16, IO_WQ_WORK_CONCURRENT = 16,
IO_WQ_WORK_FILES = 32,
IO_WQ_WORK_FS = 64,
IO_WQ_WORK_MM = 128,
IO_WQ_WORK_CREDS = 256,
IO_WQ_WORK_BLKCG = 512,
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
}; };
...@@ -85,15 +93,7 @@ static inline void wq_list_del(struct io_wq_work_list *list, ...@@ -85,15 +93,7 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work { struct io_wq_work {
struct io_wq_work_node list; struct io_wq_work_node list;
struct files_struct *files; struct io_identity *identity;
struct mm_struct *mm;
#ifdef CONFIG_BLK_CGROUP
struct cgroup_subsys_state *blkcg_css;
#endif
const struct cred *creds;
struct nsproxy *nsproxy;
struct fs_struct *fs;
unsigned long fsize;
unsigned flags; unsigned flags;
}; };
......
This diff is collapsed.
...@@ -1268,6 +1268,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, ...@@ -1268,6 +1268,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
kuid_t kloginuid; kuid_t kloginuid;
int rv; int rv;
/* Don't let kthreads write their own loginuid */
if (current->flags & PF_KTHREAD)
return -EPERM;
rcu_read_lock(); rcu_read_lock();
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -4,18 +4,33 @@ ...@@ -4,18 +4,33 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/xarray.h> #include <linux/xarray.h>
#include <linux/percpu-refcount.h>
struct io_identity {
struct files_struct *files;
struct mm_struct *mm;
#ifdef CONFIG_BLK_CGROUP
struct cgroup_subsys_state *blkcg_css;
#endif
const struct cred *creds;
struct nsproxy *nsproxy;
struct fs_struct *fs;
unsigned long fsize;
#ifdef CONFIG_AUDIT
kuid_t loginuid;
unsigned int sessionid;
#endif
refcount_t count;
};
struct io_uring_task { struct io_uring_task {
/* submission side */ /* submission side */
struct xarray xa; struct xarray xa;
struct wait_queue_head wait; struct wait_queue_head wait;
struct file *last; struct file *last;
atomic_long_t req_issue; struct percpu_counter inflight;
struct io_identity __identity;
/* completion side */ struct io_identity *identity;
bool in_idle ____cacheline_aligned_in_smp; bool in_idle;
atomic_long_t req_complete;
}; };
#if defined(CONFIG_IO_URING) #if defined(CONFIG_IO_URING)
......
...@@ -2199,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, ...@@ -2199,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT; last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
offset = *ppos & ~PAGE_MASK; offset = *ppos & ~PAGE_MASK;
/*
* If we've already successfully copied some data, then we
* can no longer safely return -EIOCBQUEUED. Hence mark
* an async read NOWAIT at that point.
*/
if (written && (iocb->ki_flags & IOCB_WAITQ))
iocb->ki_flags |= IOCB_NOWAIT;
for (;;) { for (;;) {
struct page *page; struct page *page;
pgoff_t end_index; pgoff_t end_index;
......
...@@ -552,15 +552,23 @@ static void ondemand_readahead(struct readahead_control *ractl, ...@@ -552,15 +552,23 @@ static void ondemand_readahead(struct readahead_control *ractl,
void page_cache_sync_ra(struct readahead_control *ractl, void page_cache_sync_ra(struct readahead_control *ractl,
struct file_ra_state *ra, unsigned long req_count) struct file_ra_state *ra, unsigned long req_count)
{ {
/* no read-ahead */ bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
if (!ra->ra_pages)
return;
if (blk_cgroup_congested()) /*
return; * Even if read-ahead is disabled, issue this request as read-ahead
* as we'll need it to satisfy the requested range. The forced
* read-ahead will do the right thing and limit the read to just the
* requested range, which we'll set to 1 page for this case.
*/
if (!ra->ra_pages || blk_cgroup_congested()) {
if (!ractl->file)
return;
req_count = 1;
do_forced_ra = true;
}
/* be dumb */ /* be dumb */
if (ractl->file && (ractl->file->f_mode & FMODE_RANDOM)) { if (do_forced_ra) {
force_page_cache_ra(ractl, ra, req_count); force_page_cache_ra(ractl, ra, req_count);
return; return;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment