Commit c74d40e8 authored by Dan Schatzberg's avatar Dan Schatzberg Committed by Linus Torvalds

loop: charge i/o to mem and blk cg

The current code only associates with the existing blkcg when aio is used
to access the backing file.  This patch covers all types of i/o to the
backing file and also associates the memcg so if the backing file is on
tmpfs, memory is charged appropriately.

This patch also exports cgroup_get_e_css and int_active_memcg so it can be
used by the loop module.

Link: https://lkml.kernel.org/r/20210610173944.1203706-4-schatzberg.dan@gmail.comSigned-off-by: default avatarDan Schatzberg <schatzberg.dan@gmail.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarJens Axboe <axboe@kernel.dk>
Cc: Chris Down <chris@chrisdown.name>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 04f94e3f
...@@ -78,6 +78,7 @@ ...@@ -78,6 +78,7 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/ioprio.h> #include <linux/ioprio.h>
#include <linux/blk-cgroup.h> #include <linux/blk-cgroup.h>
#include <linux/sched/mm.h>
#include "loop.h" #include "loop.h"
...@@ -516,8 +517,6 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) ...@@ -516,8 +517,6 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{ {
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
if (cmd->css)
css_put(cmd->css);
cmd->ret = ret; cmd->ret = ret;
lo_rw_aio_do_completion(cmd); lo_rw_aio_do_completion(cmd);
} }
...@@ -578,8 +577,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, ...@@ -578,8 +577,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
cmd->iocb.ki_complete = lo_rw_aio_complete; cmd->iocb.ki_complete = lo_rw_aio_complete;
cmd->iocb.ki_flags = IOCB_DIRECT; cmd->iocb.ki_flags = IOCB_DIRECT;
cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
if (cmd->css)
kthread_associate_blkcg(cmd->css);
if (rw == WRITE) if (rw == WRITE)
ret = call_write_iter(file, &cmd->iocb, &iter); ret = call_write_iter(file, &cmd->iocb, &iter);
...@@ -587,7 +584,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, ...@@ -587,7 +584,6 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
ret = call_read_iter(file, &cmd->iocb, &iter); ret = call_read_iter(file, &cmd->iocb, &iter);
lo_rw_aio_do_completion(cmd); lo_rw_aio_do_completion(cmd);
kthread_associate_blkcg(NULL);
if (ret != -EIOCBQUEUED) if (ret != -EIOCBQUEUED)
cmd->iocb.ki_complete(&cmd->iocb, ret, 0); cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
...@@ -928,7 +924,7 @@ struct loop_worker { ...@@ -928,7 +924,7 @@ struct loop_worker {
struct list_head cmd_list; struct list_head cmd_list;
struct list_head idle_list; struct list_head idle_list;
struct loop_device *lo; struct loop_device *lo;
struct cgroup_subsys_state *css; struct cgroup_subsys_state *blkcg_css;
unsigned long last_ran_at; unsigned long last_ran_at;
}; };
...@@ -957,7 +953,7 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd) ...@@ -957,7 +953,7 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
spin_lock_irq(&lo->lo_work_lock); spin_lock_irq(&lo->lo_work_lock);
if (queue_on_root_worker(cmd->css)) if (queue_on_root_worker(cmd->blkcg_css))
goto queue_work; goto queue_work;
node = &lo->worker_tree.rb_node; node = &lo->worker_tree.rb_node;
...@@ -965,10 +961,10 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd) ...@@ -965,10 +961,10 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
while (*node) { while (*node) {
parent = *node; parent = *node;
cur_worker = container_of(*node, struct loop_worker, rb_node); cur_worker = container_of(*node, struct loop_worker, rb_node);
if (cur_worker->css == cmd->css) { if (cur_worker->blkcg_css == cmd->blkcg_css) {
worker = cur_worker; worker = cur_worker;
break; break;
} else if ((long)cur_worker->css < (long)cmd->css) { } else if ((long)cur_worker->blkcg_css < (long)cmd->blkcg_css) {
node = &(*node)->rb_left; node = &(*node)->rb_left;
} else { } else {
node = &(*node)->rb_right; node = &(*node)->rb_right;
...@@ -980,13 +976,18 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd) ...@@ -980,13 +976,18 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN); worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN);
/* /*
* In the event we cannot allocate a worker, just queue on the * In the event we cannot allocate a worker, just queue on the
* rootcg worker * rootcg worker and issue the I/O as the rootcg
*/ */
if (!worker) if (!worker) {
cmd->blkcg_css = NULL;
if (cmd->memcg_css)
css_put(cmd->memcg_css);
cmd->memcg_css = NULL;
goto queue_work; goto queue_work;
}
worker->css = cmd->css; worker->blkcg_css = cmd->blkcg_css;
css_get(worker->css); css_get(worker->blkcg_css);
INIT_WORK(&worker->work, loop_workfn); INIT_WORK(&worker->work, loop_workfn);
INIT_LIST_HEAD(&worker->cmd_list); INIT_LIST_HEAD(&worker->cmd_list);
INIT_LIST_HEAD(&worker->idle_list); INIT_LIST_HEAD(&worker->idle_list);
...@@ -1306,7 +1307,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) ...@@ -1306,7 +1307,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
idle_list) { idle_list) {
list_del(&worker->idle_list); list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree); rb_erase(&worker->rb_node, &lo->worker_tree);
css_put(worker->css); css_put(worker->blkcg_css);
kfree(worker); kfree(worker);
} }
spin_unlock_irq(&lo->lo_work_lock); spin_unlock_irq(&lo->lo_work_lock);
...@@ -2100,13 +2101,18 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -2100,13 +2101,18 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
} }
/* always use the first bio's css */ /* always use the first bio's css */
cmd->blkcg_css = NULL;
cmd->memcg_css = NULL;
#ifdef CONFIG_BLK_CGROUP #ifdef CONFIG_BLK_CGROUP
if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { if (rq->bio && rq->bio->bi_blkg) {
cmd->css = &bio_blkcg(rq->bio)->css; cmd->blkcg_css = &bio_blkcg(rq->bio)->css;
css_get(cmd->css); #ifdef CONFIG_MEMCG
} else cmd->memcg_css =
cgroup_get_e_css(cmd->blkcg_css->cgroup,
&memory_cgrp_subsys);
#endif
}
#endif #endif
cmd->css = NULL;
loop_queue_work(lo, cmd); loop_queue_work(lo, cmd);
return BLK_STS_OK; return BLK_STS_OK;
...@@ -2118,13 +2124,28 @@ static void loop_handle_cmd(struct loop_cmd *cmd) ...@@ -2118,13 +2124,28 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
const bool write = op_is_write(req_op(rq)); const bool write = op_is_write(req_op(rq));
struct loop_device *lo = rq->q->queuedata; struct loop_device *lo = rq->q->queuedata;
int ret = 0; int ret = 0;
struct mem_cgroup *old_memcg = NULL;
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
ret = -EIO; ret = -EIO;
goto failed; goto failed;
} }
if (cmd->blkcg_css)
kthread_associate_blkcg(cmd->blkcg_css);
if (cmd->memcg_css)
old_memcg = set_active_memcg(
mem_cgroup_from_css(cmd->memcg_css));
ret = do_req_filebacked(lo, rq); ret = do_req_filebacked(lo, rq);
if (cmd->blkcg_css)
kthread_associate_blkcg(NULL);
if (cmd->memcg_css) {
set_active_memcg(old_memcg);
css_put(cmd->memcg_css);
}
failed: failed:
/* complete non-aio request */ /* complete non-aio request */
if (!cmd->use_aio || ret) { if (!cmd->use_aio || ret) {
...@@ -2203,7 +2224,7 @@ static void loop_free_idle_workers(struct timer_list *timer) ...@@ -2203,7 +2224,7 @@ static void loop_free_idle_workers(struct timer_list *timer)
break; break;
list_del(&worker->idle_list); list_del(&worker->idle_list);
rb_erase(&worker->rb_node, &lo->worker_tree); rb_erase(&worker->rb_node, &lo->worker_tree);
css_put(worker->css); css_put(worker->blkcg_css);
kfree(worker); kfree(worker);
} }
if (!list_empty(&lo->idle_worker_list)) if (!list_empty(&lo->idle_worker_list))
......
...@@ -77,7 +77,8 @@ struct loop_cmd { ...@@ -77,7 +77,8 @@ struct loop_cmd {
long ret; long ret;
struct kiocb iocb; struct kiocb iocb;
struct bio_vec *bvec; struct bio_vec *bvec;
struct cgroup_subsys_state *css; struct cgroup_subsys_state *blkcg_css;
struct cgroup_subsys_state *memcg_css;
}; };
/* Support for loadable transfer modules */ /* Support for loadable transfer modules */
......
...@@ -1230,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) ...@@ -1230,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL; return NULL;
} }
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
{
return NULL;
}
static inline void mem_cgroup_put(struct mem_cgroup *memcg) static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{ {
} }
......
...@@ -577,6 +577,7 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp, ...@@ -577,6 +577,7 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
rcu_read_unlock(); rcu_read_unlock();
return css; return css;
} }
EXPORT_SYMBOL_GPL(cgroup_get_e_css);
static void cgroup_get_live(struct cgroup *cgrp) static void cgroup_get_live(struct cgroup *cgrp)
{ {
......
...@@ -78,6 +78,7 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; ...@@ -78,6 +78,7 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
/* Active memory cgroup to use from an interrupt context */ /* Active memory cgroup to use from an interrupt context */
DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg); DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
/* Socket memory accounting disabled? */ /* Socket memory accounting disabled? */
static bool cgroup_memory_nosocket; static bool cgroup_memory_nosocket;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment