Commit 9d235ac0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'ucount-fixes-for-v5.15' of...

Merge branch 'ucount-fixes-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull ucounts fixes from Eric Biederman:
 "There has been one very hard to track down bug in the ucount code that
  we have been tracking since roughly v5.14 was released. Alex managed
  to find a reliable reproducer a few days ago and then I was able to
  instrument the code and figure out what the issue was.

  It turns out the sigqueue_alloc single atomic operation optimization
  did not play nicely with ucounts multiple level rlimits. It turned out
  that either sigqueue_alloc or sigqueue_free could be operating on
  multiple levels and trigger the conditions for the optimization on
  more than one level at the same time.

  To deal with that situation I have introduced inc_rlimit_get_ucounts
  and dec_rlimit_put_ucounts that just focuses on the optimization and
  the rlimit and ucount changes.

  While looking into the big bug I found I couple of other little issues
  so I am including those fixes here as well.

  When I have time I would very much like to dig into process ownership
  of the shared signal queue and see if we could pick a single owner for
  the entire queue so that all of the rlimits can count to that owner.
  That should entirely remove the need to call get_ucounts and
  put_ucounts in sigqueue_alloc and sigqueue_free. It is difficult
  because Linux unlike POSIX supports setuid that works on a single
  thread"

* 'ucount-fixes-for-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  ucounts: Move get_ucounts from cred_alloc_blank to key_change_session_keyring
  ucounts: Proper error handling in set_cred_ucounts
  ucounts: Pair inc_rlimit_ucounts with dec_rlimit_ucoutns in commit_creds
  ucounts: Fix signal ucount refcounting
parents 6c2c7127 5ebcbe34
......@@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t
long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
static inline void set_rlimit_ucount_max(struct user_namespace *ns,
......
......@@ -225,8 +225,6 @@ struct cred *cred_alloc_blank(void)
#ifdef CONFIG_DEBUG_CREDENTIALS
new->magic = CRED_MAGIC;
#endif
new->ucounts = get_ucounts(&init_ucounts);
if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
goto error;
......@@ -501,7 +499,7 @@ int commit_creds(struct cred *new)
inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
rcu_assign_pointer(task->real_cred, new);
rcu_assign_pointer(task->cred, new);
if (new->user != old->user)
if (new->user != old->user || new->user_ns != old->user_ns)
dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
alter_cred_subscribers(old, -2);
......@@ -669,7 +667,7 @@ int set_cred_ucounts(struct cred *new)
{
struct task_struct *task = current;
const struct cred *old = task->real_cred;
struct ucounts *old_ucounts = new->ucounts;
struct ucounts *new_ucounts, *old_ucounts = new->ucounts;
if (new->user == old->user && new->user_ns == old->user_ns)
return 0;
......@@ -681,9 +679,10 @@ int set_cred_ucounts(struct cred *new)
if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
return 0;
if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid)))
if (!(new_ucounts = alloc_ucounts(new->user_ns, new->euid)))
return -EAGAIN;
new->ucounts = new_ucounts;
if (old_ucounts)
put_ucounts(old_ucounts);
......
......@@ -426,22 +426,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
*/
rcu_read_lock();
ucounts = task_ucounts(t);
sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
switch (sigpending) {
case 1:
if (likely(get_ucounts(ucounts)))
break;
fallthrough;
case LONG_MAX:
/*
* we need to decrease the ucount in the userns tree on any
* failure to avoid counts leaking.
*/
dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
rcu_read_unlock();
return NULL;
}
sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
rcu_read_unlock();
if (!sigpending)
return NULL;
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
......@@ -450,8 +438,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
}
if (unlikely(q == NULL)) {
if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
put_ucounts(ucounts);
dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
} else {
INIT_LIST_HEAD(&q->list);
q->flags = sigqueue_flags;
......@@ -464,8 +451,8 @@ static void __sigqueue_free(struct sigqueue *q)
{
if (q->flags & SIGQUEUE_PREALLOC)
return;
if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
put_ucounts(q->ucounts);
if (q->ucounts) {
dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
q->ucounts = NULL;
}
kmem_cache_free(sigqueue_cachep, q);
......
......@@ -284,6 +284,55 @@ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
return (new == 0);
}
static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
struct ucounts *last, enum ucount_type type)
{
struct ucounts *iter, *next;
for (iter = ucounts; iter != last; iter = next) {
long dec = atomic_long_add_return(-1, &iter->ucount[type]);
WARN_ON_ONCE(dec < 0);
next = iter->ns->ucounts;
if (dec == 0)
put_ucounts(iter);
}
}
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
{
do_dec_rlimit_put_ucounts(ucounts, NULL, type);
}
long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
{
/* Caller must hold a reference to ucounts */
struct ucounts *iter;
long dec, ret = 0;
for (iter = ucounts; iter; iter = iter->ns->ucounts) {
long max = READ_ONCE(iter->ns->ucount_max[type]);
long new = atomic_long_add_return(1, &iter->ucount[type]);
if (new < 0 || new > max)
goto unwind;
if (iter == ucounts)
ret = new;
/*
* Grab an extra ucount reference for the caller when
* the rlimit count was previously 0.
*/
if (new != 1)
continue;
if (!get_ucounts(iter))
goto dec_unwind;
}
return ret;
dec_unwind:
dec = atomic_long_add_return(-1, &iter->ucount[type]);
WARN_ON_ONCE(dec < 0);
unwind:
do_dec_rlimit_put_ucounts(ucounts, iter, type);
return 0;
}
bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
{
struct ucounts *iter;
......
......@@ -918,6 +918,13 @@ void key_change_session_keyring(struct callback_head *twork)
return;
}
/* If get_ucounts fails more bits are needed in the refcount */
if (unlikely(!get_ucounts(old->ucounts))) {
WARN_ONCE(1, "In %s get_ucounts failed\n", __func__);
put_cred(new);
return;
}
new-> uid = old-> uid;
new-> euid = old-> euid;
new-> suid = old-> suid;
......@@ -927,6 +934,7 @@ void key_change_session_keyring(struct callback_head *twork)
new-> sgid = old-> sgid;
new->fsgid = old->fsgid;
new->user = get_uid(old->user);
new->ucounts = old->ucounts;
new->user_ns = get_user_ns(old->user_ns);
new->group_info = get_group_info(old->group_info);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment