BACKPORT: psi: Fix uaf issue when psi trigger is destroyed while being polled

commit a06247c6804f1a7c86a2e5398a4c1f1db1471848 upstream.

With write operation on psi files replacing old trigger with a new one,
the lifetime of its waitqueue is totally arbitrary. Overwriting an
existing trigger causes its waitqueue to be freed and pending poll()
will stumble on trigger->event_wait which was destroyed.
Fix this by disallowing to redefine an existing psi trigger. If a write
operation is used on a file descriptor with an already existing psi
trigger, the operation will fail with EBUSY error.
Also bypass a check for psi_disabled in the psi_trigger_destroy as the
flag can be flipped after the trigger is created, leading to a memory
leak.

Fixes: 0e94682b73bf ("psi: introduce psi monitor")
Reported-by: syzbot+cdb5dd11c97cc532efad@syzkaller.appspotmail.com
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Analyzed-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20220111232309.1786347-1-surenb@google.com
[surenb: backported to 5.10 kernel]
CC: stable@vger.kernel.org # 5.10
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
        include/linux/psi.h
        kernel/cgroup/cgroup.c
        kernel/sched/psi.c

1. Resolved trivial merge conflicts.

Bug: 233410456
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: I7143fef51b874c2df8d792808b6a9b666eec2c7b
This commit is contained in:
Suren Baghdasaryan 2022-01-11 15:23:09 -08:00
parent 37bc600650
commit 768802453f
4 changed files with 38 additions and 44 deletions

View File

@ -30,7 +30,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to);
struct psi_trigger *psi_trigger_create(struct psi_group *group,
char *buf, size_t nbytes, enum psi_res res);
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
void psi_trigger_destroy(struct psi_trigger *t);
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
poll_table *wait);

View File

@ -120,9 +120,6 @@ struct psi_trigger {
* events to one per window
*/
u64 last_event_time;
/* Refcounting to prevent premature destruction */
struct kref refcount;
};
struct psi_group {

View File

@ -3530,14 +3530,19 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
cgroup_get(cgrp);
cgroup_kn_unlock(of->kn);
/* Allow only one trigger per file descriptor */
if (of->priv) {
cgroup_put(cgrp);
return -EBUSY;
}
new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
}
psi_trigger_replace(&of->priv, new);
smp_store_release(&of->priv, new);
cgroup_put(cgrp);
return nbytes;
@ -3572,7 +3577,7 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
static void cgroup_pressure_release(struct kernfs_open_file *of)
{
psi_trigger_replace(&of->priv, NULL);
psi_trigger_destroy(of->priv);
}
bool cgroup_psi_enabled(void)

View File

@ -1050,7 +1050,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
t->event = 0;
t->last_event_time = 0;
init_waitqueue_head(&t->event_wait);
kref_init(&t->refcount);
mutex_lock(&group->trigger_lock);
@ -1083,15 +1082,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
return t;
}
static void psi_trigger_destroy(struct kref *ref)
void psi_trigger_destroy(struct psi_trigger *t)
{
struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
struct psi_group *group = t->group;
struct psi_group *group;
struct kthread_worker *kworker_to_destroy = NULL;
if (static_branch_likely(&psi_disabled))
/*
* We do not check psi_disabled since it might have been disabled after
* the trigger got created.
*/
if (!t)
return;
group = t->group;
/*
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
* from under a polling process.
@ -1126,9 +1129,9 @@ static void psi_trigger_destroy(struct kref *ref)
mutex_unlock(&group->trigger_lock);
/*
* Wait for both *trigger_ptr from psi_trigger_replace and
* poll_kworker RCUs to complete their read-side critical sections
* before destroying the trigger and optionally the poll_kworker
* Wait for psi_schedule_poll_work RCU to complete its read-side
* critical section before destroying the trigger and optionally the
* poll_task.
*/
synchronize_rcu();
/*
@ -1150,18 +1153,6 @@ static void psi_trigger_destroy(struct kref *ref)
kfree(t);
}
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
{
struct psi_trigger *old = *trigger_ptr;
if (static_branch_likely(&psi_disabled))
return;
rcu_assign_pointer(*trigger_ptr, new);
if (old)
kref_put(&old->refcount, psi_trigger_destroy);
}
__poll_t psi_trigger_poll(void **trigger_ptr,
struct file *file, poll_table *wait)
{
@ -1171,24 +1162,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
if (static_branch_likely(&psi_disabled))
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
rcu_read_lock();
t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
if (!t) {
rcu_read_unlock();
t = smp_load_acquire(trigger_ptr);
if (!t)
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
}
kref_get(&t->refcount);
rcu_read_unlock();
poll_wait(file, &t->event_wait, wait);
if (cmpxchg(&t->event, 1, 0) == 1)
ret |= EPOLLPRI;
kref_put(&t->refcount, psi_trigger_destroy);
return ret;
}
@ -1212,14 +1194,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
buf[buf_size - 1] = '\0';
new = psi_trigger_create(&psi_system, buf, nbytes, res);
if (IS_ERR(new))
return PTR_ERR(new);
seq = file->private_data;
/* Take seq->lock to protect seq->private from concurrent writes */
mutex_lock(&seq->lock);
psi_trigger_replace(&seq->private, new);
/* Allow only one trigger per file descriptor */
if (seq->private) {
mutex_unlock(&seq->lock);
return -EBUSY;
}
new = psi_trigger_create(&psi_system, buf, nbytes, res);
if (IS_ERR(new)) {
mutex_unlock(&seq->lock);
return PTR_ERR(new);
}
smp_store_release(&seq->private, new);
mutex_unlock(&seq->lock);
return nbytes;
@ -1254,7 +1246,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
psi_trigger_replace(&seq->private, NULL);
psi_trigger_destroy(seq->private);
return single_release(inode, file);
}