BACKPORT: cgroup: make per-cgroup pressure stall tracking configurable

PSI accounts stalls for each cgroup separately and aggregates it at each
level of the hierarchy. This causes additional overhead with psi_avgs_work
being called for each cgroup in the hierarchy. psi_avgs_work has been
highly optimized, however on systems with large number of cgroups the
overhead becomes noticeable.
Systems which use PSI only at the system level could avoid this overhead
if PSI can be configured to skip per-cgroup stall accounting.
Add "cgroup_disable=pressure" kernel command-line option to allow
requesting system-wide only pressure stall accounting. When set, it
keeps system-wide accounting under /proc/pressure/ but skips accounting
for individual cgroups and does not expose PSI nodes in cgroup hierarchy.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Link:  https://lore.kernel.org/patchwork/patch/1435705
(cherry picked from commit 3958e2d0c34e18c41b60dc01832bd670a59ef70f
 https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git tj)

Conflicts:
        include/linux/cgroup-defs.h
        kernel/cgroup/cgroup.c

1. Trivial merge conflict in cgroup-defs.h due to missing CFTYPE_DEBUG
2. Changed flags to (CFTYPE_NOT_ON_ROOT | CFTYPE_PRESSURE) in cgroup.c
because in 4.19 psi files were allowed only in non-root cgroups.

Bug: 178872719
Bug: 191734423
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: Ifc8fbc52f9a1131d7c2668edbb44c525c76c3360
Git-commit: 92c6dd6a65
Git-repo: https://android.googlesource.com/kernel/common/
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
This commit is contained in:
Suren Baghdasaryan 2021-05-24 12:53:39 -07:00 committed by Gerrit - the friendly Code Review server
parent 3dc0765c59
commit f7c2472acb
5 changed files with 80 additions and 18 deletions

View File

@ -483,16 +483,21 @@
ccw_timeout_log [S390]
See Documentation/s390/CommonIO for details.
cgroup_disable= [KNL] Disable a particular controller
Format: {name of the controller(s) to disable}
cgroup_disable= [KNL] Disable a particular controller or optional feature
Format: {name of the controller(s) or feature(s) to disable}
The effects of cgroup_disable=foo are:
- foo isn't auto-mounted if you mount all cgroups in
a single hierarchy
- foo isn't visible as an individually mountable
subsystem
- if foo is an optional feature then the feature is
disabled and corresponding cgroup files are not
created
{Currently only "memory" controller deal with this and
cut the overhead, others just disable the usage. So
only cgroup_disable=memory is actually worthy}
Specifying "pressure" disables per-cgroup pressure
stall information accounting feature
cgroup_no_v1= [KNL] Disable cgroup controllers and named hierarchies in v1
Format: { { controller | "all" | "named" }

View File

@ -99,6 +99,7 @@ enum {
CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */
CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */
/* internal flags, do not use outside cgroup core proper */
__CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */

View File

@ -667,6 +667,8 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
return &cgrp->psi;
}
bool cgroup_psi_enabled(void);
static inline void cgroup_init_kthreadd(void)
{
/*
@ -731,6 +733,11 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
return NULL;
}
static inline bool cgroup_psi_enabled(void)
{
return false;
}
static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
struct cgroup *ancestor)
{

View File

@ -211,6 +211,22 @@ struct cgroup_namespace init_cgroup_ns = {
static struct file_system_type cgroup2_fs_type;
static struct cftype cgroup_base_files[];
/* cgroup optional features */
enum cgroup_opt_features {
#ifdef CONFIG_PSI
OPT_FEATURE_PRESSURE,
#endif
OPT_FEATURE_COUNT
};
static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = {
#ifdef CONFIG_PSI
"pressure",
#endif
};
static u16 cgroup_feature_disable_mask __read_mostly;
static int cgroup_apply_control(struct cgroup *cgrp);
static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
static void css_task_iter_skip(struct css_task_iter *it,
@ -3535,6 +3551,18 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
{
psi_trigger_replace(&of->priv, NULL);
}
bool cgroup_psi_enabled(void)
{
return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0;
}
#else /* CONFIG_PSI */
bool cgroup_psi_enabled(void)
{
return false;
}
#endif /* CONFIG_PSI */
static int cgroup_freeze_show(struct seq_file *seq, void *v)
@ -3782,6 +3810,8 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
restart:
for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
continue;
if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
continue;
if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
@ -3858,6 +3888,9 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
WARN_ON(cft->ss || cft->kf_ops);
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
continue;
if (cft->seq_start)
kf_ops = &cgroup_kf_ops;
else
@ -4773,7 +4806,7 @@ static struct cftype cgroup_base_files[] = {
#ifdef CONFIG_PSI
{
.name = "io.pressure",
.flags = CFTYPE_NOT_ON_ROOT,
.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_PRESSURE,
.seq_show = cgroup_io_pressure_show,
.write = cgroup_io_pressure_write,
.poll = cgroup_pressure_poll,
@ -4781,7 +4814,7 @@ static struct cftype cgroup_base_files[] = {
},
{
.name = "memory.pressure",
.flags = CFTYPE_NOT_ON_ROOT,
.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_PRESSURE,
.seq_show = cgroup_memory_pressure_show,
.write = cgroup_memory_pressure_write,
.poll = cgroup_pressure_poll,
@ -4789,7 +4822,7 @@ static struct cftype cgroup_base_files[] = {
},
{
.name = "cpu.pressure",
.flags = CFTYPE_NOT_ON_ROOT,
.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_PRESSURE,
.seq_show = cgroup_cpu_pressure_show,
.write = cgroup_cpu_pressure_write,
.poll = cgroup_pressure_poll,
@ -6029,6 +6062,15 @@ static int __init cgroup_disable(char *str)
continue;
cgroup_disable_mask |= 1 << i;
}
for (i = 0; i < OPT_FEATURE_COUNT; i++) {
if (strcmp(token, cgroup_opt_feature_names[i]))
continue;
cgroup_feature_disable_mask |= 1 << i;
pr_info("Disabling %s control group feature\n",
cgroup_opt_feature_names[i]);
break;
}
}
return 1;
}
@ -6277,6 +6319,9 @@ static ssize_t show_delegatable_files(struct cftype *files, char *buf,
if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
continue;
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
continue;
if (prefix)
ret += snprintf(buf + ret, size - ret, "%s.", prefix);

View File

@ -149,6 +149,7 @@
static int psi_bug __read_mostly;
DEFINE_STATIC_KEY_FALSE(psi_disabled);
DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
#ifdef CONFIG_PSI_DEFAULT_DISABLED
static bool psi_enable;
@ -213,6 +214,9 @@ void __init psi_init(void)
return;
}
if (!cgroup_psi_enabled())
static_branch_disable(&psi_cgroups_enabled);
psi_period = jiffies_to_nsecs(PSI_FREQ);
group_init(&psi_system);
}
@ -836,23 +840,23 @@ static u32 psi_group_change(struct psi_group *group, int cpu,
static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
{
if (*iter == &psi_system)
return NULL;
#ifdef CONFIG_CGROUPS
struct cgroup *cgroup = NULL;
if (static_branch_likely(&psi_cgroups_enabled)) {
struct cgroup *cgroup = NULL;
if (!*iter)
cgroup = task->cgroups->dfl_cgrp;
else if (*iter == &psi_system)
return NULL;
else
cgroup = cgroup_parent(*iter);
if (!*iter)
cgroup = task->cgroups->dfl_cgrp;
else
cgroup = cgroup_parent(*iter);
if (cgroup && cgroup_parent(cgroup)) {
*iter = cgroup;
return cgroup_psi(cgroup);
if (cgroup && cgroup_parent(cgroup)) {
*iter = cgroup;
return cgroup_psi(cgroup);
}
}
#else
if (*iter)
return NULL;
#endif
*iter = &psi_system;
return &psi_system;