FROMLIST: sched/fair: add support to tune PELT ramp/decay timings

The PELT half-life is the time [ms] required by the PELT signal to build
up a 50% load/utilization, starting from zero. This time is currently
hardcoded to be 32ms, a value which seems to make sense for most of the
workloads.

However, 32ms has been verified to be too long for certain classes of
workloads. For example, in the mobile space many tasks affecting the
user-experience run with a 16ms or 8ms cadence, since they need to match
the common 60Hz or 120Hz refresh rate of the graphics pipeline.
This contributed so fare to the idea that "PELT is too slow" to properly
track the utilization of interactive mobile workloads, especially
compared to alternative load tracking solutions which provides a
better representation of tasks demand in the range of 10-20ms.

A faster PELT ramp-up time could give some advantages to speed-up the
time required for the signal to stabilize and thus to better represent
task demands in the mobile space. As a downside, it also reduces the
decay time, and thus we forget the load/utilization of sleeping tasks
(or idle CPUs) faster.

Fortunately, since the integration of the utilization estimation
support in mainline kernel:

   commit 7f65ea42eb ("sched/fair: Add util_est on top of PELT")

a fast decay time is no longer an issue for tasks utilization estimation.
Although estimated utilization does not slow down the decay of blocked
utilization on idle CPUs, for mobile workloads this seems not to be a
major concern compared to the benefits in interactivity responsiveness.

Let's add a compile time option to choose the PELT speed which better
fits for a specific system. By default the current 32ms half-life is
used, but we can also compile a kernel to use a faster ramp-up time of
either 16ms or 8ms. These two configurations have been verified to give
PELT a further improvement in performance, compared to other out-of-tree
load tracking solutions, when it comes to track interactive workloads
thus better supporting both tasks placements and frequencies selections.

Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Paul Turner <pjt@google.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

[
 backport from LKML:
 Message-ID: <20180409165134.707-1-patrick.bellasi@arm.com>
]
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Change-Id: I50569748918b799ac4bf4e7d2b387253080a0fd2
[kdrag0n: Forward-ported from kernel/common android-4.14 to
          android-4.19]
Signed-off-by: Danny Lin <danny@kdrag0n.dev>
This commit is contained in:
Patrick Bellasi 2017-11-21 16:31:58 +00:00 committed by spakkkk
parent 7b8fa2379f
commit 9bbe3c433e
3 changed files with 145 additions and 23 deletions

View File

@ -10,22 +10,22 @@
#include <math.h>
#include <stdio.h>
#define HALFLIFE 32
#define HALFLIFE { 32, 16, 8 }
#define SHIFT 32
double y;
void calc_runnable_avg_yN_inv(void)
void calc_runnable_avg_yN_inv(const int halflife)
{
int i;
unsigned int x;
/* To silence -Wunused-but-set-variable warnings. */
printf("static const u32 runnable_avg_yN_inv[] __maybe_unused = {");
for (i = 0; i < HALFLIFE; i++) {
for (i = 0; i < halflife; i++) {
x = ((1UL<<32)-1)*pow(y, i);
if (i % 6 == 0) printf("\n\t");
if (i % 4 == 0) printf("\n\t");
printf("0x%8x, ", x);
}
printf("\n};\n\n");
@ -33,12 +33,12 @@ void calc_runnable_avg_yN_inv(void)
int sum = 1024;
void calc_runnable_avg_yN_sum(void)
void calc_runnable_avg_yN_sum(const int halflife)
{
int i;
printf("static const u32 runnable_avg_yN_sum[] = {\n\t 0,");
for (i = 1; i <= HALFLIFE; i++) {
for (i = 1; i <= halflife; i++) {
if (i == 1)
sum *= y;
else
@ -56,7 +56,7 @@ int n = -1;
/* first period */
long max = 1024;
void calc_converged_max(void)
void calc_converged_max(const int halflife)
{
long last = 0, y_inv = ((1UL<<32)-1)*y;
@ -74,17 +74,17 @@ void calc_converged_max(void)
last = max;
}
n--;
printf("#define LOAD_AVG_PERIOD %d\n", HALFLIFE);
printf("#define LOAD_AVG_PERIOD %d\n", halflife);
printf("#define LOAD_AVG_MAX %ld\n", max);
// printf("#define LOAD_AVG_MAX_N %d\n\n", n);
printf("#define LOAD_AVG_MAX_N %d\n\n", n);
}
void calc_accumulated_sum_32(void)
void calc_accumulated_sum_32(const int halflife)
{
int i, x = sum;
printf("static const u32 __accumulated_sum_N32[] = {\n\t 0,");
for (i = 1; i <= n/HALFLIFE+1; i++) {
for (i = 1; i <= n/halflife+1; i++) {
if (i > 1)
x = x/2 + sum;
@ -98,12 +98,22 @@ void calc_accumulated_sum_32(void)
void main(void)
{
int hl_value[] = HALFLIFE;
int hl_count = sizeof(hl_value) / sizeof(int);
int hl_idx, halflife;
printf("/* Generated by Documentation/scheduler/sched-pelt; do not modify. */\n\n");
y = pow(0.5, 1/(double)HALFLIFE);
for (hl_idx = 0; hl_idx < hl_count; ++hl_idx) {
halflife = hl_value[hl_idx];
calc_runnable_avg_yN_inv();
// calc_runnable_avg_yN_sum();
calc_converged_max();
// calc_accumulated_sum_32();
y = pow(0.5, 1/(double)halflife);
printf("#if CONFIG_PELT_UTIL_HALFLIFE_%d\n", halflife);
calc_runnable_avg_yN_inv(halflife);
calc_runnable_avg_yN_sum(halflife);
calc_converged_max(halflife);
calc_accumulated_sum_32(halflife);
printf("#endif\n\n");
}
}

View File

@ -767,6 +767,41 @@ config UCLAMP_BUCKETS_COUNT
endmenu
menu "FAIR Scheuler tunables"
choice
prompt "Utilization's PELT half-Life"
default PELT_UTIL_HALFLIFE_32
help
Allows choosing one of the possible values for the PELT half-life to
be used for the update of the utilization of tasks and CPUs.
The half-life is the amount of [ms] required by the PELT signal to
build up to 50% utilization. The higher the half-life the longer it
takes for a task to be represented as a big one.
If not sure, use the default of 32 ms.
config PELT_UTIL_HALFLIFE_32
bool "32 ms, default for server"
config PELT_UTIL_HALFLIFE_16
bool "16 ms, suggested for interactive workloads"
help
Use 16ms as PELT half-life value. This will increase the ramp-up and
decay of utlization and load twice as fast as for the default
configuration using 32ms.
config PELT_UTIL_HALFLIFE_8
bool "8 ms, very fast"
help
Use 8ms as PELT half-life value. This will increase the ramp-up and
decay of utlization and load four time as fast as for the default
configuration using 32ms.
endchoice
endmenu # FAIR Scheduler tunables"
#
# For architectures that want to enable the support for NUMA-affine scheduler
# balancing logic:

View File

@ -1,14 +1,91 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Generated by Documentation/scheduler/sched-pelt; do not modify. */
#if CONFIG_PELT_UTIL_HALFLIFE_32
static const u32 runnable_avg_yN_inv[] __maybe_unused = {
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80,
0x85aac367, 0x82cd8698,
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a,
0xeac0c6e6, 0xe5b906e6, 0xe0ccdeeb, 0xdbfbb796,
0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46,
0xb504f333, 0xb123f581, 0xad583ee9, 0xa9a15ab4,
0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a,
0x8b95c1e3, 0x88980e80, 0x85aac367, 0x82cd8698,
};
static const u32 runnable_avg_yN_sum[] = {
0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103,
9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082,
17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
};
#define LOAD_AVG_PERIOD 32
#define LOAD_AVG_MAX 47742
#define LOAD_AVG_MAX_N 345
static const u32 __accumulated_sum_N32[] = {
0, 23371, 35056, 40899, 43820, 45281,
46011, 46376, 46559, 46650, 46696, 46719,
};
#endif
#if CONFIG_PELT_UTIL_HALFLIFE_16
static const u32 runnable_avg_yN_inv[] __maybe_unused = {
0xffffffff, 0xf5257d14, 0xeac0c6e6, 0xe0ccdeeb,
0xd744fcc9, 0xce248c14, 0xc5672a10, 0xbd08a39e,
0xb504f333, 0xad583ee9, 0xa5fed6a9, 0x9ef5325f,
0x9837f050, 0x91c3d373, 0x8b95c1e3, 0x85aac367,
};
static const u32 runnable_avg_yN_sum[] = {
0,22380,22411,22441,22470,22497,22523,22548,22572,22595,22617,
22638,22658,22677,22696,22714,22731,
};
#define LOAD_AVG_PERIOD 16
#define LOAD_AVG_MAX 24152
#define LOAD_AVG_MAX_N 517
static const u32 __accumulated_sum_N32[] = {
0, 22731, 34096, 39779, 42620, 44041,
44751, 45106, 45284, 45373, 45417, 45439,
45450, 45456, 45459, 45460, 45461, 45461,
45461, 45461, 45461, 45461, 45461, 45461,
45461, 45461, 45461, 45461, 45461, 45461,
45461, 45461, 45461, 45461,
};
#endif
#if CONFIG_PELT_UTIL_HALFLIFE_8
static const u32 runnable_avg_yN_inv[] __maybe_unused = {
0xffffffff, 0xeac0c6e6, 0xd744fcc9, 0xc5672a10,
0xb504f333, 0xa5fed6a9, 0x9837f050, 0x8b95c1e3,
};
static const u32 runnable_avg_yN_sum[] = {
0,20844,20053,19327,18661,18051,17491,16978,16507,
};
#define LOAD_AVG_PERIOD 8
#define LOAD_AVG_MAX 12337
#define LOAD_AVG_MAX_N 603
static const u32 __accumulated_sum_N32[] = {
0, 16507, 24760, 28887, 30950, 31982,
32498, 32756, 32885, 32949, 32981, 32997,
33005, 33009, 33011, 33012, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013, 33013,
33013, 33013, 33013, 33013, 33013,
};
#endif