sched/core: fix userspace affining threads incorrectly

Certain userspace applications, to achieve max performance, affines its threads to cpus that run the fastest. This is not always the correct strategy. For e.g. in certain architectures all the cores have the same max freq but few of them have a bigger cache. Affining to the cpus that have bigger cache is advantageous but such an application would end up affining them to all the cores. Similarly if an architecture has just one cpu that runs at max freq, it ends up crowding all its thread on that single core, which is detrimental for performance. To address this issue, we need to detect a suspicious looking affinity request from userspace and check if it links in a particular library. The latter can easily be detected by traversing executable vm areas that map a file and checking for that library name. When such a affinity request is found, change it to use a proper affinity. The suspicious affinity request, the proper affinity request and the library name can be configured by the userspace. Change-Id: I6bb8c310ca54c03261cc721f28dfd6023ab5591a Signed-off-by: Abhijeet Dharmapurikar <adharmap@codeaurora.org>
2018-08-09 17:19:54 -07:00 · 2018-08-09 17:19:54 -07:00 · b6f796997f
commit b6f796997f
parent 71b7e54063
5 changed files with 99 additions and 2 deletions
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@ -2094,6 +2094,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 # define vcpu_is_preempted(cpu)	false
 #endif

+extern long msm_sched_setaffinity(pid_t pid, struct cpumask *new_mask);
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);

--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@ -163,4 +163,9 @@ extern int sched_energy_aware_handler(struct ctl_table *table, int write,
 				 loff_t *ppos);
 #endif

+#define LIB_PATH_LENGTH 512
+extern char sched_lib_name[LIB_PATH_LENGTH];
+extern unsigned int sched_lib_mask_check;
+extern unsigned int sched_lib_mask_force;
+
 #endif /* _LINUX_SCHED_SYSCTL_H */
--- a/kernel/compat.c
+++ b/kernel/compat.c
@ -290,7 +290,7 @@ COMPAT_SYSCALL_DEFINE3(sched_setaffinity, compat_pid_t, pid,
 	if (retval)
 		goto out;

-	retval = sched_setaffinity(pid, new_mask);
+	retval = msm_sched_setaffinity(pid, new_mask);
 out:
 	free_cpumask_var(new_mask);
 	return retval;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@ -5813,6 +5813,71 @@ out_put_task:
 }
 EXPORT_SYMBOL_GPL(sched_setaffinity);

+char sched_lib_name[LIB_PATH_LENGTH];
+unsigned int sched_lib_mask_check;
+unsigned int sched_lib_mask_force;
+static inline bool is_sched_lib_based_app(pid_t pid)
+{
+	const char *name = NULL;
+	struct vm_area_struct *vma;
+	char path_buf[LIB_PATH_LENGTH];
+	bool found = false;
+	struct task_struct *p;
+
+	if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0)
+		return false;
+
+	rcu_read_lock();
+
+	p = find_process_by_pid(pid);
+	if (!p) {
+		rcu_read_unlock();
+		return false;
+	}
+
+	/* Prevent p going away */
+	get_task_struct(p);
+	rcu_read_unlock();
+
+	if (!p->mm)
+		goto put_task_struct;
+
+	down_read(&p->mm->mmap_sem);
+	for (vma = p->mm->mmap; vma ; vma = vma->vm_next) {
+		if (vma->vm_file && vma->vm_flags & VM_EXEC) {
+			name = d_path(&vma->vm_file->f_path,
+					path_buf, LIB_PATH_LENGTH);
+			if (IS_ERR(name))
+				goto release_sem;
+
+			if (strnstr(name, sched_lib_name,
+					strnlen(name, LIB_PATH_LENGTH))) {
+				found = true;
+				break;
+			}
+		}
+	}
+
+release_sem:
+	up_read(&p->mm->mmap_sem);
+put_task_struct:
+	put_task_struct(p);
+	return found;
+}
+
+long msm_sched_setaffinity(pid_t pid, struct cpumask *new_mask)
+{
+	if (sched_lib_mask_check != 0 && sched_lib_mask_force != 0 &&
+		(cpumask_bits(new_mask)[0] == sched_lib_mask_check) &&
+		is_sched_lib_based_app(pid)) {
+
+		cpumask_t forced_mask = { {sched_lib_mask_force} };
+
+		cpumask_copy(new_mask, &forced_mask);
+	}
+	return sched_setaffinity(pid, new_mask);
+}
+
 static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
 			     struct cpumask *new_mask)
 {
@ -5843,7 +5908,7 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,

 	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
 	if (retval == 0)
-		retval = sched_setaffinity(pid, new_mask);
+		retval = msm_sched_setaffinity(pid, new_mask);
 	free_cpumask_var(new_mask);
 	return retval;
 }
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@ -144,6 +144,7 @@ static int ten_thousand = 10000;
 static int six_hundred_forty_kb = 640 * 1024;
 #endif
 static int __maybe_unused two_hundred_million = 200000000;
+static int two_hundred_fifty_five = 255;

 #ifdef CONFIG_SCHED_WALT
 const int sched_user_hint_max = 1000;
@ -815,6 +816,31 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &one,
 	},
 #endif
+	{
+		.procname	= "sched_lib_name",
+		.data		= sched_lib_name,
+		.maxlen		= LIB_PATH_LENGTH,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+	{
+		.procname	= "sched_lib_mask_check",
+		.data		= &sched_lib_mask_check,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two_hundred_fifty_five,
+	},
+	{
+		.procname	= "sched_lib_mask_force",
+		.data		= &sched_lib_mask_force,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two_hundred_fifty_five,
+	},
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.procname	= "prove_locking",