From 344e3f9eb57d4368dc93222d0a6dc8686f179413 Mon Sep 17 00:00:00 2001 From: Patrick Daly Date: Thu, 4 Apr 2019 18:55:51 -0700 Subject: [PATCH] psi: Introduce ftrace events Add ftrace events to capture per-zone memory stats when a trigger threshold is hit. Change-Id: Ic52a27713acd34090c8f99543408b277877015c7 Signed-off-by: Patrick Daly --- include/trace/events/psi.h | 70 ++++++++++++++++++++++++++++++++++++++ init/Kconfig | 10 ++++++ kernel/sched/psi.c | 33 ++++++++++++++++++ 3 files changed, 113 insertions(+) create mode 100644 include/trace/events/psi.h diff --git a/include/trace/events/psi.h b/include/trace/events/psi.h new file mode 100644 index 000000000000..00659776cea3 --- /dev/null +++ b/include/trace/events/psi.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2019, The Linux Foundation. All rights reserved. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM psi + +#if !defined(_TRACE_PSI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PSI_H + +#include +#include +#include + +TRACE_EVENT(psi_window_vmstat, + + TP_PROTO(u64 memstall, const char *zone_name, u64 high, + u64 free, u64 cma, u64 file), + + TP_ARGS(memstall, zone_name, high, free, cma, file), + + TP_STRUCT__entry( + __field(u64, memstall) + __string(name, zone_name) + __field(u64, high) + __field(u64, free) + __field(u64, cma) + __field(u64, file) + ), + + TP_fast_assign( + __entry->memstall = memstall; + __assign_str(name, zone_name); + __entry->high = high; + __entry->free = free; + __entry->cma = cma; + __entry->file = file; + ), + + TP_printk("%16s: Memstall: %#16llx High: %#8llx Free: %#8llx CMA: %#8llx File: %#8llx", + __get_str(name), __entry->memstall, __entry->high, + __entry->free, __entry->cma, __entry->file + ) +); + +TRACE_EVENT(psi_event, + + TP_PROTO(enum psi_states state, u64 threshold), + + TP_ARGS(state, threshold), + + TP_STRUCT__entry( + __field(enum psi_states, state) + __field(u64, threshold) + ), + + TP_fast_assign( + __entry->state = state; + __entry->threshold = threshold; + ), + + TP_printk("State: %d Threshold: %#llx", + __entry->state, __entry->threshold + ) +); + +#endif + +/* This part must be outside protection */ +#include diff --git a/init/Kconfig b/init/Kconfig index 0729071f6907..bc1e17ec279e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -532,6 +532,16 @@ config PSI_DEFAULT_DISABLED per default but can be enabled through passing psi_enable=1 on the kernel commandline during boot. +config PSI_FTRACE + bool "Insert debug hooks for PSI ftrace events" + default n + depends on PSI + help + If not set, PSI ftrace events will be disabled and + will not incur any performance penalty. + If set, PSI ftrace events may be + enabled. + endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index e88918e0bb6d..e3e3db917d89 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -142,6 +142,9 @@ #include #include "sched.h" +#define CREATE_TRACE_POINTS +#include + static int psi_bug __read_mostly; DEFINE_STATIC_KEY_FALSE(psi_disabled); @@ -441,6 +444,33 @@ static void psi_avgs_work(struct work_struct *work) mutex_unlock(&group->avgs_lock); } +#ifdef CONFIG_PSI_FTRACE +static void trace_event_helper(struct psi_group *group) +{ + struct zone *zone; + unsigned long wmark; + unsigned long free; + unsigned long cma; + unsigned long file; + u64 memstall = group->total[PSI_POLL][PSI_MEM_SOME]; + + for_each_populated_zone(zone) { + wmark = zone->watermark[WMARK_HIGH]; + free = zone_page_state(zone, NR_FREE_PAGES); + cma = zone_page_state(zone, NR_FREE_CMA_PAGES); + file = zone_page_state(zone, NR_ZONE_ACTIVE_FILE) + + zone_page_state(zone, NR_ZONE_INACTIVE_FILE); + + trace_psi_window_vmstat( + memstall, zone->name, wmark, free, cma, file); + } +} +#else +static void trace_event_helper(struct psi_group *group) +{ +} +#endif /* CONFIG_PSI_FTRACE */ + /* Trigger tracking window manupulations */ static void window_reset(struct psi_window *win, u64 now, u64 value, u64 prev_growth) @@ -533,6 +563,8 @@ static u64 update_triggers(struct psi_group *group, u64 now) if (now < t->last_event_time + t->win.size) continue; + trace_psi_event(t->state, t->threshold); + /* Generate an event */ if (cmpxchg(&t->event, 0, 1) == 0) wake_up_interruptible(&t->event_wait); @@ -605,6 +637,7 @@ static void psi_poll_work(struct kthread_work *work) */ group->polling_until = now + group->poll_min_period * UPDATES_PER_WINDOW; + trace_event_helper(group); } if (now > group->polling_until) {