From 960434acef375b2a73168b467fdfc7fa0a11a68e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 28 Jan 2021 00:37:51 +0900 Subject: [PATCH 01/28] tracing/kprobe: Fix to support kretprobe events on unloaded modules commit 97c753e62e6c31a404183898d950d8c08d752dbd upstream. Fix kprobe_on_func_entry() returns error code instead of false so that register_kretprobe() can return an appropriate error code. append_trace_kprobe() expects the kprobe registration returns -ENOENT when the target symbol is not found, and it checks whether the target module is unloaded or not. If the target module doesn't exist, it defers to probe the target symbol until the module is loaded. However, since register_kretprobe() returns -EINVAL instead of -ENOENT in that case, it always fail on putting the kretprobe event on unloaded modules. e.g. Kprobe event: /sys/kernel/debug/tracing # echo p xfs:xfs_end_io >> kprobe_events [ 16.515574] trace_kprobe: This probe might be able to register after target module is loaded. Continue. Kretprobe event: (p -> r) /sys/kernel/debug/tracing # echo r xfs:xfs_end_io >> kprobe_events sh: write error: Invalid argument /sys/kernel/debug/tracing # cat error_log [ 41.122514] trace_kprobe: error: Failed to register probe event Command: r xfs:xfs_end_io ^ To fix this bug, change kprobe_on_func_entry() to detect symbol lookup failure and return -ENOENT in that case. Otherwise it returns -EINVAL or 0 (succeeded, given address is on the entry). Link: https://lkml.kernel.org/r/161176187132.1067016.8118042342894378981.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 59158ec4aef7 ("tracing/kprobes: Check the probe on unloaded module correctly") Reported-by: Jianlin Lv Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- include/linux/kprobes.h | 2 +- kernel/kprobes.c | 34 +++++++++++++++++++++++++--------- kernel/trace/trace_kprobe.c | 4 ++-- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 9f22652d69bb..c28204e22b54 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -245,7 +245,7 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); extern bool arch_kprobe_on_func_entry(unsigned long offset); -extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); +extern int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); extern int kprobe_add_ksym_blacklist(unsigned long entry); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6d02b78fde01..d4435fd6fc8b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1921,29 +1921,45 @@ bool __weak arch_kprobe_on_func_entry(unsigned long offset) return !offset; } -bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) +/** + * kprobe_on_func_entry() -- check whether given address is function entry + * @addr: Target address + * @sym: Target symbol name + * @offset: The offset from the symbol or the address + * + * This checks whether the given @addr+@offset or @sym+@offset is on the + * function entry address or not. + * This returns 0 if it is the function entry, or -EINVAL if it is not. + * And also it returns -ENOENT if it fails the symbol or address lookup. + * Caller must pass @addr or @sym (either one must be NULL), or this + * returns -EINVAL. + */ +int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) { kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); if (IS_ERR(kp_addr)) - return false; + return PTR_ERR(kp_addr); - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) || - !arch_kprobe_on_func_entry(offset)) - return false; + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset)) + return -ENOENT; - return true; + if (!arch_kprobe_on_func_entry(offset)) + return -EINVAL; + + return 0; } int register_kretprobe(struct kretprobe *rp) { - int ret = 0; + int ret; struct kretprobe_instance *inst; int i; void *addr; - if (!kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset)) - return -EINVAL; + ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset); + if (ret) + return ret; /* If only rp->kp.addr is specified, check reregistering kprobes */ if (rp->kp.addr && check_kprobe_rereg(&rp->kp)) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5c17f70c7f2d..61eff45653f5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -112,9 +112,9 @@ bool trace_kprobe_on_func_entry(struct trace_event_call *call) { struct trace_kprobe *tk = (struct trace_kprobe *)call->data; - return kprobe_on_func_entry(tk->rp.kp.addr, + return (kprobe_on_func_entry(tk->rp.kp.addr, tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name, - tk->rp.kp.addr ? 0 : tk->rp.kp.offset); + tk->rp.kp.addr ? 0 : tk->rp.kp.offset) == 0); } bool trace_kprobe_error_injectable(struct trace_event_call *call) From b0393aadc2d2294ba259fe010291f72a130d2d60 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Wed, 20 Feb 2019 21:27:05 +0800 Subject: [PATCH 02/28] block: fix NULL pointer dereference in register_disk commit 4d7c1d3fd7c7eda7dea351f071945e843a46c145 upstream. If __device_add_disk-->bdi_register_owner-->bdi_register--> bdi_register_va-->device_create_vargs fails, bdi->dev is still NULL, __device_add_disk-->register_disk will visit bdi->dev->kobj. This patch fixes that. Signed-off-by: zhengbin Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jack Wang --- block/genhd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 2b536ab570ac..6965dde96373 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -652,10 +652,12 @@ exit: kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); disk_part_iter_exit(&piter); - err = sysfs_create_link(&ddev->kobj, - &disk->queue->backing_dev_info->dev->kobj, - "bdi"); - WARN_ON(err); + if (disk->queue->backing_dev_info->dev) { + err = sysfs_create_link(&ddev->kobj, + &disk->queue->backing_dev_info->dev->kobj, + "bdi"); + WARN_ON(err); + } } /** From a19749a5fbd97f2147a8768813d084d584d9e045 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 29 Jan 2021 10:13:53 -0500 Subject: [PATCH 03/28] fgraph: Initialize tracing_graph_pause at task creation commit 7e0a9220467dbcfdc5bc62825724f3e52e50ab31 upstream. On some archs, the idle task can call into cpu_suspend(). The cpu_suspend() will disable or pause function graph tracing, as there's some paths in bringing down the CPU that can have issues with its return address being modified. The task_struct structure has a "tracing_graph_pause" atomic counter, that when set to something other than zero, the function graph tracer will not modify the return address. The problem is that the tracing_graph_pause counter is initialized when the function graph tracer is enabled. This can corrupt the counter for the idle task if it is suspended in these architectures. CPU 1 CPU 2 ----- ----- do_idle() cpu_suspend() pause_graph_tracing() task_struct->tracing_graph_pause++ (0 -> 1) start_graph_tracing() for_each_online_cpu(cpu) { ftrace_graph_init_idle_task(cpu) task-struct->tracing_graph_pause = 0 (1 -> 0) unpause_graph_tracing() task_struct->tracing_graph_pause-- (0 -> -1) The above should have gone from 1 to zero, and enabled function graph tracing again. But instead, it is set to -1, which keeps it disabled. There's no reason that the field tracing_graph_pause on the task_struct can not be initialized at boot up. Cc: stable@vger.kernel.org Fixes: 380c4b1411ccd ("tracing/function-graph-tracer: append the tracing_graph_flag") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=211339 Reported-by: pierre.gondois@arm.com Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- init/init_task.c | 3 ++- kernel/trace/ftrace.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/init/init_task.c b/init/init_task.c index 5aebe3be4d7c..994ffe018120 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -168,7 +168,8 @@ struct task_struct init_task .lockdep_recursion = 0, #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER - .ret_stack = NULL, + .ret_stack = NULL, + .tracing_graph_pause = ATOMIC_INIT(0), #endif #if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT) .trace_recursion = 0, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cce526755471..3492202762c7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6875,7 +6875,6 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) } if (t->ret_stack == NULL) { - atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->curr_ret_stack = -1; t->curr_ret_depth = -1; @@ -7088,7 +7087,6 @@ static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); static void graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) { - atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; /* make curr_ret_stack visible before we add the ret_stack */ From 96bc573dc8894fffd8859b6796466164053c06c7 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Thu, 23 Jul 2020 01:40:46 +0530 Subject: [PATCH 04/28] remoteproc: qcom_q6v5_mss: Validate modem blob firmware size before load commit 135b9e8d1cd8ba5ac9ad9bcf24b464b7b052e5b8 upstream The following mem abort is observed when one of the modem blob firmware size exceeds the allocated mpss region. Fix this by restricting the copy size to segment size using request_firmware_into_buf before load. Err Logs: Unable to handle kernel paging request at virtual address Mem abort info: ... Call trace: __memcpy+0x110/0x180 rproc_start+0xd0/0x190 rproc_boot+0x404/0x550 state_store+0x54/0xf8 dev_attr_store+0x44/0x60 sysfs_kf_write+0x58/0x80 kernfs_fop_write+0x140/0x230 vfs_write+0xc4/0x208 ksys_write+0x74/0xf8 ... Reviewed-by: Bjorn Andersson Fixes: 051fb70fd4ea4 ("remoteproc: qcom: Driver for the self-authenticating Hexagon v5") Cc: stable@vger.kernel.org Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/20200722201047.12975-3-sibis@codeaurora.org Signed-off-by: Bjorn Andersson [sudip: manual backport to old file path] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/qcom_q6v5_pil.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c index cc475dcbf27f..289cba29e240 100644 --- a/drivers/remoteproc/qcom_q6v5_pil.c +++ b/drivers/remoteproc/qcom_q6v5_pil.c @@ -739,14 +739,13 @@ static int q6v5_mpss_load(struct q6v5 *qproc) if (phdr->p_filesz) { snprintf(seg_name, sizeof(seg_name), "modem.b%02d", i); - ret = request_firmware(&seg_fw, seg_name, qproc->dev); + ret = request_firmware_into_buf(&seg_fw, seg_name, qproc->dev, + ptr, phdr->p_filesz); if (ret) { dev_err(qproc->dev, "failed to load %s\n", seg_name); goto release_firmware; } - memcpy(ptr, seg_fw->data, seg_fw->size); - release_firmware(seg_fw); } From 4f12385298d474541a794f4c47980861299f20e7 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Thu, 23 Jul 2020 01:40:45 +0530 Subject: [PATCH 05/28] remoteproc: qcom_q6v5_mss: Validate MBA firmware size before load commit e013f455d95add874f310dc47c608e8c70692ae5 upstream The following mem abort is observed when the mba firmware size exceeds the allocated mba region. MBA firmware size is restricted to a maximum size of 1M and remaining memory region is used by modem debug policy firmware when available. Hence verify whether the MBA firmware size lies within the allocated memory region and is not greater than 1M before loading. Err Logs: Unable to handle kernel paging request at virtual address Mem abort info: ... Call trace: __memcpy+0x110/0x180 rproc_start+0x40/0x218 rproc_boot+0x5b4/0x608 state_store+0x54/0xf8 dev_attr_store+0x44/0x60 sysfs_kf_write+0x58/0x80 kernfs_fop_write+0x140/0x230 vfs_write+0xc4/0x208 ksys_write+0x74/0xf8 __arm64_sys_write+0x24/0x30 ... Reviewed-by: Bjorn Andersson Fixes: 051fb70fd4ea4 ("remoteproc: qcom: Driver for the self-authenticating Hexagon v5") Cc: stable@vger.kernel.org Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/20200722201047.12975-2-sibis@codeaurora.org Signed-off-by: Bjorn Andersson [sudip: manual backport to old file path] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/remoteproc/qcom_q6v5_pil.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c index 289cba29e240..604828c04ddf 100644 --- a/drivers/remoteproc/qcom_q6v5_pil.c +++ b/drivers/remoteproc/qcom_q6v5_pil.c @@ -340,6 +340,12 @@ static int q6v5_load(struct rproc *rproc, const struct firmware *fw) { struct q6v5 *qproc = rproc->priv; + /* MBA is restricted to a maximum size of 1M */ + if (fw->size > qproc->mba_size || fw->size > SZ_1M) { + dev_err(qproc->dev, "MBA firmware load failed\n"); + return -EINVAL; + } + memcpy(qproc->mba_region, fw->data, fw->size); return 0; From fac13c1dd417f7d117c28d4929175c3b05820234 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 26 Dec 2020 16:50:20 -0800 Subject: [PATCH 06/28] af_key: relax availability checks for skb size calculation [ Upstream commit afbc293add6466f8f3f0c3d944d85f53709c170f ] xfrm_probe_algs() probes kernel crypto modules and changes the availability of struct xfrm_algo_desc. But there is a small window where ealg->available and aalg->available get changed between count_ah_combs()/count_esp_combs() and dump_ah_combs()/dump_esp_combs(), in this case we may allocate a smaller skb but later put a larger amount of data and trigger the panic in skb_put(). Fix this by relaxing the checks when counting the size, that is, skipping the test of ->available. We may waste some memory for a few of sizeof(struct sadb_comb), but it is still much better than a panic. Reported-by: syzbot+b2bf2652983d23734c5c@syzkaller.appspotmail.com Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Cong Wang Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/key/af_key.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index e340e97224c3..c7d5a6015389 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2908,7 +2908,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2926,7 +2926,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { @@ -2937,7 +2937,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } From 1d915c56268fa214c94ef764df8bf31d1bd5f25b Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 7 Jan 2021 17:16:02 -0800 Subject: [PATCH 07/28] regulator: core: avoid regulator_resolve_supply() race condition [ Upstream commit eaa7995c529b54d68d97a30f6344cc6ca2f214a7 ] The final step in regulator_register() is to call regulator_resolve_supply() for each registered regulator (including the one in the process of being registered). The regulator_resolve_supply() function first checks if rdev->supply is NULL, then it performs various steps to try to find the supply. If successful, rdev->supply is set inside of set_supply(). This procedure can encounter a race condition if two concurrent tasks call regulator_register() near to each other on separate CPUs and one of the regulators has rdev->supply_name specified. There is currently nothing guaranteeing atomicity between the rdev->supply check and set steps. Thus, both tasks can observe rdev->supply==NULL in their regulator_resolve_supply() calls. This then results in both creating a struct regulator for the supply. One ends up actually stored in rdev->supply and the other is lost (though still present in the supply's consumer_list). Here is a kernel log snippet showing the issue: [ 12.421768] gpu_cc_gx_gdsc: supplied by pm8350_s5_level [ 12.425854] gpu_cc_gx_gdsc: supplied by pm8350_s5_level [ 12.429064] debugfs: Directory 'regulator.4-SUPPLY' with parent '17a00000.rsc:rpmh-regulator-gfxlvl-pm8350_s5_level' already present! Avoid this race condition by holding the rdev->mutex lock inside of regulator_resolve_supply() while checking and setting rdev->supply. Signed-off-by: David Collins Link: https://lore.kernel.org/r/1610068562-4410-1-git-send-email-collinsd@codeaurora.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 8a6ca06d9c16..fa8f5fc04d8f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1567,23 +1567,34 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) { struct regulator_dev *r; struct device *dev = rdev->dev.parent; - int ret; + int ret = 0; /* No supply to resovle? */ if (!rdev->supply_name) return 0; - /* Supply already resolved? */ + /* Supply already resolved? (fast-path without locking contention) */ if (rdev->supply) return 0; + /* + * Recheck rdev->supply with rdev->mutex lock held to avoid a race + * between rdev->supply null check and setting rdev->supply in + * set_supply() from concurrent tasks. + */ + regulator_lock(rdev); + + /* Supply just resolved by a concurrent task? */ + if (rdev->supply) + goto out; + r = regulator_dev_lookup(dev, rdev->supply_name); if (IS_ERR(r)) { ret = PTR_ERR(r); /* Did the lookup explicitly defer for us? */ if (ret == -EPROBE_DEFER) - return ret; + goto out; if (have_full_constraints()) { r = dummy_regulator_rdev; @@ -1591,15 +1602,18 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } else { dev_err(dev, "Failed to resolve %s-supply for %s\n", rdev->supply_name, rdev->desc->name); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out; } } if (r == rdev) { dev_err(dev, "Supply for %s (%s) resolved to itself\n", rdev->desc->name, rdev->supply_name); - if (!have_full_constraints()) - return -EINVAL; + if (!have_full_constraints()) { + ret = -EINVAL; + goto out; + } r = dummy_regulator_rdev; get_device(&r->dev); } @@ -1613,7 +1627,8 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (r->dev.parent && r->dev.parent != rdev->dev.parent) { if (!device_is_bound(r->dev.parent)) { put_device(&r->dev); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out; } } @@ -1621,13 +1636,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) ret = regulator_resolve_supply(r); if (ret < 0) { put_device(&r->dev); - return ret; + goto out; } ret = set_supply(rdev, r); if (ret < 0) { put_device(&r->dev); - return ret; + goto out; } /* Cascade always-on state to supply */ @@ -1636,11 +1651,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (ret < 0) { _regulator_put(rdev->supply); rdev->supply = NULL; - return ret; + goto out; } } - return 0; +out: + regulator_unlock(rdev); + return ret; } /* Internal regulator request function */ From 6259963da5adcbbe4992412ccf8375bd380c5c72 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Jan 2021 06:57:38 -0800 Subject: [PATCH 08/28] chtls: Fix potential resource leak [ Upstream commit b6011966ac6f402847eb5326beee8da3a80405c7 ] The dst entry should be released if no neighbour is found. Goto label free_dst to fix the issue. Besides, the check of ndev against NULL is redundant. Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210121145738.51091-1-bianpan2016@163.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/crypto/chelsio/chtls/chtls_cm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index fd3092a4378e..08ed3ff8b255 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1051,11 +1051,9 @@ static struct sock *chtls_recv_sock(struct sock *lsk, tcph = (struct tcphdr *)(iph + 1); n = dst_neigh_lookup(dst, &iph->saddr); if (!n || !n->dev) - goto free_sk; + goto free_dst; ndev = n->dev; - if (!ndev) - goto free_dst; if (is_vlan_dev(ndev)) ndev = vlan_dev_real_dev(ndev); @@ -1117,7 +1115,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, free_csk: chtls_sock_release(&csk->kref); free_dst: - neigh_release(n); + if (n) + neigh_release(n); dst_release(dst); free_sk: inet_csk_prepare_forced_close(newsk); From 682821d905f77005b1b85684608dfc75e75422a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jan 2021 17:11:42 -0500 Subject: [PATCH 09/28] pNFS/NFSv4: Try to return invalid layout in pnfs_layout_process() [ Upstream commit 08bd8dbe88825760e953759d7ec212903a026c75 ] If the server returns a new stateid that does not match the one in our cache, then try to return the one we hold instead of just invalidating it on the client side. This ensures that both client and server will agree that the stateid is invalid. Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/pnfs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4b165aa5a256..55965e8e9a2e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2301,7 +2301,13 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) * We got an entirely new state ID. Mark all segments for the * inode invalid, and retry the layoutget */ - pnfs_mark_layout_stateid_invalid(lo, &free_me); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .length = NFS4_MAX_UINT64, + }; + pnfs_set_plh_return_info(lo, IOMODE_ANY, 0); + pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, + &range, 0); goto out_forget; } From dc64ebad85455ae43fa589687184c547fae1714d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:48 +0200 Subject: [PATCH 10/28] iwlwifi: mvm: take mutex for calling iwl_mvm_get_sync_time() [ Upstream commit 5c56d862c749669d45c256f581eac4244be00d4d ] We need to take the mutex to call iwl_mvm_get_sync_time(), do it. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.4bb5ccf881a6.I62973cbb081e80aa5b0447a5c3b9c3251a65cf6b@changeid Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 798605c4f122..5287f21d7ba6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -520,7 +520,10 @@ static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file, const size_t bufsz = sizeof(buf); int pos = 0; + mutex_lock(&mvm->mutex); iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os); + mutex_unlock(&mvm->mutex); + do_div(curr_os, NSEC_PER_USEC); diff = curr_os - curr_gp2; pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff); From 896cf561fc605391eea05c0e5dd8b217c0c40bad Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 15 Jan 2021 13:05:55 +0200 Subject: [PATCH 11/28] iwlwifi: pcie: add a NULL check in iwl_pcie_txq_unmap [ Upstream commit 98c7d21f957b10d9c07a3a60a3a5a8f326a197e5 ] I hit a NULL pointer exception in this function when the init flow went really bad. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.2e8da9f2c132.I0234d4b8ddaf70aaa5028a20c863255e05bc1f84@changeid Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index b73582ec03a0..b1a71539ca3e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -631,6 +631,11 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = trans_pcie->txq[txq_id]; + if (!txq) { + IWL_ERR(trans, "Trying to free a queue that wasn't allocated?\n"); + return; + } + spin_lock_bh(&txq->lock); while (txq->write_ptr != txq->read_ptr) { IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", From 182b4310af89bd8980976d60e4d290f4c6635ed9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:56 +0200 Subject: [PATCH 12/28] iwlwifi: pcie: fix context info memory leak [ Upstream commit 2d6bc752cc2806366d9a4fd577b3f6c1f7a7e04e ] If the image loader allocation fails, we leak all the previously allocated memory. Fix this. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.97172cbaa67c.I3473233d0ad01a71aa9400832fb2b9f494d88a11@changeid Signed-off-by: Sasha Levin --- .../net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 6783b20d9681..a1cecf4a0e82 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -159,8 +159,10 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, /* Allocate IML */ iml_img = dma_alloc_coherent(trans->dev, trans->iml_len, &trans_pcie->iml_dma_addr, GFP_KERNEL); - if (!iml_img) - return -ENOMEM; + if (!iml_img) { + ret = -ENOMEM; + goto err_free_ctxt_info; + } memcpy(iml_img, trans->iml, trans->iml_len); @@ -177,6 +179,11 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, return 0; +err_free_ctxt_info: + dma_free_coherent(trans->dev, sizeof(*trans_pcie->ctxt_info_gen3), + trans_pcie->ctxt_info_gen3, + trans_pcie->ctxt_info_dma_addr); + trans_pcie->ctxt_info_gen3 = NULL; err_free_prph_info: dma_free_coherent(trans->dev, sizeof(*prph_info), From 45115259782a6b18566f378c8d9b16b25869444c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Jan 2021 14:52:41 +0200 Subject: [PATCH 13/28] iwlwifi: mvm: guard against device removal in reprobe [ Upstream commit 7a21b1d4a728a483f07c638ccd8610d4b4f12684 ] If we get into a problem severe enough to attempt a reprobe, we schedule a worker to do that. However, if the problem gets more severe and the device is actually destroyed before this worker has a chance to run, we use a free device. Bump up the reference count of the device until the worker runs to avoid this situation. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210122144849.871f0892e4b2.I94819e11afd68d875f3e242b98bef724b8236f1e@changeid Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 0e26619fb330..d932171617e6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1192,6 +1192,7 @@ static void iwl_mvm_reprobe_wk(struct work_struct *wk) reprobe = container_of(wk, struct iwl_mvm_reprobe, work); if (device_reprobe(reprobe->dev)) dev_err(reprobe->dev, "reprobe failed!\n"); + put_device(reprobe->dev); kfree(reprobe); module_put(THIS_MODULE); } @@ -1242,7 +1243,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error) module_put(THIS_MODULE); return; } - reprobe->dev = mvm->trans->dev; + reprobe->dev = get_device(mvm->trans->dev); INIT_WORK(&reprobe->work, iwl_mvm_reprobe_wk); schedule_work(&reprobe->work); } else if (mvm->fwrt.cur_fw_img == IWL_UCODE_REGULAR && From b5c6efba2b0d35b0f8920d93c5510ea029d17976 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 21 Jan 2021 16:17:23 -0500 Subject: [PATCH 14/28] SUNRPC: Move simple_get_bytes and simple_get_netobj into private header [ Upstream commit ba6dfce47c4d002d96cd02a304132fca76981172 ] Remove duplicated helper functions to parse opaque XDR objects and place inside new file net/sunrpc/auth_gss/auth_gss_internal.h. In the new file carry the license and copyright from the source file net/sunrpc/auth_gss/auth_gss.c. Finally, update the comment inside include/linux/sunrpc/xdr.h since lockd is not the only user of struct xdr_netobj. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- include/linux/sunrpc/xdr.h | 3 +- net/sunrpc/auth_gss/auth_gss.c | 30 +----------------- net/sunrpc/auth_gss/auth_gss_internal.h | 42 +++++++++++++++++++++++++ net/sunrpc/auth_gss/gss_krb5_mech.c | 31 ++---------------- 4 files changed, 46 insertions(+), 60 deletions(-) create mode 100644 net/sunrpc/auth_gss/auth_gss_internal.h diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2bd68177a442..33580cc72a43 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -26,8 +26,7 @@ struct rpc_rqst; #define XDR_QUADLEN(l) (((l) + 3) >> 2) /* - * Generic opaque `network object.' At the kernel level, this type - * is used only by lockd. + * Generic opaque `network object.' */ #define XDR_MAX_NETOBJ 1024 struct xdr_netobj { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 8cb7d812ccb8..e61c48c1b37d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -53,6 +53,7 @@ #include #include +#include "auth_gss_internal.h" #include "../netns.h" static const struct rpc_authops authgss_ops; @@ -147,35 +148,6 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); } -static const void * -simple_get_bytes(const void *p, const void *end, void *res, size_t len) -{ - const void *q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - memcpy(res, p, len); - return q; -} - -static inline const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) -{ - const void *q; - unsigned int len; - - p = simple_get_bytes(p, end, &len, sizeof(len)); - if (IS_ERR(p)) - return p; - q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(dest->data == NULL)) - return ERR_PTR(-ENOMEM); - dest->len = len; - return q; -} - static struct gss_cl_ctx * gss_cred_get_ctx(struct rpc_cred *cred) { diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h new file mode 100644 index 000000000000..c5603242b54b --- /dev/null +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * linux/net/sunrpc/auth_gss/auth_gss_internal.h + * + * Internal definitions for RPCSEC_GSS client authentication + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + */ +#include +#include +#include + +static inline const void * +simple_get_bytes(const void *p, const void *end, void *res, size_t len) +{ + const void *q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + memcpy(res, p, len); + return q; +} + +static inline const void * +simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) +{ + const void *q; + unsigned int len; + + p = simple_get_bytes(p, end, &len, sizeof(len)); + if (IS_ERR(p)) + return p; + q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + dest->data = kmemdup(p, len, GFP_NOFS); + if (unlikely(dest->data == NULL)) + return ERR_PTR(-ENOMEM); + dest->len = len; + return q; +} diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 7bb2514aadd9..14f2823ad6c2 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -46,6 +46,8 @@ #include #include +#include "auth_gss_internal.h" + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -187,35 +189,6 @@ get_gss_krb5_enctype(int etype) return NULL; } -static const void * -simple_get_bytes(const void *p, const void *end, void *res, int len) -{ - const void *q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - memcpy(res, p, len); - return q; -} - -static const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) -{ - const void *q; - unsigned int len; - - p = simple_get_bytes(p, end, &len, sizeof(len)); - if (IS_ERR(p)) - return p; - q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(res->data == NULL)) - return ERR_PTR(-ENOMEM); - res->len = len; - return q; -} - static inline const void * get_key(const void *p, const void *end, struct krb5_ctx *ctx, struct crypto_skcipher **res) From 9848962bfb28515c476bcf34a8ec9655ccee08e4 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 21 Jan 2021 16:17:24 -0500 Subject: [PATCH 15/28] SUNRPC: Handle 0 length opaque XDR object data properly [ Upstream commit e4a7d1f7707eb44fd953a31dd59eff82009d879c ] When handling an auth_gss downcall, it's possible to get 0-length opaque object for the acceptor. In the case of a 0-length XDR object, make sure simple_get_netobj() fills in dest->data = NULL, and does not continue to kmemdup() which will set dest->data = ZERO_SIZE_PTR for the acceptor. The trace event code can handle NULL but not ZERO_SIZE_PTR for a string, and so without this patch the rpcgss_context trace event will crash the kernel as follows: [ 162.887992] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ 162.898693] #PF: supervisor read access in kernel mode [ 162.900830] #PF: error_code(0x0000) - not-present page [ 162.902940] PGD 0 P4D 0 [ 162.904027] Oops: 0000 [#1] SMP PTI [ 162.905493] CPU: 4 PID: 4321 Comm: rpc.gssd Kdump: loaded Not tainted 5.10.0 #133 [ 162.908548] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 162.910978] RIP: 0010:strlen+0x0/0x20 [ 162.912505] Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 [ 162.920101] RSP: 0018:ffffaec900c77d90 EFLAGS: 00010202 [ 162.922263] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000fffde697 [ 162.925158] RDX: 000000000000002f RSI: 0000000000000080 RDI: 0000000000000010 [ 162.928073] RBP: 0000000000000010 R08: 0000000000000e10 R09: 0000000000000000 [ 162.930976] R10: ffff8e698a590cb8 R11: 0000000000000001 R12: 0000000000000e10 [ 162.933883] R13: 00000000fffde697 R14: 000000010034d517 R15: 0000000000070028 [ 162.936777] FS: 00007f1e1eb93700(0000) GS:ffff8e6ab7d00000(0000) knlGS:0000000000000000 [ 162.940067] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 162.942417] CR2: 0000000000000010 CR3: 0000000104eba000 CR4: 00000000000406e0 [ 162.945300] Call Trace: [ 162.946428] trace_event_raw_event_rpcgss_context+0x84/0x140 [auth_rpcgss] [ 162.949308] ? __kmalloc_track_caller+0x35/0x5a0 [ 162.951224] ? gss_pipe_downcall+0x3a3/0x6a0 [auth_rpcgss] [ 162.953484] gss_pipe_downcall+0x585/0x6a0 [auth_rpcgss] [ 162.955953] rpc_pipe_write+0x58/0x70 [sunrpc] [ 162.957849] vfs_write+0xcb/0x2c0 [ 162.959264] ksys_write+0x68/0xe0 [ 162.960706] do_syscall_64+0x33/0x40 [ 162.962238] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 162.964346] RIP: 0033:0x7f1e1f1e57df Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- net/sunrpc/auth_gss/auth_gss_internal.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h index c5603242b54b..f6d9631bd9d0 100644 --- a/net/sunrpc/auth_gss/auth_gss_internal.h +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -34,9 +34,12 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(dest->data == NULL)) - return ERR_PTR(-ENOMEM); + if (len) { + dest->data = kmemdup(p, len, GFP_NOFS); + if (unlikely(dest->data == NULL)) + return ERR_PTR(-ENOMEM); + } else + dest->data = NULL; dest->len = len; return q; } From 2a3687034e47c51cd4f01ce82ed16f1964008b9e Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 5 Apr 2019 12:58:58 +1100 Subject: [PATCH 16/28] lib/string: Add strscpy_pad() function [ Upstream commit 458a3bf82df4fe1f951d0f52b1e0c1e9d5a88a3b ] We have a function to copy strings safely and we have a function to copy strings and zero the tail of the destination (if source string is shorter than destination buffer) but we do not have a function to do both at once. This means developers must write this themselves if they desire this functionality. This is a chore, and also leaves us open to off by one errors unnecessarily. Add a function that calls strscpy() then memset()s the tail to zero if the source string is shorter than the destination buffer. Acked-by: Kees Cook Signed-off-by: Tobin C. Harding Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- include/linux/string.h | 4 ++++ lib/string.c | 47 +++++++++++++++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/include/linux/string.h b/include/linux/string.h index 4db285b83f44..1e0c442b941e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -31,6 +31,10 @@ size_t strlcpy(char *, const char *, size_t); #ifndef __HAVE_ARCH_STRSCPY ssize_t strscpy(char *, const char *, size_t); #endif + +/* Wraps calls to strscpy()/memset(), no arch specific code required */ +ssize_t strscpy_pad(char *dest, const char *src, size_t count); + #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); #endif diff --git a/lib/string.c b/lib/string.c index edf4907ec946..f7f7770444bf 100644 --- a/lib/string.c +++ b/lib/string.c @@ -158,11 +158,9 @@ EXPORT_SYMBOL(strlcpy); * @src: Where to copy the string from * @count: Size of destination buffer * - * Copy the string, or as much of it as fits, into the dest buffer. - * The routine returns the number of characters copied (not including - * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough. - * The behavior is undefined if the string buffers overlap. - * The destination buffer is always NUL terminated, unless it's zero-sized. + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always NUL terminated, unless it's zero-sized. * * Preferred to strlcpy() since the API doesn't require reading memory * from the src string beyond the specified "count" bytes, and since @@ -172,8 +170,10 @@ EXPORT_SYMBOL(strlcpy); * * Preferred to strncpy() since it always returns a valid string, and * doesn't unnecessarily force the tail of the destination buffer to be - * zeroed. If the zeroing is desired, it's likely cleaner to use strscpy() - * with an overflow test, then just memset() the tail of the dest buffer. + * zeroed. If zeroing is desired please use strscpy_pad(). + * + * Return: The number of characters copied (not including the trailing + * %NUL) or -E2BIG if the destination buffer wasn't big enough. */ ssize_t strscpy(char *dest, const char *src, size_t count) { @@ -260,6 +260,39 @@ char *stpcpy(char *__restrict__ dest, const char *__restrict__ src) } EXPORT_SYMBOL(stpcpy); +/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, zeros + * the tail of the destination buffer. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Return: The number of characters copied (not including the trailing + * %NUL) or -E2BIG if the destination buffer wasn't big enough. + */ +ssize_t strscpy_pad(char *dest, const char *src, size_t count) +{ + ssize_t written; + + written = strscpy(dest, src, count); + if (written < 0 || written == count - 1) + return written; + + memset(dest + written + 1, 0, count - written - 1); + + return written; +} +EXPORT_SYMBOL(strscpy_pad); + #ifndef __HAVE_ARCH_STRCAT /** * strcat - Append one %NUL-terminated string to another From d3fcb6d76c4fb3d2c335cc78f982f3bd3de9e66f Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 25 Sep 2019 16:46:16 -0700 Subject: [PATCH 17/28] include/trace/events/writeback.h: fix -Wstringop-truncation warnings [ Upstream commit d1a445d3b86c9341ce7a0954c23be0edb5c9bec5 ] There are many of those warnings. In file included from ./arch/powerpc/include/asm/paca.h:15, from ./arch/powerpc/include/asm/current.h:13, from ./include/linux/thread_info.h:21, from ./include/asm-generic/preempt.h:5, from ./arch/powerpc/include/generated/asm/preempt.h:1, from ./include/linux/preempt.h:78, from ./include/linux/spinlock.h:51, from fs/fs-writeback.c:19: In function 'strncpy', inlined from 'perf_trace_writeback_page_template' at ./include/trace/events/writeback.h:56:1: ./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified bound 32 equals destination size [-Wstringop-truncation] return __builtin_strncpy(p, q, size); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix it by using the new strscpy_pad() which was introduced in "lib/string: Add strscpy_pad() function" and will always be NUL-terminated instead of strncpy(). Also, change strlcpy() to use strscpy_pad() in this file for consistency. Link: http://lkml.kernel.org/r/1564075099-27750-1-git-send-email-cai@lca.pw Fixes: 455b2864686d ("writeback: Initial tracing support") Fixes: 028c2dd184c0 ("writeback: Add tracing to balance_dirty_pages") Fixes: e84d0a4f8e39 ("writeback: trace event writeback_queue_io") Fixes: b48c104d2211 ("writeback: trace event bdi_dirty_ratelimit") Fixes: cc1676d917f3 ("writeback: Move requeueing when I_SYNC set to writeback_sb_inodes()") Fixes: 9fb0a7da0c52 ("writeback: add more tracepoints") Signed-off-by: Qian Cai Reviewed-by: Jan Kara Cc: Tobin C. Harding Cc: Steven Rostedt (VMware) Cc: Ingo Molnar Cc: Tejun Heo Cc: Dave Chinner Cc: Fengguang Wu Cc: Jens Axboe Cc: Joe Perches Cc: Kees Cook Cc: Jann Horn Cc: Jonathan Corbet Cc: Nitin Gote Cc: Rasmus Villemoes Cc: Stephen Kitt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/trace/events/writeback.h | 38 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 146e7b3faa85..b463e2575117 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -65,8 +65,9 @@ TRACE_EVENT(writeback_dirty_page, ), TP_fast_assign( - strncpy(__entry->name, - mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, + mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", + 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -95,8 +96,8 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ - strncpy(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, + bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; @@ -175,8 +176,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); @@ -219,8 +220,9 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strncpy(__entry->name, - wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, + wb->bdi->dev ? dev_name(wb->bdi->dev) : + "(unknown)", 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -273,7 +275,7 @@ DECLARE_EVENT_CLASS(writeback_class, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: cgroup_ino=%u", @@ -295,7 +297,7 @@ TRACE_EVENT(writeback_bdi_register, __array(char, name, 32) ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, dev_name(bdi->dev), 32); ), TP_printk("bdi %s", __entry->name @@ -320,7 +322,7 @@ DECLARE_EVENT_CLASS(wbc_class, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, dev_name(bdi->dev), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; @@ -371,7 +373,7 @@ TRACE_EVENT(writeback_queue_io, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strncpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); __entry->older = dirtied_before; __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; @@ -456,7 +458,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, ), TP_fast_assign( - strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); @@ -521,7 +523,7 @@ TRACE_EVENT(balance_dirty_pages, TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -581,8 +583,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; @@ -655,8 +657,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; From 050462f040b9cdb92b63f35aca76c6c873b0b5ab Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 30 Jan 2020 22:11:04 -0800 Subject: [PATCH 18/28] memcg: fix a crash in wb_workfn when a device disappears [ Upstream commit 68f23b89067fdf187763e75a56087550624fdbee ] Without memcg, there is a one-to-one mapping between the bdi and bdi_writeback structures. In this world, things are fairly straightforward; the first thing bdi_unregister() does is to shutdown the bdi_writeback structure (or wb), and part of that writeback ensures that no other work queued against the wb, and that the wb is fully drained. With memcg, however, there is a one-to-many relationship between the bdi and bdi_writeback structures; that is, there are multiple wb objects which can all point to a single bdi. There is a refcount which prevents the bdi object from being released (and hence, unregistered). So in theory, the bdi_unregister() *should* only get called once its refcount goes to zero (bdi_put will drop the refcount, and when it is zero, release_bdi gets called, which calls bdi_unregister). Unfortunately, del_gendisk() in block/gen_hd.c never got the memo about the Brave New memcg World, and calls bdi_unregister directly. It does this without informing the file system, or the memcg code, or anything else. This causes the root wb associated with the bdi to be unregistered, but none of the memcg-specific wb's are shutdown. So when one of these wb's are woken up to do delayed work, they try to dereference their wb->bdi->dev to fetch the device name, but unfortunately bdi->dev is now NULL, thanks to the bdi_unregister() called by del_gendisk(). As a result, *boom*. Fortunately, it looks like the rest of the writeback path is perfectly happy with bdi->dev and bdi->owner being NULL, so the simplest fix is to create a bdi_dev_name() function which can handle bdi->dev being NULL. This also allows us to bulletproof the writeback tracepoints to prevent them from dereferencing a NULL pointer and crashing the kernel if one is tracing with memcg's enabled, and an iSCSI device dies or a USB storage stick is pulled. The most common way of triggering this will be hotremoval of a device while writeback with memcg enabled is going on. It was triggering several times a day in a heavily loaded production environment. Google Bug Id: 145475544 Link: https://lore.kernel.org/r/20191227194829.150110-1-tytso@mit.edu Link: http://lkml.kernel.org/r/20191228005211.163952-1-tytso@mit.edu Signed-off-by: Theodore Ts'o Cc: Chris Mason Cc: Tejun Heo Cc: Jens Axboe Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/fs-writeback.c | 2 +- include/linux/backing-dev.h | 10 ++++++++++ include/trace/events/writeback.h | 29 +++++++++++++---------------- mm/backing-dev.c | 1 + 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f2d0c4acb3cb..a247cb4b00e2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1986,7 +1986,7 @@ void wb_workfn(struct work_struct *work) struct bdi_writeback, dwork); long pages_written; - set_worker_desc("flush-%s", dev_name(wb->bdi->dev)); + set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); current->flags |= PF_SWAPWRITE; if (likely(!current_is_workqueue_rescuer() || diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c28a47cbe355..1ef4aca7b953 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -498,4 +499,13 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } +extern const char *bdi_unknown_name; + +static inline const char *bdi_dev_name(struct backing_dev_info *bdi) +{ + if (!bdi || !bdi->dev) + return bdi_unknown_name; + return dev_name(bdi->dev); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index b463e2575117..300afa559f46 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -66,8 +66,8 @@ TRACE_EVENT(writeback_dirty_page, TP_fast_assign( strscpy_pad(__entry->name, - mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", - 32); + bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : + NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -96,8 +96,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ - strscpy_pad(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; @@ -177,7 +176,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); @@ -220,9 +219,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strscpy_pad(__entry->name, - wb->bdi->dev ? dev_name(wb->bdi->dev) : - "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -275,7 +272,7 @@ DECLARE_EVENT_CLASS(writeback_class, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: cgroup_ino=%u", @@ -297,7 +294,7 @@ TRACE_EVENT(writeback_bdi_register, __array(char, name, 32) ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); ), TP_printk("bdi %s", __entry->name @@ -322,7 +319,7 @@ DECLARE_EVENT_CLASS(wbc_class, ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; @@ -373,7 +370,7 @@ TRACE_EVENT(writeback_queue_io, __field(unsigned int, cgroup_ino) ), TP_fast_assign( - strncpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->older = dirtied_before; __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; @@ -458,7 +455,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, ), TP_fast_assign( - strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); @@ -523,7 +520,7 @@ TRACE_EVENT(balance_dirty_pages, TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -584,7 +581,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; @@ -658,7 +655,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 72e6d0c55cfa..0a970be24a28 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -19,6 +19,7 @@ struct backing_dev_info noop_backing_dev_info = { EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; +const char *bdi_unknown_name = "(unknown)"; /* * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side From f1ecde00ce1694597f923f0d25f7a797c5243d99 Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Wed, 27 Jan 2021 08:15:24 -0800 Subject: [PATCH 19/28] Fix unsynchronized access to sev members through svm_register_enc_region commit 19a23da53932bc8011220bd8c410cb76012de004 upstream. Grab kvm->lock before pinning memory when registering an encrypted region; sev_pin_memory() relies on kvm->lock being held to ensure correctness when checking and updating the number of pinned pages. Add a lockdep assertion to help prevent future regressions. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Paolo Bonzini Cc: Joerg Roedel Cc: Tom Lendacky Cc: Brijesh Singh Cc: Sean Christopherson Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: stable@vger.kernel.org Cc: linux-kernel@vger.kernel.org Fixes: 1e80fdc09d12 ("KVM: SVM: Pin guest memory when SEV is active") Signed-off-by: Peter Gonda V2 - Fix up patch description - Correct file paths svm.c -> sev.c - Add unlock of kvm->lock on sev_pin_memory error V1 - https://lore.kernel.org/kvm/20210126185431.1824530-1-pgonda@google.com/ Message-Id: <20210127161524.2832400-1-pgonda@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f1e86051aa5f..b34d11f22213 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1832,6 +1832,8 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, struct page **pages; unsigned long first, last; + lockdep_assert_held(&kvm->lock); + if (ulen == 0 || uaddr + ulen < uaddr) return NULL; @@ -7084,12 +7086,21 @@ static int svm_register_enc_region(struct kvm *kvm, if (!region) return -ENOMEM; + mutex_lock(&kvm->lock); region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1); if (!region->pages) { ret = -ENOMEM; + mutex_unlock(&kvm->lock); goto e_free; } + region->uaddr = range->addr; + region->size = range->size; + + mutex_lock(&kvm->lock); + list_add_tail(®ion->list, &sev->regions_list); + mutex_unlock(&kvm->lock); + /* * The guest may change the memory encryption attribute from C=0 -> C=1 * or vice versa for this memory range. Lets make sure caches are @@ -7098,13 +7109,6 @@ static int svm_register_enc_region(struct kvm *kvm, */ sev_clflush_pages(region->pages, region->npages); - region->uaddr = range->addr; - region->size = range->size; - - mutex_lock(&kvm->lock); - list_add_tail(®ion->list, &sev->regions_list); - mutex_unlock(&kvm->lock); - return ret; e_free: From 44b07600cec4df051b7ecbd6831febdeca252e54 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 27 Aug 2019 19:01:45 +0800 Subject: [PATCH 20/28] block: don't hold q->sysfs_lock in elevator_init_mq commit c48dac137a62a5d6fa1ef3fa445cbd9c43655a76 upstream. The original comment says: q->sysfs_lock must be held to provide mutual exclusion between elevator_switch() and here. Which is simply wrong. elevator_init_mq() is only called from blk_mq_init_allocated_queue, which is always called before the request queue is registered via blk_register_queue(), for dm-rq or normal rq based driver. However, queue's kobject is only exposed and added to sysfs in blk_register_queue(). So there isn't such race between elevator_switch() and elevator_init_mq(). So avoid to hold q->sysfs_lock in elevator_init_mq(). Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Greg KH Cc: Mike Snitzer Cc: Bart Van Assche Cc: Damien Le Moal Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- block/elevator.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index fae58b2f906f..ddbcd36616a8 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -980,23 +980,19 @@ int elevator_init_mq(struct request_queue *q) if (q->nr_hw_queues != 1) return 0; - /* - * q->sysfs_lock must be held to provide mutual exclusion between - * elevator_switch() and here. - */ - mutex_lock(&q->sysfs_lock); + WARN_ON_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)); + if (unlikely(q->elevator)) - goto out_unlock; + goto out; e = elevator_get(q, "mq-deadline", false); if (!e) - goto out_unlock; + goto out; err = blk_mq_init_sched(q, e); if (err) elevator_put(e); -out_unlock: - mutex_unlock(&q->sysfs_lock); +out: return err; } From 6ff18507a7c165b90f1fe51822c65d10265f5714 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 27 Aug 2019 19:01:46 +0800 Subject: [PATCH 21/28] blk-mq: don't hold q->sysfs_lock in blk_mq_map_swqueue commit c6ba933358f0d7a6a042b894dba20cc70396a6d3 upstream. blk_mq_map_swqueue() is called from blk_mq_init_allocated_queue() and blk_mq_update_nr_hw_queues(). For the former caller, the kobject isn't exposed to userspace yet. For the latter caller, hctx sysfs entries and debugfs are un-registered before updating nr_hw_queues. On the other hand, commit 2f8f1336a48b ("blk-mq: always free hctx after request queue is freed") moves freeing hctx into queue's release handler, so there won't be race with queue release path too. So don't hold q->sysfs_lock in blk_mq_map_swqueue(). Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Greg KH Cc: Mike Snitzer Cc: Bart Van Assche Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Jack Wang Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 0df43515ff94..195526b93895 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2324,11 +2324,6 @@ static void blk_mq_map_swqueue(struct request_queue *q) struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; - /* - * Avoid others reading imcomplete hctx->cpumask through sysfs - */ - mutex_lock(&q->sysfs_lock); - queue_for_each_hw_ctx(q, hctx, i) { cpumask_clear(hctx->cpumask); hctx->nr_ctx = 0; @@ -2362,8 +2357,6 @@ static void blk_mq_map_swqueue(struct request_queue *q) hctx->ctxs[hctx->nr_ctx++] = ctx; } - mutex_unlock(&q->sysfs_lock); - queue_for_each_hw_ctx(q, hctx, i) { /* * If no software queues are mapped to this hardware queue, From e5099c0e851a801d04831db64d140bd4f3a014db Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 9 Feb 2021 13:41:53 -0800 Subject: [PATCH 22/28] squashfs: add more sanity checks in id lookup commit f37aa4c7366e23f91b81d00bafd6a7ab54e4a381 upstream. Sysbot has reported a number of "slab-out-of-bounds reads" and "use-after-free read" errors which has been identified as being caused by a corrupted index value read from the inode. This could be because the metadata block is uncompressed, or because the "compression" bit has been corrupted (turning a compressed block into an uncompressed block). This patch adds additional sanity checks to detect this, and the following corruption. 1. It checks against corruption of the ids count. This can either lead to a larger table to be read, or a smaller than expected table to be read. In the case of a too large ids count, this would often have been trapped by the existing sanity checks, but this patch introduces a more exact check, which can identify too small values. 2. It checks the contents of the index table for corruption. Link: https://lkml.kernel.org/r/20210204130249.4495-3-phillip@squashfs.org.uk Signed-off-by: Phillip Lougher Reported-by: syzbot+b06d57ba83f604522af2@syzkaller.appspotmail.com Reported-by: syzbot+c021ba012da41ee9807c@syzkaller.appspotmail.com Reported-by: syzbot+5024636e8b5fd19f0f19@syzkaller.appspotmail.com Reported-by: syzbot+bcbc661df46657d0fa4f@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/id.c | 40 ++++++++++++++++++++++++++++-------- fs/squashfs/squashfs_fs_sb.h | 1 + fs/squashfs/super.c | 6 +++--- fs/squashfs/xattr.h | 10 ++++++++- 4 files changed, 45 insertions(+), 12 deletions(-) diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index d38ea3dab951..8ccc0e3f6ea5 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -48,10 +48,15 @@ int squashfs_get_id(struct super_block *sb, unsigned int index, struct squashfs_sb_info *msblk = sb->s_fs_info; int block = SQUASHFS_ID_BLOCK(index); int offset = SQUASHFS_ID_BLOCK_OFFSET(index); - u64 start_block = le64_to_cpu(msblk->id_table[block]); + u64 start_block; __le32 disk_id; int err; + if (index >= msblk->ids) + return -EINVAL; + + start_block = le64_to_cpu(msblk->id_table[block]); + err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset, sizeof(disk_id)); if (err < 0) @@ -69,7 +74,10 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, u64 id_table_start, u64 next_table, unsigned short no_ids) { unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids); + unsigned int indexes = SQUASHFS_ID_BLOCKS(no_ids); + int n; __le64 *table; + u64 start, end; TRACE("In read_id_index_table, length %d\n", length); @@ -80,20 +88,36 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, return ERR_PTR(-EINVAL); /* - * length bytes should not extend into the next table - this check - * also traps instances where id_table_start is incorrectly larger - * than the next table start + * The computed size of the index table (length bytes) should exactly + * match the table start and end points */ - if (id_table_start + length > next_table) + if (length != (next_table - id_table_start)) return ERR_PTR(-EINVAL); table = squashfs_read_table(sb, id_table_start, length); + if (IS_ERR(table)) + return table; /* - * table[0] points to the first id lookup table metadata block, this - * should be less than id_table_start + * table[0], table[1], ... table[indexes - 1] store the locations + * of the compressed id blocks. Each entry should be less than + * the next (i.e. table[0] < table[1]), and the difference between them + * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1] + * should be less than id_table_start, and again the difference + * should be SQUASHFS_METADATA_SIZE or less */ - if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) { + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); + + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= id_table_start || (id_table_start - start) > SQUASHFS_METADATA_SIZE) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index ef69c31947bf..5234c19a0eab 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -77,5 +77,6 @@ struct squashfs_sb_info { unsigned int inodes; unsigned int fragments; int xattr_ids; + unsigned int ids; }; #endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 40e657386fa5..728b2d72f3f0 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -176,6 +176,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->directory_table = le64_to_cpu(sblk->directory_table_start); msblk->inodes = le32_to_cpu(sblk->inodes); msblk->fragments = le32_to_cpu(sblk->fragments); + msblk->ids = le16_to_cpu(sblk->no_ids); flags = le16_to_cpu(sblk->flags); TRACE("Found valid superblock on %pg\n", sb->s_bdev); @@ -187,7 +188,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) TRACE("Block size %d\n", msblk->block_size); TRACE("Number of inodes %d\n", msblk->inodes); TRACE("Number of fragments %d\n", msblk->fragments); - TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); + TRACE("Number of ids %d\n", msblk->ids); TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); TRACE("sblk->fragment_table_start %llx\n", @@ -244,8 +245,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) allocate_id_index_table: /* Allocate and read id index table */ msblk->id_table = squashfs_read_id_index_table(sb, - le64_to_cpu(sblk->id_table_start), next_table, - le16_to_cpu(sblk->no_ids)); + le64_to_cpu(sblk->id_table_start), next_table, msblk->ids); if (IS_ERR(msblk->id_table)) { ERROR("unable to read id index table\n"); err = PTR_ERR(msblk->id_table); diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h index afe70f815e3d..86b0a0073e51 100644 --- a/fs/squashfs/xattr.h +++ b/fs/squashfs/xattr.h @@ -30,8 +30,16 @@ extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, u64 *xattr_table_start, int *xattr_ids) { + struct squashfs_xattr_id_table *id_table; + + id_table = squashfs_read_table(sb, start, sizeof(*id_table)); + if (IS_ERR(id_table)) + return (__le64 *) id_table; + + *xattr_table_start = le64_to_cpu(id_table->xattr_table_start); + kfree(id_table); + ERROR("Xattrs in filesystem, these will be ignored\n"); - *xattr_table_start = start; return ERR_PTR(-ENOTSUPP); } From a6f933a3036313a3c4b56c68eccbf84d7546b49e Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 9 Feb 2021 13:41:56 -0800 Subject: [PATCH 23/28] squashfs: add more sanity checks in inode lookup commit eabac19e40c095543def79cb6ffeb3a8588aaff4 upstream. Sysbot has reported an "slab-out-of-bounds read" error which has been identified as being caused by a corrupted "ino_num" value read from the inode. This could be because the metadata block is uncompressed, or because the "compression" bit has been corrupted (turning a compressed block into an uncompressed block). This patch adds additional sanity checks to detect this, and the following corruption. 1. It checks against corruption of the inodes count. This can either lead to a larger table to be read, or a smaller than expected table to be read. In the case of a too large inodes count, this would often have been trapped by the existing sanity checks, but this patch introduces a more exact check, which can identify too small values. 2. It checks the contents of the index table for corruption. [phillip@squashfs.org.uk: fix checkpatch issue] Link: https://lkml.kernel.org/r/527909353.754618.1612769948607@webmail.123-reg.co.uk Link: https://lkml.kernel.org/r/20210204130249.4495-4-phillip@squashfs.org.uk Signed-off-by: Phillip Lougher Reported-by: syzbot+04419e3ff19d2970ea28@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/export.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 8073b6532cf0..d2a806416c3a 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -54,12 +54,17 @@ static long long squashfs_inode_lookup(struct super_block *sb, int ino_num) struct squashfs_sb_info *msblk = sb->s_fs_info; int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1); int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1); - u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]); + u64 start; __le64 ino; int err; TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num); + if (ino_num == 0 || (ino_num - 1) >= msblk->inodes) + return -EINVAL; + + start = le64_to_cpu(msblk->inode_lookup_table[blk]); + err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino)); if (err < 0) return err; @@ -124,7 +129,10 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, u64 lookup_table_start, u64 next_table, unsigned int inodes) { unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes); + unsigned int indexes = SQUASHFS_LOOKUP_BLOCKS(inodes); + int n; __le64 *table; + u64 start, end; TRACE("In read_inode_lookup_table, length %d\n", length); @@ -134,20 +142,37 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, if (inodes == 0) return ERR_PTR(-EINVAL); - /* length bytes should not extend into the next table - this check - * also traps instances where lookup_table_start is incorrectly larger - * than the next table start + /* + * The computed size of the lookup table (length bytes) should exactly + * match the table start and end points */ - if (lookup_table_start + length > next_table) + if (length != (next_table - lookup_table_start)) return ERR_PTR(-EINVAL); table = squashfs_read_table(sb, lookup_table_start, length); + if (IS_ERR(table)) + return table; /* - * table[0] points to the first inode lookup table metadata block, - * this should be less than lookup_table_start + * table0], table[1], ... table[indexes - 1] store the locations + * of the compressed inode lookup blocks. Each entry should be + * less than the next (i.e. table[0] < table[1]), and the difference + * between them should be SQUASHFS_METADATA_SIZE or less. + * table[indexes - 1] should be less than lookup_table_start, and + * again the difference should be SQUASHFS_METADATA_SIZE or less */ - if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) { + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); + + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= lookup_table_start || (lookup_table_start - start) > SQUASHFS_METADATA_SIZE) { kfree(table); return ERR_PTR(-EINVAL); } From a8717b34003f4f7353b23826617ad872f85d85d8 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 9 Feb 2021 13:42:00 -0800 Subject: [PATCH 24/28] squashfs: add more sanity checks in xattr id lookup commit 506220d2ba21791314af569211ffd8870b8208fa upstream. Sysbot has reported a warning where a kmalloc() attempt exceeds the maximum limit. This has been identified as corruption of the xattr_ids count when reading the xattr id lookup table. This patch adds a number of additional sanity checks to detect this corruption and others. 1. It checks for a corrupted xattr index read from the inode. This could be because the metadata block is uncompressed, or because the "compression" bit has been corrupted (turning a compressed block into an uncompressed block). This would cause an out of bounds read. 2. It checks against corruption of the xattr_ids count. This can either lead to the above kmalloc failure, or a smaller than expected table to be read. 3. It checks the contents of the index table for corruption. [phillip@squashfs.org.uk: fix checkpatch issue] Link: https://lkml.kernel.org/r/270245655.754655.1612770082682@webmail.123-reg.co.uk Link: https://lkml.kernel.org/r/20210204130249.4495-5-phillip@squashfs.org.uk Signed-off-by: Phillip Lougher Reported-by: syzbot+2ccea6339d368360800d@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/squashfs/xattr_id.c | 66 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index c89607d690c4..3a655d879600 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -44,10 +44,15 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, struct squashfs_sb_info *msblk = sb->s_fs_info; int block = SQUASHFS_XATTR_BLOCK(index); int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index); - u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]); + u64 start_block; struct squashfs_xattr_id id; int err; + if (index >= msblk->xattr_ids) + return -EINVAL; + + start_block = le64_to_cpu(msblk->xattr_id_table[block]); + err = squashfs_read_metadata(sb, &id, &start_block, &offset, sizeof(id)); if (err < 0) @@ -63,13 +68,17 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, /* * Read uncompressed xattr id lookup table indexes from disk into memory */ -__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, +__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, u64 *xattr_table_start, int *xattr_ids) { - unsigned int len; + struct squashfs_sb_info *msblk = sb->s_fs_info; + unsigned int len, indexes; struct squashfs_xattr_id_table *id_table; + __le64 *table; + u64 start, end; + int n; - id_table = squashfs_read_table(sb, start, sizeof(*id_table)); + id_table = squashfs_read_table(sb, table_start, sizeof(*id_table)); if (IS_ERR(id_table)) return (__le64 *) id_table; @@ -83,13 +92,52 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, if (*xattr_ids == 0) return ERR_PTR(-EINVAL); - /* xattr_table should be less than start */ - if (*xattr_table_start >= start) + len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); + indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids); + + /* + * The computed size of the index table (len bytes) should exactly + * match the table start and end points + */ + start = table_start + sizeof(*id_table); + end = msblk->bytes_used; + + if (len != (end - start)) return ERR_PTR(-EINVAL); - len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); + table = squashfs_read_table(sb, start, len); + if (IS_ERR(table)) + return table; - TRACE("In read_xattr_index_table, length %d\n", len); + /* table[0], table[1], ... table[indexes - 1] store the locations + * of the compressed xattr id blocks. Each entry should be less than + * the next (i.e. table[0] < table[1]), and the difference between them + * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1] + * should be less than table_start, and again the difference + * shouls be SQUASHFS_METADATA_SIZE or less. + * + * Finally xattr_table_start should be less than table[0]. + */ + for (n = 0; n < (indexes - 1); n++) { + start = le64_to_cpu(table[n]); + end = le64_to_cpu(table[n + 1]); - return squashfs_read_table(sb, start + sizeof(*id_table), len); + if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + } + + start = le64_to_cpu(table[indexes - 1]); + if (start >= table_start || (table_start - start) > SQUASHFS_METADATA_SIZE) { + kfree(table); + return ERR_PTR(-EINVAL); + } + + if (*xattr_table_start >= le64_to_cpu(table[0])) { + kfree(table); + return ERR_PTR(-EINVAL); + } + + return table; } From cb19da4d5ccfa731bf5dd17e6161b6d7d1b32cbe Mon Sep 17 00:00:00 2001 From: Olliver Schinagl Date: Mon, 26 Nov 2018 17:27:44 +0200 Subject: [PATCH 25/28] regulator: core: enable power when setting up constraints [ Upstream commit 2bb1666369339f69f227ad060c250afde94d5c69 ] When a regulator is marked as always on, it is enabled early on, when checking and setting up constraints. It makes the assumption that the bootloader properly initialized the regulator, and just in case enables the regulator anyway. Some constraints however currently get missed, such as the soft-start and ramp-delay. This causes the regulator to be enabled, without the soft-start and ramp-delay being applied, which in turn can cause high-currents or other start-up problems. By moving the always-enabled constraints later in the constraints check, we can at least ensure all constraints for the regulator are followed. Signed-off-by: Olliver Schinagl Signed-off-by: Priit Laes Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index fa8f5fc04d8f..870baa7036ec 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1142,17 +1142,6 @@ static int set_machine_constraints(struct regulator_dev *rdev) } } - /* If the constraints say the regulator should be on at this point - * and we have control then make sure it is enabled. - */ - if (rdev->constraints->always_on || rdev->constraints->boot_on) { - ret = _regulator_do_enable(rdev); - if (ret < 0 && ret != -EINVAL) { - rdev_err(rdev, "failed to enable\n"); - return ret; - } - } - if ((rdev->constraints->ramp_delay || rdev->constraints->ramp_disable) && ops->set_ramp_delay) { ret = ops->set_ramp_delay(rdev, rdev->constraints->ramp_delay); @@ -1198,6 +1187,17 @@ static int set_machine_constraints(struct regulator_dev *rdev) } } + /* If the constraints say the regulator should be on at this point + * and we have control then make sure it is enabled. + */ + if (rdev->constraints->always_on || rdev->constraints->boot_on) { + ret = _regulator_do_enable(rdev); + if (ret < 0 && ret != -EINVAL) { + rdev_err(rdev, "failed to enable\n"); + return ret; + } + } + print_constraints(rdev); return 0; } From 45f9c1b2e57cb06b732dc21fda6e47477e802233 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 6 Dec 2018 14:23:18 -0800 Subject: [PATCH 26/28] regulator: core: Clean enabling always-on regulators + their supplies [ Upstream commit 05f224ca669398b567d09feb6e2ceefcb7d7f945 ] At the end of regulator_resolve_supply() we have historically turned on our supply in some cases. This could be for one of two reasons: 1. If resolving supplies was happening before the call to set_machine_constraints() we needed to predict if set_machine_constraints() was going to turn the regulator on and we needed to preemptively turn the supply on. 2. Maybe set_machine_constraints() happened before we could resolve supplies (because we failed the first time to resolve) and thus we might need to propagate an enable that already happened up to our supply. Historically regulator_resolve_supply() used _regulator_is_enabled() to decide whether to turn on the supply. Let's change things a little bit. Specifically: 1. Let's try to enable the supply and the regulator in the same place, both in set_machine_constraints(). This means that we have exactly the same logic for enabling the supply and the regulator. 2. Let's properly set use_count when we enable always-on or boot-on regulators even for those that don't have supplies. The previous commit 1fc12b05895e ("regulator: core: Avoid propagating to supplies when possible") only did this right for regulators with supplies. 3. Let's make it clear that the only time we need to enable the supply in regulator_resolve_supply() is if the main regulator is currently in use. By using use_count (like the rest of the code) to decide if we're going to enable our supply we keep everything consistent. Overall the new scheme should be cleaner and easier to reason about. In addition to fixing regulator_summary to be more correct (because of the more correct use_count), this change also has the effect of no longer using _regulator_is_enabled() in this code path. _regulator_is_enabled() could return an error code for some regulators at bootup (like RPMh) that can't read their initial state. While one can argue that the design of those regulators is sub-optimal, the new logic sidesteps this brokenness. This fix in particular fixes observed problems on Qualcomm sdm845 boards which use the above-mentioned RPMh regulator. Those problems were made worse by commit 1fc12b05895e ("regulator: core: Avoid propagating to supplies when possible") because now we'd think at bootup that the SD regulators were already enabled and we'd never try them again. Fixes: 1fc12b05895e ("regulator: core: Avoid propagating to supplies when possible") Reported-by: Evan Green Signed-off-by: Douglas Anderson Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 870baa7036ec..b79ecf37e40f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1191,11 +1191,21 @@ static int set_machine_constraints(struct regulator_dev *rdev) * and we have control then make sure it is enabled. */ if (rdev->constraints->always_on || rdev->constraints->boot_on) { + if (rdev->supply) { + ret = regulator_enable(rdev->supply); + if (ret < 0) { + _regulator_put(rdev->supply); + rdev->supply = NULL; + return ret; + } + } + ret = _regulator_do_enable(rdev); if (ret < 0 && ret != -EINVAL) { rdev_err(rdev, "failed to enable\n"); return ret; } + rdev->use_count++; } print_constraints(rdev); @@ -1645,8 +1655,12 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) goto out; } - /* Cascade always-on state to supply */ - if (_regulator_is_enabled(rdev)) { + /* + * In set_machine_constraints() we may have turned this regulator on + * but we couldn't propagate to the supply if it hadn't been resolved + * yet. Do it now. + */ + if (rdev->use_count) { ret = regulator_enable(rdev->supply); if (ret < 0) { _regulator_put(rdev->supply); From 9dcaae42b61d8106056f027c4a09f443af0a6c7f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Jan 2021 13:20:42 +0000 Subject: [PATCH 27/28] regulator: Fix lockdep warning resolving supplies [ Upstream commit 14a71d509ac809dcf56d7e3ca376b15d17bd0ddd ] With commit eaa7995c529b54 (regulator: core: avoid regulator_resolve_supply() race condition) we started holding the rdev lock while resolving supplies, an operation that requires holding the regulator_list_mutex. This results in lockdep warnings since in other places we take the list mutex then the mutex on an individual rdev. Since the goal is to make sure that we don't call set_supply() twice rather than a concern about the cost of resolution pull the rdev lock and check for duplicate resolution down to immediately before we do the set_supply() and drop it again once the allocation is done. Fixes: eaa7995c529b54 (regulator: core: avoid regulator_resolve_supply() race condition) Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210122132042.10306-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index b79ecf37e40f..65e1cde13d59 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1587,17 +1587,6 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (rdev->supply) return 0; - /* - * Recheck rdev->supply with rdev->mutex lock held to avoid a race - * between rdev->supply null check and setting rdev->supply in - * set_supply() from concurrent tasks. - */ - regulator_lock(rdev); - - /* Supply just resolved by a concurrent task? */ - if (rdev->supply) - goto out; - r = regulator_dev_lookup(dev, rdev->supply_name); if (IS_ERR(r)) { ret = PTR_ERR(r); @@ -1649,12 +1638,29 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) goto out; } - ret = set_supply(rdev, r); - if (ret < 0) { + /* + * Recheck rdev->supply with rdev->mutex lock held to avoid a race + * between rdev->supply null check and setting rdev->supply in + * set_supply() from concurrent tasks. + */ + regulator_lock(rdev); + + /* Supply just resolved by a concurrent task? */ + if (rdev->supply) { + regulator_unlock(rdev); put_device(&r->dev); goto out; } + ret = set_supply(rdev, r); + if (ret < 0) { + regulator_unlock(rdev); + put_device(&r->dev); + goto out; + } + + regulator_unlock(rdev); + /* * In set_machine_constraints() we may have turned this regulator on * but we couldn't propagate to the supply if it hadn't been resolved @@ -1670,7 +1676,6 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } out: - regulator_unlock(rdev); return ret; } From 255b58a2b3af0baa0ee11507390349217b8b73b0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 13 Feb 2021 13:51:16 +0100 Subject: [PATCH 28/28] Linux 4.19.176 Tested-by: Jason Self Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Tested-by: Pavel Machek (CIP) Tested-by: Ross Schmidt Link: https://lore.kernel.org/r/20210212074240.963766197@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index db37b39fae69..6bebe3b22b45 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 175 +SUBLEVEL = 176 EXTRAVERSION = NAME = "People's Front"