android_kernel_xiaomi_sm7250/drivers/soc/qcom/rpmh.c

646 lines
14 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016-2021, The Linux Foundation. All rights reserved.
*/
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <soc/qcom/rpmh.h>
#include "rpmh-internal.h"
#define RPMH_TIMEOUT_MS msecs_to_jiffies(10000)
#define DEFINE_RPMH_MSG_ONSTACK(dev, s, q, name) \
struct rpmh_request name = { \
.msg = { \
.state = s, \
.cmds = name.cmd, \
.num_cmds = 0, \
.wait_for_compl = true, \
}, \
.cmd = { { 0 } }, \
.completion = q, \
.dev = dev, \
.needs_free = false, \
}
#define ctrlr_to_drv(ctrlr) container_of(ctrlr, struct rsc_drv, client)
/**
* struct cache_req: the request object for caching
*
* @addr: the address of the resource
* @sleep_val: the sleep vote
* @wake_val: the wake vote
* @list: linked list obj
*/
struct cache_req {
u32 addr;
u32 sleep_val;
u32 wake_val;
struct list_head list;
};
/**
* struct batch_cache_req - An entry in our batch catch
*
* @list: linked list obj
* @count: number of messages
* @rpm_msgs: the messages
*/
struct batch_cache_req {
struct list_head list;
int count;
struct rpmh_request *rpm_msgs;
};
static struct rpmh_ctrlr *get_rpmh_ctrlr(const struct device *dev)
{
struct rsc_drv *drv = dev_get_drvdata(dev->parent);
return &drv->client;
}
static int check_ctrlr_state(struct rpmh_ctrlr *ctrlr, enum rpmh_state state)
{
int ret = 0;
/* Do not allow setting active votes when in solver mode */
spin_lock(&ctrlr->cache_lock);
if (ctrlr->in_solver_mode && state == RPMH_ACTIVE_ONLY_STATE)
ret = -EBUSY;
spin_unlock(&ctrlr->cache_lock);
return ret;
}
/**
* rpmh_mode_solver_set: Indicate that the RSC controller hardware has
* been configured to be in solver mode
*
* @dev: the device making the request
* @enable: Boolean value indicating if the controller is in solver mode.
*
* When solver mode is enabled, passthru API will not be able to send wake
* votes, just awake and active votes.
*/
int rpmh_mode_solver_set(const struct device *dev, bool enable)
{
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
spin_lock(&ctrlr->cache_lock);
rpmh_rsc_mode_solver_set(ctrlr_to_drv(ctrlr), enable);
ctrlr->in_solver_mode = enable;
spin_unlock(&ctrlr->cache_lock);
return 0;
}
EXPORT_SYMBOL(rpmh_mode_solver_set);
void rpmh_tx_done(const struct tcs_request *msg, int r)
{
struct rpmh_request *rpm_msg = container_of(msg, struct rpmh_request,
msg);
struct completion *compl = rpm_msg->completion;
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
bool free = rpm_msg->needs_free;
rpm_msg->err = r;
if (r)
dev_err(rpm_msg->dev, "RPMH TX fail in msg addr=%#x, err=%d\n",
rpm_msg->msg.cmds[0].addr, r);
if (!compl)
goto exit;
/* Signal the blocking thread we are done */
complete(compl);
exit:
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
if (free)
kfree(rpm_msg);
}
static struct cache_req *__find_req(struct rpmh_ctrlr *ctrlr, u32 addr)
{
struct cache_req *p, *req = NULL;
list_for_each_entry(p, &ctrlr->cache, list) {
if (p->addr == addr) {
req = p;
break;
}
}
return req;
}
static struct cache_req *cache_rpm_request(struct rpmh_ctrlr *ctrlr,
enum rpmh_state state,
struct tcs_cmd *cmd)
{
struct cache_req *req;
spin_lock(&ctrlr->cache_lock);
req = __find_req(ctrlr, cmd->addr);
if (req)
goto existing;
req = kzalloc(sizeof(*req), GFP_ATOMIC);
if (!req) {
req = ERR_PTR(-ENOMEM);
goto unlock;
}
req->addr = cmd->addr;
req->sleep_val = req->wake_val = UINT_MAX;
INIT_LIST_HEAD(&req->list);
list_add_tail(&req->list, &ctrlr->cache);
existing:
switch (state) {
case RPMH_ACTIVE_ONLY_STATE:
if (req->sleep_val != UINT_MAX) {
req->wake_val = cmd->data;
ctrlr->dirty = true;
}
break;
case RPMH_WAKE_ONLY_STATE:
if (req->wake_val != cmd->data) {
req->wake_val = cmd->data;
ctrlr->dirty = true;
}
break;
case RPMH_SLEEP_STATE:
if (req->sleep_val != cmd->data) {
req->sleep_val = cmd->data;
ctrlr->dirty = true;
}
break;
default:
break;
}
unlock:
spin_unlock(&ctrlr->cache_lock);
return req;
}
/**
* __rpmh_write: Cache and send the RPMH request
*
* @dev: The device making the request
* @state: Active/Sleep request type
* @rpm_msg: The data that needs to be sent (cmds).
*
* Cache the RPMH request and send if the state is ACTIVE_ONLY.
* SLEEP/WAKE_ONLY requests are not sent to the controller at
* this time. Use rpmh_flush() to send them to the controller.
*/
static int __rpmh_write(const struct device *dev, enum rpmh_state state,
struct rpmh_request *rpm_msg)
{
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
int ret = -EINVAL;
struct cache_req *req;
int i;
rpm_msg->msg.state = state;
/* Cache the request in our store and link the payload */
for (i = 0; i < rpm_msg->msg.num_cmds; i++) {
req = cache_rpm_request(ctrlr, state, &rpm_msg->msg.cmds[i]);
if (IS_ERR(req))
return PTR_ERR(req);
}
rpm_msg->msg.state = state;
if (state == RPMH_ACTIVE_ONLY_STATE) {
WARN_ON(irqs_disabled());
ret = rpmh_rsc_send_data(ctrlr_to_drv(ctrlr), &rpm_msg->msg);
} else {
/* Clean up our call by spoofing tx_done */
ret = 0;
rpmh_tx_done(&rpm_msg->msg, ret);
}
return ret;
}
static int __fill_rpmh_msg(struct rpmh_request *req, enum rpmh_state state,
const struct tcs_cmd *cmd, u32 n)
{
if (!cmd || !n || n > MAX_RPMH_PAYLOAD)
return -EINVAL;
memcpy(req->cmd, cmd, n * sizeof(*cmd));
req->msg.state = state;
req->msg.cmds = req->cmd;
req->msg.num_cmds = n;
return 0;
}
/**
* rpmh_write_async: Write a set of RPMH commands
*
* @dev: The device making the request
* @state: Active/sleep set
* @cmd: The payload data
* @n: The number of elements in payload
*
* Write a set of RPMH commands, the order of commands is maintained
* and will be sent as a single shot.
*/
int rpmh_write_async(const struct device *dev, enum rpmh_state state,
const struct tcs_cmd *cmd, u32 n)
{
struct rpmh_request *rpm_msg;
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
int ret;
if (rpmh_standalone)
return 0;
ret = check_ctrlr_state(ctrlr, state);
if (ret)
return ret;
rpm_msg = kzalloc(sizeof(*rpm_msg), GFP_ATOMIC);
if (!rpm_msg)
return -ENOMEM;
rpm_msg->needs_free = true;
ret = __fill_rpmh_msg(rpm_msg, state, cmd, n);
if (ret) {
kfree(rpm_msg);
return ret;
}
return __rpmh_write(dev, state, rpm_msg);
}
EXPORT_SYMBOL(rpmh_write_async);
/**
* rpmh_write: Write a set of RPMH commands and block until response
*
* @rc: The RPMH handle got from rpmh_get_client
* @state: Active/sleep set
* @cmd: The payload data
* @n: The number of elements in @cmd
*
* May sleep. Do not call from atomic contexts.
*/
int rpmh_write(const struct device *dev, enum rpmh_state state,
const struct tcs_cmd *cmd, u32 n)
{
DECLARE_COMPLETION_ONSTACK(compl);
DEFINE_RPMH_MSG_ONSTACK(dev, state, &compl, rpm_msg);
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
int ret;
if (!cmd || !n || n > MAX_RPMH_PAYLOAD)
return -EINVAL;
if (rpmh_standalone)
return 0;
ret = check_ctrlr_state(ctrlr, state);
if (ret)
return ret;
memcpy(rpm_msg.cmd, cmd, n * sizeof(*cmd));
rpm_msg.msg.num_cmds = n;
ret = __rpmh_write(dev, state, &rpm_msg);
if (ret)
return ret;
ret = wait_for_completion_timeout(&compl, RPMH_TIMEOUT_MS);
if (!ret) {
rpmh_rsc_debug(ctrlr_to_drv(ctrlr), &compl);
return -ETIMEDOUT;
}
return 0;
}
EXPORT_SYMBOL(rpmh_write);
static void cache_batch(struct rpmh_ctrlr *ctrlr, struct batch_cache_req *req)
{
spin_lock(&ctrlr->cache_lock);
list_add_tail(&req->list, &ctrlr->batch_cache);
spin_unlock(&ctrlr->cache_lock);
}
static int flush_batch(struct rpmh_ctrlr *ctrlr)
{
struct batch_cache_req *req;
const struct rpmh_request *rpm_msg;
int ret = 0;
int i;
/* Send Sleep/Wake requests to the controller, expect no response */
spin_lock(&ctrlr->cache_lock);
list_for_each_entry(req, &ctrlr->batch_cache, list) {
for (i = 0; i < req->count; i++) {
rpm_msg = req->rpm_msgs + i;
ret = rpmh_rsc_write_ctrl_data(ctrlr_to_drv(ctrlr),
&rpm_msg->msg);
if (ret)
break;
}
}
spin_unlock(&ctrlr->cache_lock);
return ret;
}
static void invalidate_batch(struct rpmh_ctrlr *ctrlr)
{
struct batch_cache_req *req, *tmp;
spin_lock(&ctrlr->cache_lock);
list_for_each_entry_safe(req, tmp, &ctrlr->batch_cache, list) {
list_del(&req->list);
kfree(req);
}
INIT_LIST_HEAD(&ctrlr->batch_cache);
spin_unlock(&ctrlr->cache_lock);
}
/**
* rpmh_write_batch: Write multiple sets of RPMH commands and wait for the
* batch to finish.
*
* @dev: the device making the request
* @state: Active/sleep set
* @cmd: The payload data
* @n: The array of count of elements in each batch, 0 terminated.
*
* Write a request to the RSC controller without caching. If the request
* state is ACTIVE, then the requests are treated as completion request
* and sent to the controller immediately. The function waits until all the
* commands are complete. If the request was to SLEEP or WAKE_ONLY, then the
* request is sent as fire-n-forget and no ack is expected.
*
* May sleep. Do not call from atomic contexts for ACTIVE_ONLY requests.
*/
int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
const struct tcs_cmd *cmd, u32 *n)
{
struct batch_cache_req *req;
struct rpmh_request *rpm_msgs;
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
struct completion *compls;
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
unsigned long time_left;
int count = 0;
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
int ret, i;
void *ptr;
if (!cmd || !n)
return -EINVAL;
if (rpmh_standalone)
return 0;
ret = check_ctrlr_state(ctrlr, state);
if (ret)
return ret;
while (n[count] > 0)
count++;
if (!count)
return -EINVAL;
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
ptr = kzalloc(sizeof(*req) +
count * (sizeof(req->rpm_msgs[0]) + sizeof(*compls)),
GFP_ATOMIC);
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
if (!ptr)
return -ENOMEM;
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
req = ptr;
rpm_msgs = ptr + sizeof(*req);
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
compls = ptr + sizeof(*req) + count * sizeof(*rpm_msgs);
req->count = count;
req->rpm_msgs = rpm_msgs;
for (i = 0; i < count; i++) {
__fill_rpmh_msg(rpm_msgs + i, state, cmd, n[i]);
cmd += n[i];
}
if (state != RPMH_ACTIVE_ONLY_STATE) {
cache_batch(ctrlr, req);
return 0;
}
for (i = 0; i < count; i++) {
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
struct completion *compl = &compls[i];
init_completion(compl);
rpm_msgs[i].completion = compl;
ret = rpmh_rsc_send_data(ctrlr_to_drv(ctrlr), &rpm_msgs[i].msg);
if (ret) {
pr_err("Error(%d) sending RPMH message addr=%#x\n",
ret, rpm_msgs[i].msg.cmds[0].addr);
break;
}
}
time_left = RPMH_TIMEOUT_MS;
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
while (i--) {
time_left = wait_for_completion_timeout(&compls[i], time_left);
if (!time_left) {
/*
* Better hope they never finish because they'll signal
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
* the completion that we're going to free once
* we've returned from this function.
*/
rpmh_rsc_debug(ctrlr_to_drv(ctrlr), &compls[i]);
BUG_ON(1);
}
}
soc: qcom: rpmh: Avoid accessing freed memory from batch API Using the batch API from the interconnect driver sometimes leads to a KASAN error due to an access to freed memory. This is easier to trigger with threadirqs on the kernel commandline. BUG: KASAN: use-after-free in rpmh_tx_done+0x114/0x12c Read of size 1 at addr fffffff51414ad84 by task irq/110-apps_rs/57 CPU: 0 PID: 57 Comm: irq/110-apps_rs Tainted: G W 4.19.10 #72 Call trace: dump_backtrace+0x0/0x2f8 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0xcc/0x10c print_address_description+0x74/0x240 kasan_report+0x250/0x26c __asan_report_load1_noabort+0x20/0x2c rpmh_tx_done+0x114/0x12c tcs_tx_done+0x450/0x768 irq_forced_thread_fn+0x58/0x9c irq_thread+0x120/0x1dc kthread+0x248/0x260 ret_from_fork+0x10/0x18 Allocated by task 385: kasan_kmalloc+0xac/0x148 __kmalloc+0x170/0x1e4 rpmh_write_batch+0x174/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 Freed by task 385: __kasan_slab_free+0x12c/0x1e0 kasan_slab_free+0x10/0x1c kfree+0x134/0x588 rpmh_write_batch+0x49c/0x540 qcom_icc_set+0x8dc/0x9ac icc_set+0x288/0x2e8 a6xx_gmu_stop+0x320/0x3c0 a6xx_pm_suspend+0x108/0x124 adreno_suspend+0x50/0x60 cr50_spi spi5.0: SPI transfer timed out pm_generic_runtime_suspend+0x60/0x78 __rpm_callback+0x214/0x32c rpm_callback+0x54/0x184 rpm_suspend+0x3f8/0xa90 pm_runtime_work+0xb4/0x178 process_one_work+0x544/0xbc0 worker_thread+0x514/0x7d0 kthread+0x248/0x260 ret_from_fork+0x10/0x18 The fault address belongs to the object at fffffff51414ac80 which belongs to the cache kmalloc-512 of size 512 The fault address is located 260 bytes inside of 512-byte region [fffffff51414ac80, fffffff51414ae80) The fault address belongs to the page: page:ffffffbfd4505200 count:1 mapcount:0 mapping:fffffff51e00c680 index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 ffffffbfd4529008 ffffffbfd44f9208 fffffff51e00c680 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the fault address: fffffff51414ac80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ad00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >fffffff51414ad80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ fffffff51414ae00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fffffff51414ae80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc The batch API sets the same completion for each rpmh message that's sent and then loops through all the messages and waits for that single completion declared on the stack to be completed before returning from the function and freeing the message structures. Unfortunately, some messages may still be in process and 'stuck' in the TCS. At some later point, the tcs_tx_done() interrupt will run and try to process messages that have already been freed at the end of rpmh_write_batch(). This will in turn access the 'needs_free' member of the rpmh_request structure and cause KASAN to complain. Furthermore, if there's a message that's completed in rpmh_tx_done() and freed immediately after the complete() call is made we'll be racing with potentially freed memory when accessing the 'needs_free' member: CPU0 CPU1 ---- ---- rpmh_tx_done() complete(&compl) wait_for_completion(&compl) kfree(rpm_msg) if (rpm_msg->needs_free) <KASAN warning splat> Let's fix this by allocating a chunk of completions for each message and waiting for all of them to be completed before returning from the batch API. Alternatively, we could wait for the last message in the batch, but that may be a more complicated change because it looks like tcs_tx_done() just iterates through the indices of the queue and completes each message instead of tracking the last inserted message and completing that first. Change-Id: Ie4a771f0ce6799ea620226b8ccbb5cd1d0c6fd39 Cc: Lina Iyer <ilina@codeaurora.org> Cc: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org> Cc: Matthias Kaehlcke <mka@chromium.org> Cc: Evan Green <evgreen@chromium.org> Fixes: c8790cb6da58 ("drivers: qcom: rpmh: add support for batch RPMH request") Patch-mainline: https://patchwork.codeaurora.org/patch/700833/ Signed-off-by: Stephen Boyd <swboyd@chromium.org> [ilina@codeaurora.org: Updated for restricted words in commit text] Signed-off-by: Lina Iyer <ilina@codeaurora.org>
2019-01-15 23:54:47 +01:00
kfree(ptr);
return ret;
}
EXPORT_SYMBOL(rpmh_write_batch);
/**
* rpmh_write_pdc_data: Write PDC data to the controller
*
* @dev: the device making the request
* @cmd: The payload data
* @n: The number of elements in payload
*
* Write PDC data to the controller. The messages are always sent async.
*
* May be called from atomic contexts.
*/
int rpmh_write_pdc_data(const struct device *dev,
const struct tcs_cmd *cmd, u32 n)
{
DEFINE_RPMH_MSG_ONSTACK(dev, 0, NULL, rpm_msg);
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
if (!n || n > MAX_RPMH_PAYLOAD)
return -EINVAL;
if (rpmh_standalone)
return 0;
memcpy(rpm_msg.cmd, cmd, n * sizeof(*cmd));
rpm_msg.msg.num_cmds = n;
rpm_msg.msg.wait_for_compl = false;
return rpmh_rsc_write_pdc_data(ctrlr_to_drv(ctrlr), &rpm_msg.msg);
}
EXPORT_SYMBOL(rpmh_write_pdc_data);
static int is_req_valid(struct cache_req *req)
{
return (req->sleep_val != UINT_MAX &&
req->wake_val != UINT_MAX &&
req->sleep_val != req->wake_val);
}
static int send_single(const struct device *dev, enum rpmh_state state,
u32 addr, u32 data)
{
DEFINE_RPMH_MSG_ONSTACK(dev, state, NULL, rpm_msg);
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
/* Wake sets are always complete and sleep sets are not */
rpm_msg.msg.wait_for_compl = (state == RPMH_WAKE_ONLY_STATE);
rpm_msg.cmd[0].addr = addr;
rpm_msg.cmd[0].data = data;
rpm_msg.msg.num_cmds = 1;
return rpmh_rsc_write_ctrl_data(ctrlr_to_drv(ctrlr), &rpm_msg.msg);
}
/**
* rpmh_flush: Flushes the buffered active and sleep sets to TCS
*
* @dev: The device making the request
*
* Return: -EBUSY if the controller is busy, probably waiting on a response
* to a RPMH request sent earlier.
*
* This function is always called from the sleep code from the last CPU
* that is powering down the entire system. Since no other RPMH API would be
* executing at this time, it is safe to run lockless.
*/
int rpmh_flush(const struct device *dev)
{
struct cache_req *p;
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
int ret;
if (rpmh_standalone)
return 0;
if (!ctrlr->dirty) {
pr_debug("Skipping flush, TCS has latest data.\n");
return 0;
}
do {
ret = rpmh_rsc_invalidate(ctrlr_to_drv(ctrlr));
} while (ret == -EAGAIN);
/* First flush the cached batch requests */
ret = flush_batch(ctrlr);
if (ret)
return ret;
/*
* Nobody else should be calling this function other than system PM,
* hence we can run without locks.
*/
list_for_each_entry(p, &ctrlr->cache, list) {
if (!is_req_valid(p)) {
pr_debug("%s: skipping RPMH req: a:%#x s:%#x w:%#x",
__func__, p->addr, p->sleep_val, p->wake_val);
continue;
}
ret = send_single(dev, RPMH_SLEEP_STATE, p->addr, p->sleep_val);
if (ret)
return ret;
ret = send_single(dev, RPMH_WAKE_ONLY_STATE,
p->addr, p->wake_val);
if (ret)
return ret;
}
ctrlr->dirty = false;
return 0;
}
EXPORT_SYMBOL(rpmh_flush);
/**
* rpmh_invalidate: Invalidate all sleep and active sets
* sets.
*
* @dev: The device making the request
*
* Invalidate the sleep and active values in the TCS blocks.
*/
int rpmh_invalidate(const struct device *dev)
{
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
int ret;
if (rpmh_standalone)
return 0;
invalidate_batch(ctrlr);
ctrlr->dirty = true;
do {
ret = rpmh_rsc_invalidate(ctrlr_to_drv(ctrlr));
} while (ret == -EAGAIN);
return ret;
}
EXPORT_SYMBOL(rpmh_invalidate);
/**
* rpmh_ctrlr_idle: Return the controller idle status
*
* @dev: the device making the request
*/
int rpmh_ctrlr_idle(const struct device *dev)
{
struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
if (rpmh_standalone)
return 0;
return rpmh_rsc_ctrlr_is_idle(ctrlr_to_drv(ctrlr));
}
EXPORT_SYMBOL(rpmh_ctrlr_idle);