e150be1c85
This adds support to arm64 for fast refcount checking, as contributed by Kees for x86 based on the implementation by grsecurity/PaX. The general approach is identical: the existing atomic_t helpers are cloned for refcount_t, with the arithmetic instruction modified to set the PSTATE flags, and one or two branch instructions added that jump to an out of line handler if overflow, decrement to zero or increment from zero are detected. One complication that we have to deal with on arm64 is the fact that it has two atomics implementations: the original LL/SC implementation using load/store exclusive loops, and the newer LSE one that does mostly the same in a single instruction. So we need to clone some parts of both for the refcount handlers, but we also need to deal with the way LSE builds fall back to LL/SC at runtime if the hardware does not support it. As is the case with the x86 version, the performance gain is substantial (ThunderX2 @ 2.2 GHz, using LSE), even though the arm64 implementation incorporates an add-from-zero check as well: perf stat -B -- echo ATOMIC_TIMING >/sys/kernel/debug/provoke-crash/DIRECT 116252672661 cycles # 2.207 GHz 52.689793525 seconds time elapsed perf stat -B -- echo REFCOUNT_TIMING >/sys/kernel/debug/provoke-crash/DIRECT 127060259162 cycles # 2.207 GHz 57.243690077 seconds time elapsed For comparison, the numbers below were captured using CONFIG_REFCOUNT_FULL, which uses the validation routines implemented in C using cmpxchg(): perf stat -B -- echo REFCOUNT_TIMING >/sys/kernel/debug/provoke-crash/DIRECT Performance counter stats for 'cat /dev/fd/63': 191057942484 cycles # 2.207 GHz 86.568269904 seconds time elapsed As a bonus, this code has been found to perform significantly better on systems with many CPUs, due to the fact that it no longer relies on the load/compare-and-swap combo performed in a tight loop, which is what we emit for cmpxchg() on arm64. Cc: Will Deacon <will.deacon@arm.com> Cc: Jayachandran Chandrasekharan Nair <jnair@marvell.com>, Cc: Kees Cook <keescook@chromium.org> Cc: Catalin Marinas <catalin.marinas@arm.com>, Cc: Jan Glauber <jglauber@cavium.com>, Cc: Linus Torvalds <torvalds@linux-foundation.org>, Cc: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> [kdrag0n] - Backported to k4.14 from: https://www.spinics.net/lists/arm-kernel/msg735992.html - Benchmarked on sm8150 using perf and LKDTM REFCOUNT_TIMING: https://docs.google.com/spreadsheets/d/14CctCmWzQAGhOmpHrBJfXQy_HuNFTpEkMEYSUGKOZR8/edit | Fast checking | Generic checking ---------+--------------------+----------------------- Cycles | 79235532616 | 102554062037 | 79391767237 | 99625955749 Time | 32.99879212 sec | 42.5354029 sec | 32.97133254 sec | 41.31902045 sec Average: Cycles | 79313649927 | 101090008893 Time | 33 sec | 42 sec Signed-off-by: Danny Lin <danny@kdrag0n.dev>
190 lines
6.9 KiB
C
190 lines
6.9 KiB
C
/*
|
|
* Based on arch/arm/include/asm/atomic.h
|
|
*
|
|
* Copyright (C) 1996 Russell King.
|
|
* Copyright (C) 2002 Deep Blue Solutions Ltd.
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#ifndef __ASM_ATOMIC_H
|
|
#define __ASM_ATOMIC_H
|
|
|
|
#include <linux/compiler.h>
|
|
#include <linux/stringify.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <asm/barrier.h>
|
|
#include <asm/brk-imm.h>
|
|
#include <asm/lse.h>
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
/*
|
|
* To avoid having to allocate registers that pass the counter address and
|
|
* address of the call site to the overflow handler, encode the register and
|
|
* call site offset in a dummy cbz instruction that we can decode later.
|
|
*/
|
|
#define REFCOUNT_CHECK_TAIL \
|
|
" .subsection 1\n" \
|
|
"33: brk " __stringify(REFCOUNT_BRK_IMM) "\n" \
|
|
" cbz %[counter], 22b\n" /* never reached */ \
|
|
" .previous\n"
|
|
|
|
#define REFCOUNT_POST_CHECK_NEG \
|
|
"22: b.mi 33f\n" \
|
|
REFCOUNT_CHECK_TAIL
|
|
|
|
#define REFCOUNT_POST_CHECK_NEG_OR_ZERO \
|
|
" b.eq 33f\n" \
|
|
REFCOUNT_POST_CHECK_NEG
|
|
|
|
#define REFCOUNT_PRE_CHECK_ZERO(reg) "ccmp " #reg ", wzr, #8, pl\n"
|
|
#define REFCOUNT_PRE_CHECK_NONE(reg)
|
|
|
|
#define __ARM64_IN_ATOMIC_IMPL
|
|
|
|
#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE)
|
|
#include <asm/atomic_lse.h>
|
|
#else
|
|
#include <asm/atomic_ll_sc.h>
|
|
#endif
|
|
|
|
#undef __ARM64_IN_ATOMIC_IMPL
|
|
|
|
#include <asm/cmpxchg.h>
|
|
|
|
#define ATOMIC_INIT(i) { (i) }
|
|
|
|
#define atomic_read(v) READ_ONCE((v)->counter)
|
|
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
|
|
|
|
#define atomic_add_return_relaxed atomic_add_return_relaxed
|
|
#define atomic_add_return_acquire atomic_add_return_acquire
|
|
#define atomic_add_return_release atomic_add_return_release
|
|
#define atomic_add_return atomic_add_return
|
|
|
|
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
|
|
#define atomic_sub_return_acquire atomic_sub_return_acquire
|
|
#define atomic_sub_return_release atomic_sub_return_release
|
|
#define atomic_sub_return atomic_sub_return
|
|
|
|
#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
|
|
#define atomic_fetch_add_acquire atomic_fetch_add_acquire
|
|
#define atomic_fetch_add_release atomic_fetch_add_release
|
|
#define atomic_fetch_add atomic_fetch_add
|
|
|
|
#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
|
|
#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
|
|
#define atomic_fetch_sub_release atomic_fetch_sub_release
|
|
#define atomic_fetch_sub atomic_fetch_sub
|
|
|
|
#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
|
|
#define atomic_fetch_and_acquire atomic_fetch_and_acquire
|
|
#define atomic_fetch_and_release atomic_fetch_and_release
|
|
#define atomic_fetch_and atomic_fetch_and
|
|
|
|
#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
|
|
#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
|
|
#define atomic_fetch_andnot_release atomic_fetch_andnot_release
|
|
#define atomic_fetch_andnot atomic_fetch_andnot
|
|
|
|
#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
|
|
#define atomic_fetch_or_acquire atomic_fetch_or_acquire
|
|
#define atomic_fetch_or_release atomic_fetch_or_release
|
|
#define atomic_fetch_or atomic_fetch_or
|
|
|
|
#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
|
|
#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
|
|
#define atomic_fetch_xor_release atomic_fetch_xor_release
|
|
#define atomic_fetch_xor atomic_fetch_xor
|
|
|
|
#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
|
|
#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new))
|
|
#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new))
|
|
#define atomic_xchg(v, new) xchg(&((v)->counter), (new))
|
|
|
|
#define atomic_cmpxchg_relaxed(v, old, new) \
|
|
cmpxchg_relaxed(&((v)->counter), (old), (new))
|
|
#define atomic_cmpxchg_acquire(v, old, new) \
|
|
cmpxchg_acquire(&((v)->counter), (old), (new))
|
|
#define atomic_cmpxchg_release(v, old, new) \
|
|
cmpxchg_release(&((v)->counter), (old), (new))
|
|
#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new))
|
|
|
|
#define atomic_andnot atomic_andnot
|
|
|
|
/*
|
|
* 64-bit atomic operations.
|
|
*/
|
|
#define ATOMIC64_INIT ATOMIC_INIT
|
|
#define atomic64_read atomic_read
|
|
#define atomic64_set atomic_set
|
|
|
|
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
|
|
#define atomic64_add_return_acquire atomic64_add_return_acquire
|
|
#define atomic64_add_return_release atomic64_add_return_release
|
|
#define atomic64_add_return atomic64_add_return
|
|
|
|
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
|
|
#define atomic64_sub_return_acquire atomic64_sub_return_acquire
|
|
#define atomic64_sub_return_release atomic64_sub_return_release
|
|
#define atomic64_sub_return atomic64_sub_return
|
|
|
|
#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
|
|
#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
|
|
#define atomic64_fetch_add_release atomic64_fetch_add_release
|
|
#define atomic64_fetch_add atomic64_fetch_add
|
|
|
|
#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
|
|
#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
|
|
#define atomic64_fetch_sub_release atomic64_fetch_sub_release
|
|
#define atomic64_fetch_sub atomic64_fetch_sub
|
|
|
|
#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
|
|
#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
|
|
#define atomic64_fetch_and_release atomic64_fetch_and_release
|
|
#define atomic64_fetch_and atomic64_fetch_and
|
|
|
|
#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
|
|
#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
|
|
#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
|
|
#define atomic64_fetch_andnot atomic64_fetch_andnot
|
|
|
|
#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
|
|
#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
|
|
#define atomic64_fetch_or_release atomic64_fetch_or_release
|
|
#define atomic64_fetch_or atomic64_fetch_or
|
|
|
|
#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
|
|
#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
|
|
#define atomic64_fetch_xor_release atomic64_fetch_xor_release
|
|
#define atomic64_fetch_xor atomic64_fetch_xor
|
|
|
|
#define atomic64_xchg_relaxed atomic_xchg_relaxed
|
|
#define atomic64_xchg_acquire atomic_xchg_acquire
|
|
#define atomic64_xchg_release atomic_xchg_release
|
|
#define atomic64_xchg atomic_xchg
|
|
|
|
#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed
|
|
#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire
|
|
#define atomic64_cmpxchg_release atomic_cmpxchg_release
|
|
#define atomic64_cmpxchg atomic_cmpxchg
|
|
|
|
#define atomic64_andnot atomic64_andnot
|
|
|
|
#define atomic64_dec_if_positive atomic64_dec_if_positive
|
|
|
|
#endif
|
|
#endif
|