android_kernel_xiaomi_sm7250/arch/arm64/include/asm/atomic.h
Ard Biesheuvel e150be1c85 FROMLIST: arm64: kernel: implement fast refcount checking
This adds support to arm64 for fast refcount checking, as contributed
by Kees for x86 based on the implementation by grsecurity/PaX.

The general approach is identical: the existing atomic_t helpers are
cloned for refcount_t, with the arithmetic instruction modified to set
the PSTATE flags, and one or two branch instructions added that jump to
an out of line handler if overflow, decrement to zero or increment from
zero are detected.

One complication that we have to deal with on arm64 is the fact that
it has two atomics implementations: the original LL/SC implementation
using load/store exclusive loops, and the newer LSE one that does mostly
the same in a single instruction. So we need to clone some parts of
both for the refcount handlers, but we also need to deal with the way
LSE builds fall back to LL/SC at runtime if the hardware does not
support it.

As is the case with the x86 version, the performance gain is substantial
(ThunderX2 @ 2.2 GHz, using LSE), even though the arm64 implementation
incorporates an add-from-zero check as well:

perf stat -B -- echo ATOMIC_TIMING >/sys/kernel/debug/provoke-crash/DIRECT

      116252672661      cycles                    #    2.207 GHz

      52.689793525 seconds time elapsed

perf stat -B -- echo REFCOUNT_TIMING >/sys/kernel/debug/provoke-crash/DIRECT

      127060259162      cycles                    #    2.207 GHz

      57.243690077 seconds time elapsed

For comparison, the numbers below were captured using CONFIG_REFCOUNT_FULL,
which uses the validation routines implemented in C using cmpxchg():

perf stat -B -- echo REFCOUNT_TIMING >/sys/kernel/debug/provoke-crash/DIRECT

 Performance counter stats for 'cat /dev/fd/63':

      191057942484      cycles                    #    2.207 GHz

      86.568269904 seconds time elapsed

As a bonus, this code has been found to perform significantly better on
systems with many CPUs, due to the fact that it no longer relies on the
load/compare-and-swap combo performed in a tight loop, which is what we
emit for cmpxchg() on arm64.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Jayachandran Chandrasekharan Nair <jnair@marvell.com>,
Cc: Kees Cook <keescook@chromium.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
Cc: Jan Glauber <jglauber@cavium.com>,
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Cc: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

[kdrag0n]
 - Backported to k4.14 from:
     https://www.spinics.net/lists/arm-kernel/msg735992.html
 - Benchmarked on sm8150 using perf and LKDTM REFCOUNT_TIMING:
     https://docs.google.com/spreadsheets/d/14CctCmWzQAGhOmpHrBJfXQy_HuNFTpEkMEYSUGKOZR8/edit

         | Fast checking      | Generic checking
---------+--------------------+-----------------------
Cycles   | 79235532616        | 102554062037
         | 79391767237        | 99625955749
Time     | 32.99879212 sec    | 42.5354029 sec
         | 32.97133254 sec    | 41.31902045 sec

Average:
Cycles   | 79313649927        | 101090008893
Time     | 33 sec             | 42 sec

Signed-off-by: Danny Lin <danny@kdrag0n.dev>
2022-11-12 11:19:29 +00:00

190 lines
6.9 KiB
C

/*
* Based on arch/arm/include/asm/atomic.h
*
* Copyright (C) 1996 Russell King.
* Copyright (C) 2002 Deep Blue Solutions Ltd.
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_ATOMIC_H
#define __ASM_ATOMIC_H
#include <linux/compiler.h>
#include <linux/stringify.h>
#include <linux/types.h>
#include <asm/barrier.h>
#include <asm/brk-imm.h>
#include <asm/lse.h>
#ifdef __KERNEL__
/*
* To avoid having to allocate registers that pass the counter address and
* address of the call site to the overflow handler, encode the register and
* call site offset in a dummy cbz instruction that we can decode later.
*/
#define REFCOUNT_CHECK_TAIL \
" .subsection 1\n" \
"33: brk " __stringify(REFCOUNT_BRK_IMM) "\n" \
" cbz %[counter], 22b\n" /* never reached */ \
" .previous\n"
#define REFCOUNT_POST_CHECK_NEG \
"22: b.mi 33f\n" \
REFCOUNT_CHECK_TAIL
#define REFCOUNT_POST_CHECK_NEG_OR_ZERO \
" b.eq 33f\n" \
REFCOUNT_POST_CHECK_NEG
#define REFCOUNT_PRE_CHECK_ZERO(reg) "ccmp " #reg ", wzr, #8, pl\n"
#define REFCOUNT_PRE_CHECK_NONE(reg)
#define __ARM64_IN_ATOMIC_IMPL
#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE)
#include <asm/atomic_lse.h>
#else
#include <asm/atomic_ll_sc.h>
#endif
#undef __ARM64_IN_ATOMIC_IMPL
#include <asm/cmpxchg.h>
#define ATOMIC_INIT(i) { (i) }
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_add_return_acquire atomic_add_return_acquire
#define atomic_add_return_release atomic_add_return_release
#define atomic_add_return atomic_add_return
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
#define atomic_sub_return_acquire atomic_sub_return_acquire
#define atomic_sub_return_release atomic_sub_return_release
#define atomic_sub_return atomic_sub_return
#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
#define atomic_fetch_add_acquire atomic_fetch_add_acquire
#define atomic_fetch_add_release atomic_fetch_add_release
#define atomic_fetch_add atomic_fetch_add
#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
#define atomic_fetch_sub_release atomic_fetch_sub_release
#define atomic_fetch_sub atomic_fetch_sub
#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
#define atomic_fetch_and_acquire atomic_fetch_and_acquire
#define atomic_fetch_and_release atomic_fetch_and_release
#define atomic_fetch_and atomic_fetch_and
#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
#define atomic_fetch_andnot_release atomic_fetch_andnot_release
#define atomic_fetch_andnot atomic_fetch_andnot
#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
#define atomic_fetch_or_acquire atomic_fetch_or_acquire
#define atomic_fetch_or_release atomic_fetch_or_release
#define atomic_fetch_or atomic_fetch_or
#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
#define atomic_fetch_xor_release atomic_fetch_xor_release
#define atomic_fetch_xor atomic_fetch_xor
#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new))
#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new))
#define atomic_xchg(v, new) xchg(&((v)->counter), (new))
#define atomic_cmpxchg_relaxed(v, old, new) \
cmpxchg_relaxed(&((v)->counter), (old), (new))
#define atomic_cmpxchg_acquire(v, old, new) \
cmpxchg_acquire(&((v)->counter), (old), (new))
#define atomic_cmpxchg_release(v, old, new) \
cmpxchg_release(&((v)->counter), (old), (new))
#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new))
#define atomic_andnot atomic_andnot
/*
* 64-bit atomic operations.
*/
#define ATOMIC64_INIT ATOMIC_INIT
#define atomic64_read atomic_read
#define atomic64_set atomic_set
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_add_return_acquire atomic64_add_return_acquire
#define atomic64_add_return_release atomic64_add_return_release
#define atomic64_add_return atomic64_add_return
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
#define atomic64_sub_return_acquire atomic64_sub_return_acquire
#define atomic64_sub_return_release atomic64_sub_return_release
#define atomic64_sub_return atomic64_sub_return
#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
#define atomic64_fetch_add_release atomic64_fetch_add_release
#define atomic64_fetch_add atomic64_fetch_add
#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
#define atomic64_fetch_sub_release atomic64_fetch_sub_release
#define atomic64_fetch_sub atomic64_fetch_sub
#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
#define atomic64_fetch_and_release atomic64_fetch_and_release
#define atomic64_fetch_and atomic64_fetch_and
#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
#define atomic64_fetch_andnot atomic64_fetch_andnot
#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
#define atomic64_fetch_or_release atomic64_fetch_or_release
#define atomic64_fetch_or atomic64_fetch_or
#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
#define atomic64_fetch_xor_release atomic64_fetch_xor_release
#define atomic64_fetch_xor atomic64_fetch_xor
#define atomic64_xchg_relaxed atomic_xchg_relaxed
#define atomic64_xchg_acquire atomic_xchg_acquire
#define atomic64_xchg_release atomic_xchg_release
#define atomic64_xchg atomic_xchg
#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed
#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire
#define atomic64_cmpxchg_release atomic_cmpxchg_release
#define atomic64_cmpxchg atomic_cmpxchg
#define atomic64_andnot atomic64_andnot
#define atomic64_dec_if_positive atomic64_dec_if_positive
#endif
#endif