arm64: bpf: optimize modulo operation

Optimize modulo operation instruction generation by
using single MSUB instruction vs MUL followed by SUB
instruction scheme.

Signed-off-by: Jerin Jacob <jerinj@marvell.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com>
This commit is contained in:
Jerin Jacob 2019-09-02 11:44:48 +05:30 committed by spakkkk
parent 1802058d43
commit c099ad7b62
2 changed files with 5 additions and 4 deletions

View File

@ -182,6 +182,9 @@
/* Rd = Ra + Rn * Rm */ /* Rd = Ra + Rn * Rm */
#define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \ #define A64_MADD(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD) A64_VARIANT(sf), AARCH64_INSN_DATA3_MADD)
/* Rd = Ra - Rn * Rm */
#define A64_MSUB(sf, Rd, Ra, Rn, Rm) aarch64_insn_gen_data3(Rd, Ra, Rn, Rm, \
A64_VARIANT(sf), AARCH64_INSN_DATA3_MSUB)
/* Rd = Rn * Rm */ /* Rd = Rn * Rm */
#define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm) #define A64_MUL(sf, Rd, Rn, Rm) A64_MADD(sf, Rd, A64_ZR, Rn, Rm)

View File

@ -419,8 +419,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break; break;
case BPF_MOD: case BPF_MOD:
emit(A64_UDIV(is64, tmp, dst, src), ctx); emit(A64_UDIV(is64, tmp, dst, src), ctx);
emit(A64_MUL(is64, tmp, tmp, src), ctx); emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
emit(A64_SUB(is64, dst, dst, tmp), ctx);
break; break;
} }
break; break;
@ -526,8 +525,7 @@ emit_bswap_uxt:
case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K:
emit_a64_mov_i(is64, tmp2, imm, ctx); emit_a64_mov_i(is64, tmp2, imm, ctx);
emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
emit(A64_MUL(is64, tmp, tmp, tmp2), ctx); emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
emit(A64_SUB(is64, dst, dst, tmp), ctx);
break; break;
case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU64 | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: