RISC-V: add zbb support to string functions

A patch from »Zbb string optimizations and call support in alternatives« in state Mainline for linux-kernel

From: Heiko Stuebner <heiko.stuebner@...> Date: Wed, 19 Oct 2022 15:36:22 +0200

Commit-Message

Add handling for ZBB extension and add support for using it as a variant for optimized string functions. Support for the Zbb-str-variants is limited to the GNU-assembler for now, as LLVM has not yet acquired the functionality to selectively change the arch option in assembler code. This is still under review at https://reviews.llvm.org/D123515 Co-developed-by: Christoph Muellner <christoph.muellner@...> Signed-off-by: Christoph Muellner <christoph.muellner@...> Signed-off-by: Heiko Stuebner <heiko.stuebner@...>

Patch-Comment

arch/riscv/Kconfig | 24 ++++++ arch/riscv/include/asm/errata_list.h | 3 +- arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/include/asm/string.h | 2 + arch/riscv/kernel/cpu.c | 1 + arch/riscv/kernel/cpufeature.c | 18 +++++ arch/riscv/lib/strcmp.S | 94 ++++++++++++++++++++++ arch/riscv/lib/strlen.S | 114 +++++++++++++++++++++++++++ arch/riscv/lib/strncmp.S | 111 ++++++++++++++++++++++++++ 9 files changed, 367 insertions(+), 1 deletion(-)

Statistics

  • 367 lines added
  • 1 lines removed

Changes

------------------------------ arch/riscv/Kconfig ------------------------------
index e2b656043abf..7c814fbf9527 100644
@@ -416,6 +416,30 @@ config RISCV_ISA_SVPBMT
If you don't know what to do here, say Y.
+config TOOLCHAIN_HAS_ZBB
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb)
+ depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
+ depends on AS_IS_GNU
+
+config RISCV_ISA_ZBB
+ bool "Zbb extension support for bit manipulation instructions"
+ depends on TOOLCHAIN_HAS_ZBB
+ depends on !XIP_KERNEL && MMU
+ select RISCV_ALTERNATIVE
+ default y
+ help
+ Adds support to dynamically detect the presence of the ZBB
+ extension (basic bit manipulation) and enable its usage.
+
+ The Zbb extension provides instructions to accelerate a number
+ of bit-specific operations (count bit population, sign extending,
+ bitrotation, etc).
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZICBOM
bool
default y
--------------------- arch/riscv/include/asm/errata_list.h ---------------------
index 4180312d2a70..95e626b7281e 100644
@@ -24,7 +24,8 @@
#define CPUFEATURE_SVPBMT 0
#define CPUFEATURE_ZICBOM 1
-#define CPUFEATURE_NUMBER 2
+#define CPUFEATURE_ZBB 2
+#define CPUFEATURE_NUMBER 3
#ifdef __ASSEMBLY__
------------------------ arch/riscv/include/asm/hwcap.h ------------------------
index 86328e3acb02..b727491fb100 100644
@@ -59,6 +59,7 @@ enum riscv_isa_ext_id {
RISCV_ISA_EXT_ZIHINTPAUSE,
RISCV_ISA_EXT_SSTC,
RISCV_ISA_EXT_SVINVAL,
+ RISCV_ISA_EXT_ZBB,
RISCV_ISA_EXT_ID_MAX
};
static_assert(RISCV_ISA_EXT_ID_MAX <= RISCV_ISA_EXT_MAX);
----------------------- arch/riscv/include/asm/string.h ------------------------
index a96b1fea24fe..17dfc4ab4c80 100644
@@ -6,6 +6,8 @@
#ifndef _ASM_RISCV_STRING_H
#define _ASM_RISCV_STRING_H
+#include <asm/alternative-macros.h>
+#include <asm/errata_list.h>
#include <linux/types.h>
#include <linux/linkage.h>
--------------------------- arch/riscv/kernel/cpu.c ----------------------------
index 1b9a5a66e55a..c4d1aa166f8b 100644
@@ -162,6 +162,7 @@ arch_initcall(riscv_cpuinfo_init);
* extensions by an underscore.
*/
static struct riscv_isa_ext_data isa_ext_arr[] = {
+ __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
------------------------ arch/riscv/kernel/cpufeature.c ------------------------
index 205bbd6b1fce..bf3a791d7110 100644
@@ -222,6 +222,7 @@ void __init riscv_fill_hwcap(void)
set_bit(nr, this_isa);
}
} else {
+ SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
@@ -301,6 +302,20 @@ static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
return true;
}
+static bool __init_or_module cpufeature_probe_zbb(unsigned int stage)
+{
+ if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB))
+ return false;
+
+ if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+ return false;
+
+ if (!riscv_isa_extension_available(NULL, ZBB))
+ return false;
+
+ return true;
+}
+
/*
* Probe presence of individual extensions.
*
@@ -318,6 +333,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
if (cpufeature_probe_zicbom(stage))
cpu_req_feature |= BIT(CPUFEATURE_ZICBOM);
+ if (cpufeature_probe_zbb(stage))
+ cpu_req_feature |= BIT(CPUFEATURE_ZBB);
+
return cpu_req_feature;
}
--------------------------- arch/riscv/lib/strcmp.S ----------------------------
index 94440fb8390c..5428a8f2eb84 100644
@@ -3,9 +3,14 @@
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm-generic/export.h>
+#include <asm/alternative-macros.h>
+#include <asm/errata_list.h>
/* int strcmp(const char *cs, const char *ct) */
SYM_FUNC_START(strcmp)
+
+ ALTERNATIVE("nop", "j variant_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+
/*
* Returns
* a0 - comparison result, value like strcmp
@@ -34,4 +39,93 @@ SYM_FUNC_START(strcmp)
bnez t1, 1b
li a0, 0
j 2b
+
+/*
+ * Variant of strcmp using the ZBB extension if available
+ */
+#ifdef CONFIG_RISCV_ISA_ZBB
+variant_zbb:
+#define src1 a0
+#define result a0
+#define src2 t5
+#define data1 t0
+#define data2 t1
+#define align t2
+#define data1_orcb t3
+#define m1 t4
+
+.option push
+.option arch,+zbb
+
+ /*
+ * Returns
+ * a0 - comparison result, value like strcmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4, t5
+ */
+ mv src2, a1
+
+ or align, src1, src2
+ li m1, -1
+ and align, align, SZREG-1
+ bnez align, 3f
+
+ /* Main loop for aligned string. */
+ .p2align 3
+1:
+ REG_L data1, 0(src1)
+ REG_L data2, 0(src2)
+ orc.b data1_orcb, data1
+ bne data1_orcb, m1, 2f
+ addi src1, src1, SZREG
+ addi src2, src2, SZREG
+ beq data1, data2, 1b
+
+ /*
+ * Words don't match, and no null byte in the first
+ * word. Get bytes in big-endian order and compare.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ rev8 data1, data1
+ rev8 data2, data2
+#endif
+
+ /* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence. */
+ sltu result, data1, data2
+ neg result, result
+ ori result, result, 1
+ ret
+
+2:
+ /*
+ * Found a null byte.
+ * If words don't match, fall back to simple loop.
+ */
+ bne data1, data2, 3f
+
+ /* Otherwise, strings are equal. */
+ li result, 0
+ ret
+
+ /* Simple loop for misaligned strings. */
+ .p2align 3
+3:
+ lbu data1, 0(src1)
+ lbu data2, 0(src2)
+ addi src1, src1, 1
+ addi src2, src2, 1
+ bne data1, data2, 4f
+ bnez data1, 3b
+
+4:
+ sub result, data1, data2
+ ret
+
+.option pop
+#endif
SYM_FUNC_END(strcmp)
--------------------------- arch/riscv/lib/strlen.S ----------------------------
index 09a7aaff26c8..738efb04307d 100644
@@ -3,9 +3,14 @@
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm-generic/export.h>
+#include <asm/alternative-macros.h>
+#include <asm/errata_list.h>
/* int strlen(const char *s) */
SYM_FUNC_START(strlen)
+
+ ALTERNATIVE("nop", "j variant_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+
/*
* Returns
* a0 - string length
@@ -25,4 +30,113 @@ SYM_FUNC_START(strlen)
2:
addi t1, t1, 1
j 1b
+
+/*
+ * Variant of strlen using the ZBB extension if available
+ */
+#ifdef CONFIG_RISCV_ISA_ZBB
+variant_zbb:
+
+#define src a0
+#define result a0
+#define addr t0
+#define data t1
+#define offset t2
+#define offset_bits t2
+#define valid_bytes t3
+#define m1 t3
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+# define CZ clz
+# define SHIFT sll
+#else
+# define CZ ctz
+# define SHIFT srl
+#endif
+
+.option push
+.option arch,+zbb
+
+ /*
+ * Returns
+ * a0 - string length
+ *
+ * Parameters
+ * a0 - String to measure
+ *
+ * Clobbers
+ * t0, t1, t2, t3
+ */
+
+ /* Number of irrelevant bytes in the first word. */
+ andi offset, src, SZREG-1
+
+ /* Align pointer. */
+ andi addr, src, -SZREG
+
+ li valid_bytes, SZREG
+ sub valid_bytes, valid_bytes, offset
+ slli offset_bits, offset, RISCV_LGPTR
+
+ /* Get the first word. */
+ REG_L data, 0(addr)
+
+ /*
+ * Shift away the partial data we loaded to remove the irrelevant bytes
+ * preceding the string with the effect of adding NUL bytes at the
+ * end of the string.
+ */
+ SHIFT data, data, offset_bits
+
+ /* Convert non-NUL into 0xff and NUL into 0x00. */
+ orc.b data, data
+
+ /* Convert non-NUL into 0x00 and NUL into 0xff. */
+ not data, data
+
+ /*
+ * Search for the first set bit (corresponding to a NUL byte in the
+ * original chunk).
+ */
+ CZ data, data
+
+ /*
+ * The first chunk is special: commpare against the number
+ * of valid bytes in this chunk.
+ */
+ srli result, data, 3
+ bgtu valid_bytes, result, 3f
+
+ /* Prepare for the word comparison loop. */
+ addi offset, addr, SZREG
+ li m1, -1
+
+ /*
+ * Our critical loop is 4 instructions and processes data in
+ * 4 byte or 8 byte chunks.
+ */
+ .p2align 3
+1:
+ REG_L data, SZREG(addr)
+ addi addr, addr, SZREG
+ orc.b data, data
+ beq data, m1, 1b
+2:
+ not data, data
+ CZ data, data
+
+ /* Get number of processed words. */
+ sub offset, addr, offset
+
+ /* Add number of characters in the first word. */
+ add result, result, offset
+ srli data, data, 3
+
+ /* Add number of characters in the last word. */
+ add result, result, data
+3:
+ ret
+
+.option pop
+#endif
SYM_FUNC_END(strlen)
--------------------------- arch/riscv/lib/strncmp.S ---------------------------
index 493ab6febcb2..851428b439dc 100644
@@ -3,9 +3,14 @@
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm-generic/export.h>
+#include <asm/alternative-macros.h>
+#include <asm/errata_list.h>
/* int strncmp(const char *cs, const char *ct, size_t count) */
SYM_FUNC_START(strncmp)
+
+ ALTERNATIVE("nop", "j variant_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+
/*
* Returns
* a0 - comparison result, value like strncmp
@@ -37,4 +42,110 @@ SYM_FUNC_START(strncmp)
4:
li a0, 0
j 2b
+
+/*
+ * Variant of strncmp using the ZBB extension if available
+ */
+#ifdef CONFIG_RISCV_ISA_ZBB
+variant_zbb:
+
+#define src1 a0
+#define result a0
+#define src2 t6
+#define len a2
+#define data1 t0
+#define data2 t1
+#define align t2
+#define data1_orcb t3
+#define limit t4
+#define m1 t5
+
+.option push
+.option arch,+zbb
+
+ /*
+ * Returns
+ * a0 - comparison result, like strncmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ * a2 - number of characters to compare
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4, t5, t6
+ */
+ mv src2, a1
+
+ or align, src1, src2
+ li m1, -1
+ and align, align, SZREG-1
+ add limit, src1, len
+ bnez align, 4f
+
+ /* Adjust limit for fast-path. */
+ addi limit, limit, -SZREG
+
+ /* Main loop for aligned string. */
+ .p2align 3
+1:
+ bgt src1, limit, 3f
+ REG_L data1, 0(src1)
+ REG_L data2, 0(src2)
+ orc.b data1_orcb, data1
+ bne data1_orcb, m1, 2f
+ addi src1, src1, SZREG
+ addi src2, src2, SZREG
+ beq data1, data2, 1b
+
+ /*
+ * Words don't match, and no null byte in the first
+ * word. Get bytes in big-endian order and compare.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ rev8 data1, data1
+ rev8 data2, data2
+#endif
+
+ /* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence. */
+ sltu result, data1, data2
+ neg result, result
+ ori result, result, 1
+ ret
+
+2:
+ /*
+ * Found a null byte.
+ * If words don't match, fall back to simple loop.
+ */
+ bne data1, data2, 3f
+
+ /* Otherwise, strings are equal. */
+ li result, 0
+ ret
+
+ /* Simple loop for misaligned strings. */
+3:
+ /* Restore limit for slow-path. */
+ addi limit, limit, SZREG
+ .p2align 3
+4:
+ bge src1, limit, 6f
+ lbu data1, 0(src1)
+ lbu data2, 0(src2)
+ addi src1, src1, 1
+ addi src2, src2, 1
+ bne data1, data2, 5f
+ bnez data1, 4b
+
+5:
+ sub result, data1, data2
+ ret
+
+6:
+ li result, 0
+ ret
+
+.option pop
+#endif
SYM_FUNC_END(strncmp)
 
 

Recent Patches

About Us

Sed lacus. Donec lectus. Nullam pretium nibh ut turpis. Nam bibendum. In nulla tortor, elementum vel, tempor at, varius non, purus. Mauris vitae nisl nec metus placerat consectetuer.

Read More...