riscv: Add Zbb support for building U-Boot

This patch adds ISA string to the -march to generate zbb instructions
for U-Boot binaries, along with optimized string functions introduced
from Linux kernel.

Signed-off-by: Yu Chien Peter Lin <peterlin@andestech.com>
Reviewed-by: Leo Yu-Chi Liang <ycliang@andestech.com>
This commit is contained in:
Yu Chien Peter Lin 2023-08-09 18:49:30 +08:00 committed by Leo Yu-Chi Liang
parent 03a4504659
commit bc5a50452b
7 changed files with 392 additions and 1 deletions

View file

@ -187,6 +187,97 @@ config RISCV_ISA_D
riscv32 ABI from ilp32 to ilp32d and the riscv64 ABI from lp64 to riscv32 ABI from ilp32 to ilp32d and the riscv64 ABI from lp64 to
lp64d. lp64d.
config RISCV_ISA_ZBB
bool "Zbb extension support for bit manipulation instructions"
help
Adds ZBB extension (basic bit manipulation) to the ISA subsets
that the toolchain is allowed to emit when building U-Boot.
The Zbb extension provides instructions to accelerate a number
of bit-specific operations (count bit population, sign extending,
bitrotation, etc) and enables optimized string routines.
menu "Use assembly optimized implementation of string routines"
config USE_ARCH_STRLEN
bool "Use an assembly optimized implementation of strlen"
default y
depends on RISCV_ISA_ZBB
help
Enable the generation of an optimized version of strlen using
Zbb extension.
config SPL_USE_ARCH_STRLEN
bool "Use an assembly optimized implementation of strlen for SPL"
default y if USE_ARCH_STRLEN
depends on RISCV_ISA_ZBB
depends on SPL
help
Enable the generation of an optimized version of strlen using
Zbb extension.
config TPL_USE_ARCH_STRLEN
bool "Use an assembly optimized implementation of strlen for TPL"
default y if USE_ARCH_STRLEN
depends on RISCV_ISA_ZBB
depends on TPL
help
Enable the generation of an optimized version of strlen using
Zbb extension.
config USE_ARCH_STRCMP
bool "Use an assembly optimized implementation of strcmp"
default y
depends on RISCV_ISA_ZBB
help
Enable the generation of an optimized version of strcmp using
Zbb extension.
config SPL_USE_ARCH_STRCMP
bool "Use an assembly optimized implementation of strcmp for SPL"
default y if USE_ARCH_STRCMP
depends on RISCV_ISA_ZBB
depends on SPL
help
Enable the generation of an optimized version of strcmp using
Zbb extension.
config TPL_USE_ARCH_STRCMP
bool "Use an assembly optimized implementation of strcmp for TPL"
default y if USE_ARCH_STRCMP
depends on RISCV_ISA_ZBB
depends on TPL
help
Enable the generation of an optimized version of strcmp using
Zbb extension.
config USE_ARCH_STRNCMP
bool "Use an assembly optimized implementation of strncmp"
default y
depends on RISCV_ISA_ZBB
help
Enable the generation of an optimized version of strncmp using
Zbb extension.
config SPL_USE_ARCH_STRNCMP
bool "Use an assembly optimized implementation of strncmp for SPL"
default y if USE_ARCH_STRNCMP
depends on RISCV_ISA_ZBB
depends on SPL
help
Enable the generation of an optimized version of strncmp using
Zbb extension.
config TPL_USE_ARCH_STRNCMP
bool "Use an assembly optimized implementation of strncmp for TPL"
default y if USE_ARCH_STRNCMP
depends on RISCV_ISA_ZBB
depends on TPL
help
Enable the generation of an optimized version of strncmp using
Zbb extension.
endmenu
config RISCV_ISA_A config RISCV_ISA_A
def_bool y def_bool y

View file

@ -24,6 +24,9 @@ endif
ifeq ($(CONFIG_RISCV_ISA_C),y) ifeq ($(CONFIG_RISCV_ISA_C),y)
ARCH_C = c ARCH_C = c
endif endif
ifeq ($(CONFIG_RISCV_ISA_ZBB),y)
ARCH_ZBB = _zbb
endif
ifeq ($(CONFIG_CMODEL_MEDLOW),y) ifeq ($(CONFIG_CMODEL_MEDLOW),y)
CMODEL = medlow CMODEL = medlow
endif endif
@ -32,7 +35,7 @@ ifeq ($(CONFIG_CMODEL_MEDANY),y)
endif endif
RISCV_MARCH = $(ARCH_BASE)$(ARCH_A)$(ARCH_F)$(ARCH_D)$(ARCH_C) RISCV_MARCH = $(ARCH_BASE)$(ARCH_A)$(ARCH_F)$(ARCH_D)$(ARCH_C)$(ARCH_ZBB)
ABI = $(ABI_BASE)$(ABI_D) ABI = $(ABI_BASE)$(ABI_D)
# Newer binutils versions default to ISA spec version 20191213 which moves some # Newer binutils versions default to ISA spec version 20191213 which moves some

View file

@ -40,4 +40,22 @@ extern void *memmove(void *, const void *, __kernel_size_t);
#endif #endif
extern void *memset(void *, int, __kernel_size_t); extern void *memset(void *, int, __kernel_size_t);
#undef __HAVE_ARCH_STRLEN
#if CONFIG_IS_ENABLED(USE_ARCH_STRLEN)
#define __HAVE_ARCH_STRLEN
#endif
extern __kernel_size_t strlen(const char *);
#undef __HAVE_ARCH_STRCMP
#if CONFIG_IS_ENABLED(USE_ARCH_STRCMP)
#define __HAVE_ARCH_STRCMP
#endif
extern int strcmp(const char *, const char *);
#undef __HAVE_ARCH_STRNCMP
#if CONFIG_IS_ENABLED(USE_ARCH_STRNCMP)
#define __HAVE_ARCH_STRNCMP
#endif
extern int strncmp(const char *, const char *, size_t __kernel_size_t);
#endif /* __ASM_RISCV_STRING_H */ #endif /* __ASM_RISCV_STRING_H */

View file

@ -42,5 +42,8 @@ extra-$(CONFIG_EFI) += $(EFI_CRT0) $(EFI_RELOC)
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMSET) += memset.o obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMSET) += memset.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMMOVE) += memmove.o obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMMOVE) += memmove.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMCPY) += memcpy.o obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMCPY) += memcpy.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRLEN) += strlen_zbb.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRCMP) += strcmp_zbb.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRNCMP) += strncmp_zbb.o
obj-$(CONFIG_$(SPL_TPL_)SEMIHOSTING) += semihosting.o obj-$(CONFIG_$(SPL_TPL_)SEMIHOSTING) += semihosting.o

View file

@ -0,0 +1,81 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Taken from Linux arch/riscv/lib/strcmp.S
*/
#include <linux/linkage.h>
#include <asm/asm.h>
ENTRY(__strcmp)
WEAK(strcmp)
.option push
.option arch,+zbb
/*
* Returns
* a0 - comparison result, value like strcmp
*
* Parameters
* a0 - string1
* a1 - string2
*
* Clobbers
* t0, t1, t2, t3, t4
*/
or t2, a0, a1
li t4, -1
and t2, t2, SZREG-1
bnez t2, 3f
/* Main loop for aligned string. */
.p2align 3
1:
REG_L t0, 0(a0)
REG_L t1, 0(a1)
orc.b t3, t0
bne t3, t4, 2f
addi a0, a0, SZREG
addi a1, a1, SZREG
beq t0, t1, 1b
/*
* Words don't match, and no null byte in the first
* word. Get bytes in big-endian order and compare.
*/
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
rev8 t0, t0
rev8 t1, t1
#endif
/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
sltu a0, t0, t1
neg a0, a0
ori a0, a0, 1
ret
2:
/*
* Found a null byte.
* If words don't match, fall back to simple loop.
*/
bne t0, t1, 3f
/* Otherwise, strings are equal. */
li a0, 0
ret
/* Simple loop for misaligned strings. */
.p2align 3
3:
lbu t0, 0(a0)
lbu t1, 0(a1)
addi a0, a0, 1
addi a1, a1, 1
bne t0, t1, 4f
bnez t0, 3b
4:
sub a0, t0, t1
ret
.option pop
END(__strcmp)

101
arch/riscv/lib/strlen_zbb.S Normal file
View file

@ -0,0 +1,101 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Taken from Linux arch/riscv/lib/strlen.S
*/
#include <linux/linkage.h>
#include <asm/asm.h>
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define CZ ctz
# define SHIFT srl
#else
# define CZ clz
# define SHIFT sll
#endif
ENTRY(__strlen)
WEAK(strlen)
.option push
.option arch,+zbb
/*
* Returns
* a0 - string length
*
* Parameters
* a0 - String to measure
*
* Clobbers
* t0, t1, t2, t3
*/
/* Number of irrelevant bytes in the first word. */
andi t2, a0, SZREG-1
/* Align pointer. */
andi t0, a0, -SZREG
li t3, SZREG
sub t3, t3, t2
slli t2, t2, 3
/* Get the first word. */
REG_L t1, 0(t0)
/*
* Shift away the partial data we loaded to remove the irrelevant bytes
* preceding the string with the effect of adding NUL bytes at the
* end of the string's first word.
*/
SHIFT t1, t1, t2
/* Convert non-NUL into 0xff and NUL into 0x00. */
orc.b t1, t1
/* Convert non-NUL into 0x00 and NUL into 0xff. */
not t1, t1
/*
* Search for the first set bit (corresponding to a NUL byte in the
* original chunk).
*/
CZ t1, t1
/*
* The first chunk is special: compare against the number
* of valid bytes in this chunk.
*/
srli a0, t1, 3
bgtu t3, a0, 2f
/* Prepare for the word comparison loop. */
addi t2, t0, SZREG
li t3, -1
/*
* Our critical loop is 4 instructions and processes data in
* 4 byte or 8 byte chunks.
*/
.p2align 3
1:
REG_L t1, SZREG(t0)
addi t0, t0, SZREG
orc.b t1, t1
beq t1, t3, 1b
not t1, t1
CZ t1, t1
srli t1, t1, 3
/* Get number of processed bytes. */
sub t2, t0, t2
/* Add number of characters in the first word. */
add a0, a0, t2
/* Add number of characters in the last word. */
add a0, a0, t1
2:
ret
.option pop
END(__strlen)

View file

@ -0,0 +1,94 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Taken from Linux arch/riscv/lib/strncmp.S
*/
#include <linux/linkage.h>
#include <asm/asm.h>
ENTRY(__strncmp)
WEAK(strncmp)
.option push
.option arch,+zbb
/*
* Returns
* a0 - comparison result, like strncmp
*
* Parameters
* a0 - string1
* a1 - string2
* a2 - number of characters to compare
*
* Clobbers
* t0, t1, t2, t3, t4, t5, t6
*/
or t2, a0, a1
li t5, -1
and t2, t2, SZREG-1
add t4, a0, a2
bnez t2, 3f
/* Adjust limit for fast-path. */
andi t6, t4, -SZREG
/* Main loop for aligned string. */
.p2align 3
1:
bge a0, t6, 3f
REG_L t0, 0(a0)
REG_L t1, 0(a1)
orc.b t3, t0
bne t3, t5, 2f
orc.b t3, t1
bne t3, t5, 2f
addi a0, a0, SZREG
addi a1, a1, SZREG
beq t0, t1, 1b
/*
* Words don't match, and no null byte in the first
* word. Get bytes in big-endian order and compare.
*/
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
rev8 t0, t0
rev8 t1, t1
#endif
/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
sltu a0, t0, t1
neg a0, a0
ori a0, a0, 1
ret
2:
/*
* Found a null byte.
* If words don't match, fall back to simple loop.
*/
bne t0, t1, 3f
/* Otherwise, strings are equal. */
li a0, 0
ret
/* Simple loop for misaligned strings. */
.p2align 3
3:
bge a0, t4, 5f
lbu t0, 0(a0)
lbu t1, 0(a1)
addi a0, a0, 1
addi a1, a1, 1
bne t0, t1, 4f
bnez t0, 3b
4:
sub a0, t0, t1
ret
5:
li a0, 0
ret
.option pop
END(__strncmp)