feat(sme): enable SME2 functionality for NS world

FEAT_SME2 is an extension of FEAT_SME and an optional feature
from v9.2. Its an extension of SME, wherein it not only
processes matrix operations efficiently, but also provides
outer-product instructions to accelerate matrix operations.
It affords instructions for multi-vector operations.
Further, it adds an 512 bit architectural register ZT0.

This patch implements all the changes introduced with FEAT_SME2
to ensure that the instructions are allowed to access ZT0
register from Non-secure lower exception levels.

Additionally, it adds support to ensure FEAT_SME2 is aligned
with the existing FEATURE DETECTION mechanism, and documented.

Change-Id: Iee0f61943304a9cfc3db8f986047b1321d0a6463
Signed-off-by: Jayanth Dodderi Chidanand <jayanthdodderi.chidanand@arm.com>
This commit is contained in:
Jayanth Dodderi Chidanand 2022-11-08 10:31:07 +00:00
parent d5f19c49ba
commit 03d3c0d729
9 changed files with 59 additions and 5 deletions

View file

@ -861,8 +861,12 @@ ifeq ($(FEATURE_DETECTION),1)
$(info FEATURE_DETECTION is an experimental feature) $(info FEATURE_DETECTION is an experimental feature)
endif endif
ifneq ($(ENABLE_SME_FOR_NS), 0) ifneq ($(ENABLE_SME2_FOR_NS), 0)
$(info ENABLE_SME_FOR_NS is an experimental feature) ifeq (${ENABLE_SME_FOR_NS}, 0)
$(warning "ENABLE_SME2_FOR_NS requires ENABLE_SME_FOR_NS also to be set")
$(warning "Forced ENABLE_SME_FOR_NS=1")
override ENABLE_SME_FOR_NS := 1
endif
endif endif
ifeq (${ARM_XLAT_TABLES_LIB_V1}, 1) ifeq (${ARM_XLAT_TABLES_LIB_V1}, 1)
@ -884,6 +888,7 @@ ifeq (${ARCH},aarch32)
ifneq (${ENABLE_SME_FOR_NS},0) ifneq (${ENABLE_SME_FOR_NS},0)
$(error "ENABLE_SME_FOR_NS cannot be used with ARCH=aarch32") $(error "ENABLE_SME_FOR_NS cannot be used with ARCH=aarch32")
endif endif
ifeq (${ENABLE_SVE_FOR_NS},1) ifeq (${ENABLE_SVE_FOR_NS},1)
# Warning instead of error due to CI dependency on this # Warning instead of error due to CI dependency on this
$(error "ENABLE_SVE_FOR_NS cannot be used with ARCH=aarch32") $(error "ENABLE_SVE_FOR_NS cannot be used with ARCH=aarch32")
@ -925,6 +930,7 @@ ifeq (${CTX_INCLUDE_FPREGS},1)
ifneq (${ENABLE_SME_FOR_NS},0) ifneq (${ENABLE_SME_FOR_NS},0)
$(error "ENABLE_SME_FOR_NS cannot be used with CTX_INCLUDE_FPREGS") $(error "ENABLE_SME_FOR_NS cannot be used with CTX_INCLUDE_FPREGS")
endif endif
ifeq (${ENABLE_SVE_FOR_NS},1) ifeq (${ENABLE_SVE_FOR_NS},1)
# Warning instead of error due to CI dependency on this # Warning instead of error due to CI dependency on this
$(warning "ENABLE_SVE_FOR_NS cannot be used with CTX_INCLUDE_FPREGS") $(warning "ENABLE_SVE_FOR_NS cannot be used with CTX_INCLUDE_FPREGS")
@ -1196,6 +1202,7 @@ $(eval $(call assert_numerics,\
ENABLE_SPE_FOR_NS \ ENABLE_SPE_FOR_NS \
ENABLE_SYS_REG_TRACE_FOR_NS \ ENABLE_SYS_REG_TRACE_FOR_NS \
ENABLE_SME_FOR_NS \ ENABLE_SME_FOR_NS \
ENABLE_SME2_FOR_NS \
ENABLE_SVE_FOR_NS \ ENABLE_SVE_FOR_NS \
ENABLE_TRF_FOR_NS \ ENABLE_TRF_FOR_NS \
FW_ENC_STATUS \ FW_ENC_STATUS \
@ -1251,6 +1258,7 @@ $(eval $(call add_defines,\
ENABLE_RME \ ENABLE_RME \
ENABLE_RUNTIME_INSTRUMENTATION \ ENABLE_RUNTIME_INSTRUMENTATION \
ENABLE_SME_FOR_NS \ ENABLE_SME_FOR_NS \
ENABLE_SME2_FOR_NS \
ENABLE_SME_FOR_SWD \ ENABLE_SME_FOR_SWD \
ENABLE_SPE_FOR_NS \ ENABLE_SPE_FOR_NS \
ENABLE_SVE_FOR_NS \ ENABLE_SVE_FOR_NS \

View file

@ -128,7 +128,7 @@ subsections:
- title: Trapping support for RNDR/RNDRRS (FEAT_RNG_TRAP) - title: Trapping support for RNDR/RNDRRS (FEAT_RNG_TRAP)
scope: rng-trap scope: rng-trap
- title: Scalable Matrix Extension (FEAT_SME) - title: Scalable Matrix Extension (FEAT_SME, FEAT_SME2)
scope: sme scope: sme
- title: Statistical profiling Extension (FEAT_SPE) - title: Statistical profiling Extension (FEAT_SPE)

View file

@ -218,6 +218,8 @@ void detect_arch_features(void)
/* v9.2 features */ /* v9.2 features */
check_feature(ENABLE_SME_FOR_NS, read_feat_sme_id_field(), check_feature(ENABLE_SME_FOR_NS, read_feat_sme_id_field(),
"SME", 1, 2); "SME", 1, 2);
check_feature(ENABLE_SME2_FOR_NS, read_feat_sme_id_field(),
"SME2", 2, 2);
/* v9.4 features */ /* v9.4 features */
check_feature(ENABLE_FEAT_GCS, read_feat_gcs_id_field(), "GCS", 1, 1); check_feature(ENABLE_FEAT_GCS, read_feat_gcs_id_field(), "GCS", 1, 1);

View file

@ -442,6 +442,13 @@ Common build options
build with these options will fail. This flag can take the values 0 to 2, to build with these options will fail. This flag can take the values 0 to 2, to
align with the ``FEATURE_DETECTION`` mechanism. Default is 0. align with the ``FEATURE_DETECTION`` mechanism. Default is 0.
- ``ENABLE_SME2_FOR_NS``: Numeric value to enable Scalable Matrix Extension
version 2 (SME2) for the non-secure world only. SME2 is an optional
architectural feature for AArch64 and TF-A support is experimental.
This should be set along with ENABLE_SME_FOR_NS=1, if not, the default SME
accesses will still be trapped. This flag can take the values 0 to 2, to
align with the ``FEATURE_DETECTION`` mechanism. Default is 0.
- ``ENABLE_SME_FOR_SWD``: Boolean option to enable the Scalable Matrix - ``ENABLE_SME_FOR_SWD``: Boolean option to enable the Scalable Matrix
Extension for secure world use along with SVE and FPU/SIMD, ENABLE_SME_FOR_NS Extension for secure world use along with SVE and FPU/SIMD, ENABLE_SME_FOR_NS
must also be set to use this. If enabling this, the secure world MUST must also be set to use this. If enabling this, the secure world MUST

View file

@ -412,6 +412,7 @@
#define ID_AA64PFR1_EL1_SME_MASK ULL(0xf) #define ID_AA64PFR1_EL1_SME_MASK ULL(0xf)
#define ID_AA64PFR1_EL1_SME_NOT_SUPPORTED ULL(0x0) #define ID_AA64PFR1_EL1_SME_NOT_SUPPORTED ULL(0x0)
#define ID_AA64PFR1_EL1_SME_SUPPORTED ULL(0x1) #define ID_AA64PFR1_EL1_SME_SUPPORTED ULL(0x1)
#define ID_AA64PFR1_EL1_SME2_SUPPORTED ULL(0x2)
/* ID_PFR1_EL1 definitions */ /* ID_PFR1_EL1 definitions */
#define ID_PFR1_VIRTEXT_SHIFT U(12) #define ID_PFR1_VIRTEXT_SHIFT U(12)
@ -519,6 +520,8 @@
#define CPACR_EL1_FP_TRAP_EL0 UL(0x1) #define CPACR_EL1_FP_TRAP_EL0 UL(0x1)
#define CPACR_EL1_FP_TRAP_ALL UL(0x2) #define CPACR_EL1_FP_TRAP_ALL UL(0x2)
#define CPACR_EL1_FP_TRAP_NONE UL(0x3) #define CPACR_EL1_FP_TRAP_NONE UL(0x3)
#define CPACR_EL1_SMEN_SHIFT U(24)
#define CPACR_EL1_SMEN_MASK ULL(0x3)
/* SCR definitions */ /* SCR definitions */
#define SCR_RES1_BITS ((U(1) << 4) | (U(1) << 5)) #define SCR_RES1_BITS ((U(1) << 4) | (U(1) << 5))
@ -1026,11 +1029,16 @@
#define ID_AA64SMFR0_EL1_SME_FA64_SHIFT U(63) #define ID_AA64SMFR0_EL1_SME_FA64_SHIFT U(63)
#define ID_AA64SMFR0_EL1_SME_FA64_MASK U(0x1) #define ID_AA64SMFR0_EL1_SME_FA64_MASK U(0x1)
#define ID_AA64SMFR0_EL1_SME_FA64_SUPPORTED U(0x1) #define ID_AA64SMFR0_EL1_SME_FA64_SUPPORTED U(0x1)
#define ID_AA64SMFR0_EL1_SME_VER_SHIFT U(55)
#define ID_AA64SMFR0_EL1_SME_VER_MASK ULL(0xf)
#define ID_AA64SMFR0_EL1_SME_INST_SUPPORTED ULL(0x0)
#define ID_AA64SMFR0_EL1_SME2_INST_SUPPORTED ULL(0x1)
/* SMCR_ELx definitions */ /* SMCR_ELx definitions */
#define SMCR_ELX_LEN_SHIFT U(0) #define SMCR_ELX_LEN_SHIFT U(0)
#define SMCR_ELX_LEN_MASK U(0x1ff) #define SMCR_ELX_LEN_MAX U(0x1ff)
#define SMCR_ELX_FA64_BIT (U(1) << 31) #define SMCR_ELX_FA64_BIT (U(1) << 31)
#define SMCR_ELX_EZT0_BIT (U(1) << 30)
/******************************************************************************* /*******************************************************************************
* Definitions of MAIR encodings for device and normal memory * Definitions of MAIR encodings for device and normal memory

View file

@ -657,4 +657,17 @@ static inline bool is_feat_sme_supported(void)
return read_feat_sme_id_field() >= ID_AA64PFR1_EL1_SME_SUPPORTED; return read_feat_sme_id_field() >= ID_AA64PFR1_EL1_SME_SUPPORTED;
} }
static inline bool is_feat_sme2_supported(void)
{
if (ENABLE_SME2_FOR_NS == FEAT_STATE_DISABLED) {
return false;
}
if (ENABLE_SME2_FOR_NS == FEAT_STATE_ALWAYS) {
return true;
}
return read_feat_sme_id_field() >= ID_AA64PFR1_EL1_SME2_SUPPORTED;
}
#endif /* ARCH_FEATURES_H */ #endif /* ARCH_FEATURES_H */

View file

@ -43,11 +43,23 @@ void sme_enable(cpu_context_t *context)
* to be the least restrictive, then lower ELs can restrict as needed * to be the least restrictive, then lower ELs can restrict as needed
* using SMCR_EL2 and SMCR_EL1. * using SMCR_EL2 and SMCR_EL1.
*/ */
reg = SMCR_ELX_LEN_MASK; reg = SMCR_ELX_LEN_MAX;
if (read_feat_sme_fa64_id_field() != 0U) { if (read_feat_sme_fa64_id_field() != 0U) {
VERBOSE("[SME] FA64 enabled\n"); VERBOSE("[SME] FA64 enabled\n");
reg |= SMCR_ELX_FA64_BIT; reg |= SMCR_ELX_FA64_BIT;
} }
/*
* Enable access to ZT0 register.
* Make sure FEAT_SME2 is supported by the hardware before continuing.
* If supported, Set the EZT0 bit in SMCR_EL3 to allow instructions to
* access ZT0 register without trapping.
*/
if (is_feat_sme2_supported()) {
VERBOSE("SME2 enabled\n");
reg |= SMCR_ELX_EZT0_BIT;
}
write_smcr_el3(reg); write_smcr_el3(reg);
/* Reset CPTR_EL3 value. */ /* Reset CPTR_EL3 value. */

View file

@ -405,6 +405,9 @@ ifneq (${ENABLE_SME_FOR_NS},0)
override ENABLE_SVE_FOR_SWD := 0 override ENABLE_SVE_FOR_SWD := 0
endif endif
# SME2 defaults to disabled
ENABLE_SME2_FOR_NS := 0
SANITIZE_UB := off SANITIZE_UB := off
# For ARMv8.1 (AArch64) platforms, enabling this option selects the spinlock # For ARMv8.1 (AArch64) platforms, enabling this option selects the spinlock

View file

@ -55,6 +55,7 @@ ifeq (${SPM_MM}, 0)
ifeq (${ENABLE_RME}, 0) ifeq (${ENABLE_RME}, 0)
ifeq (${CTX_INCLUDE_FPREGS}, 0) ifeq (${CTX_INCLUDE_FPREGS}, 0)
ENABLE_SME_FOR_NS := 2 ENABLE_SME_FOR_NS := 2
ENABLE_SME2_FOR_NS := 2
endif endif
endif endif
endif endif