From 0a580b5128d6da0e3885e2a541aae47a7f1f7365 Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Fri, 15 Nov 2024 15:03:02 +0000 Subject: [PATCH] perf(cm): drop ZCR_EL3 saving and some ISBs and replace them with root context SVE and SME aren't enabled symmetrically for all worlds, but EL3 needs to context switch them nonetheless. Previously, this had to happen by writing the enable bits just before reading/writing the relevant context. But since the introduction of root context, this need not be the case. We can have these enables always be present for EL3 and save on some work (and ISBs!) on every context switch. We can also hoist ZCR_EL3 to a never changing register, as we set its value to be identical for every world, which happens to be the one we want for EL3 too. Change-Id: I3d950e72049a298008205ba32f230d5a5c02f8b0 Signed-off-by: Boyan Karatotev --- .../components/context-management-library.rst | 3 +- include/arch/aarch64/el3_common_macros.S | 10 ++++ include/lib/el3_runtime/aarch64/context.h | 8 ++- include/lib/extensions/sve.h | 6 ++- lib/el3_runtime/aarch64/context.S | 50 +------------------ lib/el3_runtime/aarch64/context_mgmt.c | 4 ++ lib/extensions/sme/sme.c | 11 +--- lib/extensions/sve/sve.c | 11 ++-- 8 files changed, 33 insertions(+), 70 deletions(-) diff --git a/docs/components/context-management-library.rst b/docs/components/context-management-library.rst index 6a76ada31..8cb1ace64 100644 --- a/docs/components/context-management-library.rst +++ b/docs/components/context-management-library.rst @@ -498,7 +498,6 @@ structure and is intended to manage specific EL3 registers. typedef struct per_world_context { uint64_t ctx_cptr_el3; - uint64_t ctx_zcr_el3; uint64_t ctx_mpam3_el3; } per_world_context_t; @@ -555,7 +554,7 @@ EL3. EL3 execution context needs to setup at both boot time (cold and warm boot) entrypaths and at all the possible exception handlers routing to EL3 at runtime. -*Copyright (c) 2024, Arm Limited and Contributors. All rights reserved.* +*Copyright (c) 2024-2025, Arm Limited and Contributors. All rights reserved.* .. |Context Memory Allocation| image:: ../resources/diagrams/context_memory_allocation.png .. |CPU Context Memory Configuration| image:: ../resources/diagrams/cpu_data_config_context_memory.png diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S index 4864596ac..2f2aeaf26 100644 --- a/include/arch/aarch64/el3_common_macros.S +++ b/include/arch/aarch64/el3_common_macros.S @@ -454,6 +454,10 @@ * Necessary on PMUv3 <= p7 where MDCR_EL3.{SCCD,MCCD} are not * available. * + * CPTR_EL3.EZ: Set to one so that accesses to ZCR_EL3 do not trap + * CPTR_EL3.TFP: Set to zero so that advanced SIMD operations don't trap + * CPTR_EL3.ESM: Set to one so that SME related registers don't trap + * * PSTATE.DIT: Set to one to enable the Data Independent Timing (DIT) * functionality, if implemented in EL3. * --------------------------------------------------------------------- @@ -473,6 +477,12 @@ orr x15, x15, #PMCR_EL0_DP_BIT msr pmcr_el0, x15 + mrs x15, cptr_el3 + orr x15, x15, #CPTR_EZ_BIT + orr x15, x15, #ESM_BIT + bic x15, x15, #TFP_BIT + msr cptr_el3, x15 + #if ENABLE_FEAT_DIT #if ENABLE_FEAT_DIT > 1 mrs x15, id_aa64pfr0_el1 diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h index 87f154130..15d5204b8 100644 --- a/include/lib/el3_runtime/aarch64/context.h +++ b/include/lib/el3_runtime/aarch64/context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -167,9 +167,8 @@ * Registers initialised in a per-world context. ******************************************************************************/ #define CTX_CPTR_EL3 U(0x0) -#define CTX_ZCR_EL3 U(0x8) -#define CTX_MPAM3_EL3 U(0x10) -#define CTX_PERWORLD_EL3STATE_END U(0x18) +#define CTX_MPAM3_EL3 U(0x8) +#define CTX_PERWORLD_EL3STATE_END U(0x10) #ifndef __ASSEMBLER__ @@ -278,7 +277,6 @@ typedef struct cpu_context { */ typedef struct per_world_context { uint64_t ctx_cptr_el3; - uint64_t ctx_zcr_el3; uint64_t ctx_mpam3_el3; } per_world_context_t; diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h index 2979efb1d..a471efb13 100644 --- a/include/lib/extensions/sve.h +++ b/include/lib/extensions/sve.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2024, Arm Limited and Contributors. All rights reserved. + * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -11,10 +11,14 @@ #if (ENABLE_SME_FOR_NS || ENABLE_SVE_FOR_NS) +void sve_init_el3(void); void sve_init_el2_unused(void); void sve_enable_per_world(per_world_context_t *per_world_ctx); void sve_disable_per_world(per_world_context_t *per_world_ctx); #else +static inline void sve_init_el3(void) +{ +} static inline void sve_init_el2_unused(void) { } diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S index 6415cb346..1e6a42ecc 100644 --- a/lib/el3_runtime/aarch64/context.S +++ b/lib/el3_runtime/aarch64/context.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -209,19 +209,6 @@ endfunc fpregs_context_restore */ func sve_context_save .arch_extension sve - /* Temporarily enable SVE */ - mrs x10, cptr_el3 - orr x11, x10, #CPTR_EZ_BIT - bic x11, x11, #TFP_BIT - msr cptr_el3, x11 - isb - - /* zcr_el3 */ - mrs x12, S3_6_C1_C2_0 - mov x13, #((SVE_VECTOR_LEN >> 7) - 1) - msr S3_6_C1_C2_0, x13 - isb - /* Predicate registers */ mov x13, #CTX_SIMD_PREDICATES add x9, x0, x13 @@ -237,11 +224,6 @@ func sve_context_save mov x13, #CTX_SIMD_VECTORS add x9, x0, x13 sve_vectors_op str, x9 - - /* Restore SVE enablement */ - msr S3_6_C1_C2_0, x12 /* zcr_el3 */ - msr cptr_el3, x10 - isb .arch_extension nosve /* Save FPSR, FPCR and FPEXC32 */ @@ -260,19 +242,6 @@ endfunc sve_context_save */ func sve_context_restore .arch_extension sve - /* Temporarily enable SVE for EL3 */ - mrs x10, cptr_el3 - orr x11, x10, #CPTR_EZ_BIT - bic x11, x11, #TFP_BIT - msr cptr_el3, x11 - isb - - /* zcr_el3 */ - mrs x12, S3_6_C1_C2_0 - mov x13, #((SVE_VECTOR_LEN >> 7) - 1) - msr S3_6_C1_C2_0, x13 - isb - /* Restore FFR register before predicates */ mov x13, #CTX_SIMD_FFR add x9, x0, x13 @@ -288,11 +257,6 @@ func sve_context_restore mov x13, #CTX_SIMD_VECTORS add x9, x0, x13 sve_vectors_op ldr, x9 - - /* Restore SVE enablement */ - msr S3_6_C1_C2_0, x12 /* zcr_el3 */ - msr cptr_el3, x10 - isb .arch_extension nosve /* Restore FPSR, FPCR and FPEXC32 */ @@ -604,10 +568,7 @@ func el3_exit /* ---------------------------------------------------------- * Restore CPTR_EL3. - * ZCR is only restored if SVE is supported and enabled. - * Synchronization is required before zcr_el3 is addressed. - * ---------------------------------------------------------- - */ + * ---------------------------------------------------------- */ /* The address of the per_world context is stored in x9 */ get_per_world_context x9 @@ -616,13 +577,6 @@ func el3_exit msr cptr_el3, x19 #if IMAGE_BL31 - ands x19, x19, #CPTR_EZ_BIT - beq sve_not_enabled - - isb - msr S3_6_C1_C2_0, x20 /* zcr_el3 */ -sve_not_enabled: - restore_mpam3_el3 #endif /* IMAGE_BL31 */ diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c index 5bc933636..e28e1c34e 100644 --- a/lib/el3_runtime/aarch64/context_mgmt.c +++ b/lib/el3_runtime/aarch64/context_mgmt.c @@ -666,6 +666,10 @@ void cm_setup_context(cpu_context_t *ctx, const entry_point_info_t *ep) #if IMAGE_BL31 void cm_manage_extensions_el3(unsigned int my_idx) { + if (is_feat_sve_supported()) { + sve_init_el3(); + } + if (is_feat_amu_supported()) { amu_init_el3(my_idx); } diff --git a/lib/extensions/sme/sme.c b/lib/extensions/sme/sme.c index 98d57e928..e477da36e 100644 --- a/lib/extensions/sme/sme.c +++ b/lib/extensions/sme/sme.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, Arm Limited and Contributors. All rights reserved. + * Copyright (c) 2021-2025, Arm Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -40,13 +40,8 @@ void sme_enable_per_world(per_world_context_t *per_world_ctx) void sme_init_el3(void) { - u_register_t cptr_el3 = read_cptr_el3(); u_register_t smcr_el3; - /* Set CPTR_EL3.ESM bit so we can access SMCR_EL3 without trapping. */ - write_cptr_el3(cptr_el3 | ESM_BIT); - isb(); - /* * Set the max LEN value and FA64 bit. This register is set up per_world * to be the least restrictive, then lower ELs can restrict as needed @@ -69,10 +64,6 @@ void sme_init_el3(void) smcr_el3 |= SMCR_ELX_EZT0_BIT; } write_smcr_el3(smcr_el3); - - /* Reset CPTR_EL3 value. */ - write_cptr_el3(cptr_el3); - isb(); } void sme_init_el2_unused(void) diff --git a/lib/extensions/sve/sve.c b/lib/extensions/sve/sve.c index 143717e33..4e18cdf50 100644 --- a/lib/extensions/sve/sve.c +++ b/lib/extensions/sve/sve.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved. + * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -22,6 +22,12 @@ CASSERT((SVE_VECTOR_LEN % 128) == 0, assert_sve_vl_granule); */ #define CONVERT_SVE_LENGTH(x) (((x / 128) - 1)) +void sve_init_el3(void) +{ + /* Restrict maximum SVE vector length (SVE_VECTOR_LEN+1) * 128. */ + write_zcr_el3(ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN)); +} + void sve_enable_per_world(per_world_context_t *per_world_ctx) { u_register_t cptr_el3; @@ -30,9 +36,6 @@ void sve_enable_per_world(per_world_context_t *per_world_ctx) cptr_el3 = per_world_ctx->ctx_cptr_el3; cptr_el3 = (cptr_el3 | CPTR_EZ_BIT) & ~(TFP_BIT); per_world_ctx->ctx_cptr_el3 = cptr_el3; - - /* Restrict maximum SVE vector length (SVE_VECTOR_LEN+1) * 128. */ - per_world_ctx->ctx_zcr_el3 = (ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN)); } void sve_init_el2_unused(void)