perf(cm): drop ZCR_EL3 saving and some ISBs and replace them with root context

SVE and SME aren't enabled symmetrically for all worlds, but EL3 needs
to context switch them nonetheless. Previously, this had to happen by
writing the enable bits just before reading/writing the relevant
context. But since the introduction of root context, this need not be
the case. We can have these enables always be present for EL3 and save
on some work (and ISBs!) on every context switch.

We can also hoist ZCR_EL3 to a never changing register, as we set its
value to be identical for every world, which happens to be the one we
want for EL3 too.

Change-Id: I3d950e72049a298008205ba32f230d5a5c02f8b0
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
This commit is contained in:
Boyan Karatotev 2024-11-15 15:03:02 +00:00
parent abf6666e26
commit 0a580b5128
8 changed files with 33 additions and 70 deletions

View file

@ -498,7 +498,6 @@ structure and is intended to manage specific EL3 registers.
typedef struct per_world_context { typedef struct per_world_context {
uint64_t ctx_cptr_el3; uint64_t ctx_cptr_el3;
uint64_t ctx_zcr_el3;
uint64_t ctx_mpam3_el3; uint64_t ctx_mpam3_el3;
} per_world_context_t; } per_world_context_t;
@ -555,7 +554,7 @@ EL3.
EL3 execution context needs to setup at both boot time (cold and warm boot) EL3 execution context needs to setup at both boot time (cold and warm boot)
entrypaths and at all the possible exception handlers routing to EL3 at runtime. entrypaths and at all the possible exception handlers routing to EL3 at runtime.
*Copyright (c) 2024, Arm Limited and Contributors. All rights reserved.* *Copyright (c) 2024-2025, Arm Limited and Contributors. All rights reserved.*
.. |Context Memory Allocation| image:: ../resources/diagrams/context_memory_allocation.png .. |Context Memory Allocation| image:: ../resources/diagrams/context_memory_allocation.png
.. |CPU Context Memory Configuration| image:: ../resources/diagrams/cpu_data_config_context_memory.png .. |CPU Context Memory Configuration| image:: ../resources/diagrams/cpu_data_config_context_memory.png

View file

@ -454,6 +454,10 @@
* Necessary on PMUv3 <= p7 where MDCR_EL3.{SCCD,MCCD} are not * Necessary on PMUv3 <= p7 where MDCR_EL3.{SCCD,MCCD} are not
* available. * available.
* *
* CPTR_EL3.EZ: Set to one so that accesses to ZCR_EL3 do not trap
* CPTR_EL3.TFP: Set to zero so that advanced SIMD operations don't trap
* CPTR_EL3.ESM: Set to one so that SME related registers don't trap
*
* PSTATE.DIT: Set to one to enable the Data Independent Timing (DIT) * PSTATE.DIT: Set to one to enable the Data Independent Timing (DIT)
* functionality, if implemented in EL3. * functionality, if implemented in EL3.
* --------------------------------------------------------------------- * ---------------------------------------------------------------------
@ -473,6 +477,12 @@
orr x15, x15, #PMCR_EL0_DP_BIT orr x15, x15, #PMCR_EL0_DP_BIT
msr pmcr_el0, x15 msr pmcr_el0, x15
mrs x15, cptr_el3
orr x15, x15, #CPTR_EZ_BIT
orr x15, x15, #ESM_BIT
bic x15, x15, #TFP_BIT
msr cptr_el3, x15
#if ENABLE_FEAT_DIT #if ENABLE_FEAT_DIT
#if ENABLE_FEAT_DIT > 1 #if ENABLE_FEAT_DIT > 1
mrs x15, id_aa64pfr0_el1 mrs x15, id_aa64pfr0_el1

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved. * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
* *
* SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause
*/ */
@ -167,9 +167,8 @@
* Registers initialised in a per-world context. * Registers initialised in a per-world context.
******************************************************************************/ ******************************************************************************/
#define CTX_CPTR_EL3 U(0x0) #define CTX_CPTR_EL3 U(0x0)
#define CTX_ZCR_EL3 U(0x8) #define CTX_MPAM3_EL3 U(0x8)
#define CTX_MPAM3_EL3 U(0x10) #define CTX_PERWORLD_EL3STATE_END U(0x10)
#define CTX_PERWORLD_EL3STATE_END U(0x18)
#ifndef __ASSEMBLER__ #ifndef __ASSEMBLER__
@ -278,7 +277,6 @@ typedef struct cpu_context {
*/ */
typedef struct per_world_context { typedef struct per_world_context {
uint64_t ctx_cptr_el3; uint64_t ctx_cptr_el3;
uint64_t ctx_zcr_el3;
uint64_t ctx_mpam3_el3; uint64_t ctx_mpam3_el3;
} per_world_context_t; } per_world_context_t;

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2024, Arm Limited and Contributors. All rights reserved. * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved.
* *
* SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause
*/ */
@ -11,10 +11,14 @@
#if (ENABLE_SME_FOR_NS || ENABLE_SVE_FOR_NS) #if (ENABLE_SME_FOR_NS || ENABLE_SVE_FOR_NS)
void sve_init_el3(void);
void sve_init_el2_unused(void); void sve_init_el2_unused(void);
void sve_enable_per_world(per_world_context_t *per_world_ctx); void sve_enable_per_world(per_world_context_t *per_world_ctx);
void sve_disable_per_world(per_world_context_t *per_world_ctx); void sve_disable_per_world(per_world_context_t *per_world_ctx);
#else #else
static inline void sve_init_el3(void)
{
}
static inline void sve_init_el2_unused(void) static inline void sve_init_el2_unused(void)
{ {
} }

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved. * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
* *
* SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause
*/ */
@ -209,19 +209,6 @@ endfunc fpregs_context_restore
*/ */
func sve_context_save func sve_context_save
.arch_extension sve .arch_extension sve
/* Temporarily enable SVE */
mrs x10, cptr_el3
orr x11, x10, #CPTR_EZ_BIT
bic x11, x11, #TFP_BIT
msr cptr_el3, x11
isb
/* zcr_el3 */
mrs x12, S3_6_C1_C2_0
mov x13, #((SVE_VECTOR_LEN >> 7) - 1)
msr S3_6_C1_C2_0, x13
isb
/* Predicate registers */ /* Predicate registers */
mov x13, #CTX_SIMD_PREDICATES mov x13, #CTX_SIMD_PREDICATES
add x9, x0, x13 add x9, x0, x13
@ -237,11 +224,6 @@ func sve_context_save
mov x13, #CTX_SIMD_VECTORS mov x13, #CTX_SIMD_VECTORS
add x9, x0, x13 add x9, x0, x13
sve_vectors_op str, x9 sve_vectors_op str, x9
/* Restore SVE enablement */
msr S3_6_C1_C2_0, x12 /* zcr_el3 */
msr cptr_el3, x10
isb
.arch_extension nosve .arch_extension nosve
/* Save FPSR, FPCR and FPEXC32 */ /* Save FPSR, FPCR and FPEXC32 */
@ -260,19 +242,6 @@ endfunc sve_context_save
*/ */
func sve_context_restore func sve_context_restore
.arch_extension sve .arch_extension sve
/* Temporarily enable SVE for EL3 */
mrs x10, cptr_el3
orr x11, x10, #CPTR_EZ_BIT
bic x11, x11, #TFP_BIT
msr cptr_el3, x11
isb
/* zcr_el3 */
mrs x12, S3_6_C1_C2_0
mov x13, #((SVE_VECTOR_LEN >> 7) - 1)
msr S3_6_C1_C2_0, x13
isb
/* Restore FFR register before predicates */ /* Restore FFR register before predicates */
mov x13, #CTX_SIMD_FFR mov x13, #CTX_SIMD_FFR
add x9, x0, x13 add x9, x0, x13
@ -288,11 +257,6 @@ func sve_context_restore
mov x13, #CTX_SIMD_VECTORS mov x13, #CTX_SIMD_VECTORS
add x9, x0, x13 add x9, x0, x13
sve_vectors_op ldr, x9 sve_vectors_op ldr, x9
/* Restore SVE enablement */
msr S3_6_C1_C2_0, x12 /* zcr_el3 */
msr cptr_el3, x10
isb
.arch_extension nosve .arch_extension nosve
/* Restore FPSR, FPCR and FPEXC32 */ /* Restore FPSR, FPCR and FPEXC32 */
@ -604,10 +568,7 @@ func el3_exit
/* ---------------------------------------------------------- /* ----------------------------------------------------------
* Restore CPTR_EL3. * Restore CPTR_EL3.
* ZCR is only restored if SVE is supported and enabled. * ---------------------------------------------------------- */
* Synchronization is required before zcr_el3 is addressed.
* ----------------------------------------------------------
*/
/* The address of the per_world context is stored in x9 */ /* The address of the per_world context is stored in x9 */
get_per_world_context x9 get_per_world_context x9
@ -616,13 +577,6 @@ func el3_exit
msr cptr_el3, x19 msr cptr_el3, x19
#if IMAGE_BL31 #if IMAGE_BL31
ands x19, x19, #CPTR_EZ_BIT
beq sve_not_enabled
isb
msr S3_6_C1_C2_0, x20 /* zcr_el3 */
sve_not_enabled:
restore_mpam3_el3 restore_mpam3_el3
#endif /* IMAGE_BL31 */ #endif /* IMAGE_BL31 */

View file

@ -666,6 +666,10 @@ void cm_setup_context(cpu_context_t *ctx, const entry_point_info_t *ep)
#if IMAGE_BL31 #if IMAGE_BL31
void cm_manage_extensions_el3(unsigned int my_idx) void cm_manage_extensions_el3(unsigned int my_idx)
{ {
if (is_feat_sve_supported()) {
sve_init_el3();
}
if (is_feat_amu_supported()) { if (is_feat_amu_supported()) {
amu_init_el3(my_idx); amu_init_el3(my_idx);
} }

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021-2024, Arm Limited and Contributors. All rights reserved. * Copyright (c) 2021-2025, Arm Limited and Contributors. All rights reserved.
* *
* SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause
*/ */
@ -40,13 +40,8 @@ void sme_enable_per_world(per_world_context_t *per_world_ctx)
void sme_init_el3(void) void sme_init_el3(void)
{ {
u_register_t cptr_el3 = read_cptr_el3();
u_register_t smcr_el3; u_register_t smcr_el3;
/* Set CPTR_EL3.ESM bit so we can access SMCR_EL3 without trapping. */
write_cptr_el3(cptr_el3 | ESM_BIT);
isb();
/* /*
* Set the max LEN value and FA64 bit. This register is set up per_world * Set the max LEN value and FA64 bit. This register is set up per_world
* to be the least restrictive, then lower ELs can restrict as needed * to be the least restrictive, then lower ELs can restrict as needed
@ -69,10 +64,6 @@ void sme_init_el3(void)
smcr_el3 |= SMCR_ELX_EZT0_BIT; smcr_el3 |= SMCR_ELX_EZT0_BIT;
} }
write_smcr_el3(smcr_el3); write_smcr_el3(smcr_el3);
/* Reset CPTR_EL3 value. */
write_cptr_el3(cptr_el3);
isb();
} }
void sme_init_el2_unused(void) void sme_init_el2_unused(void)

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved. * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved.
* *
* SPDX-License-Identifier: BSD-3-Clause * SPDX-License-Identifier: BSD-3-Clause
*/ */
@ -22,6 +22,12 @@ CASSERT((SVE_VECTOR_LEN % 128) == 0, assert_sve_vl_granule);
*/ */
#define CONVERT_SVE_LENGTH(x) (((x / 128) - 1)) #define CONVERT_SVE_LENGTH(x) (((x / 128) - 1))
void sve_init_el3(void)
{
/* Restrict maximum SVE vector length (SVE_VECTOR_LEN+1) * 128. */
write_zcr_el3(ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN));
}
void sve_enable_per_world(per_world_context_t *per_world_ctx) void sve_enable_per_world(per_world_context_t *per_world_ctx)
{ {
u_register_t cptr_el3; u_register_t cptr_el3;
@ -30,9 +36,6 @@ void sve_enable_per_world(per_world_context_t *per_world_ctx)
cptr_el3 = per_world_ctx->ctx_cptr_el3; cptr_el3 = per_world_ctx->ctx_cptr_el3;
cptr_el3 = (cptr_el3 | CPTR_EZ_BIT) & ~(TFP_BIT); cptr_el3 = (cptr_el3 | CPTR_EZ_BIT) & ~(TFP_BIT);
per_world_ctx->ctx_cptr_el3 = cptr_el3; per_world_ctx->ctx_cptr_el3 = cptr_el3;
/* Restrict maximum SVE vector length (SVE_VECTOR_LEN+1) * 128. */
per_world_ctx->ctx_zcr_el3 = (ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN));
} }
void sve_init_el2_unused(void) void sve_init_el2_unused(void)