From f808873372381a401dcd86d7d45a5ee6fd164d50 Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Thu, 21 Nov 2024 13:55:59 +0000 Subject: [PATCH 1/2] fix(spe): add a psb before updating context and remove context saving In the chapter about FEAT_SPE (D16.4 specifically) it is stated that "Sampling is always disabled at EL3". That means that disabling sampling (writing PMBLIMITR_EL1.E to 0) is redundant and can be removed. The only reason we save/restore SPE context is because of that disable, so those can be removed too. There's the issue of draining the profiling buffer though. No new samples will have been generated since entering EL3. However, old samples might still be in-flight. Unless synchronised by a psb csync, those might be affected by our extensive context mutation. Adding a psb in prepare_el3_entry should cater for that. Note that prior to the introduction of root context this was not a problem as context remained unchanged and the hooks took care of the rest. Then, the only time we care about the buffer actually making it to memory is when we exit coherency. On HW_ASSISTED_COHERENCY systems we don't have to do anything, it should be handled for us. Systems without it need a dsb to wait for them to complete. There should be one already in each cpu's powerdown hook which should work. While on the topic of barriers, the esb barrier is no longer used. Remove it. Change-Id: I9736fc7d109702c63e7d403dc9e2a4272828afb2 Signed-off-by: Boyan Karatotev --- include/arch/aarch64/asm_macros.S | 13 +++--- include/lib/extensions/spe.h | 4 -- lib/el3_runtime/aarch64/context.S | 8 ++++ lib/extensions/spe/spe.c | 75 ------------------------------- lib/psci/psci_common.c | 19 -------- 5 files changed, 13 insertions(+), 106 deletions(-) diff --git a/include/arch/aarch64/asm_macros.S b/include/arch/aarch64/asm_macros.S index ec2acd560..00e482b57 100644 --- a/include/arch/aarch64/asm_macros.S +++ b/include/arch/aarch64/asm_macros.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ @@ -224,13 +224,6 @@ .space SPINLOCK_ASM_SIZE .endm - /* - * With RAS extension executes esb instruction, else NOP - */ - .macro esb - .inst 0xd503221f - .endm - /* * Helper macro to read system register value into x0 */ @@ -265,6 +258,10 @@ msr SYSREG_SB, xzr .endm + .macro psb_csync + hint #17 /* use the hint synonym for compatibility */ + .endm + /* * Macro for using speculation barrier instruction introduced by * FEAT_SB, if it's enabled. diff --git a/include/lib/extensions/spe.h b/include/lib/extensions/spe.h index 4801a2206..0a41e1e3c 100644 --- a/include/lib/extensions/spe.h +++ b/include/lib/extensions/spe.h @@ -14,7 +14,6 @@ void spe_enable(cpu_context_t *ctx); void spe_disable(cpu_context_t *ctx); void spe_init_el2_unused(void); -void spe_stop(void); #else static inline void spe_enable(cpu_context_t *ctx) { @@ -25,9 +24,6 @@ static inline void spe_disable(cpu_context_t *ctx) static inline void spe_init_el2_unused(void) { } -static inline void spe_stop(void) -{ -} #endif /* ENABLE_SPE_FOR_NS */ #endif /* SPE_H */ diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S index a353a87d2..a37c7f49c 100644 --- a/lib/el3_runtime/aarch64/context.S +++ b/lib/el3_runtime/aarch64/context.S @@ -440,6 +440,14 @@ no_mpam: * ----------------------------------------------------------------- */ func prepare_el3_entry + /* + * context is about to mutate, so make sure we don't affect any still + * in-flight profiling operations. We don't care that they actually + * finish, that can still be later. NOP if not present + */ +#if ENABLE_SPE_FOR_NS + psb_csync +#endif save_gp_pmcr_pauth_regs setup_el3_execution_context ret diff --git a/lib/extensions/spe/spe.c b/lib/extensions/spe/spe.c index d6532224a..12eae355a 100644 --- a/lib/extensions/spe/spe.c +++ b/lib/extensions/spe/spe.c @@ -14,21 +14,6 @@ #include -typedef struct spe_ctx { - u_register_t pmblimitr_el1; -} spe_ctx_t; - -static struct spe_ctx spe_ctxs[PLATFORM_CORE_COUNT]; - -static inline void psb_csync(void) -{ - /* - * The assembler does not yet understand the psb csync mnemonic - * so use the equivalent hint instruction. - */ - __asm__ volatile("hint #17"); -} - void spe_enable(cpu_context_t *ctx) { el3_state_t *state = get_el3state_ctx(ctx); @@ -90,63 +75,3 @@ void spe_init_el2_unused(void) v |= MDCR_EL2_E2PB(MDCR_EL2_E2PB_EL1); write_mdcr_el2(v); } - -void spe_stop(void) -{ - uint64_t v; - - /* Drain buffered data */ - psb_csync(); - dsbnsh(); - - /* Disable profiling buffer */ - v = read_pmblimitr_el1(); - v &= ~(1ULL << 0); - write_pmblimitr_el1(v); - isb(); -} - -static void *spe_drain_buffers_hook(const void *arg) -{ - if (!is_feat_spe_supported()) - return (void *)-1; - - /* Drain buffered data */ - psb_csync(); - dsbnsh(); - - return (void *)0; -} - -static void *spe_context_save(const void *arg) -{ - unsigned int core_pos; - struct spe_ctx *ctx; - - if (is_feat_spe_supported()) { - core_pos = plat_my_core_pos(); - ctx = &spe_ctxs[core_pos]; - ctx->pmblimitr_el1 = read_pmblimitr_el1(); - } - - return NULL; -} - -static void *spe_context_restore(const void *arg) -{ - unsigned int core_pos; - struct spe_ctx *ctx; - - if (is_feat_spe_supported()) { - core_pos = plat_my_core_pos(); - ctx = &spe_ctxs[core_pos]; - write_pmblimitr_el1(ctx->pmblimitr_el1); - } - - return NULL; -} - -SUBSCRIBE_TO_EVENT(cm_entering_secure_world, spe_drain_buffers_hook); - -SUBSCRIBE_TO_EVENT(psci_suspend_pwrdown_start, spe_context_save); -SUBSCRIBE_TO_EVENT(psci_suspend_pwrdown_finish, spe_context_restore); diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c index 375cdbab5..5418666b0 100644 --- a/lib/psci/psci_common.c +++ b/lib/psci/psci_common.c @@ -1169,8 +1169,6 @@ int psci_secondaries_brought_up(void) ******************************************************************************/ void psci_pwrdown_cpu(unsigned int power_level) { - psci_do_manage_extensions(); - #if HW_ASSISTED_COHERENCY /* * With hardware-assisted coherency, the CPU drivers only initiate the @@ -1290,20 +1288,3 @@ bool psci_are_all_cpus_on_safe(void) return true; } - -/******************************************************************************* - * This function performs architectural feature specific management. - * It ensures the architectural features are disabled during cpu - * power off/suspend operations. - ******************************************************************************/ -void psci_do_manage_extensions(void) -{ - /* - * On power down we need to disable statistical profiling extensions - * before exiting coherency. - */ - if (is_feat_spe_supported()) { - spe_stop(); - } - -} From 73d98e37593f4a4044dd28f52127cdc890911c0c Mon Sep 17 00:00:00 2001 From: Boyan Karatotev Date: Mon, 2 Dec 2024 09:36:10 +0000 Subject: [PATCH 2/2] fix(trbe): add a tsb before context switching Just like for SPE, we need to synchronize TRBE samples before we change the context to ensure everything goes where it was intended to. If that is not done, the in-flight entries might use any piece of now incorrect context as there are no implicit ordering requirements. Prior to root context, the buffer drain hooks would have done that. But now that must happen much earlier. So add a tsb to prepare_el3_entry as well. Annoyingly, the barrier can be reordered relative to other instructions by default (rule RCKVWP). So add an isb after the psb/tsb to assure that they are ordered, at least as far as context is concerned. Then, drop the buffer draining hooks. Everything they need to do is already done by now. There's a notable difference in that there are no dsb-s now. Since EL3 does not access the buffers or the feature specific context, we don't need to wait for them to finish. Finally, drop a stray isb in the context saving macro. It is now absorbed into root context, but was missed. Change-Id: I30797a40ac7f91d0bb71ad271a1597e85092ccd5 Signed-off-by: Boyan Karatotev --- include/arch/aarch64/asm_macros.S | 4 ++++ lib/el3_runtime/aarch64/context.S | 5 ++++- lib/extensions/spe/spe.c | 1 - lib/extensions/trbe/trbe.c | 28 ---------------------------- 4 files changed, 8 insertions(+), 30 deletions(-) diff --git a/include/arch/aarch64/asm_macros.S b/include/arch/aarch64/asm_macros.S index 00e482b57..197ea0600 100644 --- a/include/arch/aarch64/asm_macros.S +++ b/include/arch/aarch64/asm_macros.S @@ -262,6 +262,10 @@ hint #17 /* use the hint synonym for compatibility */ .endm + .macro tsb_csync + hint #18 /* use the hint synonym for compatibility */ + .endm + /* * Macro for using speculation barrier instruction introduced by * FEAT_SB, if it's enabled. diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S index a37c7f49c..fed24f0f5 100644 --- a/lib/el3_runtime/aarch64/context.S +++ b/lib/el3_runtime/aarch64/context.S @@ -400,7 +400,6 @@ no_mpam: /* PMUv3 is presumed to be always present */ mrs x9, pmcr_el0 str x9, [sp, #CTX_EL3STATE_OFFSET + CTX_PMCR_EL0] - isb #if CTX_INCLUDE_PAUTH_REGS /* ---------------------------------------------------------- * Save the ARMv8.3-PAuth keys as they are not banked @@ -448,6 +447,10 @@ func prepare_el3_entry #if ENABLE_SPE_FOR_NS psb_csync #endif +#if ENABLE_TRBE_FOR_NS + tsb_csync +#endif + isb save_gp_pmcr_pauth_regs setup_el3_execution_context ret diff --git a/lib/extensions/spe/spe.c b/lib/extensions/spe/spe.c index 12eae355a..a8d42ab77 100644 --- a/lib/extensions/spe/spe.c +++ b/lib/extensions/spe/spe.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include diff --git a/lib/extensions/trbe/trbe.c b/lib/extensions/trbe/trbe.c index 8c1c42180..8775e40cc 100644 --- a/lib/extensions/trbe/trbe.c +++ b/lib/extensions/trbe/trbe.c @@ -7,18 +7,8 @@ #include #include #include -#include #include -static void tsb_csync(void) -{ - /* - * The assembler does not yet understand the tsb csync mnemonic - * so use the equivalent hint instruction. - */ - __asm__ volatile("hint #18"); -} - void trbe_enable(cpu_context_t *ctx) { el3_state_t *state = get_el3state_ctx(ctx); @@ -68,21 +58,3 @@ void trbe_init_el2_unused(void) */ write_mdcr_el2(read_mdcr_el2() & ~MDCR_EL2_E2TB(MDCR_EL2_E2TB_EL1)); } - -static void *trbe_drain_trace_buffers_hook(const void *arg __unused) -{ - if (is_feat_trbe_supported()) { - /* - * Before switching from normal world to secure world - * the trace buffers need to be drained out to memory. This is - * required to avoid an invalid memory access when TTBR is switched - * for entry to S-EL1. - */ - tsb_csync(); - dsbnsh(); - } - - return (void *)0; -} - -SUBSCRIBE_TO_EVENT(cm_entering_secure_world, trbe_drain_trace_buffers_hook);