arm-trusted-firmware/lib/cpus/aarch64/denver.S
Boyan Karatotev 89dba82dfa perf(cpus): make reset errata do fewer branches
Errata application is painful for performance. For a start, it's done
when the core has just come out of reset, which means branch predictors
and caches will be empty so a branch to a workaround function must be
fetched from memory and that round trip is very slow. Then it also runs
with the I-cache off, which means that the loop to iterate over the
workarounds must also be fetched from memory on each iteration.

We can remove both branches. First, we can simply apply every erratum
directly instead of defining a workaround function and jumping to it.
Currently, no errata that need to be applied at both reset and runtime,
with the same workaround function, exist. If the need arose in future,
this should be achievable with a reset + runtime wrapper combo.

Then, we can construct a function that applies each erratum linearly
instead of looping over the list. If this function is part of the reset
function, then the only "far" branches at reset will be for the checker
functions. Importantly, this mitigates the slowdown even when an erratum
is disabled.

The result is ~50% speedup on N1SDP and ~20% on AArch64 Juno on wakeup
from PSCI calls that end in powerdown. This is roughly back to the
baseline of v2.9, before the errata framework regressed on performance
(or a little better). It is important to note that there are other
slowdowns since then that remain unknown.

Change-Id: Ie4d5288a331b11fd648e5c4a0b652b74160b07b9
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
2025-02-24 09:36:11 +00:00

340 lines
9.2 KiB
ArmAsm

/*
* Copyright (c) 2015-2025, Arm Limited and Contributors. All rights reserved.
* Copyright (c) 2020-2022, NVIDIA Corporation. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <arch.h>
#include <asm_macros.S>
#include <assert_macros.S>
#include <context.h>
#include <denver.h>
#include <cpu_macros.S>
#include <plat_macros.S>
cpu_reset_prologue denver
/* -------------------------------------------------
* CVE-2017-5715 mitigation
*
* Flush the indirect branch predictor and RSB on
* entry to EL3 by issuing a newly added instruction
* for Denver CPUs.
*
* To achieve this without performing any branch
* instruction, a per-cpu vbar is installed which
* executes the workaround and then branches off to
* the corresponding vector entry in the main vector
* table.
* -------------------------------------------------
*/
vector_base workaround_bpflush_runtime_exceptions
.macro apply_workaround
stp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
/* Disable cycle counter when event counting is prohibited */
mrs x1, pmcr_el0
orr x0, x1, #PMCR_EL0_DP_BIT
msr pmcr_el0, x0
isb
/* -------------------------------------------------
* A new write-only system register where a write of
* 1 to bit 0 will cause the indirect branch predictor
* and RSB to be flushed.
*
* A write of 0 to bit 0 will be ignored. A write of
* 1 to any other bit will cause an MCA.
* -------------------------------------------------
*/
mov x0, #1
msr s3_0_c15_c0_6, x0
isb
ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
.endm
/* ---------------------------------------------------------------------
* Current EL with SP_EL0 : 0x0 - 0x200
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpflush_sync_exception_sp_el0
b sync_exception_sp_el0
end_vector_entry workaround_bpflush_sync_exception_sp_el0
vector_entry workaround_bpflush_irq_sp_el0
b irq_sp_el0
end_vector_entry workaround_bpflush_irq_sp_el0
vector_entry workaround_bpflush_fiq_sp_el0
b fiq_sp_el0
end_vector_entry workaround_bpflush_fiq_sp_el0
vector_entry workaround_bpflush_serror_sp_el0
b serror_sp_el0
end_vector_entry workaround_bpflush_serror_sp_el0
/* ---------------------------------------------------------------------
* Current EL with SP_ELx: 0x200 - 0x400
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpflush_sync_exception_sp_elx
b sync_exception_sp_elx
end_vector_entry workaround_bpflush_sync_exception_sp_elx
vector_entry workaround_bpflush_irq_sp_elx
b irq_sp_elx
end_vector_entry workaround_bpflush_irq_sp_elx
vector_entry workaround_bpflush_fiq_sp_elx
b fiq_sp_elx
end_vector_entry workaround_bpflush_fiq_sp_elx
vector_entry workaround_bpflush_serror_sp_elx
b serror_sp_elx
end_vector_entry workaround_bpflush_serror_sp_elx
/* ---------------------------------------------------------------------
* Lower EL using AArch64 : 0x400 - 0x600
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpflush_sync_exception_aarch64
apply_workaround
b sync_exception_aarch64
end_vector_entry workaround_bpflush_sync_exception_aarch64
vector_entry workaround_bpflush_irq_aarch64
apply_workaround
b irq_aarch64
end_vector_entry workaround_bpflush_irq_aarch64
vector_entry workaround_bpflush_fiq_aarch64
apply_workaround
b fiq_aarch64
end_vector_entry workaround_bpflush_fiq_aarch64
vector_entry workaround_bpflush_serror_aarch64
apply_workaround
b serror_aarch64
end_vector_entry workaround_bpflush_serror_aarch64
/* ---------------------------------------------------------------------
* Lower EL using AArch32 : 0x600 - 0x800
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpflush_sync_exception_aarch32
apply_workaround
b sync_exception_aarch32
end_vector_entry workaround_bpflush_sync_exception_aarch32
vector_entry workaround_bpflush_irq_aarch32
apply_workaround
b irq_aarch32
end_vector_entry workaround_bpflush_irq_aarch32
vector_entry workaround_bpflush_fiq_aarch32
apply_workaround
b fiq_aarch32
end_vector_entry workaround_bpflush_fiq_aarch32
vector_entry workaround_bpflush_serror_aarch32
apply_workaround
b serror_aarch32
end_vector_entry workaround_bpflush_serror_aarch32
.global denver_disable_dco
/* ---------------------------------------------
* Disable debug interfaces
* ---------------------------------------------
*/
func denver_disable_ext_debug
mov x0, #1
msr osdlr_el1, x0
isb
dsb sy
ret
endfunc denver_disable_ext_debug
/* ----------------------------------------------------
* Enable dynamic code optimizer (DCO)
* ----------------------------------------------------
*/
func denver_enable_dco
/* DCO is not supported on PN5 and later */
mrs x1, midr_el1
mov_imm x2, DENVER_MIDR_PN4
cmp x1, x2
b.hi 1f
mov x18, x30
bl plat_my_core_pos
mov x1, #1
lsl x1, x1, x0
msr s3_0_c15_c0_2, x1
mov x30, x18
1: ret
endfunc denver_enable_dco
/* ----------------------------------------------------
* Disable dynamic code optimizer (DCO)
* ----------------------------------------------------
*/
func denver_disable_dco
/* DCO is not supported on PN5 and later */
mrs x1, midr_el1
mov_imm x2, DENVER_MIDR_PN4
cmp x1, x2
b.hi 2f
/* turn off background work */
mov x18, x30
bl plat_my_core_pos
mov x1, #1
lsl x1, x1, x0
lsl x2, x1, #16
msr s3_0_c15_c0_2, x2
isb
/* wait till the background work turns off */
1: mrs x2, s3_0_c15_c0_2
lsr x2, x2, #32
and w2, w2, 0xFFFF
and x2, x2, x1
cbnz x2, 1b
mov x30, x18
2: ret
endfunc denver_disable_dco
workaround_reset_start denver, CVE(2017, 5715), WORKAROUND_CVE_2017_5715
#if IMAGE_BL31
adr x1, workaround_bpflush_runtime_exceptions
msr vbar_el3, x1
#endif
workaround_reset_end denver, CVE(2017, 5715)
check_erratum_custom_start denver, CVE(2017, 5715)
mov x0, #ERRATA_MISSING
#if WORKAROUND_CVE_2017_5715
/*
* Check if the CPU supports the special instruction
* required to flush the indirect branch predictor and
* RSB. Support for this operation can be determined by
* comparing bits 19:16 of ID_AFR0_EL1 with 0b0001.
*/
mrs x1, id_afr0_el1
mov x2, #0x10000
and x1, x1, x2
cbz x1, 1f
mov x0, #ERRATA_APPLIES
1:
#endif
ret
check_erratum_custom_end denver, CVE(2017, 5715)
workaround_reset_start denver, CVE(2018, 3639), WORKAROUND_CVE_2018_3639
/*
* Denver CPUs with DENVER_MIDR_PN3 or earlier, use different
* bits in the ACTLR_EL3 register to disable speculative
* store buffer and memory disambiguation.
*/
mrs x0, midr_el1
mov_imm x1, DENVER_MIDR_PN4
cmp x0, x1
mrs x0, actlr_el3
mov x1, #(DENVER_CPU_DIS_MD_EL3 | DENVER_CPU_DIS_SSB_EL3)
mov x2, #(DENVER_PN4_CPU_DIS_MD_EL3 | DENVER_PN4_CPU_DIS_SSB_EL3)
csel x3, x1, x2, ne
orr x0, x0, x3
msr actlr_el3, x0
isb
dsb sy
workaround_reset_end denver, CVE(2018, 3639)
check_erratum_chosen denver, CVE(2018, 3639), WORKAROUND_CVE_2018_3639
cpu_reset_func_start denver
/* ----------------------------------------------------
* Reset ACTLR.PMSTATE to C1 state
* ----------------------------------------------------
*/
mrs x0, actlr_el1
bic x0, x0, #DENVER_CPU_PMSTATE_MASK
orr x0, x0, #DENVER_CPU_PMSTATE_C1
msr actlr_el1, x0
/* ----------------------------------------------------
* Enable dynamic code optimizer (DCO)
* ----------------------------------------------------
*/
bl denver_enable_dco
cpu_reset_func_end denver
/* ----------------------------------------------------
* The CPU Ops core power down function for Denver.
* ----------------------------------------------------
*/
func denver_core_pwr_dwn
mov x19, x30
/* ---------------------------------------------
* Force the debug interfaces to be quiescent
* ---------------------------------------------
*/
bl denver_disable_ext_debug
ret x19
endfunc denver_core_pwr_dwn
/* -------------------------------------------------------
* The CPU Ops cluster power down function for Denver.
* -------------------------------------------------------
*/
func denver_cluster_pwr_dwn
ret
endfunc denver_cluster_pwr_dwn
/* ---------------------------------------------
* This function provides Denver specific
* register information for crash reporting.
* It needs to return with x6 pointing to
* a list of register names in ascii and
* x8 - x15 having values of registers to be
* reported.
* ---------------------------------------------
*/
.section .rodata.denver_regs, "aS"
denver_regs: /* The ascii list of register names to be reported */
.asciz "actlr_el1", ""
func denver_cpu_reg_dump
adr x6, denver_regs
mrs x8, ACTLR_EL1
ret
endfunc denver_cpu_reg_dump
/* macro to declare cpu_ops for Denver SKUs */
.macro denver_cpu_ops_wa midr
declare_cpu_ops_wa denver, \midr, \
denver_reset_func, \
check_erratum_denver_5715, \
CPU_NO_EXTRA2_FUNC, \
CPU_NO_EXTRA3_FUNC, \
denver_core_pwr_dwn, \
denver_cluster_pwr_dwn
.endm
denver_cpu_ops_wa DENVER_MIDR_PN0
denver_cpu_ops_wa DENVER_MIDR_PN1
denver_cpu_ops_wa DENVER_MIDR_PN2
denver_cpu_ops_wa DENVER_MIDR_PN3
denver_cpu_ops_wa DENVER_MIDR_PN4
denver_cpu_ops_wa DENVER_MIDR_PN5
denver_cpu_ops_wa DENVER_MIDR_PN6
denver_cpu_ops_wa DENVER_MIDR_PN7
denver_cpu_ops_wa DENVER_MIDR_PN8
denver_cpu_ops_wa DENVER_MIDR_PN9