From 3c251af392f2dfeedfe9c4595a8a33188c1d1d14 Mon Sep 17 00:00:00 2001 From: Jeenu Viswambharan Date: Wed, 4 Jan 2017 13:51:42 +0000 Subject: [PATCH 1/5] build: Define build option for hardware-assisted coherency The boolean build option HW_ASSISTED_COHERENCY is introduced to enable various optimizations in ARM Trusted Software, when built for such systems. It's set to 0 by default. Change-Id: I638390da6e1718fe024dcf5b402e07084f1eb014 Signed-off-by: Jeenu Viswambharan --- Makefile | 2 ++ docs/user-guide.md | 9 +++++++++ make_helpers/defaults.mk | 4 ++++ 3 files changed, 15 insertions(+) diff --git a/Makefile b/Makefile index 932fb3b54..e00159a30 100644 --- a/Makefile +++ b/Makefile @@ -385,6 +385,7 @@ $(eval $(call assert_boolean,ENABLE_PSCI_STAT)) $(eval $(call assert_boolean,ENABLE_RUNTIME_INSTRUMENTATION)) $(eval $(call assert_boolean,ERROR_DEPRECATED)) $(eval $(call assert_boolean,GENERATE_COT)) +$(eval $(call assert_boolean,HW_ASSISTED_COHERENCY)) $(eval $(call assert_boolean,LOAD_IMAGE_V2)) $(eval $(call assert_boolean,NS_TIMER_SWITCH)) $(eval $(call assert_boolean,PL011_GENERIC_UART)) @@ -419,6 +420,7 @@ $(eval $(call add_define,ENABLE_PMF)) $(eval $(call add_define,ENABLE_PSCI_STAT)) $(eval $(call add_define,ENABLE_RUNTIME_INSTRUMENTATION)) $(eval $(call add_define,ERROR_DEPRECATED)) +$(eval $(call add_define,HW_ASSISTED_COHERENCY)) $(eval $(call add_define,LOAD_IMAGE_V2)) $(eval $(call add_define,LOG_LEVEL)) $(eval $(call add_define,NS_TIMER_SWITCH)) diff --git a/docs/user-guide.md b/docs/user-guide.md index 091aeba04..4ed0e2017 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -328,6 +328,15 @@ performed. * `HANDLE_EA_EL3_FIRST`: When defined External Aborts and SError Interrupts will be always trapped in EL3 i.e. in BL31 at runtime. +* `HW_ASSISTED_COHERENCY`: On most ARM systems to-date, platform-specific + software operations are required for CPUs to enter and exit coherency. + However, there exists newer systems where CPUs' entry to and exit from + coherency is managed in hardware. Such systems require software to only + initiate the operations, and the rest is managed in hardware, minimizing + active software management. In such systems, this boolean option enables ARM + Trusted Firmware to carry out build and run-time optimizations during boot + and power management operations. This option defaults to 0. + * `LOAD_IMAGE_V2`: Boolean option to enable support for new version (v2) of image loading, which provides more flexibility and scalability around what images are loaded and executed during boot. Default is 0. diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk index b47ea46e3..de506be5b 100644 --- a/make_helpers/defaults.mk +++ b/make_helpers/defaults.mk @@ -105,6 +105,10 @@ FWU_FIP_NAME := fwu_fip.bin # For Chain of Trust GENERATE_COT := 0 +# Whether system coherency is managed in hardware, without explicit software +# operations. +HW_ASSISTED_COHERENCY := 0 + # Flag to enable new version of image loading LOAD_IMAGE_V2 := 0 From 25a93f7cd181ca79a631864b7c076fa7106f4365 Mon Sep 17 00:00:00 2001 From: Jeenu Viswambharan Date: Thu, 5 Jan 2017 10:37:21 +0000 Subject: [PATCH 2/5] Enable data caches early with hardware-assisted coherency At present, warm-booted CPUs keep their caches disabled when enabling MMU, and remains so until they enter coherency later. On systems with hardware-assisted coherency, for which HW_ASSISTED_COHERENCY build flag would be enabled, warm-booted CPUs can have both caches and MMU enabled at once. Change-Id: Icb0adb026e01aecf34beadf49c88faa9dd368327 Signed-off-by: Jeenu Viswambharan --- bl31/aarch64/bl31_entrypoint.S | 39 ++++++++++++++++++-------------- bl32/sp_min/aarch32/entrypoint.S | 37 ++++++++++++++++-------------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S index d14a68d06..f6a21dc14 100644 --- a/bl31/aarch64/bl31_entrypoint.S +++ b/bl31/aarch64/bl31_entrypoint.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -180,24 +180,29 @@ func bl31_warm_entrypoint _init_c_runtime=0 \ _exception_vectors=runtime_exceptions - /* -------------------------------------------- - * Enable the MMU with the DCache disabled. It - * is safe to use stacks allocated in normal - * memory as a result. All memory accesses are - * marked nGnRnE when the MMU is disabled. So - * all the stack writes will make it to memory. - * All memory accesses are marked Non-cacheable - * when the MMU is enabled but D$ is disabled. - * So used stack memory is guaranteed to be - * visible immediately after the MMU is enabled - * Enabling the DCache at the same time as the - * MMU can lead to speculatively fetched and - * possibly stale stack memory being read from - * other caches. This can lead to coherency - * issues. - * -------------------------------------------- + /* + * We're about to enable MMU and participate in PSCI state coordination. + * + * The PSCI implementation invokes platform routines that enable CPUs to + * participate in coherency. On a system where CPUs are not + * cache-coherent out of reset, having caches enabled until such time + * might lead to coherency issues (resulting from stale data getting + * speculatively fetched, among others). Therefore we keep data caches + * disabled while enabling the MMU, thereby forcing data accesses to + * have non-cacheable, nGnRnE attributes (these will always be coherent + * with main memory). + * + * On systems with hardware-assisted coherency, where CPUs are expected + * to be cache-coherent out of reset without needing explicit software + * intervention, PSCI need not invoke platform routines to enter + * coherency (as CPUs already are); and there's no reason to have caches + * disabled either. */ +#if HW_ASSISTED_COHERENCY + mov x0, #0 +#else mov x0, #DISABLE_DCACHE +#endif bl bl31_plat_enable_mmu bl psci_warmboot_entrypoint diff --git a/bl32/sp_min/aarch32/entrypoint.S b/bl32/sp_min/aarch32/entrypoint.S index e2ab923d1..d934bb84e 100644 --- a/bl32/sp_min/aarch32/entrypoint.S +++ b/bl32/sp_min/aarch32/entrypoint.S @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -231,24 +231,27 @@ func sp_min_warm_entrypoint _init_c_runtime=0 \ _exception_vectors=sp_min_vector_table - /* -------------------------------------------- - * Enable the MMU with the DCache disabled. It - * is safe to use stacks allocated in normal - * memory as a result. All memory accesses are - * marked nGnRnE when the MMU is disabled. So - * all the stack writes will make it to memory. - * All memory accesses are marked Non-cacheable - * when the MMU is enabled but D$ is disabled. - * So used stack memory is guaranteed to be - * visible immediately after the MMU is enabled - * Enabling the DCache at the same time as the - * MMU can lead to speculatively fetched and - * possibly stale stack memory being read from - * other caches. This can lead to coherency - * issues. - * -------------------------------------------- + /* + * We're about to enable MMU and participate in PSCI state coordination. + * + * The PSCI implementation invokes platform routines that enable CPUs to + * participate in coherency. On a system where CPUs are not + * cache-coherent out of reset, having caches enabled until such time + * might lead to coherency issues (resulting from stale data getting + * speculatively fetched, among others). Therefore we keep data caches + * disabled while enabling the MMU, thereby forcing data accesses to + * have non-cacheable, nGnRnE attributes (these will always be coherent + * with main memory). + * + * On systems where CPUs are cache-coherent out of reset, however, PSCI + * need not invoke platform routines to enter coherency (as CPUs already + * are), and there's no reason to have caches disabled either. */ +#if HW_ASSISTED_COHERENCY + mov r0, #0 +#else mov r0, #DISABLE_DCACHE +#endif bl bl32_plat_enable_mmu bl sp_min_warm_boot From d4593e4713617b455929960eb616c9c09e446dc4 Mon Sep 17 00:00:00 2001 From: Jeenu Viswambharan Date: Fri, 6 Jan 2017 16:14:42 +0000 Subject: [PATCH 3/5] Disallow using coherent memory with hardware-assisted coherency ARM Trusted Firmware keeps certain data structures in a memory region with non-cacheable attributes (termed as "coherent memory") to keep data coherent with observers that are cache-coherent, and those not. These data structures pertain to power management and mutual exclusion. Using coherent memory also costs at least an additional page to map memory with special memory attributes. On systems with hardware-assisted coherency, all CPUs that participate in power management and mutual exclusion are cache-coherent, obviating the need for special memory attributes for such data structures. Instead, they can be placed in normal memory, along with rest of data. On systems with hardware-assisted coherency, where build option HW_ASSISTED_COHERENCY will be set, also having USE_COHERENT_MEMORY enabled only wastes a page of memory without any benefit. Therefore, with HW_ASSISTED_COHERENCY set to 1, require that USE_COHERENT_MEMORY is explicitly set to 0. Change-Id: I5101657ae6b1a46278069f23e2d88ee5cbd98efa Signed-off-by: Jeenu Viswambharan --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index e00159a30..5c798d585 100644 --- a/Makefile +++ b/Makefile @@ -305,6 +305,11 @@ ifeq (${ARCH},aarch32) endif endif +# When building for systems with hardware-assisted coherency, there's no need to +# use USE_COHERENT_MEM. Require that USE_COHERENT_MEM must be set to 0 too. +ifeq ($(HW_ASSISTED_COHERENCY)-$(USE_COHERENT_MEM),1-1) +$(error USE_COHERENT_MEM cannot be enabled with HW_ASSISTED_COHERENCY) +endif ################################################################################ # Process platform overrideable behaviour From a10d3632acbd1135648f07c2a998cba8c5c77cfd Mon Sep 17 00:00:00 2001 From: Jeenu Viswambharan Date: Fri, 6 Jan 2017 14:58:11 +0000 Subject: [PATCH 4/5] PSCI: Introduce cache and barrier wrappers The PSCI implementation performs cache maintenance operations on its data structures to ensure their visibility to both cache-coherent and non-cache-coherent participants. These cache maintenance operations can be skipped if all PSCI participants are cache-coherent. When HW_ASSISTED_COHERENCY build option is enabled, we assume PSCI participants are cache-coherent. For usage abstraction, this patch introduces wrappers for PSCI cache maintenance and barrier operations used for state coordination: they are effectively NOPs when HW_ASSISTED_COHERENCY is enabled, but are applied otherwise. Also refactor local state usage and associated cache operations to make it clearer. Change-Id: I77f17a90cba41085b7188c1345fe5731c99fad87 Signed-off-by: Jeenu Viswambharan --- lib/psci/psci_common.c | 79 ++++++++++++++++++++++++++--------------- lib/psci/psci_off.c | 12 +++---- lib/psci/psci_private.h | 23 ++++++++++++ lib/psci/psci_setup.c | 6 ++-- lib/psci/psci_suspend.c | 4 +-- 5 files changed, 85 insertions(+), 39 deletions(-) diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c index 9fdce4986..026690d25 100644 --- a/lib/psci/psci_common.c +++ b/lib/psci/psci_common.c @@ -247,6 +247,50 @@ static plat_local_state_t *psci_get_req_local_pwr_states(unsigned int pwrlvl, return &psci_req_local_pwr_states[pwrlvl - 1][cpu_idx]; } +/* + * psci_non_cpu_pd_nodes can be placed either in normal memory or coherent + * memory. + * + * With !USE_COHERENT_MEM, psci_non_cpu_pd_nodes is placed in normal memory, + * it's accessed by both cached and non-cached participants. To serve the common + * minimum, perform a cache flush before read and after write so that non-cached + * participants operate on latest data in main memory. + * + * When USE_COHERENT_MEM is used, psci_non_cpu_pd_nodes is placed in coherent + * memory. With HW_ASSISTED_COHERENCY, all PSCI participants are cache-coherent. + * In both cases, no cache operations are required. + */ + +/* + * Retrieve local state of non-CPU power domain node from a non-cached CPU, + * after any required cache maintenance operation. + */ +static plat_local_state_t get_non_cpu_pd_node_local_state( + unsigned int parent_idx) +{ +#if !USE_COHERENT_MEM || !HW_ASSISTED_COHERENCY + flush_dcache_range( + (uintptr_t) &psci_non_cpu_pd_nodes[parent_idx], + sizeof(psci_non_cpu_pd_nodes[parent_idx])); +#endif + return psci_non_cpu_pd_nodes[parent_idx].local_state; +} + +/* + * Update local state of non-CPU power domain node from a cached CPU; perform + * any required cache maintenance operation afterwards. + */ +static void set_non_cpu_pd_node_local_state(unsigned int parent_idx, + plat_local_state_t state) +{ + psci_non_cpu_pd_nodes[parent_idx].local_state = state; +#if !USE_COHERENT_MEM || !HW_ASSISTED_COHERENCY + flush_dcache_range( + (uintptr_t) &psci_non_cpu_pd_nodes[parent_idx], + sizeof(psci_non_cpu_pd_nodes[parent_idx])); +#endif +} + /****************************************************************************** * Helper function to return the current local power state of each power domain * from the current cpu power domain to its ancestor at the 'end_pwrlvl'. This @@ -264,18 +308,7 @@ void psci_get_target_local_pwr_states(unsigned int end_pwrlvl, /* Copy the local power state from node to state_info */ for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) { -#if !USE_COHERENT_MEM - /* - * If using normal memory for psci_non_cpu_pd_nodes, we need - * to flush before reading the local power state as another - * cpu in the same power domain could have updated it and this - * code runs before caches are enabled. - */ - flush_dcache_range( - (uintptr_t) &psci_non_cpu_pd_nodes[parent_idx], - sizeof(psci_non_cpu_pd_nodes[parent_idx])); -#endif - pd_state[lvl] = psci_non_cpu_pd_nodes[parent_idx].local_state; + pd_state[lvl] = get_non_cpu_pd_node_local_state(parent_idx); parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node; } @@ -299,21 +332,16 @@ static void psci_set_target_local_pwr_states(unsigned int end_pwrlvl, psci_set_cpu_local_state(pd_state[PSCI_CPU_PWR_LVL]); /* - * Need to flush as local_state will be accessed with Data Cache + * Need to flush as local_state might be accessed with Data Cache * disabled during power on */ - flush_cpu_data(psci_svc_cpu_data.local_state); + psci_flush_cpu_data(psci_svc_cpu_data.local_state); parent_idx = psci_cpu_pd_nodes[plat_my_core_pos()].parent_node; /* Copy the local_state from state_info */ for (lvl = 1; lvl <= end_pwrlvl; lvl++) { - psci_non_cpu_pd_nodes[parent_idx].local_state = pd_state[lvl]; -#if !USE_COHERENT_MEM - flush_dcache_range( - (uintptr_t)&psci_non_cpu_pd_nodes[parent_idx], - sizeof(psci_non_cpu_pd_nodes[parent_idx])); -#endif + set_non_cpu_pd_node_local_state(parent_idx, pd_state[lvl]); parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node; } } @@ -347,13 +375,8 @@ void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl) /* Reset the local_state to RUN for the non cpu power domains. */ for (lvl = PSCI_CPU_PWR_LVL + 1; lvl <= end_pwrlvl; lvl++) { - psci_non_cpu_pd_nodes[parent_idx].local_state = - PSCI_LOCAL_STATE_RUN; -#if !USE_COHERENT_MEM - flush_dcache_range( - (uintptr_t) &psci_non_cpu_pd_nodes[parent_idx], - sizeof(psci_non_cpu_pd_nodes[parent_idx])); -#endif + set_non_cpu_pd_node_local_state(parent_idx, + PSCI_LOCAL_STATE_RUN); psci_set_req_local_pwr_state(lvl, cpu_idx, PSCI_LOCAL_STATE_RUN); @@ -364,7 +387,7 @@ void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl) psci_set_aff_info_state(AFF_STATE_ON); psci_set_cpu_local_state(PSCI_LOCAL_STATE_RUN); - flush_cpu_data(psci_svc_cpu_data); + psci_flush_cpu_data(psci_svc_cpu_data); } /****************************************************************************** diff --git a/lib/psci/psci_off.c b/lib/psci/psci_off.c index 394aaa3b1..94cf2ede1 100644 --- a/lib/psci/psci_off.c +++ b/lib/psci/psci_off.c @@ -154,17 +154,17 @@ exit: */ if (rc == PSCI_E_SUCCESS) { /* - * Set the affinity info state to OFF. This writes directly to - * main memory as caches are disabled, so cache maintenance is + * Set the affinity info state to OFF. When caches are disabled, + * this writes directly to main memory, so cache maintenance is * required to ensure that later cached reads of aff_info_state - * return AFF_STATE_OFF. A dsbish() ensures ordering of the + * return AFF_STATE_OFF. A dsbish() ensures ordering of the * update to the affinity info state prior to cache line * invalidation. */ - flush_cpu_data(psci_svc_cpu_data.aff_info_state); + psci_flush_cpu_data(psci_svc_cpu_data.aff_info_state); psci_set_aff_info_state(AFF_STATE_OFF); - dsbish(); - inv_cpu_data(psci_svc_cpu_data.aff_info_state); + psci_dsbish(); + psci_inv_cpu_data(psci_svc_cpu_data.aff_info_state); #if ENABLE_RUNTIME_INSTRUMENTATION diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h index ca8291e48..7f0204ad2 100644 --- a/lib/psci/psci_private.h +++ b/lib/psci/psci_private.h @@ -38,6 +38,29 @@ #include #include +#if HW_ASSISTED_COHERENCY +/* + * On systems with hardware-assisted coherency, make PSCI cache operations NOP, + * as PSCI participants are cache-coherent, and there's no need for explicit + * cache maintenance operations or barriers to coordinate their state. + */ +#define psci_flush_dcache_range(addr, size) +#define psci_flush_cpu_data(member) +#define psci_inv_cpu_data(member) + +#define psci_dsbish() +#else +/* + * If not all PSCI participants are cache-coherent, perform cache maintenance + * and issue barriers wherever required to coordinate state. + */ +#define psci_flush_dcache_range(addr, size) flush_dcache_range(addr, size) +#define psci_flush_cpu_data(member) flush_cpu_data(member) +#define psci_inv_cpu_data(member) inv_cpu_data(member) + +#define psci_dsbish() dsbish() +#endif + /* * The following helper macros abstract the interface to the Bakery * Lock API. diff --git a/lib/psci/psci_setup.c b/lib/psci/psci_setup.c index 7327b92ed..323dc62cb 100644 --- a/lib/psci/psci_setup.c +++ b/lib/psci/psci_setup.c @@ -86,7 +86,7 @@ static void psci_init_pwr_domain_node(unsigned int node_idx, /* Set the power state to OFF state */ svc_cpu_data->local_state = PLAT_MAX_OFF_STATE; - flush_dcache_range((uintptr_t)svc_cpu_data, + psci_flush_dcache_range((uintptr_t)svc_cpu_data, sizeof(*svc_cpu_data)); cm_set_context_by_index(node_idx, @@ -242,9 +242,9 @@ int psci_setup(const psci_lib_args_t *lib_args) /* * Flush `psci_plat_pm_ops` as it will be accessed by secondary CPUs - * during warm boot before data cache is enabled. + * during warm boot, possibly before data cache is enabled. */ - flush_dcache_range((uintptr_t)&psci_plat_pm_ops, + psci_flush_dcache_range((uintptr_t)&psci_plat_pm_ops, sizeof(psci_plat_pm_ops)); /* Initialize the psci capability */ diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c index 302116bd4..23e5adab3 100644 --- a/lib/psci/psci_suspend.c +++ b/lib/psci/psci_suspend.c @@ -91,10 +91,10 @@ static void psci_suspend_to_pwrdown_start(unsigned int end_pwrlvl, psci_set_suspend_pwrlvl(end_pwrlvl); /* - * Flush the target power level as it will be accessed on power up with + * Flush the target power level as it might be accessed on power up with * Data cache disabled. */ - flush_cpu_data(psci_svc_cpu_data.target_pwrlvl); + psci_flush_cpu_data(psci_svc_cpu_data.target_pwrlvl); /* * Call the cpu suspend handler registered by the Secure Payload From b0408e87f7dfbdfe3e00cd3c1421b2939dd209ca Mon Sep 17 00:00:00 2001 From: Jeenu Viswambharan Date: Thu, 5 Jan 2017 11:01:02 +0000 Subject: [PATCH 5/5] PSCI: Optimize call paths if all participants are cache-coherent The current PSCI implementation can apply certain optimizations upon the assumption that all PSCI participants are cache-coherent. - Skip performing cache maintenance during power-up. - Skip performing cache maintenance during power-down: At present, on the power-down path, CPU driver disables caches and MMU, and performs cache maintenance in preparation for powering down the CPU. This means that PSCI must perform additional cache maintenance on the extant stack for correct functioning. If all participating CPUs are cache-coherent, CPU driver would neither disable MMU nor perform cache maintenance. The CPU being powered down, therefore, remain cache-coherent throughout all PSCI call paths. This in turn means that PSCI cache maintenance operations are not required during power down. - Choose spin locks instead of bakery locks: The current PSCI implementation must synchronize both cache-coherent and non-cache-coherent participants. Mutual exclusion primitives are not guaranteed to function on non-coherent memory. For this reason, the current PSCI implementation had to resort to bakery locks. If all participants are cache-coherent, the implementation can enable MMU and data caches early, and substitute bakery locks for spin locks. Spin locks make use of architectural mutual exclusion primitives, and are lighter and faster. The optimizations are applied when HW_ASSISTED_COHERENCY build option is enabled, as it's expected that all PSCI participants are cache-coherent in those systems. Change-Id: Iac51c3ed318ea7e2120f6b6a46fd2db2eae46ede Signed-off-by: Jeenu Viswambharan --- docs/psci-lib-integration-guide.md | 14 +++++----- lib/psci/psci_common.c | 33 ++++++++++++++++++++++- lib/psci/psci_off.c | 5 ++-- lib/psci/psci_on.c | 4 ++- lib/psci/psci_private.h | 42 +++++++++++++++++++++++++----- lib/psci/psci_suspend.c | 14 ++++------ 6 files changed, 85 insertions(+), 27 deletions(-) diff --git a/docs/psci-lib-integration-guide.md b/docs/psci-lib-integration-guide.md index f290966ba..d81b3286c 100644 --- a/docs/psci-lib-integration-guide.md +++ b/docs/psci-lib-integration-guide.md @@ -176,7 +176,9 @@ interfaces are: * The page tables must be setup and the MMU enabled * The C runtime environment must be setup and stack initialized * The Data cache must be enabled prior to invoking any of the PSCI library - interfaces except for `psci_warmboot_entrypoint()`. + interfaces except for `psci_warmboot_entrypoint()`. For + `psci_warmboot_entrypoint()`, if the build option `HW_ASSISTED_COHERENCY` + is enabled however, data caches are expected to be enabled. Further requirements for each interface can be found in the interface description. @@ -270,11 +272,11 @@ wakes up, it will start execution from the warm reset address. Return : void This function performs the warm boot initialization/restoration as mandated by -[PSCI spec]. For AArch32, on wakeup from power down the CPU resets to secure -SVC mode and the EL3 Runtime Software must perform the prerequisite -initializations mentioned at top of this section. This function must be called -with Data cache disabled but with MMU initialized and enabled. The major -actions performed by this function are: +[PSCI spec]. For AArch32, on wakeup from power down the CPU resets to secure SVC +mode and the EL3 Runtime Software must perform the prerequisite initializations +mentioned at top of this section. This function must be called with Data cache +disabled (unless build option `HW_ASSISTED_COHERENCY` is enabled) but with MMU +initialized and enabled. The major actions performed by this function are: * Invalidates the stack and enables the data cache. * Initializes architecture and PSCI state coordination. diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c index 026690d25..1be37c090 100644 --- a/lib/psci/psci_common.c +++ b/lib/psci/psci_common.c @@ -79,7 +79,8 @@ __section("tzfw_coherent_mem") #endif ; -DEFINE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]); +/* Lock for PSCI state coordination */ +DEFINE_PSCI_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]); cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT]; @@ -992,3 +993,33 @@ int psci_get_suspend_afflvl(void) } #endif + +/******************************************************************************* + * Initiate power down sequence, by calling power down operations registered for + * this CPU. + ******************************************************************************/ +void psci_do_pwrdown_sequence(unsigned int power_level) +{ +#if HW_ASSISTED_COHERENCY + /* + * With hardware-assisted coherency, the CPU drivers only initiate the + * power down sequence, without performing cache-maintenance operations + * in software. Data caches and MMU remain enabled both before and after + * this call. + */ + prepare_cpu_pwr_dwn(power_level); +#else + /* + * Without hardware-assisted coherency, the CPU drivers disable data + * caches and MMU, then perform cache-maintenance operations in + * software. + * + * We ought to call prepare_cpu_pwr_dwn() to initiate power down + * sequence. We currently have data caches and MMU enabled, but the + * function will return with data caches and MMU disabled. We must + * ensure that the stack memory is flushed out to memory before we start + * popping from it again. + */ + psci_do_pwrdown_cache_maintenance(power_level); +#endif +} diff --git a/lib/psci/psci_off.c b/lib/psci/psci_off.c index 94cf2ede1..4ba786565 100644 --- a/lib/psci/psci_off.c +++ b/lib/psci/psci_off.c @@ -119,10 +119,9 @@ int psci_do_cpu_off(unsigned int end_pwrlvl) #endif /* - * Arch. management. Perform the necessary steps to flush all - * cpu caches. + * Arch. management. Initiate power down sequence. */ - psci_do_pwrdown_cache_maintenance(psci_find_max_off_lvl(&state_info)); + psci_do_pwrdown_sequence(psci_find_max_off_lvl(&state_info)); #if ENABLE_RUNTIME_INSTRUMENTATION PMF_CAPTURE_TIMESTAMP(rt_instr_svc, diff --git a/lib/psci/psci_on.c b/lib/psci/psci_on.c index f4bb7978b..675ed6681 100644 --- a/lib/psci/psci_on.c +++ b/lib/psci/psci_on.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved. + * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -165,10 +165,12 @@ void psci_cpu_on_finish(unsigned int cpu_idx, */ psci_plat_pm_ops->pwr_domain_on_finish(state_info); +#if !HW_ASSISTED_COHERENCY /* * Arch. management: Enable data cache and manage stack memory */ psci_do_pwrup_cache_maintenance(); +#endif /* * All the platform specific actions for turning this cpu diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h index 7f0204ad2..a27e215c8 100644 --- a/lib/psci/psci_private.h +++ b/lib/psci/psci_private.h @@ -39,6 +39,7 @@ #include #if HW_ASSISTED_COHERENCY + /* * On systems with hardware-assisted coherency, make PSCI cache operations NOP, * as PSCI participants are cache-coherent, and there's no need for explicit @@ -49,7 +50,21 @@ #define psci_inv_cpu_data(member) #define psci_dsbish() + +/* + * On systems where participant CPUs are cache-coherent, we can use spinlocks + * instead of bakery locks. + */ +#define DEFINE_PSCI_LOCK(_name) spinlock_t _name +#define DECLARE_PSCI_LOCK(_name) extern DEFINE_PSCI_LOCK(_name) + +#define psci_lock_get(non_cpu_pd_node) \ + spin_lock(&psci_locks[(non_cpu_pd_node)->lock_index]) +#define psci_lock_release(non_cpu_pd_node) \ + spin_unlock(&psci_locks[(non_cpu_pd_node)->lock_index]) + #else + /* * If not all PSCI participants are cache-coherent, perform cache maintenance * and issue barriers wherever required to coordinate state. @@ -59,19 +74,24 @@ #define psci_inv_cpu_data(member) inv_cpu_data(member) #define psci_dsbish() dsbish() -#endif /* - * The following helper macros abstract the interface to the Bakery - * Lock API. + * Use bakery locks for state coordination as not all PSCI participants are + * cache coherent. */ -#define psci_lock_init(non_cpu_pd_node, idx) \ - ((non_cpu_pd_node)[(idx)].lock_index = (idx)) +#define DEFINE_PSCI_LOCK(_name) DEFINE_BAKERY_LOCK(_name) +#define DECLARE_PSCI_LOCK(_name) DECLARE_BAKERY_LOCK(_name) + #define psci_lock_get(non_cpu_pd_node) \ bakery_lock_get(&psci_locks[(non_cpu_pd_node)->lock_index]) #define psci_lock_release(non_cpu_pd_node) \ bakery_lock_release(&psci_locks[(non_cpu_pd_node)->lock_index]) +#endif + +#define psci_lock_init(non_cpu_pd_node, idx) \ + ((non_cpu_pd_node)[(idx)].lock_index = (idx)) + /* * The PSCI capability which are provided by the generic code but does not * depend on the platform or spd capabilities. @@ -189,8 +209,8 @@ extern non_cpu_pd_node_t psci_non_cpu_pd_nodes[PSCI_NUM_NON_CPU_PWR_DOMAINS]; extern cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT]; extern unsigned int psci_caps; -/* One bakery lock is required for each non-cpu power domain */ -DECLARE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]); +/* One lock is required per non-CPU power domain node */ +DECLARE_PSCI_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]); /******************************************************************************* * SPD's power management hooks registered with PSCI @@ -227,6 +247,14 @@ void psci_set_pwr_domains_to_run(unsigned int end_pwrlvl); void psci_print_power_domain_map(void); unsigned int psci_is_last_on_cpu(void); int psci_spd_migrate_info(u_register_t *mpidr); +void psci_do_pwrdown_sequence(unsigned int power_level); + +/* + * CPU power down is directly called only when HW_ASSISTED_COHERENCY is + * available. Otherwise, this needs post-call stack maintenance, which is + * handled in assembly. + */ +void prepare_cpu_pwr_dwn(unsigned int power_level); /* Private exported functions from psci_on.c */ int psci_cpu_on_start(u_register_t target_cpu, diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c index 23e5adab3..08c8fd6a6 100644 --- a/lib/psci/psci_suspend.c +++ b/lib/psci/psci_suspend.c @@ -121,13 +121,11 @@ static void psci_suspend_to_pwrdown_start(unsigned int end_pwrlvl, #endif /* - * Arch. management. Perform the necessary steps to flush all - * cpu caches. Currently we assume that the power level correspond - * the cache level. + * Arch. management. Initiate power down sequence. * TODO : Introduce a mechanism to query the cache level to flush * and the cpu-ops power down to perform from the platform. */ - psci_do_pwrdown_cache_maintenance(max_off_lvl); + psci_do_pwrdown_sequence(max_off_lvl); #if ENABLE_RUNTIME_INSTRUMENTATION PMF_CAPTURE_TIMESTAMP(rt_instr_svc, @@ -304,12 +302,10 @@ void psci_cpu_suspend_finish(unsigned int cpu_idx, */ psci_plat_pm_ops->pwr_domain_suspend_finish(state_info); - /* - * Arch. management: Enable the data cache, manage stack memory and - * restore the stashed EL3 architectural context from the 'cpu_context' - * structure for this cpu. - */ +#if !HW_ASSISTED_COHERENCY + /* Arch. management: Enable the data cache, stack memory maintenance. */ psci_do_pwrup_cache_maintenance(); +#endif /* Re-init the cntfrq_el0 register */ counter_freq = plat_get_syscnt_freq2();