From ee7b35c4e1169ec68df016bfb244a5281440119d Mon Sep 17 00:00:00 2001 From: Andrew Thoelke Date: Thu, 10 Sep 2015 11:39:36 +0100 Subject: [PATCH 1/3] Re-design bakery lock memory allocation and algorithm This patch unifies the bakery lock api's across coherent and normal memory implementation of locks by using same data type `bakery_lock_t` and similar arguments to functions. A separate section `bakery_lock` has been created and used to allocate memory for bakery locks using `DEFINE_BAKERY_LOCK`. When locks are allocated in normal memory, each lock for a core has to spread across multiple cache lines. By using the total size allocated in a separate cache line for a single core at compile time, the memory for other core locks is allocated at link time by multiplying the single core locks size with (PLATFORM_CORE_COUNT - 1). The normal memory lock algorithm now uses lock address instead of the `id` in the per_cpu_data. For locks allocated in coherent memory, it moves locks from tzfw_coherent_memory to bakery_lock section. The bakery locks are allocated as part of bss or in coherent memory depending on usage of coherent memory. Both these regions are initialised to zero as part of run_time_init before locks are used. Hence, bakery_lock_init() is made an empty function as the lock memory is already initialised to zero. The above design lead to the removal of psci bakery locks from non_cpu_power_pd_node to psci_locks. NOTE: THE BAKERY LOCK API WHEN USE_COHERENT_MEM IS NOT SET HAS CHANGED. THIS IS A BREAKING CHANGE FOR ALL PLATFORM PORTS THAT ALLOCATE BAKERY LOCKS IN NORMAL MEMORY. Change-Id: Ic3751c0066b8032dcbf9d88f1d4dc73d15f61d8b --- bl31/bl31.ld.S | 29 +++++- docs/firmware-design.md | 131 +++++++++++++----------- docs/porting-guide.md | 27 +++-- include/bl31/services/psci.h | 3 - include/lib/bakery_lock.h | 32 ++++-- lib/locks/bakery/bakery_lock_coherent.c | 10 -- lib/locks/bakery/bakery_lock_normal.c | 43 +++++--- services/std_svc/psci/psci_common.c | 2 + services/std_svc/psci/psci_private.h | 25 ++--- services/std_svc/psci/psci_setup.c | 6 -- 10 files changed, 184 insertions(+), 124 deletions(-) diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S index 3327f3165..0639d8170 100644 --- a/bl31/bl31.ld.S +++ b/bl31/bl31.ld.S @@ -101,10 +101,31 @@ SECTIONS * The .bss section gets initialised to 0 at runtime. * Its base address must be 16-byte aligned. */ - .bss : ALIGN(16) { + .bss (NOLOAD) : ALIGN(16) { __BSS_START__ = .; *(.bss*) *(COMMON) +#if !USE_COHERENT_MEM + /* + * Bakery locks are stored in normal .bss memory + * + * Each lock's data is spread across multiple cache lines, one per CPU, + * but multiple locks can share the same cache line. + * The compiler will allocate enough memory for one CPU's bakery locks, + * the remaining cache lines are allocated by the linker script + */ + . = ALIGN(CACHE_WRITEBACK_GRANULE); + __BAKERY_LOCK_START__ = .; + *(bakery_lock) + . = ALIGN(CACHE_WRITEBACK_GRANULE); + __PERCPU_BAKERY_LOCK_SIZE__ = . - __BAKERY_LOCK_START__; + . = . + (__PERCPU_BAKERY_LOCK_SIZE__ * (PLATFORM_CORE_COUNT - 1)); + __BAKERY_LOCK_END__ = .; +#ifdef PLAT_PERCPU_BAKERY_LOCK_SIZE + ASSERT(__PERCPU_BAKERY_LOCK_SIZE__ == PLAT_PERCPU_BAKERY_LOCK_SIZE, + "PLAT_PERCPU_BAKERY_LOCK_SIZE does not match bakery lock requirements"); +#endif +#endif __BSS_END__ = .; } >RAM @@ -126,6 +147,12 @@ SECTIONS */ coherent_ram (NOLOAD) : ALIGN(4096) { __COHERENT_RAM_START__ = .; + /* + * Bakery locks are stored in coherent memory + * + * Each lock's data is contiguous and fully allocated by the compiler + */ + *(bakery_lock) *(tzfw_coherent_mem) __COHERENT_RAM_END_UNALIGNED__ = .; /* diff --git a/docs/firmware-design.md b/docs/firmware-design.md index 18f634f4b..41fb7c0d8 100644 --- a/docs/firmware-design.md +++ b/docs/firmware-design.md @@ -1523,38 +1523,52 @@ approach described above. The below sections analyze the data structures allocated in the coherent memory region and the changes required to allocate them in normal memory. -### PSCI Affinity map nodes +### Coherent memory usage in PSCI implementation -The `psci_aff_map` data structure stores the hierarchial node information for -each affinity level in the system including the PSCI states associated with them. -By default, this data structure is allocated in the coherent memory region in -the Trusted Firmware because it can be accessed by multiple CPUs, either with -their caches enabled or disabled. +The `psci_non_cpu_pd_nodes` data structure stores the platform's power domain +tree information for state management of power domains. By default, this data +structure is allocated in the coherent memory region in the Trusted Firmware +because it can be accessed by multple CPUs, either with caches enabled or +disabled. - typedef struct aff_map_node { - unsigned long mpidr; - unsigned char ref_count; - unsigned char state; - unsigned char level; - #if USE_COHERENT_MEM - bakery_lock_t lock; - #else - unsigned char aff_map_index; - #endif - } aff_map_node_t; +typedef struct non_cpu_pwr_domain_node { + /* + * Index of the first CPU power domain node level 0 which has this node + * as its parent. + */ + unsigned int cpu_start_idx; + + /* + * Number of CPU power domains which are siblings of the domain indexed + * by 'cpu_start_idx' i.e. all the domains in the range 'cpu_start_idx + * -> cpu_start_idx + ncpus' have this node as their parent. + */ + unsigned int ncpus; + + /* + * Index of the parent power domain node. + * TODO: Figure out whether to whether using pointer is more efficient. + */ + unsigned int parent_node; + + plat_local_state_t local_state; + + unsigned char level; + + /* For indexing the psci_lock array*/ + unsigned char lock_index; +} non_cpu_pd_node_t; In order to move this data structure to normal memory, the use of each of its -fields must be analyzed. Fields like `mpidr` and `level` are only written once -during cold boot. Hence removing them from coherent memory involves only doing -a clean and invalidate of the cache lines after these fields are written. +fields must be analyzed. Fields like `cpu_start_idx`, `ncpus`, `parent_node` +`level` and `lock_index` are only written once during cold boot. Hence removing +them from coherent memory involves only doing a clean and invalidate of the +cache lines after these fields are written. -The fields `state` and `ref_count` can be concurrently accessed by multiple -CPUs in different cache states. A Lamport's Bakery lock is used to ensure mutual -exlusion to these fields. As a result, it is possible to move these fields out -of coherent memory by performing software cache maintenance on them. The field -`lock` is the bakery lock data structure when `USE_COHERENT_MEM` is enabled. -The `aff_map_index` is used to identify the bakery lock when `USE_COHERENT_MEM` -is disabled. +The field `local_state` can be concurrently accessed by multiple CPUs in +different cache states. A Lamport's Bakery lock `psci_locks` is used to ensure +mutual exlusion to this field and a clean and invalidate is needed after it +is written. ### Bakery lock data @@ -1563,9 +1577,13 @@ and is accessed by multiple CPUs with mismatched attributes. `bakery_lock_t` is defined as follows: typedef struct bakery_lock { - int owner; - volatile char entering[BAKERY_LOCK_MAX_CPUS]; - volatile unsigned number[BAKERY_LOCK_MAX_CPUS]; + /* + * The lock_data is a bit-field of 2 members: + * Bit[0] : choosing. This field is set when the CPU is + * choosing its bakery number. + * Bits[1 - 15] : number. This is the bakery number allocated. + */ + volatile uint16_t lock_data[BAKERY_LOCK_MAX_CPUS]; } bakery_lock_t; It is a characteristic of Lamport's Bakery algorithm that the volatile per-CPU @@ -1589,17 +1607,14 @@ the update made by CPU0 as well. To use bakery locks when `USE_COHERENT_MEM` is disabled, the lock data structure has been redesigned. The changes utilise the characteristic of Lamport's Bakery -algorithm mentioned earlier. The per-CPU fields of the new lock structure are -aligned such that they are allocated on separate cache lines. The per-CPU data -framework in Trusted Firmware is used to achieve this. This enables software to +algorithm mentioned earlier. The bakery_lock structure only allocates the memory +for a single CPU. The macro `DEFINE_BAKERY_LOCK` allocates all the bakery locks +needed for a CPU into a section `bakery_lock`. The linker allocates the memory +for other cores by using the total size allocated for the bakery_lock section +and multiplying it with (PLATFORM_CORE_COUNT - 1). This enables software to perform software cache maintenance on the lock data structure without running into coherency issues associated with mismatched attributes. -The per-CPU data framework enables consolidation of data structures on the -fewest cache lines possible. This saves memory as compared to the scenario where -each data structure is separately aligned to the cache line boundary to achieve -the same effect. - The bakery lock data structure `bakery_info_t` is defined for use when `USE_COHERENT_MEM` is disabled as follows: @@ -1615,12 +1630,10 @@ The bakery lock data structure `bakery_info_t` is defined for use when The `bakery_info_t` represents a single per-CPU field of one lock and the combination of corresponding `bakery_info_t` structures for all CPUs in the -system represents the complete bakery lock. It is embedded in the per-CPU -data framework `cpu_data` as shown below: +system represents the complete bakery lock. The view in memory for a system +with n bakery locks are: - CPU0 cpu_data - ------------------ - | .... | + bakery_lock section start |----------------| | `bakery_info_t`| <-- Lock_0 per-CPU field | Lock_0 | for CPU0 @@ -1633,12 +1646,11 @@ data framework `cpu_data` as shown below: | `bakery_info_t`| <-- Lock_N per-CPU field | Lock_N | for CPU0 ------------------ - - - CPU1 cpu_data + | XXXXX | + | Padding to | + | next Cache WB | <--- Calculate PERCPU_BAKERY_LOCK_SIZE, allocate + | Granule | continuous memory for remaining CPUs. ------------------ - | .... | - |----------------| | `bakery_info_t`| <-- Lock_0 per-CPU field | Lock_0 | for CPU1 |----------------| @@ -1650,14 +1662,20 @@ data framework `cpu_data` as shown below: | `bakery_info_t`| <-- Lock_N per-CPU field | Lock_N | for CPU1 ------------------ + | XXXXX | + | Padding to | + | next Cache WB | + | Granule | + ------------------ -Consider a system of 2 CPUs with 'N' bakery locks as shown above. For an +Consider a system of 2 CPUs with 'N' bakery locks as shown above. For an operation on Lock_N, the corresponding `bakery_info_t` in both CPU0 and CPU1 -`cpu_data` need to be fetched and appropriate cache operations need to be -performed for each access. +`bakery_lock` section need to be fetched and appropriate cache operations need +to be performed for each access. + +On ARM Platforms, bakery locks are used in psci (`psci_locks`) and power controller +driver (`arm_lock`). -For multiple bakery locks, an array of `bakery_info_t` is declared in `cpu_data` -and each lock is given an `id` to identify it in the array. ### Non Functional Impact of removing coherent memory @@ -1680,10 +1698,9 @@ Juno ARM development platform. As mentioned earlier, almost a page of memory can be saved by disabling `USE_COHERENT_MEM`. Each platform needs to consider these trade-offs to decide whether coherent memory should be used. If a platform disables -`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it should -reserve memory in `cpu_data` by defining the macro `PLAT_PCPU_DATA_SIZE` (see -the [Porting Guide]). Refer to the reference platform code for examples. - +`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it can +optionally define macro `PLAT_PERCPU_BAKERY_LOCK_SIZE` (see the [Porting +Guide]). Refer to the reference platform code for examples. 12. Code Structure ------------------- diff --git a/docs/porting-guide.md b/docs/porting-guide.md index 6846ddfe2..04793a2df 100644 --- a/docs/porting-guide.md +++ b/docs/porting-guide.md @@ -76,21 +76,24 @@ mapped page tables, and enable both the instruction and data caches for each BL stage. In ARM standard platforms, each BL stage configures the MMU in the platform-specific architecture setup function, `blX_plat_arch_setup()`. -If the build option `USE_COHERENT_MEM` is enabled, each platform must allocate a +If the build option `USE_COHERENT_MEM` is enabled, each platform can allocate a block of identity mapped secure memory with Device-nGnRE attributes aligned to -page boundary (4K) for each BL stage. This memory is identified by the section -name `tzfw_coherent_mem` so that its possible for the firmware to place -variables in it using the following C code directive: +page boundary (4K) for each BL stage. All sections which allocate coherent +memory are grouped under `coherent_ram`. For ex: Bakery locks are placed in a +section identified by name `bakery_lock` inside `coherent_ram` so that its +possible for the firmware to place variables in it using the following C code +directive: - __attribute__ ((section("tzfw_coherent_mem"))) + __attribute__ ((section("bakery_lock"))) Or alternatively the following assembler code directive: - .section tzfw_coherent_mem + .section bakery_lock -The `tzfw_coherent_mem` section is used to allocate any data structures that are -accessed both when a CPU is executing with its MMU and caches enabled, and when -it's running with its MMU and caches disabled. Examples are given below. +The `coherent_ram` section is a sum of all sections like `bakery_lock` which are +used to allocate any data structures that are accessed both when a CPU is +executing with its MMU and caches enabled, and when it's running with its MMU +and caches disabled. Examples are given below. The following variables, functions and constants must be defined by the platform for the firmware to work correctly. @@ -1150,6 +1153,12 @@ of the system counter, which is retrieved from the first entry in the frequency modes table. +* **#define : PLAT_PERCPU_BAKERY_LOCK_SIZE** [optional] + + It is used if the bakery locks are using normal memory. It defines the memory + (in bytes) to be allocated for the bakery locks and needs to be a multiple of + cache line size. + 3.3 Power State Coordination Interface (in BL3-1) ------------------------------------------------ diff --git a/include/bl31/services/psci.h b/include/bl31/services/psci.h index 004dd6146..871e2907f 100644 --- a/include/bl31/services/psci.h +++ b/include/bl31/services/psci.h @@ -251,9 +251,6 @@ typedef struct psci_cpu_data { /* The local power state of this CPU */ plat_local_state_t local_state; -#if !USE_COHERENT_MEM - bakery_info_t pcpu_bakery_info[PSCI_NUM_NON_CPU_PWR_DOMAINS]; -#endif } psci_cpu_data_t; /******************************************************************************* diff --git a/include/lib/bakery_lock.h b/include/lib/bakery_lock.h index 2e1afa21f..86adb9cb1 100644 --- a/include/lib/bakery_lock.h +++ b/include/lib/bakery_lock.h @@ -56,6 +56,11 @@ * External bakery lock interface. ****************************************************************************/ #if USE_COHERENT_MEM +/* + * Bakery locks are stored in coherent memory + * + * Each lock's data is contiguous and fully allocated by the compiler + */ typedef struct bakery_lock { /* @@ -67,12 +72,15 @@ typedef struct bakery_lock { volatile uint16_t lock_data[BAKERY_LOCK_MAX_CPUS]; } bakery_lock_t; -void bakery_lock_init(bakery_lock_t *bakery); -void bakery_lock_get(bakery_lock_t *bakery); -void bakery_lock_release(bakery_lock_t *bakery); -int bakery_lock_try(bakery_lock_t *bakery); - #else +/* + * Bakery locks are stored in normal .bss memory + * + * Each lock's data is spread across multiple cache lines, one per CPU, + * but multiple locks can share the same cache line. + * The compiler will allocate enough memory for one CPU's bakery locks, + * the remaining cache lines are allocated by the linker script + */ typedef struct bakery_info { /* @@ -84,9 +92,19 @@ typedef struct bakery_info { volatile uint16_t lock_data; } bakery_info_t; -void bakery_lock_get(unsigned int id, unsigned int offset); -void bakery_lock_release(unsigned int id, unsigned int offset); +typedef bakery_info_t bakery_lock_t; #endif /* __USE_COHERENT_MEM__ */ + +inline void bakery_lock_init(bakery_lock_t *bakery) {} +void bakery_lock_get(bakery_lock_t *bakery); +void bakery_lock_release(bakery_lock_t *bakery); + +#define DEFINE_BAKERY_LOCK(_name) bakery_lock_t _name \ + __attribute__ ((section("bakery_lock"))) + +#define DECLARE_BAKERY_LOCK(_name) extern bakery_lock_t _name + + #endif /* __ASSEMBLY__ */ #endif /* __BAKERY_LOCK_H__ */ diff --git a/lib/locks/bakery/bakery_lock_coherent.c b/lib/locks/bakery/bakery_lock_coherent.c index 1c60dba78..f2212228b 100644 --- a/lib/locks/bakery/bakery_lock_coherent.c +++ b/lib/locks/bakery/bakery_lock_coherent.c @@ -63,16 +63,6 @@ assert(entry < BAKERY_LOCK_MAX_CPUS); \ } while (0) -/* Initialize Bakery Lock to reset all ticket values */ -void bakery_lock_init(bakery_lock_t *bakery) -{ - assert(bakery); - - /* All ticket values need to be 0 */ - memset(bakery, 0, sizeof(*bakery)); -} - - /* Obtain a ticket for a given CPU */ static unsigned int bakery_get_ticket(bakery_lock_t *bakery, unsigned int me) { diff --git a/lib/locks/bakery/bakery_lock_normal.c b/lib/locks/bakery/bakery_lock_normal.c index 3ca76e0d9..45b870b9d 100644 --- a/lib/locks/bakery/bakery_lock_normal.c +++ b/lib/locks/bakery/bakery_lock_normal.c @@ -56,12 +56,29 @@ * accesses regardless of status of address translation. */ -/* This macro assumes that the bakery_info array is located at the offset specified */ -#define get_my_bakery_info(offset, id) \ - (((bakery_info_t *) (((uint8_t *)_cpu_data()) + offset)) + id) +#ifdef PLAT_PERCPU_BAKERY_LOCK_SIZE +/* + * Verify that the platform defined value for the per-cpu space for bakery locks is + * a multiple of the cache line size, to prevent multiple CPUs writing to the same + * bakery lock cache line + * + * Using this value, if provided, rather than the linker generated value results in + * more efficient code + */ +CASSERT((PLAT_PERCPU_BAKERY_LOCK_SIZE & (CACHE_WRITEBACK_GRANULE - 1)) == 0, \ + PLAT_PERCPU_BAKERY_LOCK_SIZE_not_cacheline_multiple); +#define PERCPU_BAKERY_LOCK_SIZE (PLAT_PERCPU_BAKERY_LOCK_SIZE) +#else +/* + * Use the linker defined symbol which has evaluated the size reqiurement. + * This is not as efficient as using a platform defined constant + */ +extern void *__PERCPU_BAKERY_LOCK_SIZE__; +#define PERCPU_BAKERY_LOCK_SIZE ((uintptr_t)&__PERCPU_BAKERY_LOCK_SIZE__) +#endif -#define get_bakery_info_by_index(offset, id, ix) \ - (((bakery_info_t *) (((uint8_t *)_cpu_data_by_index(ix)) + offset)) + id) +#define get_bakery_info(cpu_ix, lock) \ + (bakery_info_t *)((uintptr_t)lock + cpu_ix * PERCPU_BAKERY_LOCK_SIZE) #define write_cache_op(addr, cached) \ do { \ @@ -73,7 +90,7 @@ #define read_cache_op(addr, cached) if (cached) \ dccivac((uint64_t)addr) -static unsigned int bakery_get_ticket(int id, unsigned int offset, +static unsigned int bakery_get_ticket(bakery_lock_t *lock, unsigned int me, int is_cached) { unsigned int my_ticket, their_ticket; @@ -84,7 +101,7 @@ static unsigned int bakery_get_ticket(int id, unsigned int offset, * Obtain a reference to the bakery information for this cpu and ensure * it is not NULL. */ - my_bakery_info = get_my_bakery_info(offset, id); + my_bakery_info = get_bakery_info(me, lock); assert(my_bakery_info); /* @@ -115,7 +132,7 @@ static unsigned int bakery_get_ticket(int id, unsigned int offset, * Get a reference to the other contender's bakery info and * ensure that a stale copy is not read. */ - their_bakery_info = get_bakery_info_by_index(offset, id, they); + their_bakery_info = get_bakery_info(they, lock); assert(their_bakery_info); read_cache_op(their_bakery_info, is_cached); @@ -141,7 +158,7 @@ static unsigned int bakery_get_ticket(int id, unsigned int offset, return my_ticket; } -void bakery_lock_get(unsigned int id, unsigned int offset) +void bakery_lock_get(bakery_lock_t *lock) { unsigned int they, me, is_cached; unsigned int my_ticket, my_prio, their_ticket; @@ -153,7 +170,7 @@ void bakery_lock_get(unsigned int id, unsigned int offset) is_cached = read_sctlr_el3() & SCTLR_C_BIT; /* Get a ticket */ - my_ticket = bakery_get_ticket(id, offset, me, is_cached); + my_ticket = bakery_get_ticket(lock, me, is_cached); /* * Now that we got our ticket, compute our priority value, then compare @@ -168,7 +185,7 @@ void bakery_lock_get(unsigned int id, unsigned int offset) * Get a reference to the other contender's bakery info and * ensure that a stale copy is not read. */ - their_bakery_info = get_bakery_info_by_index(offset, id, they); + their_bakery_info = get_bakery_info(they, lock); assert(their_bakery_info); /* Wait for the contender to get their ticket */ @@ -199,12 +216,12 @@ void bakery_lock_get(unsigned int id, unsigned int offset) /* Lock acquired */ } -void bakery_lock_release(unsigned int id, unsigned int offset) +void bakery_lock_release(bakery_lock_t *lock) { bakery_info_t *my_bakery_info; unsigned int is_cached = read_sctlr_el3() & SCTLR_C_BIT; - my_bakery_info = get_my_bakery_info(offset, id); + my_bakery_info = get_bakery_info(plat_my_core_pos(), lock); assert(bakery_ticket_number(my_bakery_info->lock_data)); my_bakery_info->lock_data = 0; diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c index e12df04b4..733269527 100644 --- a/services/std_svc/psci/psci_common.c +++ b/services/std_svc/psci/psci_common.c @@ -78,6 +78,8 @@ __attribute__ ((section("tzfw_coherent_mem"))) #endif ; +DEFINE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]); + cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT]; /******************************************************************************* diff --git a/services/std_svc/psci/psci_private.h b/services/std_svc/psci/psci_private.h index 9b55d9f39..8c028a735 100644 --- a/services/std_svc/psci/psci_private.h +++ b/services/std_svc/psci/psci_private.h @@ -42,23 +42,12 @@ * The following helper macros abstract the interface to the Bakery * Lock API. */ -#if USE_COHERENT_MEM -#define psci_lock_init(non_cpu_pd_node, idx) \ - bakery_lock_init(&(non_cpu_pd_node)[(idx)].lock) -#define psci_lock_get(non_cpu_pd_node) \ - bakery_lock_get(&((non_cpu_pd_node)->lock)) -#define psci_lock_release(non_cpu_pd_node) \ - bakery_lock_release(&((non_cpu_pd_node)->lock)) -#else #define psci_lock_init(non_cpu_pd_node, idx) \ ((non_cpu_pd_node)[(idx)].lock_index = (idx)) #define psci_lock_get(non_cpu_pd_node) \ - bakery_lock_get((non_cpu_pd_node)->lock_index, \ - CPU_DATA_PSCI_LOCK_OFFSET) + bakery_lock_get(&psci_locks[(non_cpu_pd_node)->lock_index]) #define psci_lock_release(non_cpu_pd_node) \ - bakery_lock_release((non_cpu_pd_node)->lock_index, \ - CPU_DATA_PSCI_LOCK_OFFSET) -#endif + bakery_lock_release(&psci_locks[(non_cpu_pd_node)->lock_index]) /* * The PSCI capability which are provided by the generic code but does not @@ -140,12 +129,9 @@ typedef struct non_cpu_pwr_domain_node { plat_local_state_t local_state; unsigned char level; -#if USE_COHERENT_MEM - bakery_lock_t lock; -#else - /* For indexing the bakery_info array in per CPU data */ + + /* For indexing the psci_lock array*/ unsigned char lock_index; -#endif } non_cpu_pd_node_t; typedef struct cpu_pwr_domain_node { @@ -174,6 +160,9 @@ extern non_cpu_pd_node_t psci_non_cpu_pd_nodes[PSCI_NUM_NON_CPU_PWR_DOMAINS]; extern cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT]; extern unsigned int psci_caps; +/* One bakery lock is required for each non-cpu power domain */ +DECLARE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]); + /******************************************************************************* * SPD's power management hooks registered with PSCI ******************************************************************************/ diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c index 94fe630c9..7a8018739 100644 --- a/services/std_svc/psci/psci_setup.c +++ b/services/std_svc/psci/psci_setup.c @@ -181,12 +181,6 @@ static void populate_power_domain_tree(const unsigned char *topology) /* Validate the sanity of array exported by the platform */ assert(j == PLATFORM_CORE_COUNT); - -#if !USE_COHERENT_MEM - /* Flush the non CPU power domain data to memory */ - flush_dcache_range((uintptr_t) &psci_non_cpu_pd_nodes, - sizeof(psci_non_cpu_pd_nodes)); -#endif } /******************************************************************************* From e25e6f41f75531f3500d5484b16365bf980e913f Mon Sep 17 00:00:00 2001 From: Vikram Kanigiri Date: Wed, 9 Sep 2015 10:52:13 +0100 Subject: [PATCH 2/3] Update ARM platform ports to use new bakery lock apis. This patch updates ARM platform ports to use the new unified bakery locks API. The caller does not have to use a different bakery lock API depending upon the value of the USE_COHERENT_MEM build option. NOTE: THIS PATCH CAN BE USED AS A REFERENCE TO UPDATE OTHER PLATFORM PORTS. Change-Id: I1b26afc7c9a9808a6040eb22f603d30192251da7 --- include/plat/arm/common/arm_def.h | 14 ++++---- include/plat/arm/common/plat_arm.h | 56 ++---------------------------- 2 files changed, 8 insertions(+), 62 deletions(-) diff --git a/include/plat/arm/common/arm_def.h b/include/plat/arm/common/arm_def.h index 377bfaa22..98705ec63 100644 --- a/include/plat/arm/common/arm_def.h +++ b/include/plat/arm/common/arm_def.h @@ -210,14 +210,6 @@ */ #define CACHE_WRITEBACK_GRANULE (1 << ARM_CACHE_WRITEBACK_SHIFT) -#if !USE_COHERENT_MEM -/* - * Size of the per-cpu data in bytes that should be reserved in the generic - * per-cpu data structure for the ARM platform port. - */ -#define PLAT_PCPU_DATA_SIZE 2 -#endif - /******************************************************************************* * BL1 specific defines. @@ -305,4 +297,10 @@ #define TSP_IRQ_SEC_PHY_TIMER ARM_IRQ_SEC_PHY_TIMER +/* + * One cache line needed for bakery locks on ARM platforms + */ +#define PLAT_PERCPU_BAKERY_LOCK_SIZE (1 * CACHE_WRITEBACK_GRANULE) + + #endif /* __ARM_DEF_H__ */ diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h index 823212cb3..ad41f4f0a 100644 --- a/include/plat/arm/common/plat_arm.h +++ b/include/plat/arm/common/plat_arm.h @@ -71,14 +71,11 @@ void arm_configure_mmu_el3(unsigned long total_base, ); #if IMAGE_BL31 -#if USE_COHERENT_MEM - /* * Use this macro to instantiate lock before it is used in below * arm_lock_xxx() macros */ -#define ARM_INSTANTIATE_LOCK bakery_lock_t arm_lock \ - __attribute__ ((section("tzfw_coherent_mem"))); +#define ARM_INSTANTIATE_LOCK DEFINE_BAKERY_LOCK(arm_lock); /* * These are wrapper macros to the Coherent Memory Bakery Lock API. @@ -89,58 +86,9 @@ void arm_configure_mmu_el3(unsigned long total_base, #else -/******************************************************************************* - * Constants to specify how many bakery locks this platform implements. These - * are used if the platform chooses not to use coherent memory for bakery lock - * data structures. - ******************************************************************************/ -#define ARM_MAX_BAKERIES 1 -#define ARM_PWRC_BAKERY_ID 0 - -/* Empty definition */ -#define ARM_INSTANTIATE_LOCK - -/******************************************************************************* - * Definition of structure which holds platform specific per-cpu data. Currently - * it holds only the bakery lock information for each cpu. - ******************************************************************************/ -typedef struct arm_cpu_data { - bakery_info_t pcpu_bakery_info[ARM_MAX_BAKERIES]; -} arm_cpu_data_t; - -/* Macro to define the offset of bakery_info_t in arm_cpu_data_t */ -#define ARM_CPU_DATA_LOCK_OFFSET __builtin_offsetof\ - (arm_cpu_data_t, pcpu_bakery_info) - - -/******************************************************************************* - * Helper macros for bakery lock api when using the above arm_cpu_data_t for - * bakery lock data structures. It assumes that the bakery_info is at the - * beginning of the platform specific per-cpu data. - ******************************************************************************/ -#define arm_lock_init() /* No init required */ -#define arm_lock_get() bakery_lock_get(ARM_PWRC_BAKERY_ID, \ - CPU_DATA_PLAT_PCPU_OFFSET + \ - ARM_CPU_DATA_LOCK_OFFSET) -#define arm_lock_release() bakery_lock_release(ARM_PWRC_BAKERY_ID, \ - CPU_DATA_PLAT_PCPU_OFFSET + \ - ARM_CPU_DATA_LOCK_OFFSET) - /* - * Ensure that the size of the platform specific per-cpu data structure and - * the size of the memory allocated in generic per-cpu data for the platform - * are the same. + * Empty macros for all other BL stages other than BL3-1 */ -CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(arm_cpu_data_t), - arm_pcpu_data_size_mismatch); - -#endif /* USE_COHERENT_MEM */ - -#else - -/* -* Dummy macros for all other BL stages other than BL3-1 -*/ #define ARM_INSTANTIATE_LOCK #define arm_lock_init() #define arm_lock_get() From c3ec0b9ea4274120c6e82d86ccc427f13f65fa59 Mon Sep 17 00:00:00 2001 From: Vikram Kanigiri Date: Wed, 9 Sep 2015 10:53:05 +0100 Subject: [PATCH 3/3] Use unified bakery locks API in Mediatek port This patch update Mediatek port to use the `DEFINE_BAKERY_LOCK` macro instead of specifying the exact data structure to use for a bakery lock and the input linker section that it should be allocated to. Change-Id: I2116dbe27010bb46d7cc64fafef55c7240c4c721 --- plat/mediatek/mt8173/drivers/spm/spm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plat/mediatek/mt8173/drivers/spm/spm.c b/plat/mediatek/mt8173/drivers/spm/spm.c index f67daea03..7c6d72bec 100644 --- a/plat/mediatek/mt8173/drivers/spm/spm.c +++ b/plat/mediatek/mt8173/drivers/spm/spm.c @@ -53,7 +53,8 @@ static int spm_dormant_sta = CPU_DORMANT_RESET; #endif -static bakery_lock_t spm_lock __attribute__ ((section("tzfw_coherent_mem"))); +DEFINE_BAKERY_LOCK(spm_lock); + static int spm_hotplug_ready __attribute__ ((section("tzfw_coherent_mem"))); static int spm_mcdi_ready __attribute__ ((section("tzfw_coherent_mem"))); static int spm_suspend_ready __attribute__ ((section("tzfw_coherent_mem")));