Merge changes I710d1780,Ia9a59bde into integration

* changes: feat(gpt): configure memory size protected by bitlock feat(gpt): add support for large GPT mappings
2025-04-15 00:54:22 +00:00 · 2024-05-28 12:26:37 +02:00 · 2024-05-28 12:26:37 +02:00 · 261edb6a0f
commit 261edb6a0f
parent 3e8f9fd872 d766084fc4
9 changed files with 1024 additions and 241 deletions
--- a/2
+++ b/2
@ -1342,6 +1342,8 @@ $(eval $(call add_defines,\
 	PSCI_EXTENDED_STATE_ID \
 	PSCI_OS_INIT_MODE \
 	RESET_TO_BL31 \
+	RME_GPT_BITLOCK_BLOCK \
+	RME_GPT_MAX_BLOCK \
 	SEPARATE_CODE_AND_RODATA \
 	SEPARATE_BL2_NOLOAD_REGION \
 	SEPARATE_NOBITS_REGION \
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@ -805,6 +805,21 @@ Common build options
   instead of the BL1 entrypoint. It can take the value 0 (CPU reset to BL1
   entrypoint) or 1 (CPU reset to SP_MIN entrypoint). The default value is 0.

+-  ``RME_GPT_BITLOCK_BLOCK``: This defines the block size (in number of 512MB
+-  blocks) covered by a single bit of the bitlock structure during RME GPT
+-  operations. The lower the block size, the better opportunity for
+-  parallelising GPT operations but at the cost of more bits being needed
+-  for the bitlock structure. This numeric parameter can take the values
+-  from 0 to 512 and must be a power of 2. The value of 0 is special and
+-  and it chooses a single spinlock for all GPT L1 table entries. Default
+-  value is 1 which corresponds to block size of 512MB per bit of bitlock
+-  structure.
+
+-  ``RME_GPT_MAX_BLOCK``: Numeric value in MB to define the maximum size of
+   supported contiguous blocks in GPT Library. This parameter can take the
+   values 0, 2, 32 and 512. Setting this value to 0 disables use of Contigious
+   descriptors. Default value is 2.
+
 -  ``ROT_KEY``: This option is used when ``GENERATE_COT=1``. It specifies a
   file that contains the ROT private key in PEM format or a PKCS11 URI and
   enforces public key hash generation. If ``SAVE_KEYS=1``, only a file is
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@ -807,15 +807,6 @@ static inline void tlbirpalos_512m(uintptr_t addr)
 	TLBIRPALOS(addr, TLBI_SZ_512M);
 }

-/*
- * Invalidate TLBs of GPT entries by Physical address, last level.
- *
- * @pa: the starting address for the range
- *      of invalidation
- * @size: size of the range of invalidation
- */
-void gpt_tlbi_by_pa_ll(uint64_t pa, size_t size);
-
 /* Previously defined accessor functions with incomplete register names  */

 #define read_current_el()	read_CurrentEl()
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */
@ -15,7 +15,6 @@
 	.globl	zero_normalmem
 	.globl	zeromem
 	.globl	memcpy16
-	.globl	gpt_tlbi_by_pa_ll

 	.globl	disable_mmu_el1
 	.globl	disable_mmu_el3
@ -594,20 +593,3 @@ func fixup_gdt_reloc
 	b.lo	1b
 	ret
 endfunc fixup_gdt_reloc
-
-/*
- * TODO: Currently only supports size of 4KB,
- * support other sizes as well.
- */
-func gpt_tlbi_by_pa_ll
-#if ENABLE_ASSERTIONS
-	cmp	x1, #PAGE_SIZE_4KB
-	ASM_ASSERT(eq)
-	tst	x0, #(PAGE_SIZE_MASK)
-	ASM_ASSERT(eq)
-#endif
-	lsr	x0, x0, #FOUR_KB_SHIFT	/* 4KB size encoding is zero */
-	sys	#6, c8, c4, #7, x0 	/* TLBI RPALOS, <Xt> */
-	dsb	sy
-	ret
-endfunc gpt_tlbi_by_pa_ll
--- a/lib/gpt_rme/gpt_rme.c
+++ b/lib/gpt_rme/gpt_rme.c
--- a/lib/gpt_rme/gpt_rme.mk
+++ b/lib/gpt_rme/gpt_rme.mk
@ -1,8 +1,22 @@
 #
-# Copyright (c) 2021, Arm Limited. All rights reserved.
+# Copyright (c) 2021-2024, Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #

+# Process RME_GPT_BITLOCK_BLOCK value
+ifeq ($(filter 0 1 2 4 8 16 32 64 128 256 512, ${RME_GPT_BITLOCK_BLOCK}),)
+    $(error "Invalid value for RME_GPT_BITLOCK_BLOCK: ${RME_GPT_BITLOCK_BLOCK}")
+endif
+
+ifeq (${RME_GPT_BITLOCK_BLOCK},0)
+    $(warning "GPT library uses global spinlock")
+endif
+
+# Process RME_GPT_MAX_BLOCK value
+ifeq ($(filter 0 2 32 512, ${RME_GPT_MAX_BLOCK}),)
+    $(error "Invalid value for RME_GPT_MAX_BLOCK: ${RME_GPT_MAX_BLOCK}")
+endif
+
 GPT_LIB_SRCS	:=	$(addprefix lib/gpt_rme/,        \
 			gpt_rme.c)
--- a/lib/gpt_rme/gpt_rme_private.h
+++ b/lib/gpt_rme/gpt_rme_private.h
@ -9,6 +9,7 @@

 #include <arch.h>
 #include <lib/gpt_rme/gpt_rme.h>
+#include <lib/spinlock.h>
 #include <lib/utils_def.h>

 /******************************************************************************/
@ -19,7 +20,7 @@
 #define GPT_L0_TYPE_MASK		UL(0xF)
 #define GPT_L0_TYPE_SHIFT		U(0)

-/* For now, we don't support contiguous descriptors, only table and block */
+/* GPT level 0 table and block descriptors */
 #define GPT_L0_TYPE_TBL_DESC		UL(3)
 #define GPT_L0_TYPE_BLK_DESC		UL(1)

@ -29,29 +30,63 @@
 #define GPT_L0_BLK_DESC_GPI_MASK	UL(0xF)
 #define GPT_L0_BLK_DESC_GPI_SHIFT	U(4)

-/* GPT level 1 descriptor bit definitions */
+/* GPT level 1 Contiguous descriptor */
+#define GPT_L1_TYPE_CONT_DESC_MASK	UL(0xF)
+#define GPT_L1_TYPE_CONT_DESC		UL(1)
+
+/* GPT level 1 Contiguous descriptor definitions */
+#define GPT_L1_CONTIG_2MB		UL(1)
+#define GPT_L1_CONTIG_32MB		UL(2)
+#define GPT_L1_CONTIG_512MB		UL(3)
+
+#define GPT_L1_CONT_DESC_GPI_SHIFT	U(4)
+#define GPT_L1_CONT_DESC_GPI_MASK	UL(0xF)
+#define GPT_L1_CONT_DESC_CONTIG_SHIFT	U(8)
+#define GPT_L1_CONT_DESC_CONTIG_MASK	UL(3)
+
+/* GPT level 1 Granules descriptor bit definitions */
 #define GPT_L1_GRAN_DESC_GPI_MASK	UL(0xF)

+/* L1 Contiguous descriptors templates */
+#define GPT_L1_CONT_DESC_2MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_2MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+#define GPT_L1_CONT_DESC_32MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_32MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+#define GPT_L1_CONT_DESC_512MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_512MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+
+/* Create L1 Contiguous descriptor from GPI and template */
+#define GPT_L1_GPI_CONT_DESC(_gpi, _desc)	\
+			((_desc) | ((uint64_t)(_gpi) << GPT_L1_CONT_DESC_GPI_SHIFT))
+
+/* Create L1 Contiguous descriptor from Granules descriptor and size */
+#define GPT_L1_CONT_DESC(_desc, _size) \
+				(GPT_L1_CONT_DESC_##_size	| \
+				(((_desc) & GPT_L1_GRAN_DESC_GPI_MASK) << \
+				GPT_L1_CONT_DESC_GPI_SHIFT))
+
+/* Create L1 Contiguous descriptor from GPI and size */
+#define GPT_L1_CONT_DESC_SIZE(_gpi, _size) \
+				(GPT_L1_CONT_DESC_##_size	| \
+				(((uint64_t)(_gpi) << GPT_L1_CONT_DESC_GPI_SHIFT))
+
+#define GPT_L1_GPI_BYTE(_gpi)		(uint64_t)((_gpi) | ((_gpi) << 4))
+#define GPT_L1_GPI_HALF(_gpi)		(GPT_L1_GPI_BYTE(_gpi) | (GPT_L1_GPI_BYTE(_gpi) << 8))
+#define GPT_L1_GPI_WORD(_gpi)		(GPT_L1_GPI_HALF(_gpi) | (GPT_L1_GPI_HALF(_gpi) << 16))
+
 /*
- * This macro fills out every GPI entry in a granules descriptor to the same
- * value.
+ * This macro generates a Granules descriptor
+ * with the same value for every GPI entry.
 */
-#define GPT_BUILD_L1_DESC(_gpi)		(((uint64_t)(_gpi) << 4*0) | \
-					 ((uint64_t)(_gpi) << 4*1) | \
-					 ((uint64_t)(_gpi) << 4*2) | \
-					 ((uint64_t)(_gpi) << 4*3) | \
-					 ((uint64_t)(_gpi) << 4*4) | \
-					 ((uint64_t)(_gpi) << 4*5) | \
-					 ((uint64_t)(_gpi) << 4*6) | \
-					 ((uint64_t)(_gpi) << 4*7) | \
-					 ((uint64_t)(_gpi) << 4*8) | \
-					 ((uint64_t)(_gpi) << 4*9) | \
-					 ((uint64_t)(_gpi) << 4*10) | \
-					 ((uint64_t)(_gpi) << 4*11) | \
-					 ((uint64_t)(_gpi) << 4*12) | \
-					 ((uint64_t)(_gpi) << 4*13) | \
-					 ((uint64_t)(_gpi) << 4*14) | \
-					 ((uint64_t)(_gpi) << 4*15))
+#define GPT_BUILD_L1_DESC(_gpi)		(GPT_L1_GPI_WORD(_gpi) | (GPT_L1_GPI_WORD(_gpi) << 32))
+
+#define GPT_L1_SECURE_DESC	GPT_BUILD_L1_DESC(GPT_GPI_SECURE)
+#define GPT_L1_NS_DESC		GPT_BUILD_L1_DESC(GPT_GPI_NS)
+#define GPT_L1_REALM_DESC	GPT_BUILD_L1_DESC(GPT_GPI_REALM)
+#define GPT_L1_ANY_DESC		GPT_BUILD_L1_DESC(GPT_GPI_ANY)

 /******************************************************************************/
 /* GPT platform configuration                                                 */
@ -106,17 +141,46 @@ typedef enum {
 	PGS_64KB_P =	16U
 } gpt_p_val_e;

+#define LOCK_SIZE	sizeof(((bitlock_t *)NULL)->lock)
+#define LOCK_TYPE	typeof(((bitlock_t *)NULL)->lock)
+#define LOCK_BITS	(LOCK_SIZE * 8U)
+
 /*
- * Internal structure to retrieve the values from get_gpi_info();
+ * Internal structure to retrieve the values from get_gpi_params();
 */
-typedef struct gpi_info {
+typedef struct {
 	uint64_t gpt_l1_desc;
 	uint64_t *gpt_l1_addr;
 	unsigned int idx;
 	unsigned int gpi_shift;
 	unsigned int gpi;
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	bitlock_t *lock;
+	LOCK_TYPE mask;
+#endif
 } gpi_info_t;

+/*
+ * Look up structure for contiguous blocks and descriptors
+ */
+typedef struct {
+	size_t size;
+	unsigned int desc;
+} gpt_fill_lookup_t;
+
+typedef void (*gpt_shatter_func)(uintptr_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc);
+typedef void (*gpt_tlbi_func)(uintptr_t base);
+
+/*
+ * Look-up structure for
+ * invalidating TLBs of GPT entries by Physical address, last level.
+ */
+typedef struct {
+	gpt_tlbi_func function;
+	size_t mask;
+} gpt_tlbi_lookup_t;
+
 /* Max valid value for PGS */
 #define GPT_PGS_MAX			(2U)

@ -136,8 +200,8 @@ typedef struct gpi_info {
 * special case we'll get a negative width value which does not make sense and
 * would cause problems.
 */
-#define GPT_L0_IDX_WIDTH(_t)		(((_t) > GPT_S_VAL) ? \
-					((_t) - GPT_S_VAL) : (0U))
+#define GPT_L0_IDX_WIDTH(_t)		(((unsigned int)(_t) > GPT_S_VAL) ? \
+					((unsigned int)(_t) - GPT_S_VAL) : (0U))

 /* Bit shift for the L0 index field in a PA */
 #define GPT_L0_IDX_SHIFT		(GPT_S_VAL)
@ -173,10 +237,11 @@ typedef struct gpi_info {
 * the L0 index field above since all valid combinations of PGS (p) and L0GPTSZ
 * (s) will result in a positive width value.
 */
-#define GPT_L1_IDX_WIDTH(_p)		((GPT_S_VAL - 1U) - ((_p) + 3U))
+#define GPT_L1_IDX_WIDTH(_p)		((GPT_S_VAL - 1U) - \
+					((unsigned int)(_p) + 3U))

 /* Bit shift for the L1 index field */
-#define GPT_L1_IDX_SHIFT(_p)		((_p) + 4U)
+#define GPT_L1_IDX_SHIFT(_p)		((unsigned int)(_p) + 4U)

 /*
 * Mask for the L1 index field, must be shifted.
@ -196,7 +261,10 @@ typedef struct gpi_info {
 #define GPT_L1_GPI_IDX_MASK		(0xF)

 /* Total number of entries in each L1 table */
-#define GPT_L1_ENTRY_COUNT(_p)		((GPT_L1_IDX_MASK(_p)) + 1U)
+#define GPT_L1_ENTRY_COUNT(_p)		((GPT_L1_IDX_MASK(_p)) + 1UL)
+
+/* Number of L1 entries in 2MB block */
+#define GPT_L1_ENTRY_COUNT_2MB(_p)	(SZ_2M >> GPT_L1_IDX_SHIFT(_p))

 /* Total size in bytes of each L1 table */
 #define GPT_L1_TABLE_SIZE(_p)		((GPT_L1_ENTRY_COUNT(_p)) << 3U)
@ -206,10 +274,13 @@ typedef struct gpi_info {
 /******************************************************************************/

 /* Protected space actual size in bytes */
-#define GPT_PPS_ACTUAL_SIZE(_t)	(1UL << (_t))
+#define GPT_PPS_ACTUAL_SIZE(_t)	(1UL << (unsigned int)(_t))

 /* Granule actual size in bytes */
-#define GPT_PGS_ACTUAL_SIZE(_p)	(1UL << (_p))
+#define GPT_PGS_ACTUAL_SIZE(_p)	(1UL << (unsigned int)(_p))
+
+/* Number of granules in 2MB block */
+#define GPT_PGS_COUNT_2MB(_p)	(1UL << (21U - (unsigned int)(_p)))

 /* L0 GPT region size in bytes */
 #define GPT_L0GPTSZ_ACTUAL_SIZE	(1UL << GPT_S_VAL)
@ -221,7 +292,8 @@ typedef struct gpi_info {
 * This definition is used to determine if a physical address lies on an L0
 * region boundary.
 */
-#define GPT_IS_L0_ALIGNED(_pa)	(((_pa) & (GPT_L0_REGION_SIZE - U(1))) == U(0))
+#define GPT_IS_L0_ALIGNED(_pa)	\
+	(((_pa) & (GPT_L0_REGION_SIZE - UL(1))) == UL(0))

 /* Get the type field from an L0 descriptor */
 #define GPT_L0_TYPE(_desc)	(((_desc) >> GPT_L0_TYPE_SHIFT) & \
@ -246,16 +318,43 @@ typedef struct gpi_info {
 				(GPT_L0_TBL_DESC_L1ADDR_MASK << \
 				GPT_L0_TBL_DESC_L1ADDR_SHIFT))))

+/* Get the GPI from L1 Contiguous descriptor */
+#define GPT_L1_CONT_GPI(_desc)		\
+	(((_desc) >> GPT_L1_CONT_DESC_GPI_SHIFT) & GPT_L1_CONT_DESC_GPI_MASK)
+
+/* Get the GPI from L1 Granules descriptor */
+#define GPT_L1_GRAN_GPI(_desc)	((_desc) & GPT_L1_GRAN_DESC_GPI_MASK)
+
+/* Get the Contig from L1 Contiguous descriptor */
+#define GPT_L1_CONT_CONTIG(_desc)	\
+	(((_desc) >> GPT_L1_CONT_DESC_CONTIG_SHIFT) & \
+					GPT_L1_CONT_DESC_CONTIG_MASK)
+
 /* Get the index into the L1 table from a physical address */
-#define GPT_L1_IDX(_p, _pa)	(((_pa) >> GPT_L1_IDX_SHIFT(_p)) & \
-				GPT_L1_IDX_MASK(_p))
+#define GPT_L1_IDX(_p, _pa)		\
+	(((_pa) >> GPT_L1_IDX_SHIFT(_p)) & GPT_L1_IDX_MASK(_p))

 /* Get the index of the GPI within an L1 table entry from a physical address */
-#define GPT_L1_GPI_IDX(_p, _pa)	(((_pa) >> GPT_L1_GPI_IDX_SHIFT(_p)) & \
-				GPT_L1_GPI_IDX_MASK)
+#define GPT_L1_GPI_IDX(_p, _pa)		\
+	(((_pa) >> GPT_L1_GPI_IDX_SHIFT(_p)) & GPT_L1_GPI_IDX_MASK)

 /* Determine if an address is granule-aligned */
-#define GPT_IS_L1_ALIGNED(_p, _pa) (((_pa) & (GPT_PGS_ACTUAL_SIZE(_p) - U(1))) \
-				   == U(0))
+#define GPT_IS_L1_ALIGNED(_p, _pa)	\
+	(((_pa) & (GPT_PGS_ACTUAL_SIZE(_p) - UL(1))) == UL(0))
+
+/* Get aligned addresses */
+#define ALIGN_2MB(_addr)	((_addr) & ~(SZ_2M - 1UL))
+#define ALIGN_32MB(_addr)	((_addr) & ~(SZ_32M - 1UL))
+#define ALIGN_512MB(_addr)	((_addr) & ~(SZ_512M - 1UL))
+
+/* Determine if region is contiguous */
+#define GPT_REGION_IS_CONT(_len, _addr, _size)	\
+	(((_len) >= (_size)) && (((_addr) & ((_size) - UL(1))) == UL(0)))
+
+/* Get 32MB block number in 512MB block: 0-15 */
+#define GET_32MB_NUM(_addr)	((_addr >> 25) & 0xF)
+
+/* Get 2MB block number in 32MB block: 0-15 */
+#define GET_2MB_NUM(_addr)	((_addr >> 21) & 0xF)

 #endif /* GPT_RME_PRIVATE_H */
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@ -139,6 +139,12 @@ FW_ENC_STATUS			:= 0
 # For Chain of Trust
 GENERATE_COT			:= 0

+# Default number of 512 blocks per bitlock
+RME_GPT_BITLOCK_BLOCK		:= 1
+
+# Default maximum size of GPT contiguous block
+RME_GPT_MAX_BLOCK		:= 2
+
 # Hint platform interrupt control layer that Group 0 interrupts are for EL3. By
 # default, they are for Secure EL1.
 GICV2_G0_FOR_EL3		:= 0
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@ -24,7 +24,7 @@ FVP_GICR_REGION_PROTECTION	:= 0

 FVP_DT_PREFIX			:= fvp-base-gicv3-psci

-# Size (in kilobytes) of the Trusted SRAM region to  utilize when building for
+# Size (in kilobytes) of the Trusted SRAM region to  utilize when building for
 # the FVP platform. This option defaults to 256.
 FVP_TRUSTED_SRAM_SIZE		:= 256