From ec0088bbab9335c5273e57a84b81adf2201a51db Mon Sep 17 00:00:00 2001
From: AlexeiFedorov <Alexei.Fedorov@arm.com>
Date: Wed, 13 Mar 2024 17:07:03 +0000
Subject: [PATCH 1/2] feat(gpt): add support for large GPT mappings

This patch adds support for large GPT mappings using
Contiguous descriptors. The maximum size of supported
contiguous block in MB is defined in RME_GPT_MAX_BLOCK
build parameter and takes values 0, 2, 32 and 512 and
by default set to 2 in make_helpers/defaults.mk.
Setting RME_GPT_MAX_BLOCK value to 0 disables use of
Contiguous descriptors.
Function gpt_tlbi_by_pa_ll() and its declaration
are removed from lib/aarch64/misc_helpers.S and
include/arch/aarch64/arch_helpers.h, because the
GPT library now uses tlbirpalos_xxx() functions.

Change-Id: Ia9a59bde1741c5666b4ca1de9324e6dfd6f734eb
Signed-off-by: AlexeiFedorov <Alexei.Fedorov@arm.com>
---
 Makefile                               |   1 +
 docs/getting_started/build-options.rst |   5 +
 include/arch/aarch64/arch_helpers.h    |   9 -
 lib/aarch64/misc_helpers.S             |  20 +-
 lib/gpt_rme/gpt_rme.c                  | 982 ++++++++++++++++++++-----
 lib/gpt_rme/gpt_rme.mk                 |   7 +-
 lib/gpt_rme/gpt_rme_private.h          | 169 ++++-
 make_helpers/defaults.mk               |   3 +
 plat/arm/board/fvp/platform.mk         |   2 +-
 9 files changed, 960 insertions(+), 238 deletions(-)

diff --git a/Makefile b/Makefile
index 2ba4fa759..b6093e768 100644
--- a/Makefile
+++ b/Makefile
@@ -1331,6 +1331,7 @@ $(eval $(call add_defines,\
 	PSCI_EXTENDED_STATE_ID \
 	PSCI_OS_INIT_MODE \
 	RESET_TO_BL31 \
+	RME_GPT_MAX_BLOCK \
 	SEPARATE_CODE_AND_RODATA \
 	SEPARATE_BL2_NOLOAD_REGION \
 	SEPARATE_NOBITS_REGION \
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index 6ab95f9b6..0d4261d93 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -812,6 +812,11 @@ Common build options
    instead of the BL1 entrypoint. It can take the value 0 (CPU reset to BL1
    entrypoint) or 1 (CPU reset to SP_MIN entrypoint). The default value is 0.
 
+-  ``RME_GPT_MAX_BLOCK``: Numeric value in MB to define maximum size of
+   supported contiguous blocks in GPT Library. This parameter can take the
+   values 0, 2, 32 and 512. Setting this value to 0 disables use of Contigious
+   descriptors. Default value is 2.
+
 -  ``ROT_KEY``: This option is used when ``GENERATE_COT=1``. It specifies a
    file that contains the ROT private key in PEM format or a PKCS11 URI and
    enforces public key hash generation. If ``SAVE_KEYS=1``, only a file is
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index 59adc7c90..1e2f84b15 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -807,15 +807,6 @@ static inline void tlbirpalos_512m(uintptr_t addr)
 	TLBIRPALOS(addr, TLBI_SZ_512M);
 }
 
-/*
- * Invalidate TLBs of GPT entries by Physical address, last level.
- *
- * @pa: the starting address for the range
- *      of invalidation
- * @size: size of the range of invalidation
- */
-void gpt_tlbi_by_pa_ll(uint64_t pa, size_t size);
-
 /* Previously defined accessor functions with incomplete register names  */
 
 #define read_current_el()	read_CurrentEl()
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index f9c4bafd5..93771df34 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -15,7 +15,6 @@
 	.globl	zero_normalmem
 	.globl	zeromem
 	.globl	memcpy16
-	.globl	gpt_tlbi_by_pa_ll
 
 	.globl	disable_mmu_el1
 	.globl	disable_mmu_el3
@@ -594,20 +593,3 @@ func fixup_gdt_reloc
 	b.lo	1b
 	ret
 endfunc fixup_gdt_reloc
-
-/*
- * TODO: Currently only supports size of 4KB,
- * support other sizes as well.
- */
-func gpt_tlbi_by_pa_ll
-#if ENABLE_ASSERTIONS
-	cmp	x1, #PAGE_SIZE_4KB
-	ASM_ASSERT(eq)
-	tst	x0, #(PAGE_SIZE_MASK)
-	ASM_ASSERT(eq)
-#endif
-	lsr	x0, x0, #FOUR_KB_SHIFT	/* 4KB size encoding is zero */
-	sys	#6, c8, c4, #7, x0 	/* TLBI RPALOS, <Xt> */
-	dsb	sy
-	ret
-endfunc gpt_tlbi_by_pa_ll
diff --git a/lib/gpt_rme/gpt_rme.c b/lib/gpt_rme/gpt_rme.c
index 72e905e18..7dec54893 100644
--- a/lib/gpt_rme/gpt_rme.c
+++ b/lib/gpt_rme/gpt_rme.c
@@ -57,6 +57,13 @@ static const gpt_t_val_e gpt_t_lookup[] = {PPS_4GB_T, PPS_64GB_T,
  */
 static const gpt_p_val_e gpt_p_lookup[] = {PGS_4KB_P, PGS_64KB_P, PGS_16KB_P};
 
+static void shatter_2mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc);
+static void shatter_32mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc);
+static void shatter_512mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc);
+
 /*
  * This structure contains GPT configuration data
  */
@@ -70,10 +77,161 @@ typedef struct {
 
 static gpt_config_t gpt_config;
 
+/*
+ * Number of L1 entries in 2MB, depending on GPCCR_EL3.PGS:
+ * +-------+------------+
+ * |  PGS  | L1 entries |
+ * +-------+------------+
+ * |  4KB  |     32     |
+ * +-------+------------+
+ * |  16KB |     8      |
+ * +-------+------------+
+ * |  64KB |     2      |
+ * +-------+------------+
+ */
+static unsigned int gpt_l1_cnt_2mb;
+
+/*
+ * Mask for the L1 index field, depending on
+ * GPCCR_EL3.L0GPTSZ and GPCCR_EL3.PGS:
+ * +---------+-------------------------------+
+ * |         |             PGS               |
+ * +---------+----------+----------+---------+
+ * | L0GPTSZ |   4KB    |   16KB   |   64KB  |
+ * +---------+----------+----------+---------+
+ * |  1GB    |  0x3FFF  |  0xFFF   |  0x3FF  |
+ * +---------+----------+----------+---------+
+ * |  16GB   | 0x3FFFF  |  0xFFFF  | 0x3FFF  |
+ * +---------+----------+----------+---------+
+ * |  64GB   | 0xFFFFF  | 0x3FFFF  | 0xFFFF  |
+ * +---------+----------+----------+---------+
+ * |  512GB  | 0x7FFFFF | 0x1FFFFF | 0x7FFFF |
+ * +---------+----------+----------+---------+
+ */
+static uint64_t gpt_l1_index_mask;
+
+/* Number of 128-bit L1 entries in 2MB, 32MB and 512MB */
+#define L1_QWORDS_2MB	(gpt_l1_cnt_2mb / 2U)
+#define L1_QWORDS_32MB	(L1_QWORDS_2MB * 16U)
+#define L1_QWORDS_512MB	(L1_QWORDS_32MB * 16U)
+
+/* Size in bytes of L1 entries in 2MB, 32MB */
+#define L1_BYTES_2MB	(gpt_l1_cnt_2mb * sizeof(uint64_t))
+#define L1_BYTES_32MB	(L1_BYTES_2MB * 16U)
+
+/* Get the index into the L1 table from a physical address */
+#define GPT_L1_INDEX(_pa)	\
+	(((_pa) >> (unsigned int)GPT_L1_IDX_SHIFT(gpt_config.p)) & gpt_l1_index_mask)
+
 /* These variables are used during initialization of the L1 tables */
-static unsigned int gpt_next_l1_tbl_idx;
 static uintptr_t gpt_l1_tbl;
 
+/* These variable is used during runtime */
+
+/* Bitlock base address for each 512 MB block of PPS */
+static bitlock_t *gpt_bitlock_base;
+
+static void tlbi_page_dsbosh(uintptr_t base)
+{
+	/* Look-up table for invalidation TLBs for 4KB, 16KB and 64KB pages */
+	static const gpt_tlbi_lookup_t tlbi_page_lookup[] = {
+		{ tlbirpalos_4k, ~(SZ_4K - 1UL) },
+		{ tlbirpalos_64k, ~(SZ_64K - 1UL) },
+		{ tlbirpalos_16k, ~(SZ_16K - 1UL) }
+	};
+
+	tlbi_page_lookup[gpt_config.pgs].function(
+			base & tlbi_page_lookup[gpt_config.pgs].mask);
+	dsbosh();
+}
+
+/*
+ * Helper function to fill out GPI entries in a single L1 table
+ * with Granules or Contiguous descriptor.
+ *
+ * Parameters
+ *   l1			Pointer to 2MB, 32MB or 512MB aligned L1 table entry to fill out
+ *   l1_desc		GPT Granules or Contiguous descriptor set this range to
+ *   cnt		Number of double 128-bit L1 entries to fill
+ *
+ */
+static void fill_desc(uint64_t *l1, uint64_t l1_desc, unsigned int cnt)
+{
+	uint128_t *l1_quad = (uint128_t *)l1;
+	uint128_t l1_quad_desc = (uint128_t)l1_desc | ((uint128_t)l1_desc << 64);
+
+	VERBOSE("GPT: %s(%p 0x%"PRIx64" %u)\n", __func__, l1, l1_desc, cnt);
+
+	for (unsigned int i = 0U; i < cnt; i++) {
+		*l1_quad++ = l1_quad_desc;
+	}
+}
+
+static void shatter_2mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	unsigned long idx = GPT_L1_INDEX(ALIGN_2MB(base));
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n",
+				__func__, base, l1_desc);
+
+	/* Convert 2MB Contiguous block to Granules */
+	fill_desc(&gpi_info->gpt_l1_addr[idx], l1_desc, L1_QWORDS_2MB);
+}
+
+static void shatter_32mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	unsigned long idx = GPT_L1_INDEX(ALIGN_2MB(base));
+	const uint64_t *l1_gran = &gpi_info->gpt_l1_addr[idx];
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 2MB);
+	uint64_t *l1;
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n",
+				__func__, base, l1_desc);
+
+	/* Get index corresponding to 32MB aligned address */
+	idx = GPT_L1_INDEX(ALIGN_32MB(base));
+	l1 = &gpi_info->gpt_l1_addr[idx];
+
+	/* 16 x 2MB blocks in 32MB */
+	for (unsigned int i = 0U; i < 16U; i++) {
+		/* Fill with Granules or Contiguous descriptors */
+		fill_desc(l1, (l1 == l1_gran) ? l1_desc : l1_cont_desc,
+							L1_QWORDS_2MB);
+		l1 = (uint64_t *)((uintptr_t)l1 + L1_BYTES_2MB);
+	}
+}
+
+static void shatter_512mb(uintptr_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	unsigned long idx = GPT_L1_INDEX(ALIGN_32MB(base));
+	const uint64_t *l1_32mb = &gpi_info->gpt_l1_addr[idx];
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 32MB);
+	uint64_t *l1;
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n",
+				__func__, base, l1_desc);
+
+	/* Get index corresponding to 512MB aligned address */
+	idx = GPT_L1_INDEX(ALIGN_512MB(base));
+	l1 = &gpi_info->gpt_l1_addr[idx];
+
+	/* 16 x 32MB blocks in 512MB */
+	for (unsigned int i = 0U; i < 16U; i++) {
+		if (l1 == l1_32mb) {
+			/* Shatter this 32MB block */
+			shatter_32mb(base, gpi_info, l1_desc);
+		} else {
+			/* Fill 32MB with Contiguous descriptors */
+			fill_desc(l1, l1_cont_desc, L1_QWORDS_32MB);
+		}
+
+		l1 = (uint64_t *)((uintptr_t)l1 + L1_BYTES_32MB);
+	}
+}
+
 /*
  * This function checks to see if a GPI value is valid.
  *
@@ -213,10 +371,11 @@ static int validate_pas_mappings(pas_region_t *pas_regions,
 		 * to see if this PAS would fall into one that has already been
 		 * initialized.
 		 */
-		for (unsigned int i = GPT_L0_IDX(pas_regions[idx].base_pa);
-		     i <= GPT_L0_IDX(pas_regions[idx].base_pa +
-						pas_regions[idx].size - 1UL);
-		     i++) {
+		for (unsigned int i =
+			(unsigned int)GPT_L0_IDX(pas_regions[idx].base_pa);
+			i <= GPT_L0_IDX(pas_regions[idx].base_pa +
+					pas_regions[idx].size - 1UL);
+			i++) {
 			if ((GPT_L0_TYPE(l0_desc[i]) == GPT_L0_TYPE_BLK_DESC) &&
 			    (GPT_L0_BLKD_GPI(l0_desc[i]) == GPT_GPI_ANY)) {
 				/* This descriptor is unused so continue */
@@ -227,7 +386,7 @@ static int validate_pas_mappings(pas_region_t *pas_regions,
 			 * This descriptor has been initialized in a previous
 			 * call to this function so cannot be initialized again.
 			 */
-			ERROR("GPT: PAS[%u] overlaps with previous L0[%d]!\n",
+			ERROR("GPT: PAS[%u] overlaps with previous L0[%u]!\n",
 			      idx, i);
 			return -EFAULT;
 		}
@@ -318,7 +477,7 @@ static int validate_pas_mappings(pas_region_t *pas_regions,
 static int validate_l0_params(gpccr_pps_e pps, uintptr_t l0_mem_base,
 				size_t l0_mem_size)
 {
-	size_t l0_alignment;
+	size_t l0_alignment, locks_size;
 
 	/*
 	 * Make sure PPS is valid and then store it since macros need this value
@@ -346,10 +505,26 @@ static int validate_l0_params(gpccr_pps_e pps, uintptr_t l0_mem_base,
 
 	/* Check size */
 	if (l0_mem_size < GPT_L0_TABLE_SIZE(gpt_config.t)) {
-		ERROR("%sL0%s\n", "GPT: Inadequate ", " memory\n");
+		ERROR("%sL0%s\n", (const char *)"GPT: Inadequate ",
+			(const char *)" memory\n");
 		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
-		      GPT_L0_TABLE_SIZE(gpt_config.t),
-		      l0_mem_size);
+			GPT_L0_TABLE_SIZE(gpt_config.t), l0_mem_size);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Size of bitlocks in bytes for the protected address space
+	 * with 512MB per bitlock.
+	 */
+	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) / (SZ_512M * 8U);
+
+	/* Check space for bitlocks */
+	if (locks_size > (l0_mem_size - GPT_L0_TABLE_SIZE(gpt_config.t))) {
+		ERROR("%sbitlock%s", (const char *)"GPT: Inadequate ",
+			(const char *)" memory\n");
+		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
+			locks_size,
+			l0_mem_size - GPT_L0_TABLE_SIZE(gpt_config.t));
 		return -ENOMEM;
 	}
 
@@ -397,9 +572,10 @@ static int validate_l1_params(uintptr_t l1_mem_base, size_t l1_mem_size,
 
 	/* Make sure enough space was supplied */
 	if (l1_mem_size < l1_gpt_mem_sz) {
-		ERROR("%sL1 GPTs%s", "GPT: Inadequate ", " memory\n");
+		ERROR("%sL1 GPTs%s", (const char *)"GPT: Inadequate ",
+			(const char *)" memory\n");
 		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
-		      l1_gpt_mem_sz, l1_mem_size);
+			l1_gpt_mem_sz, l1_mem_size);
 		return -ENOMEM;
 	}
 
@@ -418,8 +594,7 @@ static int validate_l1_params(uintptr_t l1_mem_base, size_t l1_mem_size,
 static void generate_l0_blk_desc(pas_region_t *pas)
 {
 	uint64_t gpt_desc;
-	unsigned int end_idx;
-	unsigned int idx;
+	unsigned long idx, end_idx;
 	uint64_t *l0_gpt_arr;
 
 	assert(gpt_config.plat_gpt_l0_base != 0U);
@@ -448,7 +623,7 @@ static void generate_l0_blk_desc(pas_region_t *pas)
 	/* Generate the needed block descriptors */
 	for (; idx < end_idx; idx++) {
 		l0_gpt_arr[idx] = gpt_desc;
-		VERBOSE("GPT: L0 entry (BLOCK) index %u [%p]: GPI = 0x%"PRIx64" (0x%"PRIx64")\n",
+		VERBOSE("GPT: L0 entry (BLOCK) index %lu [%p]: GPI = 0x%"PRIx64" (0x%"PRIx64")\n",
 			idx, &l0_gpt_arr[idx],
 			(gpt_desc >> GPT_L0_BLK_DESC_GPI_SHIFT) &
 			GPT_L0_BLK_DESC_GPI_MASK, l0_gpt_arr[idx]);
@@ -482,51 +657,199 @@ static uintptr_t get_l1_end_pa(uintptr_t cur_pa, uintptr_t end_pa)
 		return end_pa;
 	}
 
-	return (cur_idx + 1U) << GPT_L0_IDX_SHIFT;
+	return (cur_idx + 1UL) << GPT_L0_IDX_SHIFT;
 }
 
 /*
- * Helper function to fill out GPI entries in a single L1 table. This function
- * fills out entire L1 descriptors at a time to save memory writes.
+ * Helper function to fill out GPI entries from 'first' granule address of
+ * the specified 'length' in a single L1 table with 'l1_desc' Contiguous
+ * descriptor.
  *
  * Parameters
- *   gpi		GPI to set this range to
  *   l1			Pointer to L1 table to fill out
- *   first		Address of first granule in range.
- *   last		Address of last granule in range (inclusive).
+ *   first		Address of first granule in range
+ *   length		Length of the range in bytes
+ *   gpi		GPI set this range to
+ *
+ * Return
+ *   Address of next granule in range.
  */
-static void fill_l1_tbl(uint64_t gpi, uint64_t *l1, uintptr_t first,
-			    uintptr_t last)
+static uintptr_t fill_l1_cont_desc(uint64_t *l1, uintptr_t first,
+				   size_t length, unsigned int gpi)
 {
-	uint64_t gpi_field = GPT_BUILD_L1_DESC(gpi);
-	uint64_t gpi_mask = ULONG_MAX;
+	/*
+	 * Look up table for contiguous blocks and descriptors.
+	 * Entries should be defined in descending block sizes:
+	 * 512MB, 32MB and 2MB.
+	 */
+	static const gpt_fill_lookup_t gpt_fill_lookup[] = {
+#if (RME_GPT_MAX_BLOCK == 512)
+		{ SZ_512M, GPT_L1_CONT_DESC_512MB },
+#endif
+#if (RME_GPT_MAX_BLOCK >= 32)
+		{ SZ_32M, GPT_L1_CONT_DESC_32MB },
+#endif
+#if (RME_GPT_MAX_BLOCK != 0)
+		{ SZ_2M, GPT_L1_CONT_DESC_2MB }
+#endif
+	};
 
-	assert(first <= last);
-	assert((first & (GPT_PGS_ACTUAL_SIZE(gpt_config.p) - 1)) == 0U);
-	assert((last & (GPT_PGS_ACTUAL_SIZE(gpt_config.p) - 1)) == 0U);
-	assert(GPT_L0_IDX(first) == GPT_L0_IDX(last));
-	assert(l1 != NULL);
+	/*
+	 * Iterate through all block sizes (512MB, 32MB and 2MB)
+	 * starting with maximum supported.
+	 */
+	for (unsigned long i = 0UL; i < ARRAY_SIZE(gpt_fill_lookup); i++) {
+		/* Calculate index */
+		unsigned long idx = GPT_L1_INDEX(first);
+
+		/* Contiguous block size */
+		size_t cont_size = gpt_fill_lookup[i].size;
+
+		if (GPT_REGION_IS_CONT(length, first, cont_size)) {
+
+			/* Generate Contiguous descriptor */
+			uint64_t l1_desc = GPT_L1_GPI_CONT_DESC(gpi,
+						gpt_fill_lookup[i].desc);
+
+			/* Number of 128-bit L1 entries in block */
+			unsigned int cnt;
+
+			switch (cont_size) {
+			case SZ_512M:
+				cnt = L1_QWORDS_512MB;
+				break;
+			case SZ_32M:
+				cnt = L1_QWORDS_32MB;
+				break;
+			default:			/* SZ_2MB */
+				cnt = L1_QWORDS_2MB;
+			}
+
+			VERBOSE("GPT: Contiguous descriptor 0x%"PRIxPTR" %luMB\n",
+				first, cont_size / SZ_1M);
+
+			/* Fill Contiguous descriptors */
+			fill_desc(&l1[idx], l1_desc, cnt);
+			first += cont_size;
+			length -= cont_size;
+
+			if (length == 0UL) {
+				break;
+			}
+		}
+	}
+
+	return first;
+}
+
+/* Build Granules descriptor with the same 'gpi' for every GPI entry */
+static uint64_t build_l1_desc(unsigned int gpi)
+{
+	uint64_t l1_desc = (uint64_t)gpi | ((uint64_t)gpi << 4);
+
+	l1_desc |= (l1_desc << 8);
+	l1_desc |= (l1_desc << 16);
+	return (l1_desc | (l1_desc << 32));
+}
+
+/*
+ * Helper function to fill out GPI entries from 'first' to 'last' granule
+ * address in a single L1 table with 'l1_desc' Granules descriptor.
+ *
+ * Parameters
+ *   l1			Pointer to L1 table to fill out
+ *   first		Address of first granule in range
+ *   last		Address of last granule in range (inclusive)
+ *   gpi		GPI set this range to
+ *
+ * Return
+ *   Address of next granule in range.
+ */
+static uintptr_t fill_l1_gran_desc(uint64_t *l1, uintptr_t first,
+				   uintptr_t last, unsigned int gpi)
+{
+	uint64_t gpi_mask;
+	unsigned long i;
+
+	/* Generate Granules descriptor */
+	uint64_t l1_desc = build_l1_desc(gpi);
 
 	/* Shift the mask if we're starting in the middle of an L1 entry */
-	gpi_mask = gpi_mask << (GPT_L1_GPI_IDX(gpt_config.p, first) << 2);
+	gpi_mask = ULONG_MAX << (GPT_L1_GPI_IDX(gpt_config.p, first) << 2);
 
 	/* Fill out each L1 entry for this region */
-	for (unsigned int i = GPT_L1_IDX(gpt_config.p, first);
-	     i <= GPT_L1_IDX(gpt_config.p, last); i++) {
+	for (i = GPT_L1_INDEX(first); i <= GPT_L1_INDEX(last); i++) {
+
 		/* Account for stopping in the middle of an L1 entry */
-		if (i == GPT_L1_IDX(gpt_config.p, last)) {
+		if (i == GPT_L1_INDEX(last)) {
 			gpi_mask &= (gpi_mask >> ((15U -
 				    GPT_L1_GPI_IDX(gpt_config.p, last)) << 2));
 		}
 
+		assert((l1[i] & gpi_mask) == (GPT_L1_ANY_DESC & gpi_mask));
+
 		/* Write GPI values */
-		assert((l1[i] & gpi_mask) ==
-		       (GPT_BUILD_L1_DESC(GPT_GPI_ANY) & gpi_mask));
-		l1[i] = (l1[i] & ~gpi_mask) | (gpi_mask & gpi_field);
+		l1[i] = (l1[i] & ~gpi_mask) | (l1_desc & gpi_mask);
 
 		/* Reset mask */
 		gpi_mask = ULONG_MAX;
 	}
+
+	return last + GPT_PGS_ACTUAL_SIZE(gpt_config.p);
+}
+
+/*
+ * Helper function to fill out GPI entries in a single L1 table.
+ * This function fills out an entire L1 table with either Contiguous
+ * or Granules descriptors depending on region length and alignment.
+ *
+ * Parameters
+ *   l1			Pointer to L1 table to fill out
+ *   first		Address of first granule in range
+ *   last		Address of last granule in range (inclusive)
+ *   gpi		GPI set this range to
+ */
+static void fill_l1_tbl(uint64_t *l1, uintptr_t first, uintptr_t last,
+			unsigned int gpi)
+{
+	assert(l1 != NULL);
+	assert(first <= last);
+	assert((first & (GPT_PGS_ACTUAL_SIZE(gpt_config.p) - 1UL)) == 0UL);
+	assert((last & (GPT_PGS_ACTUAL_SIZE(gpt_config.p) - 1UL)) == 0UL);
+	assert(GPT_L0_IDX(first) == GPT_L0_IDX(last));
+
+	while (first < last) {
+		/* Region length */
+		size_t length = last - first + GPT_PGS_ACTUAL_SIZE(gpt_config.p);
+
+		if (length < SZ_2M) {
+			/*
+			 * Fill with Granule descriptor in case of
+			 * region length < 2MB.
+			 */
+			first = fill_l1_gran_desc(l1, first, last, gpi);
+
+		} else if ((first & (SZ_2M - UL(1))) == UL(0)) {
+			/*
+			 * For region length >= 2MB and at least 2MB aligned
+			 * call to fill_l1_cont_desc will iterate through
+			 * all block sizes (512MB, 32MB and 2MB) supported and
+			 * fill corresponding Contiguous descriptors.
+			 */
+			first = fill_l1_cont_desc(l1, first, length, gpi);
+		} else {
+			/*
+			 * For not aligned region >= 2MB fill with Granules
+			 * descriptors up to the next 2MB aligned address.
+			 */
+			uintptr_t new_last = ALIGN_2MB(first + SZ_2M) -
+					GPT_PGS_ACTUAL_SIZE(gpt_config.p);
+
+			first = fill_l1_gran_desc(l1, first, new_last, gpi);
+		}
+	}
+
+	assert(first == (last + GPT_PGS_ACTUAL_SIZE(gpt_config.p)));
 }
 
 /*
@@ -543,16 +866,14 @@ static void fill_l1_tbl(uint64_t gpi, uint64_t *l1, uintptr_t first,
 static uint64_t *get_new_l1_tbl(void)
 {
 	/* Retrieve the next L1 table */
-	uint64_t *l1 = (uint64_t *)((uint64_t)(gpt_l1_tbl) +
-		       (GPT_L1_TABLE_SIZE(gpt_config.p) *
-		       gpt_next_l1_tbl_idx));
+	uint64_t *l1 = (uint64_t *)gpt_l1_tbl;
 
-	/* Increment L1 counter */
-	gpt_next_l1_tbl_idx++;
+	/* Increment L1 GPT address */
+	gpt_l1_tbl += GPT_L1_TABLE_SIZE(gpt_config.p);
 
 	/* Initialize all GPIs to GPT_GPI_ANY */
 	for (unsigned int i = 0U; i < GPT_L1_ENTRY_COUNT(gpt_config.p); i++) {
-		l1[i] = GPT_BUILD_L1_DESC(GPT_GPI_ANY);
+		l1[i] = GPT_L1_ANY_DESC;
 	}
 
 	return l1;
@@ -573,7 +894,7 @@ static void generate_l0_tbl_desc(pas_region_t *pas)
 	uintptr_t last_gran_pa;
 	uint64_t *l0_gpt_base;
 	uint64_t *l1_gpt_arr;
-	unsigned int l0_idx;
+	unsigned int l0_idx, gpi;
 
 	assert(gpt_config.plat_gpt_l0_base != 0U);
 	assert(pas != NULL);
@@ -582,18 +903,19 @@ static void generate_l0_tbl_desc(pas_region_t *pas)
 	 * Checking of PAS parameters has already been done in
 	 * validate_pas_mappings so no need to check the same things again.
 	 */
-
 	end_pa = pas->base_pa + pas->size;
 	l0_gpt_base = (uint64_t *)gpt_config.plat_gpt_l0_base;
 
 	/* We start working from the granule at base PA */
 	cur_pa = pas->base_pa;
 
-	/* Iterate over each L0 region in this memory range */
-	for (l0_idx = GPT_L0_IDX(pas->base_pa);
-	     l0_idx <= GPT_L0_IDX(end_pa - 1U);
-	     l0_idx++) {
+	/* Get GPI */
+	gpi = GPT_PAS_ATTR_GPI(pas->attrs);
 
+	/* Iterate over each L0 region in this memory range */
+	for (l0_idx = (unsigned int)GPT_L0_IDX(pas->base_pa);
+	     l0_idx <= (unsigned int)GPT_L0_IDX(end_pa - 1UL);
+	     l0_idx++) {
 		/*
 		 * See if the L0 entry is already a table descriptor or if we
 		 * need to create one.
@@ -623,8 +945,7 @@ static void generate_l0_tbl_desc(pas_region_t *pas)
 		 * function needs the addresses of the first granule and last
 		 * granule in the range.
 		 */
-		fill_l1_tbl(GPT_PAS_ATTR_GPI(pas->attrs), l1_gpt_arr,
-				cur_pa, last_gran_pa);
+		fill_l1_tbl(l1_gpt_arr, cur_pa, last_gran_pa, gpi);
 
 		/* Advance cur_pa to first granule in next L0 region */
 		cur_pa = get_l1_end_pa(cur_pa, end_pa);
@@ -644,9 +965,9 @@ static void generate_l0_tbl_desc(pas_region_t *pas)
  */
 static void flush_l0_for_pas_array(pas_region_t *pas, unsigned int pas_count)
 {
-	unsigned int idx;
-	unsigned int start_idx;
-	unsigned int end_idx;
+	unsigned long idx;
+	unsigned long start_idx;
+	unsigned long end_idx;
 	uint64_t *l0 = (uint64_t *)gpt_config.plat_gpt_l0_base;
 
 	assert(pas != NULL);
@@ -657,7 +978,7 @@ static void flush_l0_for_pas_array(pas_region_t *pas, unsigned int pas_count)
 	end_idx = GPT_L0_IDX(pas[0].base_pa + pas[0].size - 1UL);
 
 	/* Find lowest and highest L0 indices used in this PAS array */
-	for (idx = 1U; idx < pas_count; idx++) {
+	for (idx = 1UL; idx < pas_count; idx++) {
 		if (GPT_L0_IDX(pas[idx].base_pa) < start_idx) {
 			start_idx = GPT_L0_IDX(pas[idx].base_pa);
 		}
@@ -671,7 +992,7 @@ static void flush_l0_for_pas_array(pas_region_t *pas, unsigned int pas_count)
 	 * the end index value.
 	 */
 	flush_dcache_range((uintptr_t)&l0[start_idx],
-			   ((end_idx + 1U) - start_idx) * sizeof(uint64_t));
+			   ((end_idx + 1UL) - start_idx) * sizeof(uint64_t));
 }
 
 /*
@@ -767,8 +1088,10 @@ void gpt_disable(void)
 int gpt_init_l0_tables(gpccr_pps_e pps, uintptr_t l0_mem_base,
 		       size_t l0_mem_size)
 {
-	int ret;
 	uint64_t gpt_desc;
+	size_t locks_size;
+	bitlock_t *bit_locks;
+	int ret;
 
 	/* Ensure that MMU and Data caches are enabled */
 	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0U);
@@ -787,9 +1110,20 @@ int gpt_init_l0_tables(gpccr_pps_e pps, uintptr_t l0_mem_base,
 		((uint64_t *)l0_mem_base)[i] = gpt_desc;
 	}
 
-	/* Flush updated L0 tables to memory */
+	/* Initialise bitlocks at the end of L0 table */
+	bit_locks = (bitlock_t *)(l0_mem_base +
+					GPT_L0_TABLE_SIZE(gpt_config.t));
+
+	/* Size of bitlocks in bytes */
+	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) / (SZ_512M * 8U);
+
+	for (size_t i = 0UL; i < (locks_size/LOCK_SIZE); i++) {
+		bit_locks[i].lock = 0U;
+	}
+
+	/* Flush updated L0 tables and bitlocks to memory */
 	flush_dcache_range((uintptr_t)l0_mem_base,
-			   (size_t)GPT_L0_TABLE_SIZE(gpt_config.t));
+				GPT_L0_TABLE_SIZE(gpt_config.t) + locks_size);
 
 	/* Stash the L0 base address once initial setup is complete */
 	gpt_config.plat_gpt_l0_base = l0_mem_base;
@@ -806,7 +1140,7 @@ int gpt_init_l0_tables(gpccr_pps_e pps, uintptr_t l0_mem_base,
  * This function can be called multiple times with different L1 memory ranges
  * and PAS regions if it is desirable to place L1 tables in different locations
  * in memory. (ex: you have multiple DDR banks and want to place the L1 tables
- * in the DDR bank that they control)
+ * in the DDR bank that they control).
  *
  * Parameters
  *   pgs		PGS value to use for table generation.
@@ -822,8 +1156,7 @@ int gpt_init_pas_l1_tables(gpccr_pgs_e pgs, uintptr_t l1_mem_base,
 			   size_t l1_mem_size, pas_region_t *pas_regions,
 			   unsigned int pas_count)
 {
-	int ret;
-	int l1_gpt_cnt;
+	int l1_gpt_cnt, ret;
 
 	/* Ensure that MMU and Data caches are enabled */
 	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0U);
@@ -860,9 +1193,14 @@ int gpt_init_pas_l1_tables(gpccr_pgs_e pgs, uintptr_t l1_mem_base,
 
 		/* Set up parameters for L1 table generation */
 		gpt_l1_tbl = l1_mem_base;
-		gpt_next_l1_tbl_idx = 0U;
 	}
 
+	/* Number of L1 entries in 2MB depends on GPCCR_EL3.PGS value */
+	gpt_l1_cnt_2mb = (unsigned int)GPT_L1_ENTRY_COUNT_2MB(gpt_config.p);
+
+	/* Mask for the L1 index field */
+	gpt_l1_index_mask = GPT_L1_IDX_MASK(gpt_config.p);
+
 	INFO("GPT: Boot Configuration\n");
 	INFO("  PPS/T:     0x%x/%u\n", gpt_config.pps, gpt_config.t);
 	INFO("  PGS/P:     0x%x/%u\n", gpt_config.pgs, gpt_config.p);
@@ -894,7 +1232,7 @@ int gpt_init_pas_l1_tables(gpccr_pgs_e pgs, uintptr_t l1_mem_base,
 	if (l1_gpt_cnt > 0) {
 		flush_dcache_range(l1_mem_base,
 				   GPT_L1_TABLE_SIZE(gpt_config.p) *
-				   l1_gpt_cnt);
+				   (size_t)l1_gpt_cnt);
 	}
 
 	/* Make sure that all the entries are written to the memory */
@@ -946,22 +1284,26 @@ int gpt_runtime_init(void)
 	gpt_config.pgs = (reg >> GPCCR_PGS_SHIFT) & GPCCR_PGS_MASK;
 	gpt_config.p = gpt_p_lookup[gpt_config.pgs];
 
+	/* Number of L1 entries in 2MB depends on GPCCR_EL3.PGS value */
+	gpt_l1_cnt_2mb = (unsigned int)GPT_L1_ENTRY_COUNT_2MB(gpt_config.p);
+
+	/* Mask for the L1 index field */
+	gpt_l1_index_mask = GPT_L1_IDX_MASK(gpt_config.p);
+
+	/* Bitlocks at the end of L0 table */
+	gpt_bitlock_base = (bitlock_t *)(gpt_config.plat_gpt_l0_base +
+					GPT_L0_TABLE_SIZE(gpt_config.t));
+
 	VERBOSE("GPT: Runtime Configuration\n");
 	VERBOSE("  PPS/T:     0x%x/%u\n", gpt_config.pps, gpt_config.t);
 	VERBOSE("  PGS/P:     0x%x/%u\n", gpt_config.pgs, gpt_config.p);
 	VERBOSE("  L0GPTSZ/S: 0x%x/%u\n", GPT_L0GPTSZ, GPT_S_VAL);
 	VERBOSE("  L0 base:   0x%"PRIxPTR"\n", gpt_config.plat_gpt_l0_base);
+	VERBOSE("  Bitlocks:  0x%"PRIxPTR"\n", (uintptr_t)gpt_bitlock_base);
 
 	return 0;
 }
 
-/*
- * The L1 descriptors are protected by a spinlock to ensure that multiple
- * CPUs do not attempt to change the descriptors at once. In the future it
- * would be better to have separate spinlocks for each L1 descriptor.
- */
-static spinlock_t gpt_lock;
-
 /*
  * A helper to write the value (target_pas << gpi_shift) to the index of
  * the gpt_l1_addr.
@@ -973,6 +1315,8 @@ static inline void write_gpt(uint64_t *gpt_l1_desc, uint64_t *gpt_l1_addr,
 	*gpt_l1_desc &= ~(GPT_L1_GRAN_DESC_GPI_MASK << gpi_shift);
 	*gpt_l1_desc |= ((uint64_t)target_pas << gpi_shift);
 	gpt_l1_addr[idx] = *gpt_l1_desc;
+
+	dsboshst();
 }
 
 /*
@@ -982,6 +1326,7 @@ static inline void write_gpt(uint64_t *gpt_l1_desc, uint64_t *gpt_l1_addr,
 static int get_gpi_params(uint64_t base, gpi_info_t *gpi_info)
 {
 	uint64_t gpt_l0_desc, *gpt_l0_base;
+	unsigned int idx_512;
 
 	gpt_l0_base = (uint64_t *)gpt_config.plat_gpt_l0_base;
 	gpt_l0_desc = gpt_l0_base[GPT_L0_IDX(base)];
@@ -993,19 +1338,310 @@ static int get_gpi_params(uint64_t base, gpi_info_t *gpi_info)
 
 	/* Get the table index and GPI shift from PA */
 	gpi_info->gpt_l1_addr = GPT_L0_TBLD_ADDR(gpt_l0_desc);
-	gpi_info->idx = GPT_L1_IDX(gpt_config.p, base);
+	gpi_info->idx = (unsigned int)GPT_L1_INDEX(base);
 	gpi_info->gpi_shift = GPT_L1_GPI_IDX(gpt_config.p, base) << 2;
 
-	gpi_info->gpt_l1_desc = (gpi_info->gpt_l1_addr)[gpi_info->idx];
-	gpi_info->gpi = (gpi_info->gpt_l1_desc >> gpi_info->gpi_shift) &
-		GPT_L1_GRAN_DESC_GPI_MASK;
+	/* 512MB block index */
+	idx_512 = (unsigned int)(base / SZ_512M);
+
+	/* Bitlock address and mask */
+	gpi_info->lock = &gpt_bitlock_base[idx_512 / LOCK_BITS];
+	gpi_info->mask = 1U << (idx_512 & (LOCK_BITS - 1U));
+
 	return 0;
 }
 
+/*
+ * Helper to retrieve the gpt_l1_desc and GPI information from gpi_info.
+ * This function is called with bitlock acquired.
+ */
+static void read_gpi(gpi_info_t *gpi_info)
+{
+	gpi_info->gpt_l1_desc = (gpi_info->gpt_l1_addr)[gpi_info->idx];
+
+	if ((gpi_info->gpt_l1_desc & GPT_L1_TYPE_CONT_DESC_MASK) ==
+				 GPT_L1_TYPE_CONT_DESC) {
+		/* Read GPI from Contiguous descriptor */
+		gpi_info->gpi = (unsigned int)GPT_L1_CONT_GPI(gpi_info->gpt_l1_desc);
+	} else {
+		/* Read GPI from Granules descriptor */
+		gpi_info->gpi = (unsigned int)((gpi_info->gpt_l1_desc >> gpi_info->gpi_shift) &
+						GPT_L1_GRAN_DESC_GPI_MASK);
+	}
+}
+
+static void flush_page_to_popa(uintptr_t addr)
+{
+	size_t size = GPT_PGS_ACTUAL_SIZE(gpt_config.p);
+
+	if (is_feat_mte2_supported()) {
+		flush_dcache_to_popa_range_mte2(addr, size);
+	} else {
+		flush_dcache_to_popa_range(addr, size);
+	}
+}
+
+/*
+ * Helper function to check if all L1 entries in 2MB block have
+ * the same Granules descriptor value.
+ *
+ * Parameters
+ *   base		Base address of the region to be checked
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor with all entries
+ *			set to the same GPI.
+ *
+ * Return
+ *   true if L1 all entries have the same descriptor value, false otherwise.
+ */
+__unused static bool check_fuse_2mb(uint64_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc)
+{
+	/* Last L1 entry index in 2MB block */
+	unsigned int long idx = GPT_L1_INDEX(ALIGN_2MB(base)) +
+						gpt_l1_cnt_2mb - 1UL;
+
+	/* Number of L1 entries in 2MB block */
+	unsigned int cnt = gpt_l1_cnt_2mb;
+
+	/*
+	 * Start check from the last L1 entry and continue until the first
+	 * non-matching to the passed Granules descriptor value is found.
+	 */
+	while (cnt-- != 0U) {
+		if (gpi_info->gpt_l1_addr[idx--] != l1_desc) {
+			/* Non-matching L1 entry found */
+			return false;
+		}
+	}
+
+	return true;
+}
+
+__unused static void fuse_2mb(uint64_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	/* L1 entry index of the start of 2MB block */
+	unsigned long idx_2 = GPT_L1_INDEX(ALIGN_2MB(base));
+
+	/* 2MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 2MB);
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n", __func__, base, l1_desc);
+
+	fill_desc(&gpi_info->gpt_l1_addr[idx_2], l1_cont_desc, L1_QWORDS_2MB);
+}
+
+/*
+ * Helper function to check if all 1st L1 entries of 2MB blocks
+ * in 32MB have the same 2MB Contiguous descriptor value.
+ *
+ * Parameters
+ *   base		Base address of the region to be checked
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor.
+ *
+ * Return
+ *   true if all L1 entries have the same descriptor value, false otherwise.
+ */
+__unused static bool check_fuse_32mb(uint64_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc)
+{
+	/* The 1st L1 entry index of the last 2MB block in 32MB */
+	unsigned long idx = GPT_L1_INDEX(ALIGN_32MB(base)) +
+					(15UL * gpt_l1_cnt_2mb);
+
+	/* 2MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 2MB);
+
+	/* Number of 2MB blocks in 32MB */
+	unsigned int cnt = 16U;
+
+	/* Set the first L1 entry to 2MB Contiguous descriptor */
+	gpi_info->gpt_l1_addr[GPT_L1_INDEX(ALIGN_2MB(base))] = l1_cont_desc;
+
+	/*
+	 * Start check from the 1st L1 entry of the last 2MB block and
+	 * continue until the first non-matching to 2MB Contiguous descriptor
+	 * value is found.
+	 */
+	while (cnt-- != 0U) {
+		if (gpi_info->gpt_l1_addr[idx] != l1_cont_desc) {
+			/* Non-matching L1 entry found */
+			return false;
+		}
+		idx -= gpt_l1_cnt_2mb;
+	}
+
+	return true;
+}
+
+__unused static void fuse_32mb(uint64_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	/* L1 entry index of the start of 32MB block */
+	unsigned long idx_32 = GPT_L1_INDEX(ALIGN_32MB(base));
+
+	/* 32MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 32MB);
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n", __func__, base, l1_desc);
+
+	fill_desc(&gpi_info->gpt_l1_addr[idx_32], l1_cont_desc, L1_QWORDS_32MB);
+}
+
+/*
+ * Helper function to check if all 1st L1 entries of 32MB blocks
+ * in 512MB have the same 32MB Contiguous descriptor value.
+ *
+ * Parameters
+ *   base		Base address of the region to be checked
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor.
+ *
+ * Return
+ *   true if all L1 entries have the same descriptor value, false otherwise.
+ */
+__unused static bool check_fuse_512mb(uint64_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc)
+{
+	/* The 1st L1 entry index of the last 32MB block in 512MB */
+	unsigned long idx = GPT_L1_INDEX(ALIGN_512MB(base)) +
+					(15UL * 16UL * gpt_l1_cnt_2mb);
+
+	/* 32MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 32MB);
+
+	/* Number of 32MB blocks in 512MB */
+	unsigned int cnt = 16U;
+
+	/* Set the first L1 entry to 2MB Contiguous descriptor */
+	gpi_info->gpt_l1_addr[GPT_L1_INDEX(ALIGN_32MB(base))] = l1_cont_desc;
+
+	/*
+	 * Start check from the 1st L1 entry of the last 32MB block and
+	 * continue until the first non-matching to 32MB Contiguous descriptor
+	 * value is found.
+	 */
+	while (cnt-- != 0U) {
+		if (gpi_info->gpt_l1_addr[idx] != l1_cont_desc) {
+			/* Non-matching L1 entry found */
+			return false;
+		}
+		idx -= 16UL * gpt_l1_cnt_2mb;
+	}
+
+	return true;
+}
+
+__unused static void fuse_512mb(uint64_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	/* L1 entry index of the start of 512MB block */
+	unsigned long idx_512 = GPT_L1_INDEX(ALIGN_512MB(base));
+
+	/* 512MB Contiguous descriptor */
+	uint64_t l1_cont_desc = GPT_L1_CONT_DESC(l1_desc, 512MB);
+
+	VERBOSE("GPT: %s(0x%"PRIxPTR" 0x%"PRIx64")\n", __func__, base, l1_desc);
+
+	fill_desc(&gpi_info->gpt_l1_addr[idx_512], l1_cont_desc, L1_QWORDS_512MB);
+}
+
+/*
+ * Helper function to convert GPI entries in a single L1 table
+ * from Granules to Contiguous descriptor.
+ *
+ * Parameters
+ *   base		Base address of the region to be written
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor with all entries
+ *			set to the same GPI.
+ */
+__unused static void fuse_block(uint64_t base, const gpi_info_t *gpi_info,
+				uint64_t l1_desc)
+{
+	/* Start with check for 2MB block */
+	if (!check_fuse_2mb(base, gpi_info, l1_desc)) {
+		/* Check for 2MB fusing failed */
+		return;
+	}
+
+#if (RME_GPT_MAX_BLOCK == 2)
+	fuse_2mb(base, gpi_info, l1_desc);
+#else
+	/* Check for 32MB block */
+	if (!check_fuse_32mb(base, gpi_info, l1_desc)) {
+		/* Check for 32MB fusing failed, fuse to 2MB */
+		fuse_2mb(base, gpi_info, l1_desc);
+		return;
+	}
+
+#if (RME_GPT_MAX_BLOCK == 32)
+	fuse_32mb(base, gpi_info, l1_desc);
+#else
+	/* Check for 512MB block */
+	if (!check_fuse_512mb(base, gpi_info, l1_desc)) {
+		/* Check for 512MB fusing failed, fuse to 32MB */
+		fuse_32mb(base, gpi_info, l1_desc);
+		return;
+	}
+
+	/* Fuse to 512MB */
+	fuse_512mb(base, gpi_info, l1_desc);
+
+#endif	/* RME_GPT_MAX_BLOCK == 32 */
+#endif	/* RME_GPT_MAX_BLOCK == 2 */
+}
+
+/*
+ * Helper function to convert GPI entries in a single L1 table
+ * from Contiguous to Granules descriptor. This function updates
+ * descriptor to Granules in passed 'gpt_config_t' structure as
+ * the result of shuttering.
+ *
+ * Parameters
+ *   base		Base address of the region to be written
+ *   gpi_info		Pointer to 'gpt_config_t' structure
+ *   l1_desc		GPT Granules descriptor set this range to.
+ */
+__unused static void shatter_block(uint64_t base, gpi_info_t *gpi_info,
+				   uint64_t l1_desc)
+{
+	/* Look-up table for 2MB, 32MB and 512MB locks shattering */
+	static const gpt_shatter_func gpt_shatter_lookup[] = {
+		shatter_2mb,
+		shatter_32mb,
+		shatter_512mb
+	};
+
+	/* Look-up table for invalidation TLBs for 2MB, 32MB and 512MB blocks */
+	static const gpt_tlbi_lookup_t tlbi_lookup[] = {
+		{ tlbirpalos_2m, ~(SZ_2M - 1UL) },
+		{ tlbirpalos_32m, ~(SZ_32M - 1UL) },
+		{ tlbirpalos_512m, ~(SZ_512M - 1UL) }
+	};
+
+	/* Get shattering level from Contig field of Contiguous descriptor */
+	unsigned long level = GPT_L1_CONT_CONTIG(gpi_info->gpt_l1_desc) - 1UL;
+
+	/* Shatter contiguous block */
+	gpt_shatter_lookup[level](base, gpi_info, l1_desc);
+
+	tlbi_lookup[level].function(base & tlbi_lookup[level].mask);
+	dsbosh();
+
+	/*
+	 * Update 'gpt_config_t' structure's descriptor to Granules to reflect
+	 * the shattered GPI back to caller.
+	 */
+	gpi_info->gpt_l1_desc = l1_desc;
+}
+
 /*
  * This function is the granule transition delegate service. When a granule
  * transition request occurs it is routed to this function to have the request,
- * if valid, fulfilled following A1.1.1 Delegate of RME supplement
+ * if valid, fulfilled following A1.1.1 Delegate of RME supplement.
  *
  * TODO: implement support for transitioning multiple granules at once.
  *
@@ -1022,9 +1658,9 @@ static int get_gpi_params(uint64_t base, gpi_info_t *gpi_info)
 int gpt_delegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 {
 	gpi_info_t gpi_info;
-	uint64_t nse;
-	int res;
+	uint64_t nse, __unused l1_desc;
 	unsigned int target_pas;
+	int res;
 
 	/* Ensure that the tables have been set up before taking requests */
 	assert(gpt_config.plat_gpt_l0_base != 0UL);
@@ -1032,10 +1668,6 @@ int gpt_delegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 	/* Ensure that caches are enabled */
 	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0UL);
 
-	/* Delegate request can only come from REALM or SECURE */
-	assert(src_sec_state == SMC_FROM_REALM ||
-	       src_sec_state == SMC_FROM_SECURE);
-
 	/* See if this is a single or a range of granule transition */
 	if (size != GPT_PGS_ACTUAL_SIZE(gpt_config.p)) {
 		return -EINVAL;
@@ -1060,70 +1692,82 @@ int gpt_delegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 		return -EINVAL;
 	}
 
-	target_pas = GPT_GPI_REALM;
-	if (src_sec_state == SMC_FROM_SECURE) {
+	/* Delegate request can only come from REALM or SECURE */
+	if ((src_sec_state != SMC_FROM_REALM) &&
+	    (src_sec_state != SMC_FROM_SECURE)) {
+		VERBOSE("GPT: Invalid caller security state 0x%x\n",
+							src_sec_state);
+		return -EINVAL;
+	}
+
+	if (src_sec_state == SMC_FROM_REALM) {
+		target_pas = GPT_GPI_REALM;
+		nse = (uint64_t)GPT_NSE_REALM << GPT_NSE_SHIFT;
+		l1_desc = GPT_L1_REALM_DESC;
+	} else {
 		target_pas = GPT_GPI_SECURE;
+		nse = (uint64_t)GPT_NSE_SECURE << GPT_NSE_SHIFT;
+		l1_desc = GPT_L1_SECURE_DESC;
+	}
+
+	res = get_gpi_params(base, &gpi_info);
+	if (res != 0) {
+		return res;
 	}
 
 	/*
-	 * Access to L1 tables is controlled by a global lock to ensure
-	 * that no more than one CPU is allowed to make changes at any
-	 * given time.
+	 * Access to each 512MB block in L1 tables is controlled by a bitlock
+	 * to ensure that no more than one CPU is allowed to make changes at
+	 * any given time.
 	 */
-	spin_lock(&gpt_lock);
-	res = get_gpi_params(base, &gpi_info);
-	if (res != 0) {
-		spin_unlock(&gpt_lock);
-		return res;
-	}
+	bit_lock(gpi_info.lock, gpi_info.mask);
+
+	read_gpi(&gpi_info);
 
 	/* Check that the current address is in NS state */
 	if (gpi_info.gpi != GPT_GPI_NS) {
 		VERBOSE("GPT: Only Granule in NS state can be delegated.\n");
 		VERBOSE("      Caller: %u, Current GPI: %u\n", src_sec_state,
 			gpi_info.gpi);
-		spin_unlock(&gpt_lock);
+		bit_unlock(gpi_info.lock, gpi_info.mask);
 		return -EPERM;
 	}
 
-	if (src_sec_state == SMC_FROM_SECURE) {
-		nse = (uint64_t)GPT_NSE_SECURE << GPT_NSE_SHIFT;
-	} else {
-		nse = (uint64_t)GPT_NSE_REALM << GPT_NSE_SHIFT;
+#if (RME_GPT_MAX_BLOCK != 0)
+	/* Check for Contiguous descriptor */
+	if ((gpi_info.gpt_l1_desc & GPT_L1_TYPE_CONT_DESC_MASK) ==
+					GPT_L1_TYPE_CONT_DESC) {
+		shatter_block(base, &gpi_info, GPT_L1_NS_DESC);
 	}
-
+#endif
 	/*
 	 * In order to maintain mutual distrust between Realm and Secure
 	 * states, remove any data speculatively fetched into the target
-	 * physical address space. Issue DC CIPAPA over address range.
+	 * physical address space.
+	 * Issue DC CIPAPA or DC_CIGDPAPA on implementations with FEAT_MTE2.
 	 */
-	if (is_feat_mte2_supported()) {
-		flush_dcache_to_popa_range_mte2(nse | base,
-					GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	} else {
-		flush_dcache_to_popa_range(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	}
+	flush_page_to_popa(base | nse);
 
 	write_gpt(&gpi_info.gpt_l1_desc, gpi_info.gpt_l1_addr,
 		  gpi_info.gpi_shift, gpi_info.idx, target_pas);
-	dsboshst();
 
-	gpt_tlbi_by_pa_ll(base, GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	dsbosh();
+	/* Ensure that all agents observe the new configuration */
+	tlbi_page_dsbosh(base);
 
 	nse = (uint64_t)GPT_NSE_NS << GPT_NSE_SHIFT;
 
-	if (is_feat_mte2_supported()) {
-		flush_dcache_to_popa_range_mte2(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	} else {
-		flush_dcache_to_popa_range(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	}
+	/* Ensure that the scrubbed data have made it past the PoPA */
+	flush_page_to_popa(base | nse);
 
-	/* Unlock access to the L1 tables */
-	spin_unlock(&gpt_lock);
+#if (RME_GPT_MAX_BLOCK != 0)
+	if (gpi_info.gpt_l1_desc == l1_desc) {
+		/* Try to fuse */
+		fuse_block(base, &gpi_info, l1_desc);
+	}
+#endif
+
+	/* Unlock access to 512MB block */
+	bit_unlock(gpi_info.lock, gpi_info.mask);
 
 	/*
 	 * The isb() will be done as part of context
@@ -1155,7 +1799,7 @@ int gpt_delegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 int gpt_undelegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 {
 	gpi_info_t gpi_info;
-	uint64_t nse;
+	uint64_t nse, __unused l1_desc;
 	int res;
 
 	/* Ensure that the tables have been set up before taking requests */
@@ -1164,10 +1808,6 @@ int gpt_undelegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 	/* Ensure that MMU and caches are enabled */
 	assert((read_sctlr_el3() & SCTLR_C_BIT) != 0UL);
 
-	/* Delegate request can only come from REALM or SECURE */
-	assert(src_sec_state == SMC_FROM_REALM ||
-	       src_sec_state == SMC_FROM_SECURE);
-
 	/* See if this is a single or a range of granule transition */
 	if (size != GPT_PGS_ACTUAL_SIZE(gpt_config.p)) {
 		return -EINVAL;
@@ -1192,84 +1832,82 @@ int gpt_undelegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 		return -EINVAL;
 	}
 
-	/*
-	 * Access to L1 tables is controlled by a global lock to ensure
-	 * that no more than one CPU is allowed to make changes at any
-	 * given time.
-	 */
-	spin_lock(&gpt_lock);
-
 	res = get_gpi_params(base, &gpi_info);
 	if (res != 0) {
-		spin_unlock(&gpt_lock);
 		return res;
 	}
 
+	/*
+	 * Access to each 512MB block in L1 tables is controlled by a bitlock
+	 * to ensure that no more than one CPU is allowed to make changes at
+	 * any given time.
+	 */
+	bit_lock(gpi_info.lock, gpi_info.mask);
+
+	read_gpi(&gpi_info);
+
 	/* Check that the current address is in the delegated state */
-	if ((src_sec_state == SMC_FROM_REALM  &&
-	     gpi_info.gpi != GPT_GPI_REALM) ||
-	    (src_sec_state == SMC_FROM_SECURE &&
-	     gpi_info.gpi != GPT_GPI_SECURE)) {
-		VERBOSE("GPT: Only Granule in REALM or SECURE state can be undelegated.\n");
+	if ((src_sec_state == SMC_FROM_REALM) &&
+		(gpi_info.gpi == GPT_GPI_REALM)) {
+		l1_desc = GPT_L1_REALM_DESC;
+		nse = (uint64_t)GPT_NSE_REALM << GPT_NSE_SHIFT;
+	} else if ((src_sec_state == SMC_FROM_SECURE) &&
+		(gpi_info.gpi == GPT_GPI_SECURE)) {
+		l1_desc = GPT_L1_SECURE_DESC;
+		nse = (uint64_t)GPT_NSE_SECURE << GPT_NSE_SHIFT;
+	} else {
+		VERBOSE("GPT: Only Granule in REALM or SECURE state can be undelegated\n");
 		VERBOSE("      Caller: %u Current GPI: %u\n", src_sec_state,
 			gpi_info.gpi);
-		spin_unlock(&gpt_lock);
+		bit_unlock(gpi_info.lock, gpi_info.mask);
 		return -EPERM;
 	}
 
-
-	/* In order to maintain mutual distrust between Realm and Secure
+#if (RME_GPT_MAX_BLOCK != 0)
+	/* Check for Contiguous descriptor */
+	if ((gpi_info.gpt_l1_desc & GPT_L1_TYPE_CONT_DESC_MASK) ==
+					GPT_L1_TYPE_CONT_DESC) {
+		shatter_block(base, &gpi_info, l1_desc);
+	}
+#endif
+	/*
+	 * In order to maintain mutual distrust between Realm and Secure
 	 * states, remove access now, in order to guarantee that writes
 	 * to the currently-accessible physical address space will not
 	 * later become observable.
 	 */
 	write_gpt(&gpi_info.gpt_l1_desc, gpi_info.gpt_l1_addr,
 		  gpi_info.gpi_shift, gpi_info.idx, GPT_GPI_NO_ACCESS);
-	dsboshst();
 
-	gpt_tlbi_by_pa_ll(base, GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	dsbosh();
+	/* Ensure that all agents observe the new NO_ACCESS configuration */
+	tlbi_page_dsbosh(base);
 
-	if (src_sec_state == SMC_FROM_SECURE) {
-		nse = (uint64_t)GPT_NSE_SECURE << GPT_NSE_SHIFT;
-	} else {
-		nse = (uint64_t)GPT_NSE_REALM << GPT_NSE_SHIFT;
-	}
-
-	/* Ensure that the scrubbed data has made it past the PoPA */
-	if (is_feat_mte2_supported()) {
-		flush_dcache_to_popa_range_mte2(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	} else {
-		flush_dcache_to_popa_range(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	}
+	/* Ensure that the scrubbed data have made it past the PoPA */
+	flush_page_to_popa(base | nse);
 
 	/*
-	 * Remove any data loaded speculatively
-	 * in NS space from before the scrubbing
+	 * Remove any data loaded speculatively in NS space from before
+	 * the scrubbing.
 	 */
 	nse = (uint64_t)GPT_NSE_NS << GPT_NSE_SHIFT;
 
-	if (is_feat_mte2_supported()) {
-		flush_dcache_to_popa_range_mte2(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	} else {
-		flush_dcache_to_popa_range(nse | base,
-					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	}
+	flush_page_to_popa(base | nse);
 
-	/* Clear existing GPI encoding and transition granule. */
+	/* Clear existing GPI encoding and transition granule */
 	write_gpt(&gpi_info.gpt_l1_desc, gpi_info.gpt_l1_addr,
 		  gpi_info.gpi_shift, gpi_info.idx, GPT_GPI_NS);
-	dsboshst();
 
 	/* Ensure that all agents observe the new NS configuration */
-	gpt_tlbi_by_pa_ll(base, GPT_PGS_ACTUAL_SIZE(gpt_config.p));
-	dsbosh();
+	tlbi_page_dsbosh(base);
 
-	/* Unlock access to the L1 tables. */
-	spin_unlock(&gpt_lock);
+#if (RME_GPT_MAX_BLOCK != 0)
+	if (gpi_info.gpt_l1_desc == GPT_L1_NS_DESC) {
+		/* Try to fuse */
+		fuse_block(base, &gpi_info, GPT_L1_NS_DESC);
+	}
+#endif
+	/* Unlock access to 512MB block */
+	bit_unlock(gpi_info.lock, gpi_info.mask);
 
 	/*
 	 * The isb() will be done as part of context
diff --git a/lib/gpt_rme/gpt_rme.mk b/lib/gpt_rme/gpt_rme.mk
index 60176f4e1..52b38d2ac 100644
--- a/lib/gpt_rme/gpt_rme.mk
+++ b/lib/gpt_rme/gpt_rme.mk
@@ -1,8 +1,13 @@
 #
-# Copyright (c) 2021, Arm Limited. All rights reserved.
+# Copyright (c) 2021-2024, Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
+# Process RME_GPT_MAX_BLOCK value
+ifeq ($(filter 0 2 32 512, ${RME_GPT_MAX_BLOCK}),)
+    $(error "Invalid value for RME_GPT_MAX_BLOCK: ${RME_GPT_MAX_BLOCK}")
+endif
+
 GPT_LIB_SRCS	:=	$(addprefix lib/gpt_rme/,        \
 			gpt_rme.c)
diff --git a/lib/gpt_rme/gpt_rme_private.h b/lib/gpt_rme/gpt_rme_private.h
index b2a5dae4a..4d2ab591a 100644
--- a/lib/gpt_rme/gpt_rme_private.h
+++ b/lib/gpt_rme/gpt_rme_private.h
@@ -9,6 +9,7 @@
 
 #include <arch.h>
 #include <lib/gpt_rme/gpt_rme.h>
+#include <lib/spinlock.h>
 #include <lib/utils_def.h>
 
 /******************************************************************************/
@@ -19,7 +20,7 @@
 #define GPT_L0_TYPE_MASK		UL(0xF)
 #define GPT_L0_TYPE_SHIFT		U(0)
 
-/* For now, we don't support contiguous descriptors, only table and block */
+/* GPT level 0 table and block descriptors */
 #define GPT_L0_TYPE_TBL_DESC		UL(3)
 #define GPT_L0_TYPE_BLK_DESC		UL(1)
 
@@ -29,29 +30,63 @@
 #define GPT_L0_BLK_DESC_GPI_MASK	UL(0xF)
 #define GPT_L0_BLK_DESC_GPI_SHIFT	U(4)
 
-/* GPT level 1 descriptor bit definitions */
+/* GPT level 1 Contiguous descriptor */
+#define GPT_L1_TYPE_CONT_DESC_MASK	UL(0xF)
+#define GPT_L1_TYPE_CONT_DESC		UL(1)
+
+/* GPT level 1 Contiguous descriptor definitions */
+#define GPT_L1_CONTIG_2MB		UL(1)
+#define GPT_L1_CONTIG_32MB		UL(2)
+#define GPT_L1_CONTIG_512MB		UL(3)
+
+#define GPT_L1_CONT_DESC_GPI_SHIFT	U(4)
+#define GPT_L1_CONT_DESC_GPI_MASK	UL(0xF)
+#define GPT_L1_CONT_DESC_CONTIG_SHIFT	U(8)
+#define GPT_L1_CONT_DESC_CONTIG_MASK	UL(3)
+
+/* GPT level 1 Granules descriptor bit definitions */
 #define GPT_L1_GRAN_DESC_GPI_MASK	UL(0xF)
 
+/* L1 Contiguous descriptors templates */
+#define GPT_L1_CONT_DESC_2MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_2MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+#define GPT_L1_CONT_DESC_32MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_32MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+#define GPT_L1_CONT_DESC_512MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_512MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+
+/* Create L1 Contiguous descriptor from GPI and template */
+#define GPT_L1_GPI_CONT_DESC(_gpi, _desc)	\
+			((_desc) | ((uint64_t)(_gpi) << GPT_L1_CONT_DESC_GPI_SHIFT))
+
+/* Create L1 Contiguous descriptor from Granules descriptor and size */
+#define GPT_L1_CONT_DESC(_desc, _size) \
+				(GPT_L1_CONT_DESC_##_size	| \
+				(((_desc) & GPT_L1_GRAN_DESC_GPI_MASK) << \
+				GPT_L1_CONT_DESC_GPI_SHIFT))
+
+/* Create L1 Contiguous descriptor from GPI and size */
+#define GPT_L1_CONT_DESC_SIZE(_gpi, _size) \
+				(GPT_L1_CONT_DESC_##_size	| \
+				(((uint64_t)(_gpi) << GPT_L1_CONT_DESC_GPI_SHIFT))
+
+#define GPT_L1_GPI_BYTE(_gpi)		(uint64_t)((_gpi) | ((_gpi) << 4))
+#define GPT_L1_GPI_HALF(_gpi)		(GPT_L1_GPI_BYTE(_gpi) | (GPT_L1_GPI_BYTE(_gpi) << 8))
+#define GPT_L1_GPI_WORD(_gpi)		(GPT_L1_GPI_HALF(_gpi) | (GPT_L1_GPI_HALF(_gpi) << 16))
+
 /*
- * This macro fills out every GPI entry in a granules descriptor to the same
- * value.
+ * This macro generates a Granules descriptor
+ * with the same value for every GPI entry.
  */
-#define GPT_BUILD_L1_DESC(_gpi)		(((uint64_t)(_gpi) << 4*0) | \
-					 ((uint64_t)(_gpi) << 4*1) | \
-					 ((uint64_t)(_gpi) << 4*2) | \
-					 ((uint64_t)(_gpi) << 4*3) | \
-					 ((uint64_t)(_gpi) << 4*4) | \
-					 ((uint64_t)(_gpi) << 4*5) | \
-					 ((uint64_t)(_gpi) << 4*6) | \
-					 ((uint64_t)(_gpi) << 4*7) | \
-					 ((uint64_t)(_gpi) << 4*8) | \
-					 ((uint64_t)(_gpi) << 4*9) | \
-					 ((uint64_t)(_gpi) << 4*10) | \
-					 ((uint64_t)(_gpi) << 4*11) | \
-					 ((uint64_t)(_gpi) << 4*12) | \
-					 ((uint64_t)(_gpi) << 4*13) | \
-					 ((uint64_t)(_gpi) << 4*14) | \
-					 ((uint64_t)(_gpi) << 4*15))
+#define GPT_BUILD_L1_DESC(_gpi)		(GPT_L1_GPI_WORD(_gpi) | (GPT_L1_GPI_WORD(_gpi) << 32))
+
+#define GPT_L1_SECURE_DESC	GPT_BUILD_L1_DESC(GPT_GPI_SECURE)
+#define GPT_L1_NS_DESC		GPT_BUILD_L1_DESC(GPT_GPI_NS)
+#define GPT_L1_REALM_DESC	GPT_BUILD_L1_DESC(GPT_GPI_REALM)
+#define GPT_L1_ANY_DESC		GPT_BUILD_L1_DESC(GPT_GPI_ANY)
 
 /******************************************************************************/
 /* GPT platform configuration                                                 */
@@ -106,17 +141,44 @@ typedef enum {
 	PGS_64KB_P =	16U
 } gpt_p_val_e;
 
+#define LOCK_SIZE	sizeof(((bitlock_t *)NULL)->lock)
+#define LOCK_TYPE	typeof(((bitlock_t *)NULL)->lock)
+#define LOCK_BITS	(LOCK_SIZE * 8U)
+
 /*
- * Internal structure to retrieve the values from get_gpi_info();
+ * Internal structure to retrieve the values from get_gpi_params();
  */
-typedef struct gpi_info {
+typedef struct {
 	uint64_t gpt_l1_desc;
 	uint64_t *gpt_l1_addr;
 	unsigned int idx;
 	unsigned int gpi_shift;
 	unsigned int gpi;
+	bitlock_t *lock;
+	LOCK_TYPE mask;
 } gpi_info_t;
 
+/*
+ * Look up structure for contiguous blocks and descriptors
+ */
+typedef struct {
+	size_t size;
+	unsigned int desc;
+} gpt_fill_lookup_t;
+
+typedef void (*gpt_shatter_func)(uintptr_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc);
+typedef void (*gpt_tlbi_func)(uintptr_t base);
+
+/*
+ * Look-up structure for
+ * invalidating TLBs of GPT entries by Physical address, last level.
+ */
+typedef struct {
+	gpt_tlbi_func function;
+	size_t mask;
+} gpt_tlbi_lookup_t;
+
 /* Max valid value for PGS */
 #define GPT_PGS_MAX			(2U)
 
@@ -136,8 +198,8 @@ typedef struct gpi_info {
  * special case we'll get a negative width value which does not make sense and
  * would cause problems.
  */
-#define GPT_L0_IDX_WIDTH(_t)		(((_t) > GPT_S_VAL) ? \
-					((_t) - GPT_S_VAL) : (0U))
+#define GPT_L0_IDX_WIDTH(_t)		(((unsigned int)(_t) > GPT_S_VAL) ? \
+					((unsigned int)(_t) - GPT_S_VAL) : (0U))
 
 /* Bit shift for the L0 index field in a PA */
 #define GPT_L0_IDX_SHIFT		(GPT_S_VAL)
@@ -173,10 +235,11 @@ typedef struct gpi_info {
  * the L0 index field above since all valid combinations of PGS (p) and L0GPTSZ
  * (s) will result in a positive width value.
  */
-#define GPT_L1_IDX_WIDTH(_p)		((GPT_S_VAL - 1U) - ((_p) + 3U))
+#define GPT_L1_IDX_WIDTH(_p)		((GPT_S_VAL - 1U) - \
+					((unsigned int)(_p) + 3U))
 
 /* Bit shift for the L1 index field */
-#define GPT_L1_IDX_SHIFT(_p)		((_p) + 4U)
+#define GPT_L1_IDX_SHIFT(_p)		((unsigned int)(_p) + 4U)
 
 /*
  * Mask for the L1 index field, must be shifted.
@@ -196,7 +259,10 @@ typedef struct gpi_info {
 #define GPT_L1_GPI_IDX_MASK		(0xF)
 
 /* Total number of entries in each L1 table */
-#define GPT_L1_ENTRY_COUNT(_p)		((GPT_L1_IDX_MASK(_p)) + 1U)
+#define GPT_L1_ENTRY_COUNT(_p)		((GPT_L1_IDX_MASK(_p)) + 1UL)
+
+/* Number of L1 entries in 2MB block */
+#define GPT_L1_ENTRY_COUNT_2MB(_p)	(SZ_2M >> GPT_L1_IDX_SHIFT(_p))
 
 /* Total size in bytes of each L1 table */
 #define GPT_L1_TABLE_SIZE(_p)		((GPT_L1_ENTRY_COUNT(_p)) << 3U)
@@ -206,10 +272,13 @@ typedef struct gpi_info {
 /******************************************************************************/
 
 /* Protected space actual size in bytes */
-#define GPT_PPS_ACTUAL_SIZE(_t)	(1UL << (_t))
+#define GPT_PPS_ACTUAL_SIZE(_t)	(1UL << (unsigned int)(_t))
 
 /* Granule actual size in bytes */
-#define GPT_PGS_ACTUAL_SIZE(_p)	(1UL << (_p))
+#define GPT_PGS_ACTUAL_SIZE(_p)	(1UL << (unsigned int)(_p))
+
+/* Number of granules in 2MB block */
+#define GPT_PGS_COUNT_2MB(_p)	(1UL << (21U - (unsigned int)(_p)))
 
 /* L0 GPT region size in bytes */
 #define GPT_L0GPTSZ_ACTUAL_SIZE	(1UL << GPT_S_VAL)
@@ -221,7 +290,8 @@ typedef struct gpi_info {
  * This definition is used to determine if a physical address lies on an L0
  * region boundary.
  */
-#define GPT_IS_L0_ALIGNED(_pa)	(((_pa) & (GPT_L0_REGION_SIZE - U(1))) == U(0))
+#define GPT_IS_L0_ALIGNED(_pa)	\
+	(((_pa) & (GPT_L0_REGION_SIZE - UL(1))) == UL(0))
 
 /* Get the type field from an L0 descriptor */
 #define GPT_L0_TYPE(_desc)	(((_desc) >> GPT_L0_TYPE_SHIFT) & \
@@ -246,16 +316,43 @@ typedef struct gpi_info {
 				(GPT_L0_TBL_DESC_L1ADDR_MASK << \
 				GPT_L0_TBL_DESC_L1ADDR_SHIFT))))
 
+/* Get the GPI from L1 Contiguous descriptor */
+#define GPT_L1_CONT_GPI(_desc)		\
+	(((_desc) >> GPT_L1_CONT_DESC_GPI_SHIFT) & GPT_L1_CONT_DESC_GPI_MASK)
+
+/* Get the GPI from L1 Granules descriptor */
+#define GPT_L1_GRAN_GPI(_desc)	((_desc) & GPT_L1_GRAN_DESC_GPI_MASK)
+
+/* Get the Contig from L1 Contiguous descriptor */
+#define GPT_L1_CONT_CONTIG(_desc)	\
+	(((_desc) >> GPT_L1_CONT_DESC_CONTIG_SHIFT) & \
+					GPT_L1_CONT_DESC_CONTIG_MASK)
+
 /* Get the index into the L1 table from a physical address */
-#define GPT_L1_IDX(_p, _pa)	(((_pa) >> GPT_L1_IDX_SHIFT(_p)) & \
-				GPT_L1_IDX_MASK(_p))
+#define GPT_L1_IDX(_p, _pa)		\
+	(((_pa) >> GPT_L1_IDX_SHIFT(_p)) & GPT_L1_IDX_MASK(_p))
 
 /* Get the index of the GPI within an L1 table entry from a physical address */
-#define GPT_L1_GPI_IDX(_p, _pa)	(((_pa) >> GPT_L1_GPI_IDX_SHIFT(_p)) & \
-				GPT_L1_GPI_IDX_MASK)
+#define GPT_L1_GPI_IDX(_p, _pa)		\
+	(((_pa) >> GPT_L1_GPI_IDX_SHIFT(_p)) & GPT_L1_GPI_IDX_MASK)
 
 /* Determine if an address is granule-aligned */
-#define GPT_IS_L1_ALIGNED(_p, _pa) (((_pa) & (GPT_PGS_ACTUAL_SIZE(_p) - U(1))) \
-				   == U(0))
+#define GPT_IS_L1_ALIGNED(_p, _pa)	\
+	(((_pa) & (GPT_PGS_ACTUAL_SIZE(_p) - UL(1))) == UL(0))
+
+/* Get aligned addresses */
+#define ALIGN_2MB(_addr)	((_addr) & ~(SZ_2M - 1UL))
+#define ALIGN_32MB(_addr)	((_addr) & ~(SZ_32M - 1UL))
+#define ALIGN_512MB(_addr)	((_addr) & ~(SZ_512M - 1UL))
+
+/* Determine if region is contiguous */
+#define GPT_REGION_IS_CONT(_len, _addr, _size)	\
+	(((_len) >= (_size)) && (((_addr) & ((_size) - UL(1))) == UL(0)))
+
+/* Get 32MB block number in 512MB block: 0-15 */
+#define GET_32MB_NUM(_addr)	((_addr >> 25) & 0xF)
+
+/* Get 2MB block number in 32MB block: 0-15 */
+#define GET_2MB_NUM(_addr)	((_addr >> 21) & 0xF)
 
 #endif /* GPT_RME_PRIVATE_H */
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index 2685195ff..e742d6c16 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -139,6 +139,9 @@ FW_ENC_STATUS			:= 0
 # For Chain of Trust
 GENERATE_COT			:= 0
 
+# Default maximum size of GPT contiguous block
+RME_GPT_MAX_BLOCK		:= 2
+
 # Hint platform interrupt control layer that Group 0 interrupts are for EL3. By
 # default, they are for Secure EL1.
 GICV2_G0_FOR_EL3		:= 0
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index c0bba3008..033eb7c31 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -24,7 +24,7 @@ FVP_GICR_REGION_PROTECTION	:= 0
 
 FVP_DT_PREFIX			:= fvp-base-gicv3-psci
 
-# Size (in kilobytes) of the Trusted SRAM region to  utilize when building for
+# Size (in kilobytes) of the Trusted SRAM region to  utilize when building for
 # the FVP platform. This option defaults to 256.
 FVP_TRUSTED_SRAM_SIZE		:= 256
 

From d766084fc48ed83890c63a7ef773b8fff9e4ea86 Mon Sep 17 00:00:00 2001
From: AlexeiFedorov <Alexei.Fedorov@arm.com>
Date: Mon, 13 May 2024 15:35:54 +0100
Subject: [PATCH 2/2] feat(gpt): configure memory size protected by bitlock

This patch adds support in GPT library for configuration
of the memory block size protected by one bit of 'bitlock'
structure. Build option 'RME_GPT_BITLOCK_BLOCK' defines the
number of 512MB blocks covered by each bit. This numeric
parameter must be a power of 2 and can take the values from
0 to 512. Setting this value to 0 chooses a single spinlock
for all GPT L1 table entries. The default value is set to 1
which corresponds to 512MB per bit.

Change-Id: I710d178072894a3ef40daebea701f74d19e8a3d7
Signed-off-by: AlexeiFedorov <Alexei.Fedorov@arm.com>
---
 Makefile                               |   1 +
 docs/getting_started/build-options.rst |  12 ++-
 lib/gpt_rme/gpt_rme.c                  | 130 ++++++++++++++++---------
 lib/gpt_rme/gpt_rme.mk                 |   9 ++
 lib/gpt_rme/gpt_rme_private.h          |   2 +
 make_helpers/defaults.mk               |   3 +
 6 files changed, 109 insertions(+), 48 deletions(-)

diff --git a/Makefile b/Makefile
index b6093e768..f7229f529 100644
--- a/Makefile
+++ b/Makefile
@@ -1331,6 +1331,7 @@ $(eval $(call add_defines,\
 	PSCI_EXTENDED_STATE_ID \
 	PSCI_OS_INIT_MODE \
 	RESET_TO_BL31 \
+	RME_GPT_BITLOCK_BLOCK \
 	RME_GPT_MAX_BLOCK \
 	SEPARATE_CODE_AND_RODATA \
 	SEPARATE_BL2_NOLOAD_REGION \
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index 0d4261d93..82772d679 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -812,7 +812,17 @@ Common build options
    instead of the BL1 entrypoint. It can take the value 0 (CPU reset to BL1
    entrypoint) or 1 (CPU reset to SP_MIN entrypoint). The default value is 0.
 
--  ``RME_GPT_MAX_BLOCK``: Numeric value in MB to define maximum size of
+-  ``RME_GPT_BITLOCK_BLOCK``: This defines the block size (in number of 512MB
+-  blocks) covered by a single bit of the bitlock structure during RME GPT
+-  operations. The lower the block size, the better opportunity for
+-  parallelising GPT operations but at the cost of more bits being needed
+-  for the bitlock structure. This numeric parameter can take the values
+-  from 0 to 512 and must be a power of 2. The value of 0 is special and
+-  and it chooses a single spinlock for all GPT L1 table entries. Default
+-  value is 1 which corresponds to block size of 512MB per bit of bitlock
+-  structure.
+
+-  ``RME_GPT_MAX_BLOCK``: Numeric value in MB to define the maximum size of
    supported contiguous blocks in GPT Library. This parameter can take the
    values 0, 2, 32 and 512. Setting this value to 0 disables use of Contigious
    descriptors. Default value is 2.
diff --git a/lib/gpt_rme/gpt_rme.c b/lib/gpt_rme/gpt_rme.c
index 7dec54893..4d80373ab 100644
--- a/lib/gpt_rme/gpt_rme.c
+++ b/lib/gpt_rme/gpt_rme.c
@@ -127,9 +127,35 @@ static uint64_t gpt_l1_index_mask;
 static uintptr_t gpt_l1_tbl;
 
 /* These variable is used during runtime */
+#if (RME_GPT_BITLOCK_BLOCK == 0)
+/*
+ * The GPTs are protected by a global spinlock to ensure
+ * that multiple CPUs do not attempt to change the descriptors at once.
+ */
+static spinlock_t gpt_lock;
+#else
 
-/* Bitlock base address for each 512 MB block of PPS */
+/* Bitlocks base address */
 static bitlock_t *gpt_bitlock_base;
+#endif
+
+/* Lock/unlock macros for GPT entries */
+#if (RME_GPT_BITLOCK_BLOCK == 0)
+/*
+ * Access to GPT is controlled by a global lock to ensure
+ * that no more than one CPU is allowed to make changes at any
+ * given time.
+ */
+#define GPT_LOCK	spin_lock(&gpt_lock)
+#define GPT_UNLOCK	spin_unlock(&gpt_lock)
+#else
+/*
+ * Access to a block of memory is controlled by a bitlock.
+ * Size of block = RME_GPT_BITLOCK_BLOCK * 512MB.
+ */
+#define GPT_LOCK	bit_lock(gpi_info.lock, gpi_info.mask)
+#define GPT_UNLOCK	bit_unlock(gpi_info.lock, gpi_info.mask)
+#endif
 
 static void tlbi_page_dsbosh(uintptr_t base)
 {
@@ -477,7 +503,7 @@ static int validate_pas_mappings(pas_region_t *pas_regions,
 static int validate_l0_params(gpccr_pps_e pps, uintptr_t l0_mem_base,
 				size_t l0_mem_size)
 {
-	size_t l0_alignment, locks_size;
+	size_t l0_alignment, locks_size = 0;
 
 	/*
 	 * Make sure PPS is valid and then store it since macros need this value
@@ -503,28 +529,28 @@ static int validate_l0_params(gpccr_pps_e pps, uintptr_t l0_mem_base,
 		return -EFAULT;
 	}
 
-	/* Check size */
-	if (l0_mem_size < GPT_L0_TABLE_SIZE(gpt_config.t)) {
-		ERROR("%sL0%s\n", (const char *)"GPT: Inadequate ",
-			(const char *)" memory\n");
-		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
-			GPT_L0_TABLE_SIZE(gpt_config.t), l0_mem_size);
-		return -ENOMEM;
-	}
-
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	/*
 	 * Size of bitlocks in bytes for the protected address space
-	 * with 512MB per bitlock.
+	 * with RME_GPT_BITLOCK_BLOCK * 512MB per bitlock.
 	 */
-	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) / (SZ_512M * 8U);
+	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) /
+			(RME_GPT_BITLOCK_BLOCK * SZ_512M * 8U);
 
-	/* Check space for bitlocks */
-	if (locks_size > (l0_mem_size - GPT_L0_TABLE_SIZE(gpt_config.t))) {
-		ERROR("%sbitlock%s", (const char *)"GPT: Inadequate ",
-			(const char *)" memory\n");
+	/*
+	 * If protected space size is less than the size covered
+	 * by 'bitlock' structure, check for a single bitlock.
+	 */
+	if (locks_size < LOCK_SIZE) {
+		locks_size = LOCK_SIZE;
+	}
+#endif
+	/* Check size for L0 tables and bitlocks */
+	if (l0_mem_size < (GPT_L0_TABLE_SIZE(gpt_config.t) + locks_size)) {
+		ERROR("GPT: Inadequate L0 memory\n");
 		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
-			locks_size,
-			l0_mem_size - GPT_L0_TABLE_SIZE(gpt_config.t));
+			GPT_L0_TABLE_SIZE(gpt_config.t) + locks_size,
+			l0_mem_size);
 		return -ENOMEM;
 	}
 
@@ -1089,8 +1115,8 @@ int gpt_init_l0_tables(gpccr_pps_e pps, uintptr_t l0_mem_base,
 		       size_t l0_mem_size)
 {
 	uint64_t gpt_desc;
-	size_t locks_size;
-	bitlock_t *bit_locks;
+	size_t locks_size = 0;
+	__unused bitlock_t *bit_locks;
 	int ret;
 
 	/* Ensure that MMU and Data caches are enabled */
@@ -1110,16 +1136,27 @@ int gpt_init_l0_tables(gpccr_pps_e pps, uintptr_t l0_mem_base,
 		((uint64_t *)l0_mem_base)[i] = gpt_desc;
 	}
 
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	/* Initialise bitlocks at the end of L0 table */
 	bit_locks = (bitlock_t *)(l0_mem_base +
 					GPT_L0_TABLE_SIZE(gpt_config.t));
 
 	/* Size of bitlocks in bytes */
-	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) / (SZ_512M * 8U);
+	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) /
+					(RME_GPT_BITLOCK_BLOCK * SZ_512M * 8U);
+
+	/*
+	 * If protected space size is less than the size covered
+	 * by 'bitlock' structure, initialise a single bitlock.
+	 */
+	if (locks_size < LOCK_SIZE) {
+		locks_size = LOCK_SIZE;
+	}
 
 	for (size_t i = 0UL; i < (locks_size/LOCK_SIZE); i++) {
 		bit_locks[i].lock = 0U;
 	}
+#endif
 
 	/* Flush updated L0 tables and bitlocks to memory */
 	flush_dcache_range((uintptr_t)l0_mem_base,
@@ -1290,17 +1327,19 @@ int gpt_runtime_init(void)
 	/* Mask for the L1 index field */
 	gpt_l1_index_mask = GPT_L1_IDX_MASK(gpt_config.p);
 
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	/* Bitlocks at the end of L0 table */
 	gpt_bitlock_base = (bitlock_t *)(gpt_config.plat_gpt_l0_base +
 					GPT_L0_TABLE_SIZE(gpt_config.t));
-
+#endif
 	VERBOSE("GPT: Runtime Configuration\n");
 	VERBOSE("  PPS/T:     0x%x/%u\n", gpt_config.pps, gpt_config.t);
 	VERBOSE("  PGS/P:     0x%x/%u\n", gpt_config.pgs, gpt_config.p);
 	VERBOSE("  L0GPTSZ/S: 0x%x/%u\n", GPT_L0GPTSZ, GPT_S_VAL);
 	VERBOSE("  L0 base:   0x%"PRIxPTR"\n", gpt_config.plat_gpt_l0_base);
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	VERBOSE("  Bitlocks:  0x%"PRIxPTR"\n", (uintptr_t)gpt_bitlock_base);
-
+#endif
 	return 0;
 }
 
@@ -1326,7 +1365,7 @@ static inline void write_gpt(uint64_t *gpt_l1_desc, uint64_t *gpt_l1_addr,
 static int get_gpi_params(uint64_t base, gpi_info_t *gpi_info)
 {
 	uint64_t gpt_l0_desc, *gpt_l0_base;
-	unsigned int idx_512;
+	__unused unsigned int block_idx;
 
 	gpt_l0_base = (uint64_t *)gpt_config.plat_gpt_l0_base;
 	gpt_l0_desc = gpt_l0_base[GPT_L0_IDX(base)];
@@ -1341,19 +1380,20 @@ static int get_gpi_params(uint64_t base, gpi_info_t *gpi_info)
 	gpi_info->idx = (unsigned int)GPT_L1_INDEX(base);
 	gpi_info->gpi_shift = GPT_L1_GPI_IDX(gpt_config.p, base) << 2;
 
-	/* 512MB block index */
-	idx_512 = (unsigned int)(base / SZ_512M);
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	/* Block index */
+	block_idx = (unsigned int)(base / (RME_GPT_BITLOCK_BLOCK * SZ_512M));
 
 	/* Bitlock address and mask */
-	gpi_info->lock = &gpt_bitlock_base[idx_512 / LOCK_BITS];
-	gpi_info->mask = 1U << (idx_512 & (LOCK_BITS - 1U));
-
+	gpi_info->lock = &gpt_bitlock_base[block_idx / LOCK_BITS];
+	gpi_info->mask = 1U << (block_idx & (LOCK_BITS - 1U));
+#endif
 	return 0;
 }
 
 /*
  * Helper to retrieve the gpt_l1_desc and GPI information from gpi_info.
- * This function is called with bitlock acquired.
+ * This function is called with bitlock or spinlock acquired.
  */
 static void read_gpi(gpi_info_t *gpi_info)
 {
@@ -1716,12 +1756,10 @@ int gpt_delegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 	}
 
 	/*
-	 * Access to each 512MB block in L1 tables is controlled by a bitlock
-	 * to ensure that no more than one CPU is allowed to make changes at
-	 * any given time.
+	 * Access to GPT is controlled by a lock to ensure that no more
+	 * than one CPU is allowed to make changes at any given time.
 	 */
-	bit_lock(gpi_info.lock, gpi_info.mask);
-
+	GPT_LOCK;
 	read_gpi(&gpi_info);
 
 	/* Check that the current address is in NS state */
@@ -1729,7 +1767,7 @@ int gpt_delegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 		VERBOSE("GPT: Only Granule in NS state can be delegated.\n");
 		VERBOSE("      Caller: %u, Current GPI: %u\n", src_sec_state,
 			gpi_info.gpi);
-		bit_unlock(gpi_info.lock, gpi_info.mask);
+		GPT_UNLOCK;
 		return -EPERM;
 	}
 
@@ -1766,8 +1804,8 @@ int gpt_delegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 	}
 #endif
 
-	/* Unlock access to 512MB block */
-	bit_unlock(gpi_info.lock, gpi_info.mask);
+	/* Unlock the lock to GPT */
+	GPT_UNLOCK;
 
 	/*
 	 * The isb() will be done as part of context
@@ -1838,12 +1876,10 @@ int gpt_undelegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 	}
 
 	/*
-	 * Access to each 512MB block in L1 tables is controlled by a bitlock
-	 * to ensure that no more than one CPU is allowed to make changes at
-	 * any given time.
+	 * Access to GPT is controlled by a lock to ensure that no more
+	 * than one CPU is allowed to make changes at any given time.
 	 */
-	bit_lock(gpi_info.lock, gpi_info.mask);
-
+	GPT_LOCK;
 	read_gpi(&gpi_info);
 
 	/* Check that the current address is in the delegated state */
@@ -1859,7 +1895,7 @@ int gpt_undelegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 		VERBOSE("GPT: Only Granule in REALM or SECURE state can be undelegated\n");
 		VERBOSE("      Caller: %u Current GPI: %u\n", src_sec_state,
 			gpi_info.gpi);
-		bit_unlock(gpi_info.lock, gpi_info.mask);
+		GPT_UNLOCK;
 		return -EPERM;
 	}
 
@@ -1906,8 +1942,8 @@ int gpt_undelegate_pas(uint64_t base, size_t size, unsigned int src_sec_state)
 		fuse_block(base, &gpi_info, GPT_L1_NS_DESC);
 	}
 #endif
-	/* Unlock access to 512MB block */
-	bit_unlock(gpi_info.lock, gpi_info.mask);
+	/* Unlock the lock to GPT */
+	GPT_UNLOCK;
 
 	/*
 	 * The isb() will be done as part of context
diff --git a/lib/gpt_rme/gpt_rme.mk b/lib/gpt_rme/gpt_rme.mk
index 52b38d2ac..7d6b61f94 100644
--- a/lib/gpt_rme/gpt_rme.mk
+++ b/lib/gpt_rme/gpt_rme.mk
@@ -4,6 +4,15 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
+# Process RME_GPT_BITLOCK_BLOCK value
+ifeq ($(filter 0 1 2 4 8 16 32 64 128 256 512, ${RME_GPT_BITLOCK_BLOCK}),)
+    $(error "Invalid value for RME_GPT_BITLOCK_BLOCK: ${RME_GPT_BITLOCK_BLOCK}")
+endif
+
+ifeq (${RME_GPT_BITLOCK_BLOCK},0)
+    $(warning "GPT library uses global spinlock")
+endif
+
 # Process RME_GPT_MAX_BLOCK value
 ifeq ($(filter 0 2 32 512, ${RME_GPT_MAX_BLOCK}),)
     $(error "Invalid value for RME_GPT_MAX_BLOCK: ${RME_GPT_MAX_BLOCK}")
diff --git a/lib/gpt_rme/gpt_rme_private.h b/lib/gpt_rme/gpt_rme_private.h
index 4d2ab591a..31dad20c7 100644
--- a/lib/gpt_rme/gpt_rme_private.h
+++ b/lib/gpt_rme/gpt_rme_private.h
@@ -154,8 +154,10 @@ typedef struct {
 	unsigned int idx;
 	unsigned int gpi_shift;
 	unsigned int gpi;
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	bitlock_t *lock;
 	LOCK_TYPE mask;
+#endif
 } gpi_info_t;
 
 /*
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index e742d6c16..a5c78ae42 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -139,6 +139,9 @@ FW_ENC_STATUS			:= 0
 # For Chain of Trust
 GENERATE_COT			:= 0
 
+# Default number of 512 blocks per bitlock
+RME_GPT_BITLOCK_BLOCK		:= 1
+
 # Default maximum size of GPT contiguous block
 RME_GPT_MAX_BLOCK		:= 2