diff --git a/Makefile b/Makefile
index 1e32ddec3..a3aad1207 100644
--- a/Makefile
+++ b/Makefile
@@ -1185,7 +1185,6 @@ $(eval $(call assert_booleans,\
 	DYN_DISABLE_AUTH \
 	EL3_EXCEPTION_HANDLING \
 	ENABLE_AMU_AUXILIARY_COUNTERS \
-	ENABLE_AMU_FCONF \
 	AMU_RESTRICT_COUNTERS \
 	ENABLE_ASSERTIONS \
 	ENABLE_PIE \
@@ -1360,7 +1359,6 @@ $(eval $(call add_defines,\
 	DISABLE_MTPMU \
 	ENABLE_FEAT_AMU \
 	ENABLE_AMU_AUXILIARY_COUNTERS \
-	ENABLE_AMU_FCONF \
 	AMU_RESTRICT_COUNTERS \
 	ENABLE_ASSERTIONS \
 	ENABLE_BTI \
diff --git a/bl31/bl31_main.c b/bl31/bl31_main.c
index 83be0f6f4..db0ea6cb1 100644
--- a/bl31/bl31_main.c
+++ b/bl31/bl31_main.c
@@ -127,7 +127,7 @@ void bl31_setup(u_register_t arg0, u_register_t arg1, u_register_t arg2,
 void bl31_main(void)
 {
 	/* Init registers that never change for the lifetime of TF-A */
-	cm_manage_extensions_el3();
+	cm_manage_extensions_el3(plat_my_core_pos());
 
 	/* Init per-world context registers for non-secure world */
 	manage_extensions_nonsecure_per_world();
diff --git a/docs/components/activity-monitors.rst b/docs/components/activity-monitors.rst
index 5c1c2c2c7..4c33d42f3 100644
--- a/docs/components/activity-monitors.rst
+++ b/docs/components/activity-monitors.rst
@@ -20,15 +20,9 @@ known as group 1 counters), controlled by the ``ENABLE_AMU_AUXILIARY_COUNTERS``
 build option.
 
 As a security precaution, Trusted Firmware-A does not enable these by default.
-Instead, platforms may configure their auxiliary counters through one of two
-possible mechanisms:
-
-- |FCONF|, controlled by the ``ENABLE_AMU_FCONF`` build option.
-- A platform implementation of the ``plat_amu_topology`` function (the default).
-
-See :ref:`Activity Monitor Unit (AMU) Bindings` for documentation on the |FCONF|
-device tree bindings.
+Instead, platforms must configure their auxiliary counters through the
+``plat_amu_aux_enables`` platform hook.
 
 --------------
 
-*Copyright (c) 2021, Arm Limited. All rights reserved.*
+*Copyright (c) 2021-2025, Arm Limited. All rights reserved.*
diff --git a/docs/components/fconf/amu-bindings.rst b/docs/components/fconf/amu-bindings.rst
deleted file mode 100644
index 047f75ef8..000000000
--- a/docs/components/fconf/amu-bindings.rst
+++ /dev/null
@@ -1,142 +0,0 @@
-Activity Monitor Unit (AMU) Bindings
-====================================
-
-To support platform-defined Activity Monitor Unit (|AMU|) auxiliary counters
-through FCONF, the ``HW_CONFIG`` device tree accepts several |AMU|-specific
-nodes and properties.
-
-Bindings
-^^^^^^^^
-
-.. contents::
-    :local:
-
-``/cpus/cpus/cpu*`` node properties
-"""""""""""""""""""""""""""""""""""
-
-The ``cpu`` node has been augmented to support a handle to an associated |AMU|
-view, which should describe the counters offered by the core.
-
-+---------------+-------+---------------+-------------------------------------+
-| Property name | Usage | Value type    | Description                         |
-+===============+=======+===============+=====================================+
-| ``amu``       | O     | ``<phandle>`` | If present, indicates that an |AMU| |
-|               |       |               | is available and its counters are   |
-|               |       |               | described by the node provided.     |
-+---------------+-------+---------------+-------------------------------------+
-
-``/cpus/amus`` node properties
-""""""""""""""""""""""""""""""
-
-The ``amus`` node describes the |AMUs| implemented by the cores in the system.
-This node does not have any properties.
-
-``/cpus/amus/amu*`` node properties
-"""""""""""""""""""""""""""""""""""
-
-An ``amu`` node describes the layout and meaning of the auxiliary counter
-registers of one or more |AMUs|, and may be shared by multiple cores.
-
-+--------------------+-------+------------+------------------------------------+
-| Property name      | Usage | Value type | Description                        |
-+====================+=======+============+====================================+
-| ``#address-cells`` | R     | ``<u32>``  | Value shall be 1. Specifies that   |
-|                    |       |            | the ``reg`` property array of      |
-|                    |       |            | children of this node uses a       |
-|                    |       |            | single cell.                       |
-+--------------------+-------+------------+------------------------------------+
-| ``#size-cells``    | R     | ``<u32>``  | Value shall be 0. Specifies that   |
-|                    |       |            | no size is required in the ``reg`` |
-|                    |       |            | property in children of this node. |
-+--------------------+-------+------------+------------------------------------+
-
-``/cpus/amus/amu*/counter*`` node properties
-""""""""""""""""""""""""""""""""""""""""""""
-
-A ``counter`` node describes an auxiliary counter belonging to the parent |AMU|
-view.
-
-+-------------------+-------+-------------+------------------------------------+
-| Property name     | Usage | Value type  | Description                        |
-+===================+=======+=============+====================================+
-| ``reg``           | R     | array       | Represents the counter register    |
-|                   |       |             | index, and must be a single cell.  |
-+-------------------+-------+-------------+------------------------------------+
-| ``enable-at-el3`` | O     | ``<empty>`` | The presence of this property      |
-|                   |       |             | indicates that this counter should |
-|                   |       |             | be enabled prior to EL3 exit.      |
-+-------------------+-------+-------------+------------------------------------+
-
-Example
-^^^^^^^
-
-An example system offering four cores made up of two clusters, where the cores
-of each cluster share different |AMUs|, may use something like the following:
-
-.. code-block::
-
-    cpus {
-        #address-cells = <2>;
-        #size-cells = <0>;
-
-        amus {
-            amu0: amu-0 {
-                #address-cells = <1>;
-                #size-cells = <0>;
-
-                counterX: counter@0 {
-                    reg = <0>;
-
-                    enable-at-el3;
-                };
-
-                counterY: counter@1 {
-                    reg = <1>;
-
-                    enable-at-el3;
-                };
-            };
-
-            amu1: amu-1 {
-                #address-cells = <1>;
-                #size-cells = <0>;
-
-                counterZ: counter@0 {
-                    reg = <0>;
-
-                    enable-at-el3;
-                };
-            };
-        };
-
-        cpu0@00000 {
-            ...
-
-            amu = <&amu0>;
-        };
-
-        cpu1@00100 {
-            ...
-
-            amu = <&amu0>;
-        };
-
-        cpu2@10000 {
-            ...
-
-            amu = <&amu1>;
-        };
-
-        cpu3@10100 {
-            ...
-
-            amu = <&amu1>;
-        };
-    }
-
-In this situation, ``cpu0`` and ``cpu1`` (the two cores in the first cluster),
-share the view of their AMUs defined by ``amu0``. Likewise, ``cpu2`` and
-``cpu3`` (the two cores in the second cluster), share the view of their |AMUs|
-defined by ``amu1``. This will cause ``counterX`` and ``counterY`` to be enabled
-for both ``cpu0`` and ``cpu1``, and ``counterZ`` to be enabled for both ``cpu2``
-and ``cpu3``.
diff --git a/docs/components/fconf/index.rst b/docs/components/fconf/index.rst
index c10f1ea67..6cb6774aa 100644
--- a/docs/components/fconf/index.rst
+++ b/docs/components/fconf/index.rst
@@ -145,5 +145,4 @@ Properties binding information
   :maxdepth: 1
 
   fconf_properties
-  amu-bindings
   tb_fw_bindings
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index 4846d328a..2b36fda0a 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -258,10 +258,6 @@ Common build options
    (also known as group 1 counters). These are implementation-defined counters,
    and as such require additional platform configuration. Default is 0.
 
--  ``ENABLE_AMU_FCONF``: Enables configuration of the AMU through FCONF, which
-   allows platforms with auxiliary counters to describe them via the
-   ``HW_CONFIG`` device tree blob. Default is 0.
-
 -  ``ENABLE_ASSERTIONS``: This option controls whether or not calls to ``assert()``
    are compiled out. For debug builds, this option defaults to 1, and calls to
    ``assert()`` are left in place. For release builds, this option defaults to 0
diff --git a/fdts/tc-base.dtsi b/fdts/tc-base.dtsi
index 942cf75ef..ac08e0b18 100644
--- a/fdts/tc-base.dtsi
+++ b/fdts/tc-base.dtsi
@@ -104,28 +104,6 @@
 			};
 		};
 
-		amus {
-			amu: amu-0 {
-				#address-cells = <1>;
-				#size-cells = <0>;
-
-				mpmm_gear0: counter@0 {
-					reg = <0>;
-					enable-at-el3;
-				};
-
-				mpmm_gear1: counter@1 {
-					reg = <1>;
-					enable-at-el3;
-				};
-
-				mpmm_gear2: counter@2 {
-					reg = <2>;
-					enable-at-el3;
-				};
-			};
-		};
-
 		CPU0:cpu@0 {
 			device_type = "cpu";
 			compatible = "arm,armv8";
@@ -134,7 +112,6 @@
 			clocks = <&scmi_dvfs 0>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <LIT_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU1:cpu@100 {
@@ -145,7 +122,6 @@
 			clocks = <&scmi_dvfs 0>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <LIT_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU2:cpu@200 {
@@ -154,7 +130,6 @@
 			reg = <0x200>;
 			enable-method = "psci";
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			amu = <&amu>;
 		};
 
 		CPU3:cpu@300 {
@@ -163,7 +138,6 @@
 			reg = <0x300>;
 			enable-method = "psci";
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			amu = <&amu>;
 		};
 
 		CPU4:cpu@400 {
@@ -174,7 +148,6 @@
 			clocks = <&scmi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <MID_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU5:cpu@500 {
@@ -185,7 +158,6 @@
 			clocks = <&scmi_dvfs 1>;
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
 			capacity-dmips-mhz = <MID_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU6:cpu@600 {
@@ -194,7 +166,6 @@
 			reg = <0x600>;
 			enable-method = "psci";
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			amu = <&amu>;
 		};
 
 		CPU7:cpu@700 {
@@ -203,7 +174,6 @@
 			reg = <0x700>;
 			enable-method = "psci";
 			cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
-			amu = <&amu>;
 		};
 	};
 
diff --git a/fdts/tc2.dts b/fdts/tc2.dts
index 0f92294f7..fa16dcd2e 100644
--- a/fdts/tc2.dts
+++ b/fdts/tc2.dts
@@ -123,7 +123,6 @@
 			enable-method = "psci";
 			clocks = <&scmi_dvfs 1>;
 			capacity-dmips-mhz = <MID_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU9:cpu@900 {
@@ -133,7 +132,6 @@
 			enable-method = "psci";
 			clocks = <&scmi_dvfs 2>;
 			capacity-dmips-mhz = <BIG2_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU10:cpu@A00 {
@@ -143,7 +141,6 @@
 			enable-method = "psci";
 			clocks = <&scmi_dvfs 2>;
 			capacity-dmips-mhz = <BIG2_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU11:cpu@B00 {
@@ -153,7 +150,6 @@
 			enable-method = "psci";
 			clocks = <&scmi_dvfs 2>;
 			capacity-dmips-mhz = <BIG2_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU12:cpu@C00 {
@@ -163,7 +159,6 @@
 			enable-method = "psci";
 			clocks = <&scmi_dvfs 3>;
 			capacity-dmips-mhz = <BIG_CAPACITY>;
-			amu = <&amu>;
 		};
 
 		CPU13:cpu@D00 {
@@ -173,7 +168,6 @@
 			enable-method = "psci";
 			clocks = <&scmi_dvfs 3>;
 			capacity-dmips-mhz = <BIG_CAPACITY>;
-			amu = <&amu>;
 		};
 #endif
 	};
diff --git a/include/arch/aarch32/arch.h b/include/arch/aarch32/arch.h
index d2591ddb0..41be1a153 100644
--- a/include/arch/aarch32/arch.h
+++ b/include/arch/aarch32/arch.h
@@ -761,7 +761,7 @@
 
 /* AMCNTENSET0 definitions */
 #define AMCNTENSET0_Pn_SHIFT	U(0)
-#define AMCNTENSET0_Pn_MASK	U(0xffff)
+#define AMCNTENSET0_Pn_MASK	U(0xf)
 
 /* AMCNTENSET1 definitions */
 #define AMCNTENSET1_Pn_SHIFT	U(0)
@@ -769,7 +769,7 @@
 
 /* AMCNTENCLR0 definitions */
 #define AMCNTENCLR0_Pn_SHIFT	U(0)
-#define AMCNTENCLR0_Pn_MASK	U(0xffff)
+#define AMCNTENCLR0_Pn_MASK	U(0xf)
 
 /* AMCNTENCLR1 definitions */
 #define AMCNTENCLR1_Pn_SHIFT	U(0)
diff --git a/include/arch/aarch32/arch_features.h b/include/arch/aarch32/arch_features.h
index e34724010..e80faf2ce 100644
--- a/include/arch/aarch32/arch_features.h
+++ b/include/arch/aarch32/arch_features.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2024, Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2025, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -85,6 +85,10 @@ CREATE_FEATURE_PRESENT(feat_ttcnp, id_mmfr4, ID_MMFR4_CNP_SHIFT,
 CREATE_FEATURE_FUNCS(feat_amu, id_pfr0, ID_PFR0_AMU_SHIFT,
 		    ID_PFR0_AMU_MASK, ID_PFR0_AMU_V1, ENABLE_FEAT_AMU)
 
+/* Auxiliary counters for FEAT_AMU */
+CREATE_FEATURE_FUNCS(feat_amu_aux, amcfgr, AMCFGR_NCG_SHIFT,
+		    AMCFGR_NCG_MASK, 1U, ENABLE_AMU_AUXILIARY_COUNTERS)
+
 /* FEAT_AMUV1P1: AMU Extension v1.1 */
 CREATE_FEATURE_FUNCS(feat_amuv1p1, id_pfr0, ID_PFR0_AMU_SHIFT,
 		    ID_PFR0_AMU_MASK, ID_PFR0_AMU_V1P1, ENABLE_FEAT_AMUv1p1)
diff --git a/include/arch/aarch32/arch_helpers.h b/include/arch/aarch32/arch_helpers.h
index adc96ae0f..4f80e3abd 100644
--- a/include/arch/aarch32/arch_helpers.h
+++ b/include/arch/aarch32/arch_helpers.h
@@ -324,6 +324,23 @@ DEFINE_COPROCR_RW_FUNCS_64(amevcntr01, AMEVCNTR01)
 DEFINE_COPROCR_RW_FUNCS_64(amevcntr02, AMEVCNTR02)
 DEFINE_COPROCR_RW_FUNCS_64(amevcntr03, AMEVCNTR03)
 
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr10, AMEVCNTR10);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr11, AMEVCNTR11);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr12, AMEVCNTR12);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr13, AMEVCNTR13);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr14, AMEVCNTR14);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr15, AMEVCNTR15);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr16, AMEVCNTR16);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr17, AMEVCNTR17);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr18, AMEVCNTR18);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr19, AMEVCNTR19);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr1a, AMEVCNTR1A);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr1b, AMEVCNTR1B);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr1c, AMEVCNTR1C);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr1d, AMEVCNTR1D);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr1e, AMEVCNTR1E);
+DEFINE_COPROCR_RW_FUNCS_64(amevcntr1f, AMEVCNTR1F);
+
 /*
  * TLBI operation prototypes
  */
diff --git a/include/arch/aarch64/arch_features.h b/include/arch/aarch64/arch_features.h
index 1d0a2e0e6..a580213ee 100644
--- a/include/arch/aarch64/arch_features.h
+++ b/include/arch/aarch64/arch_features.h
@@ -311,6 +311,10 @@ CREATE_FEATURE_FUNCS(feat_gcs, id_aa64pfr1_el1, ID_AA64PFR1_EL1_GCS_SHIFT,
 CREATE_FEATURE_FUNCS(feat_amu, id_aa64pfr0_el1, ID_AA64PFR0_AMU_SHIFT,
 		     ID_AA64PFR0_AMU_MASK, 1U, ENABLE_FEAT_AMU)
 
+/* Auxiliary counters for FEAT_AMU */
+CREATE_FEATURE_FUNCS(feat_amu_aux, amcfgr_el0, AMCFGR_EL0_NCG_SHIFT,
+		     AMCFGR_EL0_NCG_MASK, 1U, ENABLE_AMU_AUXILIARY_COUNTERS)
+
 /* FEAT_AMUV1P1: AMU Extension v1.1 */
 CREATE_FEATURE_FUNCS(feat_amuv1p1, id_aa64pfr0_el1, ID_AA64PFR0_AMU_SHIFT,
 		     ID_AA64PFR0_AMU_MASK, ID_AA64PFR0_AMU_V1P1, ENABLE_FEAT_AMUv1p1)
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index abe379dbe..f85da972b 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -564,6 +564,27 @@ DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenclr0_el0, AMCNTENCLR0_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenset0_el0, AMCNTENSET0_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenclr1_el0, AMCNTENCLR1_EL0)
 DEFINE_RENAME_SYSREG_RW_FUNCS(amcntenset1_el0, AMCNTENSET1_EL0)
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr00_el0, AMEVCNTR00_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr01_el0, AMEVCNTR01_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr02_el0, AMEVCNTR02_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr03_el0, AMEVCNTR03_EL0);
+
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr10_el0, AMEVCNTR10_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr11_el0, AMEVCNTR11_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr12_el0, AMEVCNTR12_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr13_el0, AMEVCNTR13_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr14_el0, AMEVCNTR14_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr15_el0, AMEVCNTR15_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr16_el0, AMEVCNTR16_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr17_el0, AMEVCNTR17_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr18_el0, AMEVCNTR18_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr19_el0, AMEVCNTR19_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr1a_el0, AMEVCNTR1A_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr1b_el0, AMEVCNTR1B_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr1c_el0, AMEVCNTR1C_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr1d_el0, AMEVCNTR1D_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr1e_el0, AMEVCNTR1E_EL0);
+DEFINE_RENAME_SYSREG_RW_FUNCS(amevcntr1f_el0, AMEVCNTR1F_EL0);
 
 DEFINE_RENAME_SYSREG_RW_FUNCS(pmblimitr_el1, PMBLIMITR_EL1)
 
diff --git a/include/lib/el3_runtime/context_mgmt.h b/include/lib/el3_runtime/context_mgmt.h
index 70dbd463e..a48ed96b0 100644
--- a/include/lib/el3_runtime/context_mgmt.h
+++ b/include/lib/el3_runtime/context_mgmt.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -41,7 +41,7 @@ void cm_init_context_by_index(unsigned int cpu_idx,
 
 #ifdef __aarch64__
 #if IMAGE_BL31
-void cm_manage_extensions_el3(void);
+void cm_manage_extensions_el3(unsigned int my_idx);
 void manage_extensions_nonsecure_per_world(void);
 void cm_el3_arch_init_per_world(per_world_context_t *per_world_ctx);
 void cm_handle_asymmetric_features(void);
@@ -95,7 +95,7 @@ static inline void cm_set_next_context(void *context)
 #else
 void *cm_get_next_context(void);
 void cm_set_next_context(void *context);
-static inline void cm_manage_extensions_el3(void) {}
+static inline void cm_manage_extensions_el3(unsigned int cpu_idx) {}
 static inline void manage_extensions_nonsecure_per_world(void) {}
 static inline void cm_handle_asymmetric_features(void) {}
 #endif /* __aarch64__ */
diff --git a/include/lib/extensions/amu.h b/include/lib/extensions/amu.h
index a396b9918..402033161 100644
--- a/include/lib/extensions/amu.h
+++ b/include/lib/extensions/amu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -17,7 +17,7 @@
 #if ENABLE_FEAT_AMU
 #if __aarch64__
 void amu_enable(cpu_context_t *ctx);
-void amu_init_el3(void);
+void amu_init_el3(unsigned int core_pos);
 void amu_init_el2_unused(void);
 void amu_enable_per_world(per_world_context_t *per_world_ctx);
 #else
@@ -29,7 +29,7 @@ void amu_enable(bool el2_unused);
 void amu_enable(cpu_context_t *ctx)
 {
 }
-void amu_init_el3(void)
+void amu_init_el3(unsigned int core_pos)
 {
 }
 void amu_init_el2_unused(void)
@@ -45,28 +45,57 @@ static inline void amu_enable(bool el2_unused)
 #endif /*__aarch64__ */
 #endif /* ENABLE_FEAT_AMU */
 
+/*
+ * Per-core list of the counters to be enabled. Value will be written into
+ * AMCNTENSET1_EL0 verbatim.
+ */
 #if ENABLE_AMU_AUXILIARY_COUNTERS
-/*
- * AMU data for a single core.
- */
-struct amu_core {
-	uint16_t enable; /* Mask of auxiliary counters to enable */
-};
+extern uint16_t plat_amu_aux_enables[PLATFORM_CORE_COUNT];
+#endif
 
-/*
- * Topological platform data specific to the AMU.
- */
-struct amu_topology {
-	struct amu_core cores[PLATFORM_CORE_COUNT]; /* Per-core data */
-};
+#define CTX_AMU_GRP0_ALL		U(4)
+#define CTX_AMU_GRP1_ALL		U(16)
 
-#if !ENABLE_AMU_FCONF
-/*
- * Retrieve the platform's AMU topology. A `NULL` return value is treated as a
- * non-fatal error, in which case no auxiliary counters will be enabled.
- */
-const struct amu_topology *plat_amu_topology(void);
-#endif /* ENABLE_AMU_FCONF */
-#endif /* ENABLE_AMU_AUXILIARY_COUNTERS */
+typedef struct amu_regs {
+	u_register_t grp0[CTX_AMU_GRP0_ALL];
+#if ENABLE_AMU_AUXILIARY_COUNTERS
+	u_register_t grp1[CTX_AMU_GRP1_ALL];
+#endif
+} amu_regs_t;
+
+static inline u_register_t read_amu_grp0_ctx_reg(amu_regs_t *ctx, size_t index)
+{
+	return ctx->grp0[index];
+}
+
+static inline void write_amu_grp0_ctx_reg(amu_regs_t *ctx, size_t index, u_register_t val)
+{
+	ctx->grp0[index] = val;
+}
+
+static inline uint16_t get_amu_aux_enables(size_t index)
+{
+#if ENABLE_AMU_AUXILIARY_COUNTERS
+	return plat_amu_aux_enables[index];
+#else
+	return 0;
+#endif
+}
+
+static inline u_register_t read_amu_grp1_ctx_reg(amu_regs_t *ctx, size_t index)
+{
+#if ENABLE_AMU_AUXILIARY_COUNTERS
+	return ctx->grp1[index];
+#else
+	return 0;
+#endif
+}
+
+static inline void write_amu_grp1_ctx_reg(amu_regs_t *ctx, size_t index, u_register_t val)
+{
+#if ENABLE_AMU_AUXILIARY_COUNTERS
+	ctx->grp1[index] = val;
+#endif
+}
 
 #endif /* AMU_H */
diff --git a/include/lib/fconf/fconf_amu_getter.h b/include/lib/fconf/fconf_amu_getter.h
deleted file mode 100644
index 2faee73b4..000000000
--- a/include/lib/fconf/fconf_amu_getter.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2021, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef FCONF_AMU_GETTER_H
-#define FCONF_AMU_GETTER_H
-
-#include <lib/extensions/amu.h>
-
-#define amu__config_getter(id)	fconf_amu_config.id
-
-struct fconf_amu_config {
-	const struct amu_topology *topology;
-};
-
-extern struct fconf_amu_config fconf_amu_config;
-
-#endif /* FCONF_AMU_GETTER_H */
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index dfd14b69e..5bc933636 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -661,13 +661,13 @@ void cm_setup_context(cpu_context_t *ctx, const entry_point_info_t *ep)
 /*******************************************************************************
  * Enable architecture extensions for EL3 execution. This function only updates
  * registers in-place which are expected to either never change or be
- * overwritten by el3_exit.
+ * overwritten by el3_exit. Expects the core_pos of the current core as argument.
  ******************************************************************************/
 #if IMAGE_BL31
-void cm_manage_extensions_el3(void)
+void cm_manage_extensions_el3(unsigned int my_idx)
 {
 	if (is_feat_amu_supported()) {
-		amu_init_el3();
+		amu_init_el3(my_idx);
 	}
 
 	if (is_feat_sme_supported()) {
@@ -803,6 +803,7 @@ static void manage_extensions_secure_per_world(void)
 static void manage_extensions_nonsecure(cpu_context_t *ctx)
 {
 #if IMAGE_BL31
+	/* NOTE: registers are not context switched */
 	if (is_feat_amu_supported()) {
 		amu_enable(ctx);
 	}
diff --git a/lib/extensions/amu/aarch32/amu.c b/lib/extensions/amu/aarch32/amu.c
index 351a552dc..6faef879f 100644
--- a/lib/extensions/amu/aarch32/amu.c
+++ b/lib/extensions/amu/aarch32/amu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -8,7 +8,6 @@
 #include <cdefs.h>
 #include <stdbool.h>
 
-#include "../amu_private.h"
 #include <arch.h>
 #include <arch_features.h>
 #include <arch_helpers.h>
@@ -18,51 +17,7 @@
 
 #include <plat/common/platform.h>
 
-struct amu_ctx {
-	uint64_t group0_cnts[AMU_GROUP0_MAX_COUNTERS];
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint64_t group1_cnts[AMU_GROUP1_MAX_COUNTERS];
-#endif
-
-	uint16_t group0_enable;
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint16_t group1_enable;
-#endif
-};
-
-static struct amu_ctx amu_ctxs_[PLATFORM_CORE_COUNT];
-
-CASSERT((sizeof(amu_ctxs_[0].group0_enable) * CHAR_BIT) <= AMU_GROUP0_MAX_COUNTERS,
-	amu_ctx_group0_enable_cannot_represent_all_group0_counters);
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-CASSERT((sizeof(amu_ctxs_[0].group1_enable) * CHAR_BIT) <= AMU_GROUP1_MAX_COUNTERS,
-	amu_ctx_group1_enable_cannot_represent_all_group1_counters);
-#endif
-
-static inline __unused void write_hcptr_tam(uint32_t value)
-{
-	write_hcptr((read_hcptr() & ~TAM_BIT) |
-		((value << TAM_SHIFT) & TAM_BIT));
-}
-
-static inline __unused void write_amcr_cg1rz(uint32_t value)
-{
-	write_amcr((read_amcr() & ~AMCR_CG1RZ_BIT) |
-		((value << AMCR_CG1RZ_SHIFT) & AMCR_CG1RZ_BIT));
-}
-
-static inline __unused uint32_t read_amcfgr_ncg(void)
-{
-	return (read_amcfgr() >> AMCFGR_NCG_SHIFT) &
-		AMCFGR_NCG_MASK;
-}
-
-static inline __unused uint32_t read_amcgcr_cg0nc(void)
-{
-	return (read_amcgcr() >> AMCGCR_CG0NC_SHIFT) &
-		AMCGCR_CG0NC_MASK;
-}
+amu_regs_t amu_ctx[PLATFORM_CORE_COUNT];
 
 static inline __unused uint32_t read_amcgcr_cg1nc(void)
 {
@@ -70,134 +25,31 @@ static inline __unused uint32_t read_amcgcr_cg1nc(void)
 		AMCGCR_CG1NC_MASK;
 }
 
-static inline __unused uint32_t read_amcntenset0_px(void)
-{
-	return (read_amcntenset0() >> AMCNTENSET0_Pn_SHIFT) &
-		AMCNTENSET0_Pn_MASK;
-}
-
-static inline __unused uint32_t read_amcntenset1_px(void)
-{
-	return (read_amcntenset1() >> AMCNTENSET1_Pn_SHIFT) &
-		AMCNTENSET1_Pn_MASK;
-}
-
-static inline __unused void write_amcntenset0_px(uint32_t px)
-{
-	uint32_t value = read_amcntenset0();
-
-	value &= ~AMCNTENSET0_Pn_MASK;
-	value |= (px << AMCNTENSET0_Pn_SHIFT) &
-		AMCNTENSET0_Pn_MASK;
-
-	write_amcntenset0(value);
-}
-
-static inline __unused void write_amcntenset1_px(uint32_t px)
-{
-	uint32_t value = read_amcntenset1();
-
-	value &= ~AMCNTENSET1_Pn_MASK;
-	value |= (px << AMCNTENSET1_Pn_SHIFT) &
-		AMCNTENSET1_Pn_MASK;
-
-	write_amcntenset1(value);
-}
-
-static inline __unused void write_amcntenclr0_px(uint32_t px)
-{
-	uint32_t value = read_amcntenclr0();
-
-	value &= ~AMCNTENCLR0_Pn_MASK;
-	value |= (px << AMCNTENCLR0_Pn_SHIFT) & AMCNTENCLR0_Pn_MASK;
-
-	write_amcntenclr0(value);
-}
-
-static inline __unused void write_amcntenclr1_px(uint32_t px)
-{
-	uint32_t value = read_amcntenclr1();
-
-	value &= ~AMCNTENCLR1_Pn_MASK;
-	value |= (px << AMCNTENCLR1_Pn_SHIFT) & AMCNTENCLR1_Pn_MASK;
-
-	write_amcntenclr1(value);
-}
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-static __unused bool amu_group1_supported(void)
-{
-	return read_amcfgr_ncg() > 0U;
-}
-#endif
-
 /*
  * Enable counters. This function is meant to be invoked by the context
  * management library before exiting from EL3.
  */
 void amu_enable(bool el2_unused)
 {
-	uint32_t amcfgr_ncg;		/* Number of counter groups */
-	uint32_t amcgcr_cg0nc;		/* Number of group 0 counters */
-
-	uint32_t amcntenset0_px = 0x0;	/* Group 0 enable mask */
-	uint32_t amcntenset1_px = 0x0;	/* Group 1 enable mask */
-
 	if (el2_unused) {
 		/*
 		 * HCPTR.TAM: Set to zero so any accesses to the Activity
 		 * Monitor registers do not trap to EL2.
 		 */
-		write_hcptr_tam(0U);
+		write_hcptr(read_hcptr() & ~TAM_BIT);
 	}
 
-	/*
-	 * Retrieve the number of architected counters. All of these counters
-	 * are enabled by default.
-	 */
+	/* Architecture is currently pinned to 4 */
+	assert((read_amcgcr() & AMCGCR_CG0NC_MASK) == CTX_AMU_GRP0_ALL);
 
-	amcgcr_cg0nc = read_amcgcr_cg0nc();
-	amcntenset0_px = (UINT32_C(1) << (amcgcr_cg0nc)) - 1U;
-
-	assert(amcgcr_cg0nc <= AMU_AMCGCR_CG0NC_MAX);
-
-	/*
-	 * The platform may opt to enable specific auxiliary counters. This can
-	 * be done via the common FCONF getter, or via the platform-implemented
-	 * function.
-	 */
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	const struct amu_topology *topology;
-
-#if ENABLE_AMU_FCONF
-	topology = FCONF_GET_PROPERTY(amu, config, topology);
-#else
-	topology = plat_amu_topology();
-#endif /* ENABLE_AMU_FCONF */
-
-	if (topology != NULL) {
+	/* Enable all architected counters by default */
+	write_amcntenset0(AMCNTENSET0_Pn_MASK);
+	if (is_feat_amu_aux_supported()) {
 		unsigned int core_pos = plat_my_core_pos();
 
-		amcntenset1_el0_px = topology->cores[core_pos].enable;
-	} else {
-		ERROR("AMU: failed to generate AMU topology\n");
-	}
-#endif /* ENABLE_AMU_AUXILIARY_COUNTERS */
-
-	/*
-	 * Enable the requested counters.
-	 */
-
-	write_amcntenset0_px(amcntenset0_px);
-
-	amcfgr_ncg = read_amcfgr_ncg();
-	if (amcfgr_ncg > 0U) {
-		write_amcntenset1_px(amcntenset1_px);
-
-#if !ENABLE_AMU_AUXILIARY_COUNTERS
-		VERBOSE("AMU: auxiliary counters detected but support is disabled\n");
-#endif
+		/* Something went wrong if we're trying to write higher bits */
+		assert((get_amu_aux_enables(core_pos) & ~AMCNTENSET1_Pn_MASK) == 0);
+		write_amcntenset1(get_amu_aux_enables(core_pos));
 	}
 
 	/* Bail out if FEAT_AMUv1p1 features are not present. */
@@ -214,180 +66,177 @@ void amu_enable(bool el2_unused)
 	 * mapped view are unaffected.
 	 */
 	VERBOSE("AMU group 1 counter access restricted.\n");
-	write_amcr_cg1rz(1U);
+	write_amcr(read_amcr() | 1U);
 #else
-	write_amcr_cg1rz(0U);
+	write_amcr(0);
 #endif
 }
 
-/* Read the group 0 counter identified by the given `idx`. */
-static uint64_t amu_group0_cnt_read(unsigned int idx)
-{
-	assert(is_feat_amu_supported());
-	assert(idx < read_amcgcr_cg0nc());
-
-	return amu_group0_cnt_read_internal(idx);
-}
-
-/* Write the group 0 counter identified by the given `idx` with `val` */
-static void amu_group0_cnt_write(unsigned  int idx, uint64_t val)
-{
-	assert(is_feat_amu_supported());
-	assert(idx < read_amcgcr_cg0nc());
-
-	amu_group0_cnt_write_internal(idx, val);
-	isb();
-}
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-/* Read the group 1 counter identified by the given `idx` */
-static uint64_t amu_group1_cnt_read(unsigned  int idx)
-{
-	assert(is_feat_amu_supported());
-	assert(amu_group1_supported());
-	assert(idx < read_amcgcr_cg1nc());
-
-	return amu_group1_cnt_read_internal(idx);
-}
-
-/* Write the group 1 counter identified by the given `idx` with `val` */
-static void amu_group1_cnt_write(unsigned  int idx, uint64_t val)
-{
-	assert(is_feat_amu_supported());
-	assert(amu_group1_supported());
-	assert(idx < read_amcgcr_cg1nc());
-
-	amu_group1_cnt_write_internal(idx, val);
-	isb();
-}
-#endif
-
 static void *amu_context_save(const void *arg)
 {
-	uint32_t i;
-
-	unsigned int core_pos;
-	struct amu_ctx *ctx;
-
-	uint32_t amcgcr_cg0nc;	/* Number of group 0 counters */
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint32_t amcfgr_ncg;	/* Number of counter groups */
-	uint32_t amcgcr_cg1nc;	/* Number of group 1 counters */
-#endif
-
 	if (!is_feat_amu_supported()) {
 		return (void *)0;
 	}
 
-	core_pos = plat_my_core_pos();
-	ctx = &amu_ctxs_[core_pos];
+	unsigned int core_pos = *(unsigned int *)arg;
+	amu_regs_t *ctx = &amu_ctx[core_pos];
 
-	amcgcr_cg0nc = read_amcgcr_cg0nc();
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	amcfgr_ncg = read_amcfgr_ncg();
-	amcgcr_cg1nc = (amcfgr_ncg > 0U) ? read_amcgcr_cg1nc() : 0U;
-#endif
-
-	/*
-	 * Disable all AMU counters.
-	 */
-
-	ctx->group0_enable = read_amcntenset0_px();
-	write_amcntenclr0_px(ctx->group0_enable);
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	if (amcfgr_ncg > 0U) {
-		ctx->group1_enable = read_amcntenset1_px();
-		write_amcntenclr1_px(ctx->group1_enable);
+	/* Disable all counters so we can write to them safely later */
+	write_amcntenclr0(AMCNTENCLR0_Pn_MASK);
+	if (is_feat_amu_aux_supported()) {
+		write_amcntenclr1(get_amu_aux_enables(core_pos));
 	}
-#endif
-
-	/*
-	 * Save the counters to the local context.
-	 */
 
 	isb(); /* Ensure counters have been stopped */
 
-	for (i = 0U; i < amcgcr_cg0nc; i++) {
-		ctx->group0_cnts[i] = amu_group0_cnt_read(i);
-	}
+	write_amu_grp0_ctx_reg(ctx, 0, read64_amevcntr00());
+	write_amu_grp0_ctx_reg(ctx, 1, read64_amevcntr01());
+	write_amu_grp0_ctx_reg(ctx, 2, read64_amevcntr02());
+	write_amu_grp0_ctx_reg(ctx, 3, read64_amevcntr03());
 
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	for (i = 0U; i < amcgcr_cg1nc; i++) {
-		ctx->group1_cnts[i] = amu_group1_cnt_read(i);
+	if (is_feat_amu_aux_supported()) {
+		uint8_t num_counters = read_amcgcr_cg1nc();
+
+		switch (num_counters) {
+		case 0x10:
+			write_amu_grp1_ctx_reg(ctx, 0xf, read64_amevcntr1f());
+			__fallthrough;
+		case 0x0f:
+			write_amu_grp1_ctx_reg(ctx, 0xe, read64_amevcntr1e());
+			__fallthrough;
+		case 0x0e:
+			write_amu_grp1_ctx_reg(ctx, 0xd, read64_amevcntr1d());
+			__fallthrough;
+		case 0x0d:
+			write_amu_grp1_ctx_reg(ctx, 0xc, read64_amevcntr1c());
+			__fallthrough;
+		case 0x0c:
+			write_amu_grp1_ctx_reg(ctx, 0xb, read64_amevcntr1b());
+			__fallthrough;
+		case 0x0b:
+			write_amu_grp1_ctx_reg(ctx, 0xa, read64_amevcntr1a());
+			__fallthrough;
+		case 0x0a:
+			write_amu_grp1_ctx_reg(ctx, 0x9, read64_amevcntr19());
+			__fallthrough;
+		case 0x09:
+			write_amu_grp1_ctx_reg(ctx, 0x8, read64_amevcntr18());
+			__fallthrough;
+		case 0x08:
+			write_amu_grp1_ctx_reg(ctx, 0x7, read64_amevcntr17());
+			__fallthrough;
+		case 0x07:
+			write_amu_grp1_ctx_reg(ctx, 0x6, read64_amevcntr16());
+			__fallthrough;
+		case 0x06:
+			write_amu_grp1_ctx_reg(ctx, 0x5, read64_amevcntr15());
+			__fallthrough;
+		case 0x05:
+			write_amu_grp1_ctx_reg(ctx, 0x4, read64_amevcntr14());
+			__fallthrough;
+		case 0x04:
+			write_amu_grp1_ctx_reg(ctx, 0x3, read64_amevcntr13());
+			__fallthrough;
+		case 0x03:
+			write_amu_grp1_ctx_reg(ctx, 0x2, read64_amevcntr12());
+			__fallthrough;
+		case 0x02:
+			write_amu_grp1_ctx_reg(ctx, 0x1, read64_amevcntr11());
+			__fallthrough;
+		case 0x01:
+			write_amu_grp1_ctx_reg(ctx, 0x0, read64_amevcntr10());
+			__fallthrough;
+		case 0x00:
+			break;
+		default:
+			assert(0); /* something is wrong */
+		}
 	}
-#endif
 
 	return (void *)0;
 }
 
 static void *amu_context_restore(const void *arg)
 {
-	uint32_t i;
-
-	unsigned int core_pos;
-	struct amu_ctx *ctx;
-
-	uint32_t amcfgr_ncg;	/* Number of counter groups */
-	uint32_t amcgcr_cg0nc;	/* Number of group 0 counters */
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint32_t amcgcr_cg1nc;	/* Number of group 1 counters */
-#endif
-
 	if (!is_feat_amu_supported()) {
 		return (void *)0;
 	}
 
-	core_pos = plat_my_core_pos();
-	ctx = &amu_ctxs_[core_pos];
+	unsigned int core_pos = *(unsigned int *)arg;
+	amu_regs_t *ctx = &amu_ctx[core_pos];
 
-	amcfgr_ncg = read_amcfgr_ncg();
-	amcgcr_cg0nc = read_amcgcr_cg0nc();
+	write64_amevcntr00(read_amu_grp0_ctx_reg(ctx, 0));
+	write64_amevcntr01(read_amu_grp0_ctx_reg(ctx, 1));
+	write64_amevcntr02(read_amu_grp0_ctx_reg(ctx, 2));
+	write64_amevcntr03(read_amu_grp0_ctx_reg(ctx, 3));
 
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	amcgcr_cg1nc = (amcfgr_ncg > 0U) ? read_amcgcr_cg1nc() : 0U;
-#endif
+	if (is_feat_amu_aux_supported()) {
+		uint8_t num_counters = read_amcgcr_cg1nc();
 
-	/*
-	 * Sanity check that all counters were disabled when the context was
-	 * previously saved.
-	 */
-
-	assert(read_amcntenset0_px() == 0U);
-
-	if (amcfgr_ncg > 0U) {
-		assert(read_amcntenset1_px() == 0U);
+		switch (num_counters) {
+		case 0x10:
+			write64_amevcntr1f(read_amu_grp1_ctx_reg(ctx, 0xf));
+			__fallthrough;
+		case 0x0f:
+			write64_amevcntr1e(read_amu_grp1_ctx_reg(ctx, 0xe));
+			__fallthrough;
+		case 0x0e:
+			write64_amevcntr1d(read_amu_grp1_ctx_reg(ctx, 0xd));
+			__fallthrough;
+		case 0x0d:
+			write64_amevcntr1c(read_amu_grp1_ctx_reg(ctx, 0xc));
+			__fallthrough;
+		case 0x0c:
+			write64_amevcntr1b(read_amu_grp1_ctx_reg(ctx, 0xb));
+			__fallthrough;
+		case 0x0b:
+			write64_amevcntr1a(read_amu_grp1_ctx_reg(ctx, 0xa));
+			__fallthrough;
+		case 0x0a:
+			write64_amevcntr19(read_amu_grp1_ctx_reg(ctx, 0x9));
+			__fallthrough;
+		case 0x09:
+			write64_amevcntr18(read_amu_grp1_ctx_reg(ctx, 0x8));
+			__fallthrough;
+		case 0x08:
+			write64_amevcntr17(read_amu_grp1_ctx_reg(ctx, 0x7));
+			__fallthrough;
+		case 0x07:
+			write64_amevcntr16(read_amu_grp1_ctx_reg(ctx, 0x6));
+			__fallthrough;
+		case 0x06:
+			write64_amevcntr15(read_amu_grp1_ctx_reg(ctx, 0x5));
+			__fallthrough;
+		case 0x05:
+			write64_amevcntr14(read_amu_grp1_ctx_reg(ctx, 0x4));
+			__fallthrough;
+		case 0x04:
+			write64_amevcntr13(read_amu_grp1_ctx_reg(ctx, 0x3));
+			__fallthrough;
+		case 0x03:
+			write64_amevcntr12(read_amu_grp1_ctx_reg(ctx, 0x2));
+			__fallthrough;
+		case 0x02:
+			write64_amevcntr11(read_amu_grp1_ctx_reg(ctx, 0x1));
+			__fallthrough;
+		case 0x01:
+			write64_amevcntr10(read_amu_grp1_ctx_reg(ctx, 0x0));
+			__fallthrough;
+		case 0x00:
+			break;
+		default:
+			assert(0); /* something is wrong */
+		}
 	}
 
-	/*
-	 * Restore the counter values from the local context.
-	 */
 
-	for (i = 0U; i < amcgcr_cg0nc; i++) {
-		amu_group0_cnt_write(i, ctx->group0_cnts[i]);
+	/* now enable them again */
+	write_amcntenset0(AMCNTENSET0_Pn_MASK);
+	if (is_feat_amu_aux_supported()) {
+		write_amcntenset1(get_amu_aux_enables(core_pos));
 	}
 
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	for (i = 0U; i < amcgcr_cg1nc; i++) {
-		amu_group1_cnt_write(i, ctx->group1_cnts[i]);
-	}
-#endif
-
-	/*
-	 * Re-enable counters that were disabled during context save.
-	 */
-
-	write_amcntenset0_px(ctx->group0_enable);
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	if (amcfgr_ncg > 0U) {
-		write_amcntenset1_px(ctx->group1_enable);
-	}
-#endif
-
+	isb();
 	return (void *)0;
 }
 
diff --git a/lib/extensions/amu/aarch32/amu_helpers.S b/lib/extensions/amu/aarch32/amu_helpers.S
deleted file mode 100644
index 7090b2d50..000000000
--- a/lib/extensions/amu/aarch32/amu_helpers.S
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (c) 2021, Arm Limited and Contributors. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <arch.h>
-#include <assert_macros.S>
-#include <asm_macros.S>
-
-	.globl	amu_group0_cnt_read_internal
-	.globl	amu_group0_cnt_write_internal
-	.globl	amu_group1_cnt_read_internal
-	.globl	amu_group1_cnt_write_internal
-	.globl	amu_group1_set_evtype_internal
-
-/*
- * uint64_t amu_group0_cnt_read_internal(int idx);
- *
- * Given `idx`, read the corresponding AMU counter
- * and return it in `r0` and `r1`.
- */
-func amu_group0_cnt_read_internal
-#if ENABLE_ASSERTIONS
-	/* `idx` should be between [0, 3] */
-	mov	r1, r0
-	lsr	r1, r1, #2
-	cmp	r1, #0
-	ASM_ASSERT(eq)
-#endif
-
-	/*
-	 * Given `idx` calculate address of ldcopr16/bx lr instruction pair
-	 * in the table below.
-	 */
-	adr	r1, 1f
-	lsl	r0, r0, #3	/* each ldcopr16/bx lr sequence is 8 bytes */
-	add	r1, r1, r0
-	bx	r1
-1:
-	ldcopr16	r0, r1, AMEVCNTR00	/* index 0 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR01	/* index 1 */
-	bx 		lr
-	ldcopr16	r0, r1, AMEVCNTR02	/* index 2 */
-	bx 		lr
-	ldcopr16	r0, r1, AMEVCNTR03	/* index 3 */
-	bx 		lr
-endfunc amu_group0_cnt_read_internal
-
-/*
- * void amu_group0_cnt_write_internal(int idx, uint64_t val);
- *
- * Given `idx`, write `val` to the corresponding AMU counter.
- * `idx` is passed in `r0` and `val` is passed in `r2` and `r3`.
- * `r1` is used as a scratch register.
- */
-func amu_group0_cnt_write_internal
-#if ENABLE_ASSERTIONS
-	/* `idx` should be between [0, 3] */
-	mov	r1, r0
-	lsr	r1, r1, #2
-	cmp	r1, #0
-	ASM_ASSERT(eq)
-#endif
-
-	/*
-	 * Given `idx` calculate address of stcopr16/bx lr instruction pair
-	 * in the table below.
-	 */
-	adr	r1, 1f
-	lsl	r0, r0, #3	/* each stcopr16/bx lr sequence is 8 bytes */
-	add	r1, r1, r0
-	bx	r1
-
-1:
-	stcopr16	r2, r3, AMEVCNTR00	/* index 0 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR01	/* index 1 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR02	/* index 2 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR03	/* index 3 */
-	bx		lr
-endfunc amu_group0_cnt_write_internal
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-/*
- * uint64_t amu_group1_cnt_read_internal(int idx);
- *
- * Given `idx`, read the corresponding AMU counter
- * and return it in `r0` and `r1`.
- */
-func amu_group1_cnt_read_internal
-#if ENABLE_ASSERTIONS
-	/* `idx` should be between [0, 15] */
-	mov	r1, r0
-	lsr	r1, r1, #4
-	cmp	r1, #0
-	ASM_ASSERT(eq)
-#endif
-
-	/*
-	 * Given `idx` calculate address of ldcopr16/bx lr instruction pair
-	 * in the table below.
-	 */
-	adr	r1, 1f
-	lsl	r0, r0, #3	/* each ldcopr16/bx lr sequence is 8 bytes */
-	add	r1, r1, r0
-	bx	r1
-
-1:
-	ldcopr16	r0, r1, AMEVCNTR10	/* index 0 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR11	/* index 1 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR12	/* index 2 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR13	/* index 3 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR14	/* index 4 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR15	/* index 5 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR16	/* index 6 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR17	/* index 7 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR18	/* index 8 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR19	/* index 9 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR1A	/* index 10 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR1B	/* index 11 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR1C	/* index 12 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR1D	/* index 13 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR1E	/* index 14 */
-	bx		lr
-	ldcopr16	r0, r1, AMEVCNTR1F	/* index 15 */
-	bx		lr
-endfunc amu_group1_cnt_read_internal
-
-/*
- * void amu_group1_cnt_write_internal(int idx, uint64_t val);
- *
- * Given `idx`, write `val` to the corresponding AMU counter.
- * `idx` is passed in `r0` and `val` is passed in `r2` and `r3`.
- * `r1` is used as a scratch register.
- */
-func amu_group1_cnt_write_internal
-#if ENABLE_ASSERTIONS
-	/* `idx` should be between [0, 15] */
-	mov	r1, r0
-	lsr	r1, r1, #4
-	cmp	r1, #0
-	ASM_ASSERT(eq)
-#endif
-
-	/*
-	 * Given `idx` calculate address of ldcopr16/bx lr instruction pair
-	 * in the table below.
-	 */
-	adr	r1, 1f
-	lsl	r0, r0, #3	/* each stcopr16/bx lr sequence is 8 bytes */
-	add	r1, r1, r0
-	bx	r1
-
-1:
-	stcopr16	r2, r3, AMEVCNTR10	/* index 0 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR11	/* index 1 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR12	/* index 2 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR13	/* index 3 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR14	/* index 4 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR15	/* index 5 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR16	/* index 6 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR17	/* index 7 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR18	/* index 8 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR19	/* index 9 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR1A	/* index 10 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR1B	/* index 11 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR1C	/* index 12 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR1D	/* index 13 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR1E	/* index 14 */
-	bx		lr
-	stcopr16	r2, r3, AMEVCNTR1F	/* index 15 */
-	bx		lr
-endfunc amu_group1_cnt_write_internal
-
-/*
- * void amu_group1_set_evtype_internal(int idx, unsigned int val);
- *
- * Program the AMU event type register indexed by `idx`
- * with the value `val`.
- */
-func amu_group1_set_evtype_internal
-#if ENABLE_ASSERTIONS
-	/* `idx` should be between [0, 15] */
-	mov	r2, r0
-	lsr	r2, r2, #4
-	cmp	r2, #0
-	ASM_ASSERT(eq)
-
-	/* val should be between [0, 65535] */
-	mov	r2, r1
-	lsr	r2, r2, #16
-	cmp	r2, #0
-	ASM_ASSERT(eq)
-#endif
-
-	/*
-	 * Given `idx` calculate address of stcopr/bx lr instruction pair
-	 * in the table below.
-	 */
-	adr	r2, 1f
-	lsl	r0, r0, #3	/* each stcopr/bx lr sequence is 8 bytes */
-	add	r2, r2, r0
-	bx	r2
-
-1:
-	stcopr	r1, AMEVTYPER10 /* index 0 */
-	bx	lr
-	stcopr	r1, AMEVTYPER11 /* index 1 */
-	bx	lr
-	stcopr	r1, AMEVTYPER12 /* index 2 */
-	bx	lr
-	stcopr	r1, AMEVTYPER13 /* index 3 */
-	bx	lr
-	stcopr	r1, AMEVTYPER14 /* index 4 */
-	bx	lr
-	stcopr	r1, AMEVTYPER15 /* index 5 */
-	bx	lr
-	stcopr	r1, AMEVTYPER16 /* index 6 */
-	bx	lr
-	stcopr	r1, AMEVTYPER17 /* index 7 */
-	bx	lr
-	stcopr	r1, AMEVTYPER18 /* index 8 */
-	bx	lr
-	stcopr	r1, AMEVTYPER19 /* index 9 */
-	bx	lr
-	stcopr	r1, AMEVTYPER1A /* index 10 */
-	bx	lr
-	stcopr	r1, AMEVTYPER1B /* index 11 */
-	bx	lr
-	stcopr	r1, AMEVTYPER1C /* index 12 */
-	bx	lr
-	stcopr	r1, AMEVTYPER1D /* index 13 */
-	bx	lr
-	stcopr	r1, AMEVTYPER1E /* index 14 */
-	bx	lr
-	stcopr	r1, AMEVTYPER1F /* index 15 */
-	bx	lr
-endfunc amu_group1_set_evtype_internal
-#endif
diff --git a/lib/extensions/amu/aarch64/amu.c b/lib/extensions/amu/aarch64/amu.c
index a7898eff0..5f23c07df 100644
--- a/lib/extensions/amu/aarch64/amu.c
+++ b/lib/extensions/amu/aarch64/amu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -10,179 +10,37 @@
 #include <stdbool.h>
 #include <stdint.h>
 
-#include "../amu_private.h"
 #include <arch.h>
 #include <arch_features.h>
 #include <arch_helpers.h>
 #include <common/debug.h>
 #include <lib/el3_runtime/pubsub_events.h>
 #include <lib/extensions/amu.h>
+#include <lib/utils_def.h>
+#include <platform_def.h>
 
-#include <plat/common/platform.h>
+amu_regs_t amu_ctx[PLATFORM_CORE_COUNT];
 
-#if ENABLE_AMU_FCONF
-#	include <lib/fconf/fconf.h>
-#	include <lib/fconf/fconf_amu_getter.h>
-#endif
-
-struct amu_ctx {
-	uint64_t group0_cnts[AMU_GROUP0_MAX_COUNTERS];
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint64_t group1_cnts[AMU_GROUP1_MAX_COUNTERS];
-#endif
-
-	/* Architected event counter 1 does not have an offset register */
-	uint64_t group0_voffsets[AMU_GROUP0_MAX_COUNTERS - 1U];
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint64_t group1_voffsets[AMU_GROUP1_MAX_COUNTERS];
-#endif
-
-	uint16_t group0_enable;
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint16_t group1_enable;
-#endif
-};
-
-static struct amu_ctx amu_ctxs_[PLATFORM_CORE_COUNT];
-
-CASSERT((sizeof(amu_ctxs_[0].group0_enable) * CHAR_BIT) <= AMU_GROUP0_MAX_COUNTERS,
-	amu_ctx_group0_enable_cannot_represent_all_group0_counters);
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-CASSERT((sizeof(amu_ctxs_[0].group1_enable) * CHAR_BIT) <= AMU_GROUP1_MAX_COUNTERS,
-	amu_ctx_group1_enable_cannot_represent_all_group1_counters);
-#endif
-
-static inline __unused uint64_t read_hcr_el2_amvoffen(void)
-{
-	return (read_hcr_el2() & HCR_AMVOFFEN_BIT) >>
-		HCR_AMVOFFEN_SHIFT;
-}
-
-static inline __unused void write_cptr_el2_tam(uint64_t value)
-{
-	write_cptr_el2((read_cptr_el2() & ~CPTR_EL2_TAM_BIT) |
-		((value << CPTR_EL2_TAM_SHIFT) & CPTR_EL2_TAM_BIT));
-}
-
-static inline __unused void ctx_write_scr_el3_amvoffen(cpu_context_t *ctx, uint64_t amvoffen)
-{
-	uint64_t value = read_ctx_reg(get_el3state_ctx(ctx), CTX_SCR_EL3);
-
-	value &= ~SCR_AMVOFFEN_BIT;
-	value |= (amvoffen << SCR_AMVOFFEN_SHIFT) & SCR_AMVOFFEN_BIT;
-
-	write_ctx_reg(get_el3state_ctx(ctx), CTX_SCR_EL3, value);
-}
-
-static inline __unused void write_hcr_el2_amvoffen(uint64_t value)
-{
-	write_hcr_el2((read_hcr_el2() & ~HCR_AMVOFFEN_BIT) |
-		((value << HCR_AMVOFFEN_SHIFT) & HCR_AMVOFFEN_BIT));
-}
-
-static inline __unused void write_amcr_el0_cg1rz(uint64_t value)
-{
-	write_amcr_el0((read_amcr_el0() & ~AMCR_CG1RZ_BIT) |
-		((value << AMCR_CG1RZ_SHIFT) & AMCR_CG1RZ_BIT));
-}
-
-static inline __unused uint64_t read_amcfgr_el0_ncg(void)
-{
-	return (read_amcfgr_el0() >> AMCFGR_EL0_NCG_SHIFT) &
-		AMCFGR_EL0_NCG_MASK;
-}
-
-static inline __unused uint64_t read_amcgcr_el0_cg0nc(void)
-{
-	return (read_amcgcr_el0() >> AMCGCR_EL0_CG0NC_SHIFT) &
-		AMCGCR_EL0_CG0NC_MASK;
-}
-
-static inline __unused uint64_t read_amcg1idr_el0_voff(void)
-{
-	return (read_amcg1idr_el0() >> AMCG1IDR_VOFF_SHIFT) &
-		AMCG1IDR_VOFF_MASK;
-}
-
-static inline __unused uint64_t read_amcgcr_el0_cg1nc(void)
+static inline uint8_t read_amcgcr_el0_cg1nc(void)
 {
 	return (read_amcgcr_el0() >> AMCGCR_EL0_CG1NC_SHIFT) &
 		AMCGCR_EL0_CG1NC_MASK;
 }
 
-static inline __unused uint64_t read_amcntenset0_el0_px(void)
-{
-	return (read_amcntenset0_el0() >> AMCNTENSET0_EL0_Pn_SHIFT) &
-		AMCNTENSET0_EL0_Pn_MASK;
-}
-
-static inline __unused uint64_t read_amcntenset1_el0_px(void)
-{
-	return (read_amcntenset1_el0() >> AMCNTENSET1_EL0_Pn_SHIFT) &
-		AMCNTENSET1_EL0_Pn_MASK;
-}
-
-static inline __unused void write_amcntenset0_el0_px(uint64_t px)
-{
-	uint64_t value = read_amcntenset0_el0();
-
-	value &= ~AMCNTENSET0_EL0_Pn_MASK;
-	value |= (px << AMCNTENSET0_EL0_Pn_SHIFT) & AMCNTENSET0_EL0_Pn_MASK;
-
-	write_amcntenset0_el0(value);
-}
-
-static inline __unused void write_amcntenset1_el0_px(uint64_t px)
-{
-	uint64_t value = read_amcntenset1_el0();
-
-	value &= ~AMCNTENSET1_EL0_Pn_MASK;
-	value |= (px << AMCNTENSET1_EL0_Pn_SHIFT) & AMCNTENSET1_EL0_Pn_MASK;
-
-	write_amcntenset1_el0(value);
-}
-
-static inline __unused void write_amcntenclr0_el0_px(uint64_t px)
-{
-	uint64_t value = read_amcntenclr0_el0();
-
-	value &= ~AMCNTENCLR0_EL0_Pn_MASK;
-	value |= (px << AMCNTENCLR0_EL0_Pn_SHIFT) & AMCNTENCLR0_EL0_Pn_MASK;
-
-	write_amcntenclr0_el0(value);
-}
-
-static inline __unused void write_amcntenclr1_el0_px(uint64_t px)
-{
-	uint64_t value = read_amcntenclr1_el0();
-
-	value &= ~AMCNTENCLR1_EL0_Pn_MASK;
-	value |= (px << AMCNTENCLR1_EL0_Pn_SHIFT) & AMCNTENCLR1_EL0_Pn_MASK;
-
-	write_amcntenclr1_el0(value);
-}
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-static __unused bool amu_group1_supported(void)
-{
-	return read_amcfgr_el0_ncg() > 0U;
-}
-#endif
-
-/*
- * Enable counters. This function is meant to be invoked by the context
- * management library before exiting from EL3.
- */
 void amu_enable(cpu_context_t *ctx)
 {
 	/* Initialize FEAT_AMUv1p1 features if present. */
 	if (is_feat_amuv1p1_supported()) {
+		el3_state_t *state = get_el3state_ctx(ctx);
+		u_register_t reg;
+
 		/*
 		 * Set SCR_EL3.AMVOFFEN to one so that accesses to virtual
 		 * offset registers at EL2 do not trap to EL3
 		 */
-		ctx_write_scr_el3_amvoffen(ctx, 1U);
+		reg = read_ctx_reg(state, CTX_SCR_EL3);
+		reg |= SCR_AMVOFFEN_BIT;
+		write_ctx_reg(state, CTX_SCR_EL3, reg);
 	}
 }
 
@@ -198,46 +56,18 @@ void amu_enable_per_world(per_world_context_t *per_world_ctx)
 	per_world_ctx->ctx_cptr_el3 = cptr_el3;
 }
 
-void amu_init_el3(void)
+void amu_init_el3(unsigned int core_pos)
 {
-	uint64_t group0_impl_ctr = read_amcgcr_el0_cg0nc();
-	uint64_t group0_en_mask = (1 << (group0_impl_ctr)) - 1U;
-	uint64_t num_ctr_groups = read_amcfgr_el0_ncg();
+	/* architecture is currently pinned to 4 */
+	assert((read_amcgcr_el0() & AMCGCR_EL0_CG0NC_MASK) == CTX_AMU_GRP0_ALL);
 
 	/* Enable all architected counters by default */
-	write_amcntenset0_el0_px(group0_en_mask);
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	if (num_ctr_groups > 0U) {
-		uint64_t amcntenset1_el0_px = 0x0; /* Group 1 enable mask */
-		const struct amu_topology *topology;
-
-		/*
-		 * The platform may opt to enable specific auxiliary counters.
-		 * This can be done via the common FCONF getter, or via the
-		 * platform-implemented function.
-		 */
-#if ENABLE_AMU_FCONF
-		topology = FCONF_GET_PROPERTY(amu, config, topology);
-#else
-		topology = plat_amu_topology();
-#endif /* ENABLE_AMU_FCONF */
-
-		if (topology != NULL) {
-			unsigned int core_pos = plat_my_core_pos();
-
-			amcntenset1_el0_px = topology->cores[core_pos].enable;
-		} else {
-			ERROR("AMU: failed to generate AMU topology\n");
-		}
-
-		write_amcntenset1_el0_px(amcntenset1_el0_px);
+	write_amcntenset0_el0(AMCNTENSET0_EL0_Pn_MASK);
+	if (is_feat_amu_aux_supported()) {
+		/* something went wrong if we're trying to write higher bits */
+		assert((get_amu_aux_enables(core_pos) & ~AMCNTENSET1_EL0_Pn_MASK) == 0);
+		write_amcntenset1_el0(get_amu_aux_enables(core_pos));
 	}
-#else /* ENABLE_AMU_AUXILIARY_COUNTERS */
-	if (num_ctr_groups > 0U) {
-		VERBOSE("AMU: auxiliary counters detected but support is disabled\n");
-	}
-#endif /* ENABLE_AMU_AUXILIARY_COUNTERS */
 
 	if (is_feat_amuv1p1_supported()) {
 #if AMU_RESTRICT_COUNTERS
@@ -249,9 +79,10 @@ void amu_init_el3(void)
 		 * zero. Reads from the memory mapped view are unaffected.
 		 */
 		VERBOSE("AMU group 1 counter access restricted.\n");
-		write_amcr_el0_cg1rz(1U);
+		write_amcr_el0(AMCR_CG1RZ_BIT);
 #else
-		write_amcr_el0_cg1rz(0U);
+		/* HDBG = 0 in both cases */
+		write_amcr_el0(0);
 #endif
 	}
 }
@@ -262,230 +93,93 @@ void amu_init_el2_unused(void)
 	 * CPTR_EL2.TAM: Set to zero so any accesses to the Activity Monitor
 	 *  registers do not trap to EL2.
 	 */
-	write_cptr_el2_tam(0U);
+	write_cptr_el2(read_cptr_el2() & ~CPTR_EL2_TAM_BIT);
 
-	/* Initialize FEAT_AMUv1p1 features if present. */
 	if (is_feat_amuv1p1_supported()) {
-		/* Make sure virtual offsets are disabled if EL2 not used. */
-		write_hcr_el2_amvoffen(0U);
+		/* Make sure virtual offsets are disabled */
+		write_hcr_el2(read_hcr_el2() & ~HCR_AMVOFFEN_BIT);
 	}
 }
 
-/* Read the group 0 counter identified by the given `idx`. */
-static uint64_t amu_group0_cnt_read(unsigned int idx)
-{
-	assert(is_feat_amu_supported());
-	assert(idx < read_amcgcr_el0_cg0nc());
-
-	return amu_group0_cnt_read_internal(idx);
-}
-
-/* Write the group 0 counter identified by the given `idx` with `val` */
-static void amu_group0_cnt_write(unsigned  int idx, uint64_t val)
-{
-	assert(is_feat_amu_supported());
-	assert(idx < read_amcgcr_el0_cg0nc());
-
-	amu_group0_cnt_write_internal(idx, val);
-	isb();
-}
-
-/*
- * Unlike with auxiliary counters, we cannot detect at runtime whether an
- * architected counter supports a virtual offset. These are instead fixed
- * according to FEAT_AMUv1p1, but this switch will need to be updated if later
- * revisions of FEAT_AMU add additional architected counters.
- */
-static bool amu_group0_voffset_supported(uint64_t idx)
-{
-	switch (idx) {
-	case 0U:
-	case 2U:
-	case 3U:
-		return true;
-
-	case 1U:
-		return false;
-
-	default:
-		ERROR("AMU: can't set up virtual offset for unknown "
-		      "architected counter %" PRIu64 "!\n", idx);
-
-		panic();
-	}
-}
-
-/*
- * Read the group 0 offset register for a given index. Index must be 0, 2,
- * or 3, the register for 1 does not exist.
- *
- * Using this function requires FEAT_AMUv1p1 support.
- */
-static uint64_t amu_group0_voffset_read(unsigned int idx)
-{
-	assert(is_feat_amuv1p1_supported());
-	assert(idx < read_amcgcr_el0_cg0nc());
-	assert(idx != 1U);
-
-	return amu_group0_voffset_read_internal(idx);
-}
-
-/*
- * Write the group 0 offset register for a given index. Index must be 0, 2, or
- * 3, the register for 1 does not exist.
- *
- * Using this function requires FEAT_AMUv1p1 support.
- */
-static void amu_group0_voffset_write(unsigned int idx, uint64_t val)
-{
-	assert(is_feat_amuv1p1_supported());
-	assert(idx < read_amcgcr_el0_cg0nc());
-	assert(idx != 1U);
-
-	amu_group0_voffset_write_internal(idx, val);
-	isb();
-}
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-/* Read the group 1 counter identified by the given `idx` */
-static uint64_t amu_group1_cnt_read(unsigned int idx)
-{
-	assert(is_feat_amu_supported());
-	assert(amu_group1_supported());
-	assert(idx < read_amcgcr_el0_cg1nc());
-
-	return amu_group1_cnt_read_internal(idx);
-}
-
-/* Write the group 1 counter identified by the given `idx` with `val` */
-static void amu_group1_cnt_write(unsigned int idx, uint64_t val)
-{
-	assert(is_feat_amu_supported());
-	assert(amu_group1_supported());
-	assert(idx < read_amcgcr_el0_cg1nc());
-
-	amu_group1_cnt_write_internal(idx, val);
-	isb();
-}
-
-/*
- * Read the group 1 offset register for a given index.
- *
- * Using this function requires FEAT_AMUv1p1 support.
- */
-static uint64_t amu_group1_voffset_read(unsigned int idx)
-{
-	assert(is_feat_amuv1p1_supported());
-	assert(amu_group1_supported());
-	assert(idx < read_amcgcr_el0_cg1nc());
-	assert((read_amcg1idr_el0_voff() & (UINT64_C(1) << idx)) != 0U);
-
-	return amu_group1_voffset_read_internal(idx);
-}
-
-/*
- * Write the group 1 offset register for a given index.
- *
- * Using this function requires FEAT_AMUv1p1 support.
- */
-static void amu_group1_voffset_write(unsigned int idx, uint64_t val)
-{
-	assert(is_feat_amuv1p1_supported());
-	assert(amu_group1_supported());
-	assert(idx < read_amcgcr_el0_cg1nc());
-	assert((read_amcg1idr_el0_voff() & (UINT64_C(1) << idx)) != 0U);
-
-	amu_group1_voffset_write_internal(idx, val);
-	isb();
-}
-#endif
-
 static void *amu_context_save(const void *arg)
 {
-	uint64_t i, j;
-
-	unsigned int core_pos;
-	struct amu_ctx *ctx;
-
-	uint64_t hcr_el2_amvoffen = 0;	/* AMU virtual offsets enabled */
-	uint64_t amcgcr_el0_cg0nc;	/* Number of group 0 counters */
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint64_t amcg1idr_el0_voff;	/* Auxiliary counters with virtual offsets */
-	uint64_t amcfgr_el0_ncg;	/* Number of counter groups */
-	uint64_t amcgcr_el0_cg1nc;	/* Number of group 1 counters */
-#endif
-
 	if (!is_feat_amu_supported()) {
 		return (void *)0;
 	}
 
-	core_pos = plat_my_core_pos();
-	ctx = &amu_ctxs_[core_pos];
+	unsigned int core_pos = *(unsigned int *)arg;
+	amu_regs_t *ctx = &amu_ctx[core_pos];
 
-	amcgcr_el0_cg0nc = read_amcgcr_el0_cg0nc();
-	if (is_feat_amuv1p1_supported()) {
-		hcr_el2_amvoffen = read_hcr_el2_amvoffen();
+	/* disable all counters so we can write them safely later */
+	write_amcntenclr0_el0(AMCNTENCLR0_EL0_Pn_MASK);
+	if (is_feat_amu_aux_supported()) {
+		write_amcntenclr1_el0(get_amu_aux_enables(core_pos));
 	}
 
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	amcfgr_el0_ncg = read_amcfgr_el0_ncg();
-	amcgcr_el0_cg1nc = (amcfgr_el0_ncg > 0U) ? read_amcgcr_el0_cg1nc() : 0U;
-	amcg1idr_el0_voff = (hcr_el2_amvoffen != 0U) ? read_amcg1idr_el0_voff() : 0U;
-#endif
+	isb();
 
-	/*
-	 * Disable all AMU counters.
-	 */
+	write_amu_grp0_ctx_reg(ctx, 0, read_amevcntr00_el0());
+	write_amu_grp0_ctx_reg(ctx, 1, read_amevcntr01_el0());
+	write_amu_grp0_ctx_reg(ctx, 2, read_amevcntr02_el0());
+	write_amu_grp0_ctx_reg(ctx, 3, read_amevcntr03_el0());
 
-	ctx->group0_enable = read_amcntenset0_el0_px();
-	write_amcntenclr0_el0_px(ctx->group0_enable);
+	if (is_feat_amu_aux_supported()) {
+		uint8_t num_counters = read_amcgcr_el0_cg1nc();
 
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	if (amcfgr_el0_ncg > 0U) {
-		ctx->group1_enable = read_amcntenset1_el0_px();
-		write_amcntenclr1_el0_px(ctx->group1_enable);
-	}
-#endif
-
-	/*
-	 * Save the counters to the local context.
-	 */
-
-	isb(); /* Ensure counters have been stopped */
-
-	for (i = 0U; i < amcgcr_el0_cg0nc; i++) {
-		ctx->group0_cnts[i] = amu_group0_cnt_read(i);
-	}
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	for (i = 0U; i < amcgcr_el0_cg1nc; i++) {
-		ctx->group1_cnts[i] = amu_group1_cnt_read(i);
-	}
-#endif
-
-	/*
-	 * Save virtual offsets for counters that offer them.
-	 */
-
-	if (hcr_el2_amvoffen != 0U) {
-		for (i = 0U, j = 0U; i < amcgcr_el0_cg0nc; i++) {
-			if (!amu_group0_voffset_supported(i)) {
-				continue; /* No virtual offset */
-			}
-
-			ctx->group0_voffsets[j++] = amu_group0_voffset_read(i);
+		switch (num_counters) {
+		case 0x10:
+			write_amu_grp1_ctx_reg(ctx, 0xf, read_amevcntr1f_el0());
+			__fallthrough;
+		case 0x0f:
+			write_amu_grp1_ctx_reg(ctx, 0xe, read_amevcntr1e_el0());
+			__fallthrough;
+		case 0x0e:
+			write_amu_grp1_ctx_reg(ctx, 0xd, read_amevcntr1d_el0());
+			__fallthrough;
+		case 0x0d:
+			write_amu_grp1_ctx_reg(ctx, 0xc, read_amevcntr1c_el0());
+			__fallthrough;
+		case 0x0c:
+			write_amu_grp1_ctx_reg(ctx, 0xb, read_amevcntr1b_el0());
+			__fallthrough;
+		case 0x0b:
+			write_amu_grp1_ctx_reg(ctx, 0xa, read_amevcntr1a_el0());
+			__fallthrough;
+		case 0x0a:
+			write_amu_grp1_ctx_reg(ctx, 0x9, read_amevcntr19_el0());
+			__fallthrough;
+		case 0x09:
+			write_amu_grp1_ctx_reg(ctx, 0x8, read_amevcntr18_el0());
+			__fallthrough;
+		case 0x08:
+			write_amu_grp1_ctx_reg(ctx, 0x7, read_amevcntr17_el0());
+			__fallthrough;
+		case 0x07:
+			write_amu_grp1_ctx_reg(ctx, 0x6, read_amevcntr16_el0());
+			__fallthrough;
+		case 0x06:
+			write_amu_grp1_ctx_reg(ctx, 0x5, read_amevcntr15_el0());
+			__fallthrough;
+		case 0x05:
+			write_amu_grp1_ctx_reg(ctx, 0x4, read_amevcntr14_el0());
+			__fallthrough;
+		case 0x04:
+			write_amu_grp1_ctx_reg(ctx, 0x3, read_amevcntr13_el0());
+			__fallthrough;
+		case 0x03:
+			write_amu_grp1_ctx_reg(ctx, 0x2, read_amevcntr12_el0());
+			__fallthrough;
+		case 0x02:
+			write_amu_grp1_ctx_reg(ctx, 0x1, read_amevcntr11_el0());
+			__fallthrough;
+		case 0x01:
+			write_amu_grp1_ctx_reg(ctx, 0x0, read_amevcntr10_el0());
+			__fallthrough;
+		case 0x00:
+			break;
+		default:
+			assert(0); /* something is wrong */
 		}
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-		for (i = 0U, j = 0U; i < amcgcr_el0_cg1nc; i++) {
-			if ((amcg1idr_el0_voff >> i) & 1U) {
-				continue; /* No virtual offset */
-			}
-
-			ctx->group1_voffsets[j++] = amu_group1_voffset_read(i);
-		}
-#endif
 	}
 
 	return (void *)0;
@@ -493,94 +187,85 @@ static void *amu_context_save(const void *arg)
 
 static void *amu_context_restore(const void *arg)
 {
-	uint64_t i, j;
-
-	unsigned int core_pos;
-	struct amu_ctx *ctx;
-
-	uint64_t hcr_el2_amvoffen = 0;	/* AMU virtual offsets enabled */
-
-	uint64_t amcgcr_el0_cg0nc;	/* Number of group 0 counters */
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	uint64_t amcfgr_el0_ncg;	/* Number of counter groups */
-	uint64_t amcgcr_el0_cg1nc;	/* Number of group 1 counters */
-	uint64_t amcg1idr_el0_voff;	/* Auxiliary counters with virtual offsets */
-#endif
-
 	if (!is_feat_amu_supported()) {
 		return (void *)0;
 	}
 
-	core_pos = plat_my_core_pos();
-	ctx = &amu_ctxs_[core_pos];
+	unsigned int core_pos = *(unsigned int *)arg;
+	amu_regs_t *ctx = &amu_ctx[core_pos];
 
-	amcgcr_el0_cg0nc = read_amcgcr_el0_cg0nc();
+	write_amevcntr00_el0(read_amu_grp0_ctx_reg(ctx, 0));
+	write_amevcntr01_el0(read_amu_grp0_ctx_reg(ctx, 1));
+	write_amevcntr02_el0(read_amu_grp0_ctx_reg(ctx, 2));
+	write_amevcntr03_el0(read_amu_grp0_ctx_reg(ctx, 3));
 
-	if (is_feat_amuv1p1_supported()) {
-		hcr_el2_amvoffen = read_hcr_el2_amvoffen();
-	}
+	if (is_feat_amu_aux_supported()) {
+		uint8_t num_counters = read_amcgcr_el0_cg1nc();
 
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	amcfgr_el0_ncg = read_amcfgr_el0_ncg();
-	amcgcr_el0_cg1nc = (amcfgr_el0_ncg > 0U) ? read_amcgcr_el0_cg1nc() : 0U;
-	amcg1idr_el0_voff = (hcr_el2_amvoffen != 0U) ? read_amcg1idr_el0_voff() : 0U;
-#endif
-
-	/*
-	 * Restore the counter values from the local context.
-	 */
-
-	for (i = 0U; i < amcgcr_el0_cg0nc; i++) {
-		amu_group0_cnt_write(i, ctx->group0_cnts[i]);
-	}
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	for (i = 0U; i < amcgcr_el0_cg1nc; i++) {
-		amu_group1_cnt_write(i, ctx->group1_cnts[i]);
-	}
-#endif
-
-	/*
-	 * Restore virtual offsets for counters that offer them.
-	 */
-
-	if (hcr_el2_amvoffen != 0U) {
-		for (i = 0U, j = 0U; i < amcgcr_el0_cg0nc; i++) {
-			if (!amu_group0_voffset_supported(i)) {
-				continue; /* No virtual offset */
-			}
-
-			amu_group0_voffset_write(i, ctx->group0_voffsets[j++]);
+		switch (num_counters) {
+		case 0x10:
+			write_amevcntr1f_el0(read_amu_grp1_ctx_reg(ctx, 0xf));
+			__fallthrough;
+		case 0x0f:
+			write_amevcntr1e_el0(read_amu_grp1_ctx_reg(ctx, 0xe));
+			__fallthrough;
+		case 0x0e:
+			write_amevcntr1d_el0(read_amu_grp1_ctx_reg(ctx, 0xd));
+			__fallthrough;
+		case 0x0d:
+			write_amevcntr1c_el0(read_amu_grp1_ctx_reg(ctx, 0xc));
+			__fallthrough;
+		case 0x0c:
+			write_amevcntr1b_el0(read_amu_grp1_ctx_reg(ctx, 0xb));
+			__fallthrough;
+		case 0x0b:
+			write_amevcntr1a_el0(read_amu_grp1_ctx_reg(ctx, 0xa));
+			__fallthrough;
+		case 0x0a:
+			write_amevcntr19_el0(read_amu_grp1_ctx_reg(ctx, 0x9));
+			__fallthrough;
+		case 0x09:
+			write_amevcntr18_el0(read_amu_grp1_ctx_reg(ctx, 0x8));
+			__fallthrough;
+		case 0x08:
+			write_amevcntr17_el0(read_amu_grp1_ctx_reg(ctx, 0x7));
+			__fallthrough;
+		case 0x07:
+			write_amevcntr16_el0(read_amu_grp1_ctx_reg(ctx, 0x6));
+			__fallthrough;
+		case 0x06:
+			write_amevcntr15_el0(read_amu_grp1_ctx_reg(ctx, 0x5));
+			__fallthrough;
+		case 0x05:
+			write_amevcntr14_el0(read_amu_grp1_ctx_reg(ctx, 0x4));
+			__fallthrough;
+		case 0x04:
+			write_amevcntr13_el0(read_amu_grp1_ctx_reg(ctx, 0x3));
+			__fallthrough;
+		case 0x03:
+			write_amevcntr12_el0(read_amu_grp1_ctx_reg(ctx, 0x2));
+			__fallthrough;
+		case 0x02:
+			write_amevcntr11_el0(read_amu_grp1_ctx_reg(ctx, 0x1));
+			__fallthrough;
+		case 0x01:
+			write_amevcntr10_el0(read_amu_grp1_ctx_reg(ctx, 0x0));
+			__fallthrough;
+		case 0x00:
+			break;
+		default:
+			assert(0); /* something is wrong */
 		}
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-		for (i = 0U, j = 0U; i < amcgcr_el0_cg1nc; i++) {
-			if ((amcg1idr_el0_voff >> i) & 1U) {
-				continue; /* No virtual offset */
-			}
-
-			amu_group1_voffset_write(i, ctx->group1_voffsets[j++]);
-		}
-#endif
 	}
 
-	/*
-	 * Re-enable counters that were disabled during context save.
-	 */
 
-	write_amcntenset0_el0_px(ctx->group0_enable);
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-	if (amcfgr_el0_ncg > 0) {
-		write_amcntenset1_el0_px(ctx->group1_enable);
+	/* now enable them again */
+	write_amcntenset0_el0(AMCNTENSET0_EL0_Pn_MASK);
+	if (is_feat_amu_aux_supported()) {
+		write_amcntenset1_el0(get_amu_aux_enables(core_pos));
 	}
-#endif
-
-#if ENABLE_MPMM
-	mpmm_enable();
-#endif
 
+	isb();
 	return (void *)0;
 }
 
diff --git a/lib/extensions/amu/aarch64/amu_helpers.S b/lib/extensions/amu/aarch64/amu_helpers.S
deleted file mode 100644
index 95d4ad61f..000000000
--- a/lib/extensions/amu/aarch64/amu_helpers.S
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright (c) 2017-2021, Arm Limited and Contributors. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <arch.h>
-#include <assert_macros.S>
-#include <asm_macros.S>
-
-	.globl	amu_group0_cnt_read_internal
-	.globl	amu_group0_cnt_write_internal
-	.globl	amu_group1_cnt_read_internal
-	.globl	amu_group1_cnt_write_internal
-	.globl	amu_group1_set_evtype_internal
-
-	/* FEAT_AMUv1p1 virtualisation offset register functions */
-	.globl	amu_group0_voffset_read_internal
-	.globl	amu_group0_voffset_write_internal
-	.globl	amu_group1_voffset_read_internal
-	.globl	amu_group1_voffset_write_internal
-
-/*
- * uint64_t amu_group0_cnt_read_internal(int idx);
- *
- * Given `idx`, read the corresponding AMU counter
- * and return it in `x0`.
- */
-func amu_group0_cnt_read_internal
-	adr	x1, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~3
-	ASM_ASSERT(eq)
-#endif
-	/*
-	 * Given `idx` calculate address of mrs/ret instruction pair
-	 * in the table below.
-	 */
-	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x1
-
-1:	read	AMEVCNTR00_EL0		/* index 0 */
-	read	AMEVCNTR01_EL0		/* index 1 */
-	read	AMEVCNTR02_EL0		/* index 2 */
-	read	AMEVCNTR03_EL0		/* index 3 */
-endfunc amu_group0_cnt_read_internal
-
-/*
- * void amu_group0_cnt_write_internal(int idx, uint64_t val);
- *
- * Given `idx`, write `val` to the corresponding AMU counter.
- */
-func amu_group0_cnt_write_internal
-	adr	x2, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~3
-	ASM_ASSERT(eq)
-#endif
-	/*
-	 * Given `idx` calculate address of mrs/ret instruction pair
-	 * in the table below.
-	 */
-	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x2
-
-1:	write	AMEVCNTR00_EL0		/* index 0 */
-	write	AMEVCNTR01_EL0		/* index 1 */
-	write	AMEVCNTR02_EL0		/* index 2 */
-	write	AMEVCNTR03_EL0		/* index 3 */
-endfunc amu_group0_cnt_write_internal
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-/*
- * uint64_t amu_group1_cnt_read_internal(int idx);
- *
- * Given `idx`, read the corresponding AMU counter
- * and return it in `x0`.
- */
-func amu_group1_cnt_read_internal
-	adr	x1, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~0xF
-	ASM_ASSERT(eq)
-#endif
-	/*
-	 * Given `idx` calculate address of mrs/ret instruction pair
-	 * in the table below.
-	 */
-	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x1
-
-1:	read	AMEVCNTR10_EL0		/* index 0 */
-	read	AMEVCNTR11_EL0		/* index 1 */
-	read	AMEVCNTR12_EL0		/* index 2 */
-	read	AMEVCNTR13_EL0		/* index 3 */
-	read	AMEVCNTR14_EL0		/* index 4 */
-	read	AMEVCNTR15_EL0		/* index 5 */
-	read	AMEVCNTR16_EL0		/* index 6 */
-	read	AMEVCNTR17_EL0		/* index 7 */
-	read	AMEVCNTR18_EL0		/* index 8 */
-	read	AMEVCNTR19_EL0		/* index 9 */
-	read	AMEVCNTR1A_EL0		/* index 10 */
-	read	AMEVCNTR1B_EL0		/* index 11 */
-	read	AMEVCNTR1C_EL0		/* index 12 */
-	read	AMEVCNTR1D_EL0		/* index 13 */
-	read	AMEVCNTR1E_EL0		/* index 14 */
-	read	AMEVCNTR1F_EL0		/* index 15 */
-endfunc amu_group1_cnt_read_internal
-
-/*
- * void amu_group1_cnt_write_internal(int idx, uint64_t val);
- *
- * Given `idx`, write `val` to the corresponding AMU counter.
- */
-func amu_group1_cnt_write_internal
-	adr	x2, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~0xF
-	ASM_ASSERT(eq)
-#endif
-	/*
-	 * Given `idx` calculate address of mrs/ret instruction pair
-	 * in the table below.
-	 */
-	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x2
-
-1:	write	AMEVCNTR10_EL0		/* index 0 */
-	write	AMEVCNTR11_EL0		/* index 1 */
-	write	AMEVCNTR12_EL0		/* index 2 */
-	write	AMEVCNTR13_EL0		/* index 3 */
-	write	AMEVCNTR14_EL0		/* index 4 */
-	write	AMEVCNTR15_EL0		/* index 5 */
-	write	AMEVCNTR16_EL0		/* index 6 */
-	write	AMEVCNTR17_EL0		/* index 7 */
-	write	AMEVCNTR18_EL0		/* index 8 */
-	write	AMEVCNTR19_EL0		/* index 9 */
-	write	AMEVCNTR1A_EL0		/* index 10 */
-	write	AMEVCNTR1B_EL0		/* index 11 */
-	write	AMEVCNTR1C_EL0		/* index 12 */
-	write	AMEVCNTR1D_EL0		/* index 13 */
-	write	AMEVCNTR1E_EL0		/* index 14 */
-	write	AMEVCNTR1F_EL0		/* index 15 */
-endfunc amu_group1_cnt_write_internal
-
-/*
- * void amu_group1_set_evtype_internal(int idx, unsigned int val);
- *
- * Program the AMU event type register indexed by `idx`
- * with the value `val`.
- */
-func amu_group1_set_evtype_internal
-	adr	x2, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~0xF
-	ASM_ASSERT(eq)
-
-	/* val should be between [0, 65535] */
-	tst	x1, #~0xFFFF
-	ASM_ASSERT(eq)
-#endif
-	/*
-	 * Given `idx` calculate address of msr/ret instruction pair
-	 * in the table below.
-	 */
-	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x2
-
-1:	write	AMEVTYPER10_EL0		/* index 0 */
-	write	AMEVTYPER11_EL0		/* index 1 */
-	write	AMEVTYPER12_EL0		/* index 2 */
-	write	AMEVTYPER13_EL0		/* index 3 */
-	write	AMEVTYPER14_EL0		/* index 4 */
-	write	AMEVTYPER15_EL0		/* index 5 */
-	write	AMEVTYPER16_EL0		/* index 6 */
-	write	AMEVTYPER17_EL0		/* index 7 */
-	write	AMEVTYPER18_EL0		/* index 8 */
-	write	AMEVTYPER19_EL0		/* index 9 */
-	write	AMEVTYPER1A_EL0		/* index 10 */
-	write	AMEVTYPER1B_EL0		/* index 11 */
-	write	AMEVTYPER1C_EL0		/* index 12 */
-	write	AMEVTYPER1D_EL0		/* index 13 */
-	write	AMEVTYPER1E_EL0		/* index 14 */
-	write	AMEVTYPER1F_EL0		/* index 15 */
-endfunc amu_group1_set_evtype_internal
-#endif
-
-/*
- * Accessor functions for virtual offset registers added with FEAT_AMUv1p1
- */
-
-/*
- * uint64_t amu_group0_voffset_read_internal(int idx);
- *
- * Given `idx`, read the corresponding AMU virtual offset register
- * and return it in `x0`.
- */
-func amu_group0_voffset_read_internal
-	adr	x1, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~3
-	ASM_ASSERT(eq)
-	/* Make sure idx != 1 since AMEVCNTVOFF01_EL2 does not exist */
-	cmp	x0, #1
-	ASM_ASSERT(ne)
-#endif
-	/*
-	 * Given `idx` calculate address of mrs/ret instruction pair
-	 * in the table below.
-	 */
-	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x1
-
-1:	read	AMEVCNTVOFF00_EL2	/* index 0 */
-	.skip	8			/* AMEVCNTVOFF01_EL2 does not exist */
-#if ENABLE_BTI
-	.skip	4
-#endif
-	read	AMEVCNTVOFF02_EL2	/* index 2 */
-	read	AMEVCNTVOFF03_EL2	/* index 3 */
-endfunc amu_group0_voffset_read_internal
-
-/*
- * void amu_group0_voffset_write_internal(int idx, uint64_t val);
- *
- * Given `idx`, write `val` to the corresponding AMU virtual offset register.
- */
-func amu_group0_voffset_write_internal
-	adr	x2, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~3
-	ASM_ASSERT(eq)
-	/* Make sure idx != 1 since AMEVCNTVOFF01_EL2 does not exist */
-	cmp	x0, #1
-	ASM_ASSERT(ne)
-#endif
-	/*
-	 * Given `idx` calculate address of mrs/ret instruction pair
-	 * in the table below.
-	 */
-	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x2
-
-1:	write	AMEVCNTVOFF00_EL2	/* index 0 */
-	.skip	8			/* AMEVCNTVOFF01_EL2 does not exist */
-#if ENABLE_BTI
-	.skip	4
-#endif
-	write	AMEVCNTVOFF02_EL2	/* index 2 */
-	write	AMEVCNTVOFF03_EL2	/* index 3 */
-endfunc amu_group0_voffset_write_internal
-
-#if ENABLE_AMU_AUXILIARY_COUNTERS
-/*
- * uint64_t amu_group1_voffset_read_internal(int idx);
- *
- * Given `idx`, read the corresponding AMU virtual offset register
- * and return it in `x0`.
- */
-func amu_group1_voffset_read_internal
-	adr	x1, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~0xF
-	ASM_ASSERT(eq)
-#endif
-	/*
-	 * Given `idx` calculate address of mrs/ret instruction pair
-	 * in the table below.
-	 */
-	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x1
-
-1:	read	AMEVCNTVOFF10_EL2	/* index 0 */
-	read	AMEVCNTVOFF11_EL2	/* index 1 */
-	read	AMEVCNTVOFF12_EL2	/* index 2 */
-	read	AMEVCNTVOFF13_EL2	/* index 3 */
-	read	AMEVCNTVOFF14_EL2	/* index 4 */
-	read	AMEVCNTVOFF15_EL2	/* index 5 */
-	read	AMEVCNTVOFF16_EL2	/* index 6 */
-	read	AMEVCNTVOFF17_EL2	/* index 7 */
-	read	AMEVCNTVOFF18_EL2	/* index 8 */
-	read	AMEVCNTVOFF19_EL2	/* index 9 */
-	read	AMEVCNTVOFF1A_EL2	/* index 10 */
-	read	AMEVCNTVOFF1B_EL2	/* index 11 */
-	read	AMEVCNTVOFF1C_EL2	/* index 12 */
-	read	AMEVCNTVOFF1D_EL2	/* index 13 */
-	read	AMEVCNTVOFF1E_EL2	/* index 14 */
-	read	AMEVCNTVOFF1F_EL2	/* index 15 */
-endfunc amu_group1_voffset_read_internal
-
-/*
- * void amu_group1_voffset_write_internal(int idx, uint64_t val);
- *
- * Given `idx`, write `val` to the corresponding AMU virtual offset register.
- */
-func amu_group1_voffset_write_internal
-	adr	x2, 1f
-#if ENABLE_ASSERTIONS
-	/*
-	 * It can be dangerous to call this function with an
-	 * out of bounds index.  Ensure `idx` is valid.
-	 */
-	tst	x0, #~0xF
-	ASM_ASSERT(eq)
-#endif
-	/*
-	 * Given `idx` calculate address of mrs/ret instruction pair
-	 * in the table below.
-	 */
-	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
-#if ENABLE_BTI
-	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
-#endif
-	br	x2
-
-1:	write	AMEVCNTVOFF10_EL2	/* index 0 */
-	write	AMEVCNTVOFF11_EL2	/* index 1 */
-	write	AMEVCNTVOFF12_EL2	/* index 2 */
-	write	AMEVCNTVOFF13_EL2	/* index 3 */
-	write	AMEVCNTVOFF14_EL2	/* index 4 */
-	write	AMEVCNTVOFF15_EL2	/* index 5 */
-	write	AMEVCNTVOFF16_EL2	/* index 6 */
-	write	AMEVCNTVOFF17_EL2	/* index 7 */
-	write	AMEVCNTVOFF18_EL2	/* index 8 */
-	write	AMEVCNTVOFF19_EL2	/* index 9 */
-	write	AMEVCNTVOFF1A_EL2	/* index 10 */
-	write	AMEVCNTVOFF1B_EL2	/* index 11 */
-	write	AMEVCNTVOFF1C_EL2	/* index 12 */
-	write	AMEVCNTVOFF1D_EL2	/* index 13 */
-	write	AMEVCNTVOFF1E_EL2	/* index 14 */
-	write	AMEVCNTVOFF1F_EL2	/* index 15 */
-endfunc amu_group1_voffset_write_internal
-#endif
diff --git a/lib/extensions/amu/amu.mk b/lib/extensions/amu/amu.mk
index 868ab1254..fba2c78d3 100644
--- a/lib/extensions/amu/amu.mk
+++ b/lib/extensions/amu/amu.mk
@@ -1,24 +1,13 @@
 #
-# Copyright (c) 2021, Arm Limited. All rights reserved.
+# Copyright (c) 2021-2025, Arm Limited. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
-include lib/fconf/fconf.mk
-
-AMU_SOURCES	:=	lib/extensions/amu/${ARCH}/amu.c \
-			lib/extensions/amu/${ARCH}/amu_helpers.S
+AMU_SOURCES	:=	lib/extensions/amu/${ARCH}/amu.c
 
 ifneq (${ENABLE_AMU_AUXILIARY_COUNTERS},0)
         ifeq (${ENABLE_FEAT_AMU},0)
-                $(error AMU auxiliary counter support (`ENABLE_AMU_AUXILIARY_COUNTERS`) requires AMU support (`ENABLE_FEAT_AMU`))
+                $(error "ENABLE_AMU_AUXILIARY_COUNTERS requires ENABLE_FEAT_AMU")
         endif
 endif
-
-ifneq (${ENABLE_AMU_FCONF},0)
-        ifeq (${ENABLE_AMU_AUXILIARY_COUNTERS},0)
-                $(error AMU FCONF support (`ENABLE_AMU_FCONF`) is not necessary when auxiliary counter support (`ENABLE_AMU_AUXILIARY_COUNTERS`) is disabled)
-        endif
-
-        AMU_SOURCES	+=	${FCONF_AMU_SOURCES}
-endif
diff --git a/lib/extensions/amu/amu_private.h b/lib/extensions/amu/amu_private.h
deleted file mode 100644
index a3b684510..000000000
--- a/lib/extensions/amu/amu_private.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2017-2021, Arm Limited and Contributors. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef AMU_PRIVATE_H
-#define AMU_PRIVATE_H
-
-#include <stdint.h>
-
-#include <lib/cassert.h>
-#include <lib/extensions/amu.h>
-#include <lib/utils_def.h>
-
-#include <platform_def.h>
-
-#define AMU_GROUP0_MAX_COUNTERS		U(16)
-#define AMU_GROUP1_MAX_COUNTERS		U(16)
-
-#define AMU_AMCGCR_CG0NC_MAX		U(16)
-
-uint64_t amu_group0_cnt_read_internal(unsigned int idx);
-void amu_group0_cnt_write_internal(unsigned int idx, uint64_t val);
-
-uint64_t amu_group1_cnt_read_internal(unsigned int idx);
-void amu_group1_cnt_write_internal(unsigned int idx, uint64_t val);
-void amu_group1_set_evtype_internal(unsigned int idx, unsigned int val);
-
-#if __aarch64__
-uint64_t amu_group0_voffset_read_internal(unsigned int idx);
-void amu_group0_voffset_write_internal(unsigned int idx, uint64_t val);
-
-uint64_t amu_group1_voffset_read_internal(unsigned int idx);
-void amu_group1_voffset_write_internal(unsigned int idx, uint64_t val);
-#endif
-
-#endif /* AMU_PRIVATE_H */
diff --git a/lib/fconf/fconf.mk b/lib/fconf/fconf.mk
index 311bee4e8..d24f86b3a 100644
--- a/lib/fconf/fconf.mk
+++ b/lib/fconf/fconf.mk
@@ -11,6 +11,3 @@ FCONF_SOURCES		+=	${FDT_WRAPPERS_SOURCES}
 
 FCONF_DYN_SOURCES	:=	lib/fconf/fconf_dyn_cfg_getter.c
 FCONF_DYN_SOURCES	+=	${FDT_WRAPPERS_SOURCES}
-
-FCONF_AMU_SOURCES	:=	lib/fconf/fconf_amu_getter.c
-FCONF_AMU_SOURCES	+=	${FDT_WRAPPERS_SOURCES}
diff --git a/lib/fconf/fconf_amu_getter.c b/lib/fconf/fconf_amu_getter.c
deleted file mode 100644
index eff309cf9..000000000
--- a/lib/fconf/fconf_amu_getter.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2021, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include <common/debug.h>
-#include <common/fdt_wrappers.h>
-#include <lib/fconf/fconf.h>
-#include <lib/fconf/fconf_amu_getter.h>
-#include <libfdt.h>
-
-#include <plat/common/platform.h>
-
-struct fconf_amu_config fconf_amu_config;
-static struct amu_topology fconf_amu_topology_;
-
-/*
- * Populate the core-specific AMU structure with information retrieved from a
- * device tree.
- *
- * Returns `0` on success, or a negative integer representing an error code.
- */
-static int fconf_populate_amu_cpu_amu(const void *fdt, int parent,
-				      struct amu_core *amu)
-{
-	int ret = 0;
-	int node = 0;
-
-	fdt_for_each_subnode(node, fdt, parent) {
-		const char *name;
-		const char *value;
-		int len;
-
-		uintptr_t idx = 0U;
-
-		name = fdt_get_name(fdt, node, &len);
-		if (strncmp(name, "counter@", 8) != 0) {
-			continue;
-		}
-
-		ret = fdt_get_reg_props_by_index(fdt, node, 0, &idx, NULL);
-		if (ret < 0) {
-			break;
-		}
-
-		value = fdt_getprop(fdt, node, "enable-at-el3", &len);
-		if ((value == NULL) && (len != -FDT_ERR_NOTFOUND)) {
-			break;
-		}
-
-		if (len != -FDT_ERR_NOTFOUND) {
-			amu->enable |= (1 << idx);
-		}
-	}
-
-	if ((node < 0) && (node != -FDT_ERR_NOTFOUND)) {
-		return node;
-	}
-
-	return ret;
-}
-
-/*
- * Within a `cpu` node, attempt to dereference the `amu` property, and populate
- * the AMU information for the core.
- *
- * Returns `0` on success, or a negative integer representing an error code.
- */
-static int fconf_populate_amu_cpu(const void *fdt, int node, uintptr_t mpidr)
-{
-	int ret;
-	int idx;
-
-	uint32_t amu_phandle;
-	struct amu_core *amu;
-
-	ret = fdt_read_uint32(fdt, node, "amu", &amu_phandle);
-	if (ret < 0) {
-		if (ret == -FDT_ERR_NOTFOUND) {
-			ret = 0;
-		}
-
-		return ret;
-	}
-
-	node = fdt_node_offset_by_phandle(fdt, amu_phandle);
-	if (node < 0) {
-		return node;
-	}
-
-	idx = plat_core_pos_by_mpidr(mpidr);
-	if (idx < 0) {
-		return -FDT_ERR_BADVALUE;
-	}
-
-	amu = &fconf_amu_topology_.cores[idx];
-
-	return fconf_populate_amu_cpu_amu(fdt, node, amu);
-}
-
-/*
- * Populates the global `amu_topology` structure based on what's described by
- * the hardware configuration device tree blob.
- *
- * The device tree is expected to provide an `amu` property for each `cpu` node,
- * like so:
- *
- *     cpu@0 {
- *         amu = <&cpu0_amu>;
- *     };
- *
- *     amus {
- *         cpu0_amu: amu-0 {
- *             counters {
- *                 #address-cells = <2>;
- *                 #size-cells = <0>;
- *
- *                 counter@x,y {
- *                     reg = <x y>; // Group x, counter y
- *                 };
- *             };
- *         };
- *     };
- */
-static int fconf_populate_amu(uintptr_t config)
-{
-	int ret = fdtw_for_each_cpu(
-		(const void *)config, fconf_populate_amu_cpu);
-	if (ret == 0) {
-		fconf_amu_config.topology = &fconf_amu_topology_;
-	} else {
-		ERROR("FCONF: failed to parse AMU information: %d\n", ret);
-	}
-
-	return ret;
-}
-
-FCONF_REGISTER_POPULATOR(HW_CONFIG, amu, fconf_populate_amu);
diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
index 4bb23af32..4c2601eca 100644
--- a/lib/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -972,7 +972,7 @@ void psci_warmboot_entrypoint(void)
 	psci_power_state_t state_info = { {PSCI_LOCAL_STATE_RUN} };
 
 	/* Init registers that never change for the lifetime of TF-A */
-	cm_manage_extensions_el3();
+	cm_manage_extensions_el3(cpu_idx);
 
 	/*
 	 * Verify that we have been explicitly turned ON or resumed from
diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c
index 05bbe39cb..0fb1ed309 100644
--- a/lib/psci/psci_suspend.c
+++ b/lib/psci/psci_suspend.c
@@ -42,12 +42,13 @@ static void psci_cpu_suspend_to_standby_finish(unsigned int end_pwrlvl,
  * This function does generic and platform specific suspend to power down
  * operations.
  ******************************************************************************/
-static void psci_suspend_to_pwrdown_start(unsigned int end_pwrlvl,
+static void psci_suspend_to_pwrdown_start(unsigned int idx,
+					  unsigned int end_pwrlvl,
 					  unsigned int max_off_lvl,
 					  const entry_point_info_t *ep,
 					  const psci_power_state_t *state_info)
 {
-	PUBLISH_EVENT(psci_suspend_pwrdown_start);
+	PUBLISH_EVENT_ARG(psci_suspend_pwrdown_start, &idx);
 
 #if PSCI_OS_INIT_MODE
 #ifdef PLAT_MAX_CPU_SUSPEND_PWR_LVL
@@ -223,7 +224,7 @@ int psci_cpu_suspend_start(unsigned int idx,
 #endif
 #endif
 		max_off_lvl = psci_find_max_off_lvl(state_info);
-		psci_suspend_to_pwrdown_start(end_pwrlvl, max_off_lvl, ep, state_info);
+		psci_suspend_to_pwrdown_start(idx, end_pwrlvl, end_pwrlvl, ep, state_info);
 	}
 
 	/*
@@ -382,5 +383,5 @@ void psci_cpu_suspend_to_powerdown_finish(unsigned int cpu_idx, unsigned int max
 	/* This loses its meaning when not suspending, reset so it's correct for OFF */
 	psci_set_suspend_pwrlvl(PLAT_MAX_PWR_LVL);
 
-	PUBLISH_EVENT(psci_suspend_pwrdown_finish);
+	PUBLISH_EVENT_ARG(psci_suspend_pwrdown_finish, &cpu_idx);
 }
diff --git a/make_helpers/arch_features.mk b/make_helpers/arch_features.mk
index 3c9e1368b..8dec522eb 100644
--- a/make_helpers/arch_features.mk
+++ b/make_helpers/arch_features.mk
@@ -291,7 +291,6 @@ endif
 # Feature flags for supporting Activity monitor extensions.
 ENABLE_FEAT_AMU				?=	0
 ENABLE_AMU_AUXILIARY_COUNTERS		?=	0
-ENABLE_AMU_FCONF			?=	0
 AMU_RESTRICT_COUNTERS			?=	1
 
 # Build option to enable MPAM for lower ELs.
diff --git a/plat/arm/board/tc/platform.mk b/plat/arm/board/tc/platform.mk
index 49f9eea27..a056bc290 100644
--- a/plat/arm/board/tc/platform.mk
+++ b/plat/arm/board/tc/platform.mk
@@ -32,7 +32,6 @@ ENABLE_SME_FOR_SWD		:=	1
 ENABLE_TRBE_FOR_NS		:=	1
 ENABLE_SYS_REG_TRACE_FOR_NS	:=	1
 ENABLE_FEAT_AMU			:=	1
-ENABLE_AMU_FCONF		:=	1
 ENABLE_AMU_AUXILIARY_COUNTERS	:=	1
 ENABLE_MPMM			:=	1
 ENABLE_FEAT_MTE2		:=	2
diff --git a/plat/arm/board/tc/tc_bl31_setup.c b/plat/arm/board/tc/tc_bl31_setup.c
index c5ebfde64..5d19aeb9f 100644
--- a/plat/arm/board/tc/tc_bl31_setup.c
+++ b/plat/arm/board/tc/tc_bl31_setup.c
@@ -72,6 +72,18 @@ static scmi_channel_plat_info_t tc_scmi_plat_info = {
 };
 #endif
 
+/* the bottom 3 AMU group 1 counters */
+#define MPMM_GEARS ((1 << 0) | (1 << 1) | (1 << 2))
+
+uint16_t plat_amu_aux_enables[PLATFORM_CORE_COUNT] = {
+	MPMM_GEARS, MPMM_GEARS, MPMM_GEARS, MPMM_GEARS,
+	MPMM_GEARS, MPMM_GEARS, MPMM_GEARS, MPMM_GEARS,
+#if PLATFORM_CORE_COUNT == 14
+	MPMM_GEARS, MPMM_GEARS, MPMM_GEARS, MPMM_GEARS,
+	MPMM_GEARS, MPMM_GEARS
+#endif
+};
+
 #if (TARGET_PLATFORM == 3) || (TARGET_PLATFORM == 4)
 static void enable_ns_mcn_pmu(void)
 {