From 864299fc67522831e2d64bc468549c42d5c06073 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:31 +0000 Subject: [PATCH 01/74] ACPI / PPTT: Add a helper to fill a cpumask from a processor container The ACPI MPAM table uses the UID of a processor container specified in the PPTT to indicate the subset of CPUs and cache topology that can access each MPAM System Component (MSC). This information is not directly useful to the kernel. The equivalent cpumask is needed instead. Add a helper to find the processor container by its id, then walk the possible CPUs to fill a cpumask with the CPUs that have this processor container as a parent. CC: Dave Martin Reviewed-by: Sudeep Holla Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Hanjun Guo Reviewed-by: Jeremy Linton Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 796e29b857aed89f83f70f2c199585c45db5dc0f) Signed-off-by: Lee Trager --- drivers/acpi/pptt.c | 84 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 3 ++ 2 files changed, 87 insertions(+) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 54676e3d82dd5..b8248c0092fe8 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -817,3 +817,87 @@ int find_acpi_cpu_topology_hetero_id(unsigned int cpu) return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, ACPI_PPTT_ACPI_IDENTICAL); } + +/** + * acpi_pptt_get_child_cpus() - Find all the CPUs below a PPTT + * processor hierarchy node + * + * @table_hdr: A reference to the PPTT table + * @parent_node: A pointer to the processor hierarchy node in the + * table_hdr + * @cpus: A cpumask to fill with the CPUs below @parent_node + * + * Walks up the PPTT from every possible CPU to find if the provided + * @parent_node is a parent of this CPU. + */ +static void acpi_pptt_get_child_cpus(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *parent_node, + cpumask_t *cpus) +{ + struct acpi_pptt_processor *cpu_node; + u32 acpi_id; + int cpu; + + cpumask_clear(cpus); + + for_each_possible_cpu(cpu) { + acpi_id = get_acpi_id_for_cpu(cpu); + cpu_node = acpi_find_processor_node(table_hdr, acpi_id); + + while (cpu_node) { + if (cpu_node == parent_node) { + cpumask_set_cpu(cpu, cpus); + break; + } + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } + } +} + +/** + * acpi_pptt_get_cpus_from_container() - Populate a cpumask with all CPUs in a + * processor container + * @acpi_cpu_id: The UID of the processor container + * @cpus: The resulting CPU mask + * + * Find the specified Processor Container, and fill @cpus with all the cpus + * below it. + * + * Not all 'Processor Hierarchy' entries in the PPTT are either a CPU + * or a Processor Container, they may exist purely to describe a + * Private resource. CPUs have to be leaves, so a Processor Container + * is a non-leaf that has the 'ACPI Processor ID valid' flag set. + */ +void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) +{ + struct acpi_table_header *table_hdr; + struct acpi_subtable_header *entry; + unsigned long table_end; + u32 proc_sz; + + cpumask_clear(cpus); + + table_hdr = acpi_get_pptt(); + if (!table_hdr) + return; + + table_end = (unsigned long)table_hdr + table_hdr->length; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, + sizeof(struct acpi_table_pptt)); + proc_sz = sizeof(struct acpi_pptt_processor); + while ((unsigned long)entry + proc_sz <= table_end) { + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR) { + struct acpi_pptt_processor *cpu_node; + + cpu_node = (struct acpi_pptt_processor *)entry; + if (cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID && + !acpi_pptt_leaf_node(table_hdr, cpu_node) && + cpu_node->acpi_processor_id == acpi_cpu_id) { + acpi_pptt_get_child_cpus(table_hdr, cpu_node, cpus); + break; + } + } + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, + entry->length); + } +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5ff5d99f6ead5..4752ebd481326 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1541,6 +1541,7 @@ int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); +void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1562,6 +1563,8 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) { return -EINVAL; } +static inline void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, + cpumask_t *cpus) { } #endif void acpi_arch_init(void); From 97ddf21356c677563ab43ba86c9ba30efb12f58c Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:32 +0000 Subject: [PATCH 02/74] ACPI / PPTT: Stop acpi_count_levels() expecting callers to clear levels In acpi_count_levels(), the initial value of *levels passed by the caller is really an implementation detail of acpi_count_levels(), so it is unreasonable to expect the callers of this function to know what to pass in for this parameter. The only sensible initial value is 0, which is what the only upstream caller (acpi_get_cache_info()) passes. Use a local variable for the starting cache level in acpi_count_levels(), and pass the result back to the caller via the function return value. Get rid of the levels parameter, which has no remaining purpose. Fix acpi_get_cache_info() to match. Suggested-by: Jonathan Cameron Signed-off-by: James Morse Reviewed-by: Lorenzo Pieralisi Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Hanjun Guo Reviewed-by: Jeremy Linton Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit eeec7845e966f9278973c02573e3587e6733a4dd) Signed-off-by: Lee Trager --- drivers/acpi/pptt.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index b8248c0092fe8..2856254e29d73 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -177,14 +177,14 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr, } /** - * acpi_count_levels() - Given a PPTT table, and a CPU node, count the cache - * levels and split cache levels (data/instruction). + * acpi_count_levels() - Given a PPTT table, and a CPU node, count the + * total number of levels and split cache levels (data/instruction). * @table_hdr: Pointer to the head of the PPTT table * @cpu_node: processor node we wish to count caches for - * @levels: Number of levels if success. * @split_levels: Number of split cache levels (data/instruction) if * success. Can by NULL. * + * Return: number of levels. * Given a processor node containing a processing unit, walk into it and count * how many levels exist solely for it, and then walk up each level until we hit * the root node (ignore the package level because it may be possible to have @@ -192,14 +192,18 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr, * split cache levels (data/instruction) that exist at each level on the way * up. */ -static void acpi_count_levels(struct acpi_table_header *table_hdr, - struct acpi_pptt_processor *cpu_node, - unsigned int *levels, unsigned int *split_levels) +static int acpi_count_levels(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node, + unsigned int *split_levels) { + int current_level = 0; + do { - acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0); + acpi_find_cache_level(table_hdr, cpu_node, ¤t_level, split_levels, 0, 0); cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); } while (cpu_node); + + return current_level; } /** @@ -645,7 +649,7 @@ int acpi_get_cache_info(unsigned int cpu, unsigned int *levels, if (!cpu_node) return -ENOENT; - acpi_count_levels(table, cpu_node, levels, split_levels); + *levels = acpi_count_levels(table, cpu_node, split_levels); pr_debug("Cache Setup: last_level=%d split_levels=%d\n", *levels, split_levels ? *split_levels : -1); From db707605c377164fa00b00782f2aad77325fa83e Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:22:33 +0000 Subject: [PATCH 03/74] ACPI / PPTT: Add acpi_pptt_cache_v1_full to use pptt cache as one structure In actbl2.h, acpi_pptt_cache describes the fields in the original Cache Type Structure. In PPTT table version 3 a new field was added at the end, cache_id. This is described in acpi_pptt_cache_v1 but rather than including all v1 fields it just includes this one. In lieu of this being fixed in acpica, introduce acpi_pptt_cache_v1_full to contain all the fields of the Cache Type Structure . Update the existing code to use this new struct. This simplifies the code and removes a non-standard use of ACPI_ADD_PTR. Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Hanjun Guo Reviewed-by: Jeremy Linton Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit cfc085af8398479e855b86236a21e1d870d51184) Signed-off-by: Lee Trager --- drivers/acpi/pptt.c | 45 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index 2856254e29d73..ef39b176dc00c 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -21,6 +21,25 @@ #include #include +/* + * The acpi_pptt_cache_v1 in actbl2.h, which is imported from acpica, + * only contains the cache_id field rather than all the fields of the + * Cache Type Structure. Use this alternative structure until it is + * resolved in acpica. + */ +struct acpi_pptt_cache_v1_full { + struct acpi_subtable_header header; + u16 reserved; + u32 flags; + u32 next_level_of_cache; + u32 size; + u32 number_of_sets; + u8 associativity; + u8 attributes; + u16 line_size; + u32 cache_id; +} __packed; + static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr, u32 pptt_ref) { @@ -56,6 +75,18 @@ static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header *table_ return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, pptt_ref); } +static struct acpi_pptt_cache_v1_full *upgrade_pptt_cache(struct acpi_pptt_cache *cache) +{ + if (cache->header.length < sizeof(struct acpi_pptt_cache_v1_full)) + return NULL; + + /* No use for v1 if the only additional field is invalid */ + if (!(cache->flags & ACPI_PPTT_CACHE_ID_VALID)) + return NULL; + + return (struct acpi_pptt_cache_v1_full *)cache; +} + static struct acpi_subtable_header *acpi_get_pptt_resource(struct acpi_table_header *table_hdr, struct acpi_pptt_processor *node, int resource) @@ -355,7 +386,6 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta * @this_leaf: Kernel cache info structure being updated * @found_cache: The PPTT node describing this cache instance * @cpu_node: A unique reference to describe this cache instance - * @revision: The revision of the PPTT table * * The ACPI spec implies that the fields in the cache structures are used to * extend and correct the information probed from the hardware. Lets only @@ -365,10 +395,9 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta */ static void update_cache_properties(struct cacheinfo *this_leaf, struct acpi_pptt_cache *found_cache, - struct acpi_pptt_processor *cpu_node, - u8 revision) + struct acpi_pptt_processor *cpu_node) { - struct acpi_pptt_cache_v1* found_cache_v1; + struct acpi_pptt_cache_v1_full *found_cache_v1; this_leaf->fw_token = cpu_node; if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) @@ -418,9 +447,8 @@ static void update_cache_properties(struct cacheinfo *this_leaf, found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) this_leaf->type = CACHE_TYPE_UNIFIED; - if (revision >= 3 && (found_cache->flags & ACPI_PPTT_CACHE_ID_VALID)) { - found_cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1, - found_cache, sizeof(struct acpi_pptt_cache)); + found_cache_v1 = upgrade_pptt_cache(found_cache); + if (found_cache_v1) { this_leaf->id = found_cache_v1->cache_id; this_leaf->attributes |= CACHE_ID; } @@ -445,8 +473,7 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table, pr_debug("found = %p %p\n", found_cache, cpu_node); if (found_cache) update_cache_properties(this_leaf, found_cache, - ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)), - table->revision); + ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table))); index++; } From 2254b293267681c35835b2a159c5814e12ff48d8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:34 +0000 Subject: [PATCH 04/74] ACPI / PPTT: Find cache level by cache-id The MPAM table identifies caches by id. The MPAM driver also wants to know the cache level to determine if the platform is of the shape that can be managed via resctrl. Cacheinfo has this information, but only for CPUs that are online. Waiting for all CPUs to come online is a problem for platforms where CPUs are brought online late by user-space. Add a helper that walks every possible cache, until it finds the one identified by cache-id, then return the level. Signed-off-by: James Morse Signed-off-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Jeremy Linton Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Catalin Marinas (cherry picked from commit 41a7bb39fede8ecc053c261b86cdfadea45b7b10) Signed-off-by: Lee Trager --- drivers/acpi/pptt.c | 66 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 5 ++++ 2 files changed, 71 insertions(+) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index ef39b176dc00c..da49b56a1ef22 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -932,3 +932,69 @@ void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) entry->length); } } + +/** + * find_acpi_cache_level_from_id() - Get the level of the specified cache + * @cache_id: The id field of the cache + * + * Determine the level relative to any CPU for the cache identified by + * cache_id. This allows the property to be found even if the CPUs are offline. + * + * The returned level can be used to group caches that are peers. + * + * The PPTT table must be rev 3 or later. + * + * If one CPU's L2 is shared with another CPU as L3, this function will return + * an unpredictable value. + * + * Return: -ENOENT if the PPTT doesn't exist, the revision isn't supported or + * the cache cannot be found. + * Otherwise returns a value which represents the level of the specified cache. + */ +int find_acpi_cache_level_from_id(u32 cache_id) +{ + int cpu; + struct acpi_table_header *table; + + table = acpi_get_pptt(); + if (!table) + return -ENOENT; + + if (table->revision < 3) + return -ENOENT; + + for_each_possible_cpu(cpu) { + bool empty; + int level = 1; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct acpi_pptt_cache *cache; + struct acpi_pptt_processor *cpu_node; + + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (!cpu_node) + continue; + + do { + int cache_type[] = {CACHE_TYPE_INST, CACHE_TYPE_DATA, CACHE_TYPE_UNIFIED}; + + empty = true; + for (int i = 0; i < ARRAY_SIZE(cache_type); i++) { + struct acpi_pptt_cache_v1_full *cache_v1; + + cache = acpi_find_cache_node(table, acpi_cpu_id, cache_type[i], + level, &cpu_node); + if (!cache) + continue; + + empty = false; + + cache_v1 = upgrade_pptt_cache(cache); + if (cache_v1 && cache_v1->cache_id == cache_id) + return level; + } + level++; + } while (!empty); + } + + return -ENOENT; +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4752ebd481326..be074bdfd4d17 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1542,6 +1542,7 @@ int find_acpi_cpu_topology_cluster(unsigned int cpu); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); +int find_acpi_cache_level_from_id(u32 cache_id); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1565,6 +1566,10 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) } static inline void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus) { } +static inline int find_acpi_cache_level_from_id(u32 cache_id) +{ + return -ENOENT; +} #endif void acpi_arch_init(void); From f59c8c8f2789b9c23f9acd5ede8edca92e36d487 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:35 +0000 Subject: [PATCH 05/74] ACPI / PPTT: Add a helper to fill a cpumask from a cache_id MPAM identifies CPUs by the cache_id in the PPTT cache structure. The driver needs to know which CPUs are associated with the cache. The CPUs may not all be online, so cacheinfo does not have the information. Add a helper to pull this information out of the PPTT. CC: Rohit Mathew Reviewed-by: Gavin Shan Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Jeremy Linton Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit a39a723a6f1ed9a1602ccf8dd56392402afa7339) Signed-off-by: Lee Trager --- drivers/acpi/pptt.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 6 ++++ 2 files changed, 71 insertions(+) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index da49b56a1ef22..de5f8c018333d 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -998,3 +998,68 @@ int find_acpi_cache_level_from_id(u32 cache_id) return -ENOENT; } + +/** + * acpi_pptt_get_cpumask_from_cache_id() - Get the cpus associated with the + * specified cache + * @cache_id: The id field of the cache + * @cpus: Where to build the cpumask + * + * Determine which CPUs are below this cache in the PPTT. This allows the property + * to be found even if the CPUs are offline. + * + * The PPTT table must be rev 3 or later, + * + * Return: -ENOENT if the PPTT doesn't exist, or the cache cannot be found. + * Otherwise returns 0 and sets the cpus in the provided cpumask. + */ +int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus) +{ + int cpu; + struct acpi_table_header *table; + + cpumask_clear(cpus); + + table = acpi_get_pptt(); + if (!table) + return -ENOENT; + + if (table->revision < 3) + return -ENOENT; + + for_each_possible_cpu(cpu) { + bool empty; + int level = 1; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct acpi_pptt_cache *cache; + struct acpi_pptt_processor *cpu_node; + + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (!cpu_node) + continue; + + do { + int cache_type[] = {CACHE_TYPE_INST, CACHE_TYPE_DATA, CACHE_TYPE_UNIFIED}; + + empty = true; + for (int i = 0; i < ARRAY_SIZE(cache_type); i++) { + struct acpi_pptt_cache_v1_full *cache_v1; + + cache = acpi_find_cache_node(table, acpi_cpu_id, cache_type[i], + level, &cpu_node); + + if (!cache) + continue; + + empty = false; + + cache_v1 = upgrade_pptt_cache(cache); + if (cache_v1 && cache_v1->cache_id == cache_id) + cpumask_set_cpu(cpu, cpus); + } + level++; + } while (!empty); + } + + return 0; +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index be074bdfd4d17..a9dbacabdf891 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1543,6 +1543,7 @@ int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_topology_hetero_id(unsigned int cpu); void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus); int find_acpi_cache_level_from_id(u32 cache_id); +int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus); #else static inline int acpi_pptt_cpu_is_thread(unsigned int cpu) { @@ -1570,6 +1571,11 @@ static inline int find_acpi_cache_level_from_id(u32 cache_id) { return -ENOENT; } +static inline int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, + cpumask_t *cpus) +{ + return -ENOENT; +} #endif void acpi_arch_init(void); From 554d8d9172e37abe8e2de1c4f62328e161d3f3bc Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:36 +0000 Subject: [PATCH 06/74] arm64: kconfig: Add Kconfig entry for MPAM The bulk of the MPAM driver lives outside the arch code because it largely manages MMIO devices that generate interrupts. The driver needs a Kconfig symbol to enable it. As MPAM is only found on arm64 platforms, the arm64 tree is the most natural home for the Kconfig option. This Kconfig option will later be used by the arch code to enable or disable the MPAM context-switch code, and to register properties of CPUs with the MPAM driver. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo CC: Dave Martin Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit d8bf01d80919e81a06dca77556dcfb351fa99b0c) Signed-off-by: Lee Trager --- arch/arm64/Kconfig | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cdcf3a0fafd1a..95fa46d24e10a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2065,6 +2065,29 @@ config ARM64_TLB_RANGE ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a range of input addresses. +config ARM64_MPAM + bool "Enable support for MPAM" + help + Memory System Resource Partitioning and Monitoring (MPAM) is an + optional extension to the Arm architecture that allows each + transaction issued to the memory system to be labelled with a + Partition identifier (PARTID) and Performance Monitoring Group + identifier (PMG). + + Memory system components, such as the caches, can be configured with + policies to control how much of various physical resources (such as + memory bandwidth or cache memory) the transactions labelled with each + PARTID can consume. Depending on the capabilities of the hardware, + the PARTID and PMG can also be used as filtering criteria to measure + the memory system resource consumption of different parts of a + workload. + + Use of this extension requires CPU support, support in the + Memory System Components (MSC), and a description from firmware + of where the MSCs are in the address space. + + MPAM is exposed to user-space via the resctrl pseudo filesystem. + endmenu # "ARMv8.4 architectural features" menu "ARMv8.5 architectural features" From 87da89a50207d54c81c475989854eff5e58c6ff5 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:22:37 +0000 Subject: [PATCH 07/74] platform: Define platform_device_put cleanup handler Define a cleanup helper for use with __free to destroy platform devices automatically when the pointer goes out of scope. This is only intended to be used in error cases and so should be used with return_ptr() or no_free_ptr() directly to avoid the automatic destruction on success. A first use of this is introduced in a subsequent commit. Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit f5915600cc4ca0338a37d5a8a4032e25d939156b) Signed-off-by: Lee Trager --- include/linux/platform_device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index bc843d76066a5..8299c0541dd2e 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -227,6 +227,7 @@ extern int platform_device_add_data(struct platform_device *pdev, extern int platform_device_add(struct platform_device *pdev); extern void platform_device_del(struct platform_device *pdev); extern void platform_device_put(struct platform_device *pdev); +DEFINE_FREE(platform_device_put, struct platform_device *, if (_T) platform_device_put(_T)) struct platform_driver { int (*probe)(struct platform_device *); From 98d1bab7d7f2193b80b6ebed0cf8595a118b1370 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:22:38 +0000 Subject: [PATCH 08/74] ACPI: Define acpi_put_table cleanup handler and acpi_get_table_pointer() helper Define a cleanup helper for use with __free to release the acpi table when the pointer goes out of scope. Also, introduce the helper acpi_get_table_pointer() to simplify a commonly used pattern involving acpi_get_table(). These are first used in a subsequent commit. Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 96f4a4d53e6660d9b62e8d739388267fbb660e9f) Signed-off-by: Lee Trager --- include/linux/acpi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a9dbacabdf891..ac8797f952365 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -8,6 +8,7 @@ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H +#include #include #include /* for struct resource */ #include @@ -221,6 +222,17 @@ void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); +static inline struct acpi_table_header *acpi_get_table_pointer(char *signature, u32 instance) +{ + struct acpi_table_header *table; + int status = acpi_get_table(signature, instance, &table); + + if (ACPI_FAILURE(status)) + return ERR_PTR(-ENOENT); + return table; +} +DEFINE_FREE(acpi_put_table, struct acpi_table_header *, if (!IS_ERR_OR_NULL(_T)) acpi_put_table(_T)) + int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init_or_acpilib acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, From 8474158961c45dd7ce004ac5eaecbd4073f5b4db Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:39 +0000 Subject: [PATCH 09/74] ACPI / MPAM: Parse the MPAM table Add code to parse the arm64 specific MPAM table, looking up the cache level from the PPTT and feeding the end result into the MPAM driver. This happens in two stages. Platform devices are created first for the MSC devices. Once the driver probes it calls acpi_mpam_parse_resources() to discover the RIS entries the MSC contains. For now the MPAM hook mpam_ris_create() is stubbed out, but will update the MPAM driver with optional discovered data about the RIS entries. CC: Carl Worth Link: https://developer.arm.com/documentation/den0065/3-0bet/?lang=en Reviewed-by: Lorenzo Pieralisi Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 115c5325beae7199219ab7c12ec2a2af8dea6c3c) Signed-off-by: Lee Trager --- arch/arm64/Kconfig | 1 + drivers/acpi/arm64/Kconfig | 3 + drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/mpam.c | 411 ++++++++++++++++++++++++++++++++++++ drivers/acpi/tables.c | 2 +- include/linux/arm_mpam.h | 47 +++++ 6 files changed, 464 insertions(+), 1 deletion(-) create mode 100644 drivers/acpi/arm64/mpam.c create mode 100644 include/linux/arm_mpam.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 95fa46d24e10a..d25f964ab08b0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2067,6 +2067,7 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" + select ACPI_MPAM if ACPI help Memory System Resource Partitioning and Monitoring (MPAM) is an optional extension to the Arm architecture that allows each diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig index b3ed6212244c1..f2fd79f22e7d8 100644 --- a/drivers/acpi/arm64/Kconfig +++ b/drivers/acpi/arm64/Kconfig @@ -21,3 +21,6 @@ config ACPI_AGDI config ACPI_APMT bool + +config ACPI_MPAM + bool diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 05ecde9eaabe9..9390b57cb5648 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI_APMT) += apmt.o obj-$(CONFIG_ACPI_FFH) += ffh.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_IORT) += iort.o +obj-$(CONFIG_ACPI_MPAM) += mpam.o obj-$(CONFIG_ACPI_PROCESSOR_IDLE) += cpuidle.o obj-$(CONFIG_ARM_AMBA) += amba.o obj-y += dma.o init.o diff --git a/drivers/acpi/arm64/mpam.c b/drivers/acpi/arm64/mpam.c new file mode 100644 index 0000000000000..84963a20c3e78 --- /dev/null +++ b/drivers/acpi/arm64/mpam.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. + +/* Parse the MPAM ACPI table feeding the discovered nodes into the driver */ + +#define pr_fmt(fmt) "ACPI MPAM: " fmt + +#include +#include +#include +#include +#include +#include + +#include + +/* + * Flags for acpi_table_mpam_msc.*_interrupt_flags. + * See 2.1.1 Interrupt Flags, Table 5, of DEN0065B_MPAM_ACPI_3.0-bet. + */ +#define ACPI_MPAM_MSC_IRQ_MODE BIT(0) +#define ACPI_MPAM_MSC_IRQ_TYPE_MASK GENMASK(2, 1) +#define ACPI_MPAM_MSC_IRQ_TYPE_WIRED 0 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_MASK BIT(3) +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR 0 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR_CONTAINER 1 +#define ACPI_MPAM_MSC_IRQ_AFFINITY_VALID BIT(4) + +/* + * Encodings for the MSC node body interface type field. + * See 2.1 MPAM MSC node, Table 4 of DEN0065B_MPAM_ACPI_3.0-bet. + */ +#define ACPI_MPAM_MSC_IFACE_MMIO 0x00 +#define ACPI_MPAM_MSC_IFACE_PCC 0x0a + +static bool _is_ppi_partition(u32 flags) +{ + u32 aff_type, is_ppi; + bool ret; + + is_ppi = FIELD_GET(ACPI_MPAM_MSC_IRQ_AFFINITY_VALID, flags); + if (!is_ppi) + return false; + + aff_type = FIELD_GET(ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_MASK, flags); + ret = (aff_type == ACPI_MPAM_MSC_IRQ_AFFINITY_TYPE_PROCESSOR_CONTAINER); + if (ret) + pr_err_once("Partitioned interrupts not supported\n"); + + return ret; +} + +static int acpi_mpam_register_irq(struct platform_device *pdev, + u32 intid, u32 flags) +{ + int irq; + u32 int_type; + int trigger; + + if (!intid) + return -EINVAL; + + if (_is_ppi_partition(flags)) + return -EINVAL; + + trigger = FIELD_GET(ACPI_MPAM_MSC_IRQ_MODE, flags); + int_type = FIELD_GET(ACPI_MPAM_MSC_IRQ_TYPE_MASK, flags); + if (int_type != ACPI_MPAM_MSC_IRQ_TYPE_WIRED) + return -EINVAL; + + irq = acpi_register_gsi(&pdev->dev, intid, trigger, ACPI_ACTIVE_HIGH); + if (irq < 0) + pr_err_once("Failed to register interrupt 0x%x with ACPI\n", intid); + + return irq; +} + +static void acpi_mpam_parse_irqs(struct platform_device *pdev, + struct acpi_mpam_msc_node *tbl_msc, + struct resource *res, int *res_idx) +{ + u32 flags, intid; + int irq; + + intid = tbl_msc->overflow_interrupt; + flags = tbl_msc->overflow_interrupt_flags; + irq = acpi_mpam_register_irq(pdev, intid, flags); + if (irq > 0) + res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "overflow"); + + intid = tbl_msc->error_interrupt; + flags = tbl_msc->error_interrupt_flags; + irq = acpi_mpam_register_irq(pdev, intid, flags); + if (irq > 0) + res[(*res_idx)++] = DEFINE_RES_IRQ_NAMED(irq, "error"); +} + +static int acpi_mpam_parse_resource(struct mpam_msc *msc, + struct acpi_mpam_resource_node *res) +{ + int level, nid; + u32 cache_id; + + switch (res->locator_type) { + case ACPI_MPAM_LOCATION_TYPE_PROCESSOR_CACHE: + cache_id = res->locator.cache_locator.cache_reference; + level = find_acpi_cache_level_from_id(cache_id); + if (level <= 0) { + pr_err_once("Bad level (%d) for cache with id %u\n", level, cache_id); + return -EINVAL; + } + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_CACHE, + level, cache_id); + case ACPI_MPAM_LOCATION_TYPE_MEMORY: + nid = pxm_to_node(res->locator.memory_locator.proximity_domain); + if (nid == NUMA_NO_NODE) { + pr_debug("Bad proximity domain %lld, using node 0 instead\n", + res->locator.memory_locator.proximity_domain); + nid = 0; + } + return mpam_ris_create(msc, res->ris_index, MPAM_CLASS_MEMORY, + MPAM_CLASS_ID_DEFAULT, nid); + default: + /* These get discovered later and are treated as unknown */ + return 0; + } +} + +int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc) +{ + int i, err; + char *ptr, *table_end; + struct acpi_mpam_resource_node *resource; + + table_end = (char *)tbl_msc + tbl_msc->length; + ptr = (char *)(tbl_msc + 1); + for (i = 0; i < tbl_msc->num_resource_nodes; i++) { + u64 max_deps, remaining_table; + + if (ptr + sizeof(*resource) > table_end) + return -EINVAL; + + resource = (struct acpi_mpam_resource_node *)ptr; + + remaining_table = table_end - ptr; + max_deps = remaining_table / sizeof(struct acpi_mpam_func_deps); + if (resource->num_functional_deps > max_deps) { + pr_debug("MSC has impossible number of functional dependencies\n"); + return -EINVAL; + } + + err = acpi_mpam_parse_resource(msc, resource); + if (err) + return err; + + ptr += sizeof(*resource); + ptr += resource->num_functional_deps * sizeof(struct acpi_mpam_func_deps); + } + + return 0; +} + +/* + * Creates the device power management link and returns true if the + * acpi id is valid and usable for cpu affinity. This is the case + * when the linked device is a processor or a processor container. + */ +static bool __init parse_msc_pm_link(struct acpi_mpam_msc_node *tbl_msc, + struct platform_device *pdev, + u32 *acpi_id) +{ + char hid[sizeof(tbl_msc->hardware_id_linked_device) + 1] = { 0 }; + bool acpi_id_valid = false; + struct acpi_device *buddy; + char uid[11]; + int len; + + memcpy(hid, &tbl_msc->hardware_id_linked_device, + sizeof(tbl_msc->hardware_id_linked_device)); + + if (!strcmp(hid, ACPI_PROCESSOR_CONTAINER_HID)) { + *acpi_id = tbl_msc->instance_id_linked_device; + acpi_id_valid = true; + } + + len = snprintf(uid, sizeof(uid), "%u", + tbl_msc->instance_id_linked_device); + if (len >= sizeof(uid)) { + pr_debug("Failed to convert uid of device for power management."); + return acpi_id_valid; + } + + buddy = acpi_dev_get_first_match_dev(hid, uid, -1); + if (buddy) { + device_link_add(&pdev->dev, &buddy->dev, DL_FLAG_STATELESS); + acpi_dev_put(buddy); + } + + return acpi_id_valid; +} + +static int decode_interface_type(struct acpi_mpam_msc_node *tbl_msc, + enum mpam_msc_iface *iface) +{ + switch (tbl_msc->interface_type) { + case ACPI_MPAM_MSC_IFACE_MMIO: + *iface = MPAM_IFACE_MMIO; + return 0; + case ACPI_MPAM_MSC_IFACE_PCC: + *iface = MPAM_IFACE_PCC; + return 0; + default: + return -EINVAL; + } +} + +static struct platform_device * __init acpi_mpam_parse_msc(struct acpi_mpam_msc_node *tbl_msc) +{ + struct platform_device *pdev __free(platform_device_put) = + platform_device_alloc("mpam_msc", tbl_msc->identifier); + int next_res = 0, next_prop = 0, err; + /* pcc, nrdy, affinity and a sentinel */ + struct property_entry props[4] = { 0 }; + /* mmio, 2xirq, no sentinel. */ + struct resource res[3] = { 0 }; + struct acpi_device *companion; + enum mpam_msc_iface iface; + char uid[16]; + u32 acpi_id; + + if (!pdev) + return ERR_PTR(-ENOMEM); + + /* Some power management is described in the namespace: */ + err = snprintf(uid, sizeof(uid), "%u", tbl_msc->identifier); + if (err > 0 && err < sizeof(uid)) { + companion = acpi_dev_get_first_match_dev("ARMHAA5C", uid, -1); + if (companion) { + ACPI_COMPANION_SET(&pdev->dev, companion); + acpi_dev_put(companion); + } else { + pr_debug("MSC.%u: missing namespace entry\n", tbl_msc->identifier); + } + } + + if (decode_interface_type(tbl_msc, &iface)) { + pr_debug("MSC.%u: unknown interface type\n", tbl_msc->identifier); + return ERR_PTR(-EINVAL); + } + + if (iface == MPAM_IFACE_MMIO) { + res[next_res++] = DEFINE_RES_MEM_NAMED(tbl_msc->base_address, + tbl_msc->mmio_size, + "MPAM:MSC"); + } else if (iface == MPAM_IFACE_PCC) { + props[next_prop++] = PROPERTY_ENTRY_U32("pcc-channel", + tbl_msc->base_address); + } + + acpi_mpam_parse_irqs(pdev, tbl_msc, res, &next_res); + + WARN_ON_ONCE(next_res > ARRAY_SIZE(res)); + err = platform_device_add_resources(pdev, res, next_res); + if (err) + return ERR_PTR(err); + + props[next_prop++] = PROPERTY_ENTRY_U32("arm,not-ready-us", + tbl_msc->max_nrdy_usec); + + /* + * The MSC's CPU affinity is described via its linked power + * management device, but only if it points at a Processor or + * Processor Container. + */ + if (parse_msc_pm_link(tbl_msc, pdev, &acpi_id)) + props[next_prop++] = PROPERTY_ENTRY_U32("cpu_affinity", acpi_id); + + WARN_ON_ONCE(next_prop > ARRAY_SIZE(props) - 1); + err = device_create_managed_software_node(&pdev->dev, props, NULL); + if (err) + return ERR_PTR(err); + + /* + * Stash the table entry for acpi_mpam_parse_resources() to discover + * what this MSC controls. + */ + err = platform_device_add_data(pdev, tbl_msc, tbl_msc->length); + if (err) + return ERR_PTR(err); + + err = platform_device_add(pdev); + if (err) + return ERR_PTR(err); + + return_ptr(pdev); +} + +static int __init acpi_mpam_parse(void) +{ + char *table_end, *table_offset; + struct acpi_mpam_msc_node *tbl_msc; + struct platform_device *pdev; + + if (acpi_disabled || !system_supports_mpam()) + return 0; + + struct acpi_table_header *table __free(acpi_put_table) = + acpi_get_table_pointer(ACPI_SIG_MPAM, 0); + + if (IS_ERR(table)) + return 0; + + if (table->revision < 1) { + pr_debug("MPAM ACPI table revision %d not supported\n", table->revision); + return 0; + } + + table_offset = (char *)(table + 1); + table_end = (char *)table + table->length; + + while (table_offset < table_end) { + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; + if (table_offset + sizeof(*tbl_msc) > table_end || + table_offset + tbl_msc->length > table_end) { + pr_err("MSC entry overlaps end of ACPI table\n"); + return -EINVAL; + } + table_offset += tbl_msc->length; + + /* + * If any of the reserved fields are set, make no attempt to + * parse the MSC structure. This MSC will still be counted by + * acpi_mpam_count_msc(), meaning the MPAM driver can't probe + * against all MSC, and will never be enabled. There is no way + * to enable it safely, because we cannot determine safe + * system-wide partid and pmg ranges in this situation. + */ + if (tbl_msc->reserved || tbl_msc->reserved1 || tbl_msc->reserved2) { + pr_err_once("Unrecognised MSC, MPAM not usable\n"); + pr_debug("MSC.%u: reserved field set\n", tbl_msc->identifier); + continue; + } + + if (!tbl_msc->mmio_size) { + pr_debug("MSC.%u: marked as disabled\n", tbl_msc->identifier); + continue; + } + + pdev = acpi_mpam_parse_msc(tbl_msc); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + } + + return 0; +} + +/** + * acpi_mpam_count_msc() - Count the number of MSC described by firmware. + * + * Returns the number of MSCs, or zero for an error. + * + * This can be called before or in parallel with acpi_mpam_parse(). + */ +int acpi_mpam_count_msc(void) +{ + char *table_end, *table_offset; + struct acpi_mpam_msc_node *tbl_msc; + int count = 0; + + if (acpi_disabled || !system_supports_mpam()) + return 0; + + struct acpi_table_header *table __free(acpi_put_table) = + acpi_get_table_pointer(ACPI_SIG_MPAM, 0); + + if (IS_ERR(table)) + return 0; + + if (table->revision < 1) + return 0; + + table_offset = (char *)(table + 1); + table_end = (char *)table + table->length; + + while (table_offset < table_end) { + tbl_msc = (struct acpi_mpam_msc_node *)table_offset; + + if (table_offset + sizeof(*tbl_msc) > table_end) + return -EINVAL; + if (tbl_msc->length < sizeof(*tbl_msc)) + return -EINVAL; + if (tbl_msc->length > table_end - table_offset) + return -EINVAL; + table_offset += tbl_msc->length; + + if (!tbl_msc->mmio_size) + continue; + + count++; + } + + return count; +} + +/* + * Call after ACPI devices have been created, which happens behind acpi_scan_init() + * called from subsys_initcall(). PCC requires the mailbox driver, which is + * initialised from postcore_initcall(). + */ +subsys_initcall_sync(acpi_mpam_parse); diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 57fc8bc56166b..4286e4af10924 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -408,7 +408,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __nonstring_array __initconst ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, ACPI_SIG_NHLT, ACPI_SIG_AEST, ACPI_SIG_CEDT, ACPI_SIG_AGDI, - ACPI_SIG_NBFT, ACPI_SIG_SWFT}; + ACPI_SIG_NBFT, ACPI_SIG_SWFT, ACPI_SIG_MPAM}; #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h new file mode 100644 index 0000000000000..4b7f335181e0a --- /dev/null +++ b/include/linux/arm_mpam.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2025 Arm Ltd. */ + +#ifndef __LINUX_ARM_MPAM_H +#define __LINUX_ARM_MPAM_H + +#include +#include + +struct mpam_msc; + +enum mpam_msc_iface { + MPAM_IFACE_MMIO, /* a real MPAM MSC */ + MPAM_IFACE_PCC, /* a fake MPAM MSC */ +}; + +enum mpam_class_types { + MPAM_CLASS_CACHE, /* Caches, e.g. L2, L3 */ + MPAM_CLASS_MEMORY, /* Main memory */ + MPAM_CLASS_UNKNOWN, /* Everything else, e.g. SMMU */ +}; + +#define MPAM_CLASS_ID_DEFAULT 255 + +#ifdef CONFIG_ACPI_MPAM +int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc); + +int acpi_mpam_count_msc(void); +#else +static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, + struct acpi_mpam_msc_node *tbl_msc) +{ + return -EINVAL; +} + +static inline int acpi_mpam_count_msc(void) { return -EINVAL; } +#endif + +static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, + int component_id) +{ + return -EINVAL; +} + +#endif /* __LINUX_ARM_MPAM_H */ From bbffd0a14483048ce1cda8fab314d26dcbe5c3ad Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:40 +0000 Subject: [PATCH 10/74] arm_mpam: Add probe/remove for mpam msc driver and kbuild boiler plate Probing MPAM is convoluted. MSCs that are integrated with a CPU may only be accessible from those CPUs, and they may not be online. Touching the hardware early is pointless as MPAM can't be used until the system-wide common values for num_partid and num_pmg have been discovered. Start with driver probe/remove and mapping the MSC. Cc: Carl Worth Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit f04046f2577a5c76167333ca99d3903ee5331ba0) Signed-off-by: Lee Trager --- arch/arm64/Kconfig | 1 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/resctrl/Kconfig | 15 +++ drivers/resctrl/Makefile | 4 + drivers/resctrl/mpam_devices.c | 190 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 49 ++++++++ 7 files changed, 262 insertions(+) create mode 100644 drivers/resctrl/Kconfig create mode 100644 drivers/resctrl/Makefile create mode 100644 drivers/resctrl/mpam_devices.c create mode 100644 drivers/resctrl/mpam_internal.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d25f964ab08b0..d17fc19e5eecc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2067,6 +2067,7 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" + select ARM64_MPAM_DRIVER if EXPERT # does nothing yet select ACPI_MPAM if ACPI help Memory System Resource Partitioning and Monitoring (MPAM) is an diff --git a/drivers/Kconfig b/drivers/Kconfig index 4915a63866b01..3054b50a2f4cb 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -251,4 +251,6 @@ source "drivers/hte/Kconfig" source "drivers/cdx/Kconfig" +source "drivers/resctrl/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 8e1ffa4358d5f..20eb17596b899 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -194,6 +194,7 @@ obj-$(CONFIG_HTE) += hte/ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ obj-$(CONFIG_DPLL) += dpll/ +obj-y += resctrl/ obj-$(CONFIG_DIBS) += dibs/ obj-$(CONFIG_S390) += s390/ diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig new file mode 100644 index 0000000000000..5f7f748e611e1 --- /dev/null +++ b/drivers/resctrl/Kconfig @@ -0,0 +1,15 @@ +menuconfig ARM64_MPAM_DRIVER + bool "MPAM driver" + depends on ARM64 && ARM64_MPAM && EXPERT + help + Memory System Resource Partitioning and Monitoring (MPAM) driver for + System IP, e.g. caches and memory controllers. + +if ARM64_MPAM_DRIVER + +config ARM64_MPAM_DRIVER_DEBUG + bool "Enable debug messages from the MPAM driver" + help + Say yes here to enable debug messages from the MPAM driver. + +endif diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile new file mode 100644 index 0000000000000..898199dcf80d5 --- /dev/null +++ b/drivers/resctrl/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o +mpam-y += mpam_devices.o + +ccflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c new file mode 100644 index 0000000000000..e097e852f9c3a --- /dev/null +++ b/drivers/resctrl/mpam_devices.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. + +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpam_internal.h" + +/* + * mpam_list_lock protects the SRCU lists when writing. Once the + * mpam_enabled key is enabled these lists are read-only, + * unless the error interrupt disables the driver. + */ +static DEFINE_MUTEX(mpam_list_lock); +static LIST_HEAD(mpam_all_msc); + +struct srcu_struct mpam_srcu; + +/* + * Number of MSCs that have been probed. Once all MSCs have been probed MPAM + * can be enabled. + */ +static atomic_t mpam_num_msc; + +/* + * An MSC can control traffic from a set of CPUs, but may only be accessible + * from a (hopefully wider) set of CPUs. The common reason for this is power + * management. If all the CPUs in a cluster are in PSCI:CPU_SUSPEND, the + * corresponding cache may also be powered off. By making accesses from + * one of those CPUs, we ensure we don't access a cache that's powered off. + */ +static void update_msc_accessibility(struct mpam_msc *msc) +{ + u32 affinity_id; + int err; + + err = device_property_read_u32(&msc->pdev->dev, "cpu_affinity", + &affinity_id); + if (err) + cpumask_copy(&msc->accessibility, cpu_possible_mask); + else + acpi_pptt_get_cpus_from_container(affinity_id, &msc->accessibility); +} + +static void mpam_msc_destroy(struct mpam_msc *msc) +{ + struct platform_device *pdev = msc->pdev; + + lockdep_assert_held(&mpam_list_lock); + + list_del_rcu(&msc->all_msc_list); + platform_set_drvdata(pdev, NULL); +} + +static void mpam_msc_drv_remove(struct platform_device *pdev) +{ + struct mpam_msc *msc = platform_get_drvdata(pdev); + + mutex_lock(&mpam_list_lock); + mpam_msc_destroy(msc); + mutex_unlock(&mpam_list_lock); + + synchronize_srcu(&mpam_srcu); +} + +static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) +{ + int err; + u32 tmp; + struct mpam_msc *msc; + struct resource *msc_res; + struct device *dev = &pdev->dev; + + lockdep_assert_held(&mpam_list_lock); + + msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); + if (!msc) + return ERR_PTR(-ENOMEM); + + err = devm_mutex_init(dev, &msc->probe_lock); + if (err) + return ERR_PTR(err); + + err = devm_mutex_init(dev, &msc->part_sel_lock); + if (err) + return ERR_PTR(err); + + msc->id = pdev->id; + msc->pdev = pdev; + INIT_LIST_HEAD_RCU(&msc->all_msc_list); + INIT_LIST_HEAD_RCU(&msc->ris); + + update_msc_accessibility(msc); + if (cpumask_empty(&msc->accessibility)) { + dev_err_once(dev, "MSC is not accessible from any CPU!"); + return ERR_PTR(-EINVAL); + } + + if (device_property_read_u32(&pdev->dev, "pcc-channel", &tmp)) + msc->iface = MPAM_IFACE_MMIO; + else + msc->iface = MPAM_IFACE_PCC; + + if (msc->iface == MPAM_IFACE_MMIO) { + void __iomem *io; + + io = devm_platform_get_and_ioremap_resource(pdev, 0, + &msc_res); + if (IS_ERR(io)) { + dev_err_once(dev, "Failed to map MSC base address\n"); + return ERR_CAST(io); + } + msc->mapped_hwpage_sz = msc_res->end - msc_res->start; + msc->mapped_hwpage = io; + } else { + return ERR_PTR(-EINVAL); + } + + list_add_rcu(&msc->all_msc_list, &mpam_all_msc); + platform_set_drvdata(pdev, msc); + + return msc; +} + +static int fw_num_msc; + +static int mpam_msc_drv_probe(struct platform_device *pdev) +{ + int err; + struct mpam_msc *msc = NULL; + void *plat_data = pdev->dev.platform_data; + + mutex_lock(&mpam_list_lock); + msc = do_mpam_msc_drv_probe(pdev); + mutex_unlock(&mpam_list_lock); + + if (IS_ERR(msc)) + return PTR_ERR(msc); + + /* Create RIS entries described by firmware */ + err = acpi_mpam_parse_resources(msc, plat_data); + if (err) { + mpam_msc_drv_remove(pdev); + return err; + } + + if (atomic_add_return(1, &mpam_num_msc) == fw_num_msc) + pr_info("Discovered all MSCs\n"); + + return 0; +} + +static struct platform_driver mpam_msc_driver = { + .driver = { + .name = "mpam_msc", + }, + .probe = mpam_msc_drv_probe, + .remove = mpam_msc_drv_remove, +}; + +static int __init mpam_msc_driver_init(void) +{ + if (!system_supports_mpam()) + return -EOPNOTSUPP; + + init_srcu_struct(&mpam_srcu); + + fw_num_msc = acpi_mpam_count_msc(); + if (fw_num_msc <= 0) { + pr_err("No MSC devices found in firmware\n"); + return -EINVAL; + } + + return platform_driver_register(&mpam_msc_driver); +} +subsys_initcall(mpam_msc_driver_init); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h new file mode 100644 index 0000000000000..540066903eca7 --- /dev/null +++ b/drivers/resctrl/mpam_internal.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2025 Arm Ltd. + +#ifndef MPAM_INTERNAL_H +#define MPAM_INTERNAL_H + +#include +#include +#include +#include +#include + +struct platform_device; + +struct mpam_msc { + /* member of mpam_all_msc */ + struct list_head all_msc_list; + + int id; + struct platform_device *pdev; + + /* Not modified after mpam_is_enabled() becomes true */ + enum mpam_msc_iface iface; + u32 nrdy_usec; + cpumask_t accessibility; + + /* + * probe_lock is only taken during discovery. After discovery these + * properties become read-only and the lists are protected by SRCU. + */ + struct mutex probe_lock; + unsigned long ris_idxs; + u32 ris_max; + + /* mpam_msc_ris of this component */ + struct list_head ris; + + /* + * part_sel_lock protects access to the MSC hardware registers that are + * affected by MPAMCFG_PART_SEL. (including the ID registers that vary + * by RIS). + * If needed, take msc->probe_lock first. + */ + struct mutex part_sel_lock; + + void __iomem *mapped_hwpage; + size_t mapped_hwpage_sz; +}; +#endif /* MPAM_INTERNAL_H */ From 253d035a97bed2aaafe3c4dac65ca87b5296c0eb Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:41 +0000 Subject: [PATCH 11/74] arm_mpam: Add the class and component structures for firmware described ris An MSC is a container of resources, each identified by their RIS index. Some RIS are described by firmware to provide their position in the system. Others are discovered when the driver probes the hardware. To configure a resource it needs to be found by its class, e.g. 'L2'. There are two kinds of grouping, a class is a set of components, which are visible to user-space as there are likely to be multiple instances of the L2 cache. (e.g. one per cluster or package) Add support for creating and destroying structures to allow a hierarchy of resources to be created. Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 01fb4b8224726aa0f2170b63e4685cf0eec85d8d) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 392 +++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 94 ++++++++ include/linux/arm_mpam.h | 5 + 3 files changed, 490 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index e097e852f9c3a..f1dcf9bb14f25 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -36,6 +36,383 @@ struct srcu_struct mpam_srcu; */ static atomic_t mpam_num_msc; +/* + * An MSC is a physical container for controls and monitors, each identified by + * their RIS index. These share a base-address, interrupts and some MMIO + * registers. A vMSC is a virtual container for RIS in an MSC that control or + * monitor the same thing. Members of a vMSC are all RIS in the same MSC, but + * not all RIS in an MSC share a vMSC. + * + * Components are a group of vMSC that control or monitor the same thing but + * are from different MSC, so have different base-address, interrupts etc. + * Classes are the set components of the same type. + * + * The features of a vMSC is the union of the RIS it contains. + * The features of a Class and Component are the common subset of the vMSC + * they contain. + * + * e.g. The system cache may have bandwidth controls on multiple interfaces, + * for regulating traffic from devices independently of traffic from CPUs. + * If these are two RIS in one MSC, they will be treated as controlling + * different things, and will not share a vMSC/component/class. + * + * e.g. The L2 may have one MSC and two RIS, one for cache-controls another + * for bandwidth. These two RIS are members of the same vMSC. + * + * e.g. The set of RIS that make up the L2 are grouped as a component. These + * are sometimes termed slices. They should be configured the same, as if there + * were only one. + * + * e.g. The SoC probably has more than one L2, each attached to a distinct set + * of CPUs. All the L2 components are grouped as a class. + * + * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list, + * then linked via struct mpam_ris to a vmsc, component and class. + * The same MSC may exist under different class->component->vmsc paths, but the + * RIS index will be unique. + */ +LIST_HEAD(mpam_classes); + +/* List of all objects that can be free()d after synchronise_srcu() */ +static LLIST_HEAD(mpam_garbage); + +static inline void init_garbage(struct mpam_garbage *garbage) +{ + init_llist_node(&garbage->llist); +} + +#define add_to_garbage(x) \ +do { \ + __typeof__(x) _x = (x); \ + _x->garbage.to_free = _x; \ + llist_add(&_x->garbage.llist, &mpam_garbage); \ +} while (0) + +static void mpam_free_garbage(void) +{ + struct mpam_garbage *iter, *tmp; + struct llist_node *to_free = llist_del_all(&mpam_garbage); + + if (!to_free) + return; + + synchronize_srcu(&mpam_srcu); + + llist_for_each_entry_safe(iter, tmp, to_free, llist) { + if (iter->pdev) + devm_kfree(&iter->pdev->dev, iter->to_free); + else + kfree(iter->to_free); + } +} + +static struct mpam_class * +mpam_class_alloc(u8 level_idx, enum mpam_class_types type) +{ + struct mpam_class *class; + + lockdep_assert_held(&mpam_list_lock); + + class = kzalloc(sizeof(*class), GFP_KERNEL); + if (!class) + return ERR_PTR(-ENOMEM); + init_garbage(&class->garbage); + + INIT_LIST_HEAD_RCU(&class->components); + /* Affinity is updated when ris are added */ + class->level = level_idx; + class->type = type; + INIT_LIST_HEAD_RCU(&class->classes_list); + + list_add_rcu(&class->classes_list, &mpam_classes); + + return class; +} + +static void mpam_class_destroy(struct mpam_class *class) +{ + lockdep_assert_held(&mpam_list_lock); + + list_del_rcu(&class->classes_list); + add_to_garbage(class); +} + +static struct mpam_class * +mpam_class_find(u8 level_idx, enum mpam_class_types type) +{ + struct mpam_class *class; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, &mpam_classes, classes_list) { + if (class->type == type && class->level == level_idx) + return class; + } + + return mpam_class_alloc(level_idx, type); +} + +static struct mpam_component * +mpam_component_alloc(struct mpam_class *class, int id) +{ + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + comp = kzalloc(sizeof(*comp), GFP_KERNEL); + if (!comp) + return ERR_PTR(-ENOMEM); + init_garbage(&comp->garbage); + + comp->comp_id = id; + INIT_LIST_HEAD_RCU(&comp->vmsc); + /* Affinity is updated when RIS are added */ + INIT_LIST_HEAD_RCU(&comp->class_list); + comp->class = class; + + list_add_rcu(&comp->class_list, &class->components); + + return comp; +} + +static void mpam_component_destroy(struct mpam_component *comp) +{ + struct mpam_class *class = comp->class; + + lockdep_assert_held(&mpam_list_lock); + + list_del_rcu(&comp->class_list); + add_to_garbage(comp); + + if (list_empty(&class->components)) + mpam_class_destroy(class); +} + +static struct mpam_component * +mpam_component_find(struct mpam_class *class, int id) +{ + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(comp, &class->components, class_list) { + if (comp->comp_id == id) + return comp; + } + + return mpam_component_alloc(class, id); +} + +static struct mpam_vmsc * +mpam_vmsc_alloc(struct mpam_component *comp, struct mpam_msc *msc) +{ + struct mpam_vmsc *vmsc; + + lockdep_assert_held(&mpam_list_lock); + + vmsc = kzalloc(sizeof(*vmsc), GFP_KERNEL); + if (!vmsc) + return ERR_PTR(-ENOMEM); + init_garbage(&vmsc->garbage); + + INIT_LIST_HEAD_RCU(&vmsc->ris); + INIT_LIST_HEAD_RCU(&vmsc->comp_list); + vmsc->comp = comp; + vmsc->msc = msc; + + list_add_rcu(&vmsc->comp_list, &comp->vmsc); + + return vmsc; +} + +static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc) +{ + struct mpam_component *comp = vmsc->comp; + + lockdep_assert_held(&mpam_list_lock); + + list_del_rcu(&vmsc->comp_list); + add_to_garbage(vmsc); + + if (list_empty(&comp->vmsc)) + mpam_component_destroy(comp); +} + +static struct mpam_vmsc * +mpam_vmsc_find(struct mpam_component *comp, struct mpam_msc *msc) +{ + struct mpam_vmsc *vmsc; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + if (vmsc->msc->id == msc->id) + return vmsc; + } + + return mpam_vmsc_alloc(comp, msc); +} + +/* + * The cacheinfo structures are only populated when CPUs are online. + * This helper walks the acpi tables to include offline CPUs too. + */ +int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, + cpumask_t *affinity) +{ + return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity); +} + +/* + * cpumask_of_node() only knows about online CPUs. This can't tell us whether + * a class is represented on all possible CPUs. + */ +static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (node_id == cpu_to_node(cpu)) + cpumask_set_cpu(cpu, affinity); + } +} + +static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity, + enum mpam_class_types type, + struct mpam_class *class, + struct mpam_component *comp) +{ + int err; + + switch (type) { + case MPAM_CLASS_CACHE: + err = mpam_get_cpumask_from_cache_id(comp->comp_id, class->level, + affinity); + if (err) { + dev_warn_once(&msc->pdev->dev, + "Failed to determine CPU affinity\n"); + return err; + } + + if (cpumask_empty(affinity)) + dev_warn_once(&msc->pdev->dev, "no CPUs associated with cache node\n"); + + break; + case MPAM_CLASS_MEMORY: + get_cpumask_from_node_id(comp->comp_id, affinity); + /* affinity may be empty for CPU-less memory nodes */ + break; + case MPAM_CLASS_UNKNOWN: + return 0; + } + + cpumask_and(affinity, affinity, &msc->accessibility); + + return 0; +} + +static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, + int component_id) +{ + int err; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + struct mpam_class *class; + struct mpam_component *comp; + struct platform_device *pdev = msc->pdev; + + lockdep_assert_held(&mpam_list_lock); + + if (ris_idx > MPAM_MSC_MAX_NUM_RIS) + return -EINVAL; + + if (test_and_set_bit(ris_idx, &msc->ris_idxs)) + return -EBUSY; + + ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), GFP_KERNEL); + if (!ris) + return -ENOMEM; + init_garbage(&ris->garbage); + ris->garbage.pdev = pdev; + + class = mpam_class_find(class_id, type); + if (IS_ERR(class)) + return PTR_ERR(class); + + comp = mpam_component_find(class, component_id); + if (IS_ERR(comp)) { + if (list_empty(&class->components)) + mpam_class_destroy(class); + return PTR_ERR(comp); + } + + vmsc = mpam_vmsc_find(comp, msc); + if (IS_ERR(vmsc)) { + if (list_empty(&comp->vmsc)) + mpam_component_destroy(comp); + return PTR_ERR(vmsc); + } + + err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp); + if (err) { + if (list_empty(&vmsc->ris)) + mpam_vmsc_destroy(vmsc); + return err; + } + + ris->ris_idx = ris_idx; + INIT_LIST_HEAD_RCU(&ris->msc_list); + INIT_LIST_HEAD_RCU(&ris->vmsc_list); + ris->vmsc = vmsc; + + cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity); + cpumask_or(&class->affinity, &class->affinity, &ris->affinity); + list_add_rcu(&ris->vmsc_list, &vmsc->ris); + list_add_rcu(&ris->msc_list, &msc->ris); + + return 0; +} + +static void mpam_ris_destroy(struct mpam_msc_ris *ris) +{ + struct mpam_vmsc *vmsc = ris->vmsc; + struct mpam_msc *msc = vmsc->msc; + struct mpam_component *comp = vmsc->comp; + struct mpam_class *class = comp->class; + + lockdep_assert_held(&mpam_list_lock); + + /* + * It is assumed affinities don't overlap. If they do the class becomes + * unusable immediately. + */ + cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity); + cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity); + clear_bit(ris->ris_idx, &msc->ris_idxs); + list_del_rcu(&ris->msc_list); + list_del_rcu(&ris->vmsc_list); + add_to_garbage(ris); + + if (list_empty(&vmsc->ris)) + mpam_vmsc_destroy(vmsc); +} + +int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, int component_id) +{ + int err; + + mutex_lock(&mpam_list_lock); + err = mpam_ris_create_locked(msc, ris_idx, type, class_id, + component_id); + mutex_unlock(&mpam_list_lock); + if (err) + mpam_free_garbage(); + + return err; +} + /* * An MSC can control traffic from a set of CPUs, but may only be accessible * from a (hopefully wider) set of CPUs. The common reason for this is power @@ -56,14 +433,25 @@ static void update_msc_accessibility(struct mpam_msc *msc) acpi_pptt_get_cpus_from_container(affinity_id, &msc->accessibility); } +/* + * There are two ways of reaching a struct mpam_msc_ris. Via the + * class->component->vmsc->ris, or via the msc. + * When destroying the msc, the other side needs unlinking and cleaning up too. + */ static void mpam_msc_destroy(struct mpam_msc *msc) { struct platform_device *pdev = msc->pdev; + struct mpam_msc_ris *ris, *tmp; lockdep_assert_held(&mpam_list_lock); + list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list) + mpam_ris_destroy(ris); + list_del_rcu(&msc->all_msc_list); platform_set_drvdata(pdev, NULL); + + add_to_garbage(msc); } static void mpam_msc_drv_remove(struct platform_device *pdev) @@ -74,7 +462,7 @@ static void mpam_msc_drv_remove(struct platform_device *pdev) mpam_msc_destroy(msc); mutex_unlock(&mpam_list_lock); - synchronize_srcu(&mpam_srcu); + mpam_free_garbage(); } static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) @@ -90,6 +478,8 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) msc = devm_kzalloc(&pdev->dev, sizeof(*msc), GFP_KERNEL); if (!msc) return ERR_PTR(-ENOMEM); + init_garbage(&msc->garbage); + msc->garbage.pdev = pdev; err = devm_mutex_init(dev, &msc->probe_lock); if (err) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 540066903eca7..8f7a28d2c021a 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -7,11 +7,30 @@ #include #include #include +#include #include +#include #include +#define MPAM_MSC_MAX_NUM_RIS 16 + struct platform_device; +/* + * Structures protected by SRCU may not be freed for a surprising amount of + * time (especially if perf is running). To ensure the MPAM error interrupt can + * tear down all the structures, build a list of objects that can be garbage + * collected once synchronize_srcu() has returned. + * If pdev is non-NULL, use devm_kfree(). + */ +struct mpam_garbage { + /* member of mpam_garbage */ + struct llist_node llist; + + void *to_free; + struct platform_device *pdev; +}; + struct mpam_msc { /* member of mpam_all_msc */ struct list_head all_msc_list; @@ -45,5 +64,80 @@ struct mpam_msc { void __iomem *mapped_hwpage; size_t mapped_hwpage_sz; + + struct mpam_garbage garbage; +}; + +struct mpam_class { + /* mpam_components in this class */ + struct list_head components; + + cpumask_t affinity; + + u8 level; + enum mpam_class_types type; + + /* member of mpam_classes */ + struct list_head classes_list; + + struct mpam_garbage garbage; +}; + +struct mpam_component { + u32 comp_id; + + /* mpam_vmsc in this component */ + struct list_head vmsc; + + cpumask_t affinity; + + /* member of mpam_class:components */ + struct list_head class_list; + + /* parent: */ + struct mpam_class *class; + + struct mpam_garbage garbage; +}; + +struct mpam_vmsc { + /* member of mpam_component:vmsc_list */ + struct list_head comp_list; + + /* mpam_msc_ris in this vmsc */ + struct list_head ris; + + /* All RIS in this vMSC are members of this MSC */ + struct mpam_msc *msc; + + /* parent: */ + struct mpam_component *comp; + + struct mpam_garbage garbage; +}; + +struct mpam_msc_ris { + u8 ris_idx; + + cpumask_t affinity; + + /* member of mpam_vmsc:ris */ + struct list_head vmsc_list; + + /* member of mpam_msc:ris */ + struct list_head msc_list; + + /* parent: */ + struct mpam_vmsc *vmsc; + + struct mpam_garbage garbage; }; + +/* List of all classes - protected by srcu*/ +extern struct srcu_struct mpam_srcu; +extern struct list_head mpam_classes; + +int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, + cpumask_t *affinity); + #endif /* MPAM_INTERNAL_H */ diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 4b7f335181e0a..13a8ac5c2cbda 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -37,11 +37,16 @@ static inline int acpi_mpam_parse_resources(struct mpam_msc *msc, static inline int acpi_mpam_count_msc(void) { return -EINVAL; } #endif +#ifdef CONFIG_ARM64_MPAM_DRIVER +int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, + enum mpam_class_types type, u8 class_id, int component_id); +#else static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, enum mpam_class_types type, u8 class_id, int component_id) { return -EINVAL; } +#endif #endif /* __LINUX_ARM_MPAM_H */ From 9a3b3b61de49c8990263caf1fae2959f30ad9c14 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:42 +0000 Subject: [PATCH 12/74] arm_mpam: Add MPAM MSC register layout definitions Memory Partitioning and Monitoring (MPAM) has memory mapped devices (MSCs) with an identity/configuration page. Add the definitions for these registers as offset within the page(s). Link: https://developer.arm.com/documentation/ihi0099/aa/ Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit aa64b9e110515610b6498df0f8fce9b1c6c44f72) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_internal.h | 267 ++++++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 8f7a28d2c021a..51f791cc207bb 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -140,4 +140,271 @@ extern struct list_head mpam_classes; int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); +/* + * MPAM MSCs have the following register layout. See: + * Arm Memory System Resource Partitioning and Monitoring (MPAM) System + * Component Specification. + * https://developer.arm.com/documentation/ihi0099/aa/ + */ +#define MPAM_ARCHITECTURE_V1 0x10 + +/* Memory mapped control pages */ +/* ID Register offsets in the memory mapped page */ +#define MPAMF_IDR 0x0000 /* features id register */ +#define MPAMF_IIDR 0x0018 /* implementer id register */ +#define MPAMF_AIDR 0x0020 /* architectural id register */ +#define MPAMF_IMPL_IDR 0x0028 /* imp-def partitioning */ +#define MPAMF_CPOR_IDR 0x0030 /* cache-portion partitioning */ +#define MPAMF_CCAP_IDR 0x0038 /* cache-capacity partitioning */ +#define MPAMF_MBW_IDR 0x0040 /* mem-bw partitioning */ +#define MPAMF_PRI_IDR 0x0048 /* priority partitioning */ +#define MPAMF_MSMON_IDR 0x0080 /* performance monitoring features */ +#define MPAMF_CSUMON_IDR 0x0088 /* cache-usage monitor */ +#define MPAMF_MBWUMON_IDR 0x0090 /* mem-bw usage monitor */ +#define MPAMF_PARTID_NRW_IDR 0x0050 /* partid-narrowing */ + +/* Configuration and Status Register offsets in the memory mapped page */ +#define MPAMCFG_PART_SEL 0x0100 /* partid to configure */ +#define MPAMCFG_CPBM 0x1000 /* cache-portion config */ +#define MPAMCFG_CMAX 0x0108 /* cache-capacity config */ +#define MPAMCFG_CMIN 0x0110 /* cache-capacity config */ +#define MPAMCFG_CASSOC 0x0118 /* cache-associativity config */ +#define MPAMCFG_MBW_MIN 0x0200 /* min mem-bw config */ +#define MPAMCFG_MBW_MAX 0x0208 /* max mem-bw config */ +#define MPAMCFG_MBW_WINWD 0x0220 /* mem-bw accounting window config */ +#define MPAMCFG_MBW_PBM 0x2000 /* mem-bw portion bitmap config */ +#define MPAMCFG_PRI 0x0400 /* priority partitioning config */ +#define MPAMCFG_MBW_PROP 0x0500 /* mem-bw stride config */ +#define MPAMCFG_INTPARTID 0x0600 /* partid-narrowing config */ + +#define MSMON_CFG_MON_SEL 0x0800 /* monitor selector */ +#define MSMON_CFG_CSU_FLT 0x0810 /* cache-usage monitor filter */ +#define MSMON_CFG_CSU_CTL 0x0818 /* cache-usage monitor config */ +#define MSMON_CFG_MBWU_FLT 0x0820 /* mem-bw monitor filter */ +#define MSMON_CFG_MBWU_CTL 0x0828 /* mem-bw monitor config */ +#define MSMON_CSU 0x0840 /* current cache-usage */ +#define MSMON_CSU_CAPTURE 0x0848 /* last cache-usage value captured */ +#define MSMON_MBWU 0x0860 /* current mem-bw usage value */ +#define MSMON_MBWU_CAPTURE 0x0868 /* last mem-bw value captured */ +#define MSMON_MBWU_L 0x0880 /* current long mem-bw usage value */ +#define MSMON_MBWU_L_CAPTURE 0x0890 /* last long mem-bw value captured */ +#define MSMON_CAPT_EVNT 0x0808 /* signal a capture event */ +#define MPAMF_ESR 0x00F8 /* error status register */ +#define MPAMF_ECR 0x00F0 /* error control register */ + +/* MPAMF_IDR - MPAM features ID register */ +#define MPAMF_IDR_PARTID_MAX GENMASK(15, 0) +#define MPAMF_IDR_PMG_MAX GENMASK(23, 16) +#define MPAMF_IDR_HAS_CCAP_PART BIT(24) +#define MPAMF_IDR_HAS_CPOR_PART BIT(25) +#define MPAMF_IDR_HAS_MBW_PART BIT(26) +#define MPAMF_IDR_HAS_PRI_PART BIT(27) +#define MPAMF_IDR_EXT BIT(28) +#define MPAMF_IDR_HAS_IMPL_IDR BIT(29) +#define MPAMF_IDR_HAS_MSMON BIT(30) +#define MPAMF_IDR_HAS_PARTID_NRW BIT(31) +#define MPAMF_IDR_HAS_RIS BIT(32) +#define MPAMF_IDR_HAS_EXTD_ESR BIT(38) +#define MPAMF_IDR_HAS_ESR BIT(39) +#define MPAMF_IDR_RIS_MAX GENMASK(59, 56) + +/* MPAMF_MSMON_IDR - MPAM performance monitoring ID register */ +#define MPAMF_MSMON_IDR_MSMON_CSU BIT(16) +#define MPAMF_MSMON_IDR_MSMON_MBWU BIT(17) +#define MPAMF_MSMON_IDR_HAS_LOCAL_CAPT_EVNT BIT(31) + +/* MPAMF_CPOR_IDR - MPAM features cache portion partitioning ID register */ +#define MPAMF_CPOR_IDR_CPBM_WD GENMASK(15, 0) + +/* MPAMF_CCAP_IDR - MPAM features cache capacity partitioning ID register */ +#define MPAMF_CCAP_IDR_CMAX_WD GENMASK(5, 0) +#define MPAMF_CCAP_IDR_CASSOC_WD GENMASK(12, 8) +#define MPAMF_CCAP_IDR_HAS_CASSOC BIT(28) +#define MPAMF_CCAP_IDR_HAS_CMIN BIT(29) +#define MPAMF_CCAP_IDR_NO_CMAX BIT(30) +#define MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM BIT(31) + +/* MPAMF_MBW_IDR - MPAM features memory bandwidth partitioning ID register */ +#define MPAMF_MBW_IDR_BWA_WD GENMASK(5, 0) +#define MPAMF_MBW_IDR_HAS_MIN BIT(10) +#define MPAMF_MBW_IDR_HAS_MAX BIT(11) +#define MPAMF_MBW_IDR_HAS_PBM BIT(12) +#define MPAMF_MBW_IDR_HAS_PROP BIT(13) +#define MPAMF_MBW_IDR_WINDWR BIT(14) +#define MPAMF_MBW_IDR_BWPBM_WD GENMASK(28, 16) + +/* MPAMF_PRI_IDR - MPAM features priority partitioning ID register */ +#define MPAMF_PRI_IDR_HAS_INTPRI BIT(0) +#define MPAMF_PRI_IDR_INTPRI_0_IS_LOW BIT(1) +#define MPAMF_PRI_IDR_INTPRI_WD GENMASK(9, 4) +#define MPAMF_PRI_IDR_HAS_DSPRI BIT(16) +#define MPAMF_PRI_IDR_DSPRI_0_IS_LOW BIT(17) +#define MPAMF_PRI_IDR_DSPRI_WD GENMASK(25, 20) + +/* MPAMF_CSUMON_IDR - MPAM cache storage usage monitor ID register */ +#define MPAMF_CSUMON_IDR_NUM_MON GENMASK(15, 0) +#define MPAMF_CSUMON_IDR_HAS_OFLOW_CAPT BIT(24) +#define MPAMF_CSUMON_IDR_HAS_CEVNT_OFLW BIT(25) +#define MPAMF_CSUMON_IDR_HAS_OFSR BIT(26) +#define MPAMF_CSUMON_IDR_HAS_OFLOW_LNKG BIT(27) +#define MPAMF_CSUMON_IDR_HAS_XCL BIT(29) +#define MPAMF_CSUMON_IDR_CSU_RO BIT(30) +#define MPAMF_CSUMON_IDR_HAS_CAPTURE BIT(31) + +/* MPAMF_MBWUMON_IDR - MPAM memory bandwidth usage monitor ID register */ +#define MPAMF_MBWUMON_IDR_NUM_MON GENMASK(15, 0) +#define MPAMF_MBWUMON_IDR_HAS_RWBW BIT(28) +#define MPAMF_MBWUMON_IDR_LWD BIT(29) +#define MPAMF_MBWUMON_IDR_HAS_LONG BIT(30) +#define MPAMF_MBWUMON_IDR_HAS_CAPTURE BIT(31) + +/* MPAMF_PARTID_NRW_IDR - MPAM PARTID narrowing ID register */ +#define MPAMF_PARTID_NRW_IDR_INTPARTID_MAX GENMASK(15, 0) + +/* MPAMF_IIDR - MPAM implementation ID register */ +#define MPAMF_IIDR_IMPLEMENTER GENMASK(11, 0) +#define MPAMF_IIDR_REVISION GENMASK(15, 12) +#define MPAMF_IIDR_VARIANT GENMASK(19, 16) +#define MPAMF_IIDR_PRODUCTID GENMASK(31, 20) + +/* MPAMF_AIDR - MPAM architecture ID register */ +#define MPAMF_AIDR_ARCH_MINOR_REV GENMASK(3, 0) +#define MPAMF_AIDR_ARCH_MAJOR_REV GENMASK(7, 4) + +/* MPAMCFG_PART_SEL - MPAM partition configuration selection register */ +#define MPAMCFG_PART_SEL_PARTID_SEL GENMASK(15, 0) +#define MPAMCFG_PART_SEL_INTERNAL BIT(16) +#define MPAMCFG_PART_SEL_RIS GENMASK(27, 24) + +/* MPAMCFG_CASSOC - MPAM cache maximum associativity partition configuration register */ +#define MPAMCFG_CASSOC_CASSOC GENMASK(15, 0) + +/* MPAMCFG_CMAX - MPAM cache capacity configuration register */ +#define MPAMCFG_CMAX_SOFTLIM BIT(31) +#define MPAMCFG_CMAX_CMAX GENMASK(15, 0) + +/* MPAMCFG_CMIN - MPAM cache capacity configuration register */ +#define MPAMCFG_CMIN_CMIN GENMASK(15, 0) + +/* + * MPAMCFG_MBW_MIN - MPAM memory minimum bandwidth partitioning configuration + * register + */ +#define MPAMCFG_MBW_MIN_MIN GENMASK(15, 0) + +/* + * MPAMCFG_MBW_MAX - MPAM memory maximum bandwidth partitioning configuration + * register + */ +#define MPAMCFG_MBW_MAX_MAX GENMASK(15, 0) +#define MPAMCFG_MBW_MAX_HARDLIM BIT(31) + +/* + * MPAMCFG_MBW_WINWD - MPAM memory bandwidth partitioning window width + * register + */ +#define MPAMCFG_MBW_WINWD_US_FRAC GENMASK(7, 0) +#define MPAMCFG_MBW_WINWD_US_INT GENMASK(23, 8) + +/* MPAMCFG_PRI - MPAM priority partitioning configuration register */ +#define MPAMCFG_PRI_INTPRI GENMASK(15, 0) +#define MPAMCFG_PRI_DSPRI GENMASK(31, 16) + +/* + * MPAMCFG_MBW_PROP - Memory bandwidth proportional stride partitioning + * configuration register + */ +#define MPAMCFG_MBW_PROP_STRIDEM1 GENMASK(15, 0) +#define MPAMCFG_MBW_PROP_EN BIT(31) + +/* + * MPAMCFG_INTPARTID - MPAM internal partition narrowing configuration register + */ +#define MPAMCFG_INTPARTID_INTPARTID GENMASK(15, 0) +#define MPAMCFG_INTPARTID_INTERNAL BIT(16) + +/* MSMON_CFG_MON_SEL - Memory system performance monitor selection register */ +#define MSMON_CFG_MON_SEL_MON_SEL GENMASK(15, 0) +#define MSMON_CFG_MON_SEL_RIS GENMASK(27, 24) + +/* MPAMF_ESR - MPAM Error Status Register */ +#define MPAMF_ESR_PARTID_MON GENMASK(15, 0) +#define MPAMF_ESR_PMG GENMASK(23, 16) +#define MPAMF_ESR_ERRCODE GENMASK(27, 24) +#define MPAMF_ESR_OVRWR BIT(31) +#define MPAMF_ESR_RIS GENMASK(35, 32) + +/* MPAMF_ECR - MPAM Error Control Register */ +#define MPAMF_ECR_INTEN BIT(0) + +/* Error conditions in accessing memory mapped registers */ +#define MPAM_ERRCODE_NONE 0 +#define MPAM_ERRCODE_PARTID_SEL_RANGE 1 +#define MPAM_ERRCODE_REQ_PARTID_RANGE 2 +#define MPAM_ERRCODE_MSMONCFG_ID_RANGE 3 +#define MPAM_ERRCODE_REQ_PMG_RANGE 4 +#define MPAM_ERRCODE_MONITOR_RANGE 5 +#define MPAM_ERRCODE_INTPARTID_RANGE 6 +#define MPAM_ERRCODE_UNEXPECTED_INTERNAL 7 +#define MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL 8 +#define MPAM_ERRCODE_RIS_NO_CONTROL 9 +#define MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL 10 +#define MPAM_ERRCODE_RIS_NO_MONITOR 11 + +/* + * MSMON_CFG_CSU_CTL - Memory system performance monitor configure cache storage + * usage monitor control register + * MSMON_CFG_MBWU_CTL - Memory system performance monitor configure memory + * bandwidth usage monitor control register + */ +#define MSMON_CFG_x_CTL_TYPE GENMASK(7, 0) +#define MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L BIT(15) +#define MSMON_CFG_x_CTL_MATCH_PARTID BIT(16) +#define MSMON_CFG_x_CTL_MATCH_PMG BIT(17) +#define MSMON_CFG_MBWU_CTL_SCLEN BIT(19) +#define MSMON_CFG_x_CTL_SUBTYPE GENMASK(22, 20) +#define MSMON_CFG_x_CTL_OFLOW_FRZ BIT(24) +#define MSMON_CFG_x_CTL_OFLOW_INTR BIT(25) +#define MSMON_CFG_x_CTL_OFLOW_STATUS BIT(26) +#define MSMON_CFG_x_CTL_CAPT_RESET BIT(27) +#define MSMON_CFG_x_CTL_CAPT_EVNT GENMASK(30, 28) +#define MSMON_CFG_x_CTL_EN BIT(31) + +#define MSMON_CFG_MBWU_CTL_TYPE_MBWU 0x42 +#define MSMON_CFG_CSU_CTL_TYPE_CSU 0x43 + +/* + * MSMON_CFG_CSU_FLT - Memory system performance monitor configure cache storage + * usage monitor filter register + * MSMON_CFG_MBWU_FLT - Memory system performance monitor configure memory + * bandwidth usage monitor filter register + */ +#define MSMON_CFG_x_FLT_PARTID GENMASK(15, 0) +#define MSMON_CFG_x_FLT_PMG GENMASK(23, 16) + +#define MSMON_CFG_MBWU_FLT_RWBW GENMASK(31, 30) +#define MSMON_CFG_CSU_FLT_XCL BIT(31) + +/* + * MSMON_CSU - Memory system performance monitor cache storage usage monitor + * register + * MSMON_CSU_CAPTURE - Memory system performance monitor cache storage usage + * capture register + * MSMON_MBWU - Memory system performance monitor memory bandwidth usage + * monitor register + * MSMON_MBWU_CAPTURE - Memory system performance monitor memory bandwidth usage + * capture register + */ +#define MSMON___VALUE GENMASK(30, 0) +#define MSMON___NRDY BIT(31) +#define MSMON___L_NRDY BIT(63) +#define MSMON___L_VALUE GENMASK(43, 0) +#define MSMON___LWD_VALUE GENMASK(62, 0) + +/* + * MSMON_CAPT_EVNT - Memory system performance monitoring capture event + * generation register + */ +#define MSMON_CAPT_EVNT_NOW BIT(0) + #endif /* MPAM_INTERNAL_H */ From 5a08ad5f3cdc90aaa9eff79248e2c77128d294b1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:43 +0000 Subject: [PATCH 13/74] arm_mpam: Add cpuhp callbacks to probe MSC hardware Because an MSC can only by accessed from the CPUs in its cpu-affinity set we need to be running on one of those CPUs to probe the MSC hardware. Do this work in the cpuhp callback. Probing the hardware will only happen before MPAM is enabled, walk all the MSCs and probe those we can reach that haven't already been probed as each CPU's online call is made. This adds the low-level MSC register read accessors. Once all MSCs reported by the firmware have been probed from a CPU in their respective cpu-affinity set, the probe-time cpuhp callbacks are replaced. The replacement callbacks will ultimately need to handle save/restore of the runtime MSC state across power transitions, but for now there is nothing to do in them: so do nothing. The architecture's context switch code will be enabled by a static-key, this can be set by mpam_enable(), but must be done from process context, not a cpuhp callback because both take the cpuhp lock. Whenever a new MSC has been probed, the mpam_enable() work is scheduled to test if all the MSCs have been probed. If probing fails, mpam_disable() is scheduled to unregister the cpuhp callbacks and free memory. CC: Lecopzer Chen Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 8f8d0ac1da7885c0d619636f93e0983239dc145c) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 176 +++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 5 + 2 files changed, 180 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index f1dcf9bb14f25..51284f55ae9b6 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -4,8 +4,10 @@ #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ #include +#include #include #include +#include #include #include #include @@ -17,6 +19,7 @@ #include #include #include +#include #include "mpam_internal.h" @@ -36,6 +39,25 @@ struct srcu_struct mpam_srcu; */ static atomic_t mpam_num_msc; +static int mpam_cpuhp_state; +static DEFINE_MUTEX(mpam_cpuhp_state_lock); + +/* + * mpam is enabled once all devices have been probed from CPU online callbacks, + * scheduled via this work_struct. If access to an MSC depends on a CPU that + * was not brought online at boot, this can happen surprisingly late. + */ +static DECLARE_WORK(mpam_enable_work, &mpam_enable); + +/* + * All mpam error interrupts indicate a software bug. On receipt, disable the + * driver. + */ +static DECLARE_WORK(mpam_broken_work, &mpam_disable); + +/* When mpam is disabled, the printed reason to aid debugging */ +static char *mpam_disable_reason; + /* * An MSC is a physical container for controls and monitors, each identified by * their RIS index. These share a base-address, interrupts and some MMIO @@ -106,6 +128,21 @@ static void mpam_free_garbage(void) } } +static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) +{ + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + return readl_relaxed(msc->mapped_hwpage + reg); +} + +static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg) +{ + lockdep_assert_held_once(&msc->part_sel_lock); + return __mpam_read_reg(msc, reg); +} + +#define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg) + static struct mpam_class * mpam_class_alloc(u8 level_idx, enum mpam_class_types type) { @@ -413,6 +450,86 @@ int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, return err; } +static int mpam_msc_hw_probe(struct mpam_msc *msc) +{ + u64 idr; + struct device *dev = &msc->pdev->dev; + + lockdep_assert_held(&msc->probe_lock); + + idr = __mpam_read_reg(msc, MPAMF_AIDR); + if ((idr & MPAMF_AIDR_ARCH_MAJOR_REV) != MPAM_ARCHITECTURE_V1) { + dev_err_once(dev, "MSC does not match MPAM architecture v1.x\n"); + return -EIO; + } + + msc->probed = true; + + return 0; +} + +static int mpam_cpu_online(unsigned int cpu) +{ + return 0; +} + +/* Before mpam is enabled, try to probe new MSC */ +static int mpam_discovery_cpu_online(unsigned int cpu) +{ + int err = 0; + struct mpam_msc *msc; + bool new_device_probed = false; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + mutex_lock(&msc->probe_lock); + if (!msc->probed) + err = mpam_msc_hw_probe(msc); + mutex_unlock(&msc->probe_lock); + + if (err) + break; + new_device_probed = true; + } + + if (new_device_probed && !err) + schedule_work(&mpam_enable_work); + if (err) { + mpam_disable_reason = "error during probing"; + schedule_work(&mpam_broken_work); + } + + return err; +} + +static int mpam_cpu_offline(unsigned int cpu) +{ + return 0; +} + +static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online), + int (*offline)(unsigned int offline), + char *name) +{ + mutex_lock(&mpam_cpuhp_state_lock); + if (mpam_cpuhp_state) { + cpuhp_remove_state(mpam_cpuhp_state); + mpam_cpuhp_state = 0; + } + + mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, name, online, + offline); + if (mpam_cpuhp_state <= 0) { + pr_err("Failed to register cpuhp callbacks"); + mpam_cpuhp_state = 0; + } + mutex_unlock(&mpam_cpuhp_state_lock); +} + /* * An MSC can control traffic from a set of CPUs, but may only be accessible * from a (hopefully wider) set of CPUs. The common reason for this is power @@ -549,7 +666,8 @@ static int mpam_msc_drv_probe(struct platform_device *pdev) } if (atomic_add_return(1, &mpam_num_msc) == fw_num_msc) - pr_info("Discovered all MSCs\n"); + mpam_register_cpuhp_callbacks(mpam_discovery_cpu_online, NULL, + "mpam:drv_probe"); return 0; } @@ -562,6 +680,62 @@ static struct platform_driver mpam_msc_driver = { .remove = mpam_msc_drv_remove, }; +static void mpam_enable_once(void) +{ + mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, + "mpam:online"); + + pr_info("MPAM enabled\n"); +} + +void mpam_disable(struct work_struct *ignored) +{ + struct mpam_msc *msc, *tmp; + + mutex_lock(&mpam_cpuhp_state_lock); + if (mpam_cpuhp_state) { + cpuhp_remove_state(mpam_cpuhp_state); + mpam_cpuhp_state = 0; + } + mutex_unlock(&mpam_cpuhp_state_lock); + + mutex_lock(&mpam_list_lock); + list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list) + mpam_msc_destroy(msc); + mutex_unlock(&mpam_list_lock); + mpam_free_garbage(); + + pr_err_once("MPAM disabled due to %s\n", mpam_disable_reason); +} + +/* + * Enable mpam once all devices have been probed. + * Scheduled by mpam_discovery_cpu_online() once all devices have been created. + * Also scheduled when new devices are probed when new CPUs come online. + */ +void mpam_enable(struct work_struct *work) +{ + static atomic_t once; + struct mpam_msc *msc; + bool all_devices_probed = true; + + /* Have we probed all the hw devices? */ + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + mutex_lock(&msc->probe_lock); + if (!msc->probed) + all_devices_probed = false; + mutex_unlock(&msc->probe_lock); + + if (!all_devices_probed) + break; + } + + if (all_devices_probed && !atomic_fetch_inc(&once)) + mpam_enable_once(); +} + static int __init mpam_msc_driver_init(void) { if (!system_supports_mpam()) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 51f791cc207bb..4e1538d297837 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -48,6 +48,7 @@ struct mpam_msc { * properties become read-only and the lists are protected by SRCU. */ struct mutex probe_lock; + bool probed; unsigned long ris_idxs; u32 ris_max; @@ -137,6 +138,10 @@ struct mpam_msc_ris { extern struct srcu_struct mpam_srcu; extern struct list_head mpam_classes; +/* Scheduled work callback to enable mpam once all MSC have been probed */ +void mpam_enable(struct work_struct *work); +void mpam_disable(struct work_struct *work); + int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); From b80cd460f5552a9cf43d98dad2681e0c8a5ae356 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:44 +0000 Subject: [PATCH 14/74] arm_mpam: Probe hardware to find the supported partid/pmg values CPUs can generate traffic with a range of PARTID and PMG values, but each MSC may also have its own maximum size for these fields. Before MPAM can be used, the driver needs to probe each RIS on each MSC, to find the system-wide smallest value that can be used. The limits from requestors (e.g. CPUs) also need taking into account. While doing this, RIS entries that firmware didn't describe are created under MPAM_CLASS_UNKNOWN. This adds the low level MSC write accessors. While we're here, implement the mpam_register_requestor() call for the arch code to register the CPU limits. Future callers of this will tell us about the SMMU and ITS. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit bd221f9f82afb616887e0b88b43fbb937479d744) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 148 +++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 6 ++ include/linux/arm_mpam.h | 14 +++ 3 files changed, 167 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 51284f55ae9b6..3d9b87a9727a2 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,15 @@ static atomic_t mpam_num_msc; static int mpam_cpuhp_state; static DEFINE_MUTEX(mpam_cpuhp_state_lock); +/* + * The smallest common values for any CPU or MSC in the system. + * Generating traffic outside this range will result in screaming interrupts. + */ +u16 mpam_partid_max; +u8 mpam_pmg_max; +static bool partid_max_init, partid_max_published; +static DEFINE_SPINLOCK(partid_max_lock); + /* * mpam is enabled once all devices have been probed from CPU online callbacks, * scheduled via this work_struct. If access to an MSC depends on a CPU that @@ -143,6 +153,70 @@ static inline u32 _mpam_read_partsel_reg(struct mpam_msc *msc, u16 reg) #define mpam_read_partsel_reg(msc, reg) _mpam_read_partsel_reg(msc, MPAMF_##reg) +static void __mpam_write_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + WARN_ON_ONCE(reg + sizeof(u32) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + writel_relaxed(val, msc->mapped_hwpage + reg); +} + +static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + lockdep_assert_held_once(&msc->part_sel_lock); + __mpam_write_reg(msc, reg, val); +} + +#define mpam_write_partsel_reg(msc, reg, val) _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val) + +static u64 mpam_msc_read_idr(struct mpam_msc *msc) +{ + u64 idr_high = 0, idr_low; + + lockdep_assert_held(&msc->part_sel_lock); + + idr_low = mpam_read_partsel_reg(msc, IDR); + if (FIELD_GET(MPAMF_IDR_EXT, idr_low)) + idr_high = mpam_read_partsel_reg(msc, IDR + 4); + + return (idr_high << 32) | idr_low; +} + +static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc) +{ + lockdep_assert_held(&msc->part_sel_lock); + + mpam_write_partsel_reg(msc, PART_SEL, partsel); +} + +static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc) +{ + u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) | + FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, partid); + + __mpam_part_sel_raw(partsel, msc); +} + +int mpam_register_requestor(u16 partid_max, u8 pmg_max) +{ + guard(spinlock)(&partid_max_lock); + if (!partid_max_init) { + mpam_partid_max = partid_max; + mpam_pmg_max = pmg_max; + partid_max_init = true; + } else if (!partid_max_published) { + mpam_partid_max = min(mpam_partid_max, partid_max); + mpam_pmg_max = min(mpam_pmg_max, pmg_max); + } else { + /* New requestors can't lower the values */ + if (partid_max < mpam_partid_max || pmg_max < mpam_pmg_max) + return -EBUSY; + } + + return 0; +} +EXPORT_SYMBOL(mpam_register_requestor); + static struct mpam_class * mpam_class_alloc(u8 level_idx, enum mpam_class_types type) { @@ -450,9 +524,35 @@ int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, return err; } +static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, + u8 ris_idx) +{ + int err; + struct mpam_msc_ris *ris; + + lockdep_assert_held(&mpam_list_lock); + + if (!test_bit(ris_idx, &msc->ris_idxs)) { + err = mpam_ris_create_locked(msc, ris_idx, MPAM_CLASS_UNKNOWN, + 0, 0); + if (err) + return ERR_PTR(err); + } + + list_for_each_entry(ris, &msc->ris, msc_list) { + if (ris->ris_idx == ris_idx) + return ris; + } + + return ERR_PTR(-ENOENT); +} + static int mpam_msc_hw_probe(struct mpam_msc *msc) { u64 idr; + u16 partid_max; + u8 ris_idx, pmg_max; + struct mpam_msc_ris *ris; struct device *dev = &msc->pdev->dev; lockdep_assert_held(&msc->probe_lock); @@ -463,6 +563,40 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) return -EIO; } + /* Grab an IDR value to find out how many RIS there are */ + mutex_lock(&msc->part_sel_lock); + idr = mpam_msc_read_idr(msc); + mutex_unlock(&msc->part_sel_lock); + + msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr); + + /* Use these values so partid/pmg always starts with a valid value */ + msc->partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr); + msc->pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); + + for (ris_idx = 0; ris_idx <= msc->ris_max; ris_idx++) { + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris_idx, 0, msc); + idr = mpam_msc_read_idr(msc); + mutex_unlock(&msc->part_sel_lock); + + partid_max = FIELD_GET(MPAMF_IDR_PARTID_MAX, idr); + pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); + msc->partid_max = min(msc->partid_max, partid_max); + msc->pmg_max = min(msc->pmg_max, pmg_max); + + mutex_lock(&mpam_list_lock); + ris = mpam_get_or_create_ris(msc, ris_idx); + mutex_unlock(&mpam_list_lock); + if (IS_ERR(ris)) + return PTR_ERR(ris); + } + + spin_lock(&partid_max_lock); + mpam_partid_max = min(mpam_partid_max, msc->partid_max); + mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max); + spin_unlock(&partid_max_lock); + msc->probed = true; return 0; @@ -682,10 +816,20 @@ static struct platform_driver mpam_msc_driver = { static void mpam_enable_once(void) { + /* + * Once the cpuhp callbacks have been changed, mpam_partid_max can no + * longer change. + */ + spin_lock(&partid_max_lock); + partid_max_published = true; + spin_unlock(&partid_max_lock); + mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); - pr_info("MPAM enabled\n"); + /* Use printk() to avoid the pr_fmt adding the function name. */ + printk(KERN_INFO "MPAM enabled with %u PARTIDs and %u PMGs\n", + mpam_partid_max + 1, mpam_pmg_max + 1); } void mpam_disable(struct work_struct *ignored) @@ -751,4 +895,6 @@ static int __init mpam_msc_driver_init(void) return platform_driver_register(&mpam_msc_driver); } + +/* Must occur after arm64_mpam_register_cpus() from arch_initcall() */ subsys_initcall(mpam_msc_driver_init); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 4e1538d297837..768a58a3ab275 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -49,6 +49,8 @@ struct mpam_msc { */ struct mutex probe_lock; bool probed; + u16 partid_max; + u8 pmg_max; unsigned long ris_idxs; u32 ris_max; @@ -138,6 +140,10 @@ struct mpam_msc_ris { extern struct srcu_struct mpam_srcu; extern struct list_head mpam_classes; +/* System wide partid/pmg values */ +extern u16 mpam_partid_max; +extern u8 mpam_pmg_max; + /* Scheduled work callback to enable mpam once all MSC have been probed */ void mpam_enable(struct work_struct *work); void mpam_disable(struct work_struct *work); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 13a8ac5c2cbda..7f00c5285a326 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -49,4 +49,18 @@ static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, } #endif +/** + * mpam_register_requestor() - Register a requestor with the MPAM driver + * @partid_max: The maximum PARTID value the requestor can generate. + * @pmg_max: The maximum PMG value the requestor can generate. + * + * Registers a requestor with the MPAM driver to ensure the chosen system-wide + * minimum PARTID and PMG values will allow the requestors features to be used. + * + * Returns an error if the registration is too late, and a larger PARTID/PMG + * value has been advertised to user-space. In this case the requestor should + * not use its MPAM features. Returns 0 on success. + */ +int mpam_register_requestor(u16 partid_max, u8 pmg_max); + #endif /* __LINUX_ARM_MPAM_H */ From a3a702eac561706ff1adf61948724c2092ea4f2a Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:45 +0000 Subject: [PATCH 15/74] arm_mpam: Add helpers for managing the locking around the mon_sel registers The MSC MON_SEL register needs to be accessed from hardirq for the overflow interrupt, and when taking an IPI to access these registers on platforms where MSC are not accessible from every CPU. This makes an irqsave spinlock the obvious lock to protect these registers. On systems with SCMI or PCC mailboxes it must be able to sleep, meaning a mutex must be used. The SCMI or PCC platforms can't support an overflow interrupt, and can't access the registers from hardirq context. Clearly these two can't exist for one MSC at the same time. Add helpers for the MON_SEL locking. For now, use a irqsave spinlock and only support 'real' MMIO platforms. In the future this lock will be split in two allowing SCMI/PCC platforms to take a mutex. Because there are contexts where the SCMI/PCC platforms can't make an access, mpam_mon_sel_lock() needs to be able to fail. Do this now, so that all the error handling on these paths is present. This allows the relevant paths to fail if they are needed on a platform where this isn't possible, instead of having to make explicit checks of the interface type. Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit d02beb06ca2a624e17004659c79d26a23484aa8b) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 2 ++ drivers/resctrl/mpam_internal.h | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 3d9b87a9727a2..dcbc9cf5581d1 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -740,6 +741,7 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) if (err) return ERR_PTR(err); + mpam_mon_sel_lock_init(msc); msc->id = pdev->id; msc->pdev = pdev; INIT_LIST_HEAD_RCU(&msc->all_msc_list); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 768a58a3ab275..97f02cf92d7a8 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #define MPAM_MSC_MAX_NUM_RIS 16 @@ -65,12 +66,52 @@ struct mpam_msc { */ struct mutex part_sel_lock; + /* + * mon_sel_lock protects access to the MSC hardware registers that are + * affected by MPAMCFG_MON_SEL, and the mbwu_state. + * Access to mon_sel is needed from both process and interrupt contexts, + * but is complicated by firmware-backed platforms that can't make any + * access unless they can sleep. + * Always use the mpam_mon_sel_lock() helpers. + * Accesses to mon_sel need to be able to fail if they occur in the wrong + * context. + * If needed, take msc->probe_lock first. + */ + raw_spinlock_t _mon_sel_lock; + unsigned long _mon_sel_flags; + void __iomem *mapped_hwpage; size_t mapped_hwpage_sz; struct mpam_garbage garbage; }; +/* Returning false here means accesses to mon_sel must fail and report an error. */ +static inline bool __must_check mpam_mon_sel_lock(struct mpam_msc *msc) +{ + /* Locking will require updating to support a firmware backed interface */ + if (WARN_ON_ONCE(msc->iface != MPAM_IFACE_MMIO)) + return false; + + raw_spin_lock_irqsave(&msc->_mon_sel_lock, msc->_mon_sel_flags); + return true; +} + +static inline void mpam_mon_sel_unlock(struct mpam_msc *msc) +{ + raw_spin_unlock_irqrestore(&msc->_mon_sel_lock, msc->_mon_sel_flags); +} + +static inline void mpam_mon_sel_lock_held(struct mpam_msc *msc) +{ + lockdep_assert_held_once(&msc->_mon_sel_lock); +} + +static inline void mpam_mon_sel_lock_init(struct mpam_msc *msc) +{ + raw_spin_lock_init(&msc->_mon_sel_lock); +} + struct mpam_class { /* mpam_components in this class */ struct list_head components; From d2231c560104d6c312ace322e92e1c4e452d48f4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:46 +0000 Subject: [PATCH 16/74] arm_mpam: Probe the hardware features resctrl supports Expand the probing support with the control and monitor types we can use with resctrl. CC: Dave Martin Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 8c90dc68a5de4349ef9ba51449fb0a29cd690547) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 149 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 33 +++++++ 2 files changed, 182 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index dcbc9cf5581d1..ff561a08cd0db 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -170,6 +170,22 @@ static inline void _mpam_write_partsel_reg(struct mpam_msc *msc, u16 reg, u32 va #define mpam_write_partsel_reg(msc, reg, val) _mpam_write_partsel_reg(msc, MPAMCFG_##reg, val) +static inline u32 _mpam_read_monsel_reg(struct mpam_msc *msc, u16 reg) +{ + mpam_mon_sel_lock_held(msc); + return __mpam_read_reg(msc, reg); +} + +#define mpam_read_monsel_reg(msc, reg) _mpam_read_monsel_reg(msc, MSMON_##reg) + +static inline void _mpam_write_monsel_reg(struct mpam_msc *msc, u16 reg, u32 val) +{ + mpam_mon_sel_lock_held(msc); + __mpam_write_reg(msc, reg, val); +} + +#define mpam_write_monsel_reg(msc, reg, val) _mpam_write_monsel_reg(msc, MSMON_##reg, val) + static u64 mpam_msc_read_idr(struct mpam_msc *msc) { u64 idr_high = 0, idr_low; @@ -548,6 +564,133 @@ static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, return ERR_PTR(-ENOENT); } +/* + * IHI009A.a has this nugget: "If a monitor does not support automatic behaviour + * of NRDY, software can use this bit for any purpose" - so hardware might not + * implement this - but it isn't RES0. + * + * Try and see what values stick in this bit. If we can write either value, + * its probably not implemented by hardware. + */ +static bool _mpam_ris_hw_probe_hw_nrdy(struct mpam_msc_ris *ris, u32 mon_reg) +{ + u32 now; + u64 mon_sel; + bool can_set, can_clear; + struct mpam_msc *msc = ris->vmsc->msc; + + if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + return false; + + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, 0) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + _mpam_write_monsel_reg(msc, mon_reg, mon_sel); + + _mpam_write_monsel_reg(msc, mon_reg, MSMON___NRDY); + now = _mpam_read_monsel_reg(msc, mon_reg); + can_set = now & MSMON___NRDY; + + _mpam_write_monsel_reg(msc, mon_reg, 0); + now = _mpam_read_monsel_reg(msc, mon_reg); + can_clear = !(now & MSMON___NRDY); + mpam_mon_sel_unlock(msc); + + return (!can_set || !can_clear); +} + +#define mpam_ris_hw_probe_hw_nrdy(_ris, _mon_reg) \ + _mpam_ris_hw_probe_hw_nrdy(_ris, MSMON_##_mon_reg) + +static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) +{ + int err; + struct mpam_msc *msc = ris->vmsc->msc; + struct device *dev = &msc->pdev->dev; + struct mpam_props *props = &ris->props; + + lockdep_assert_held(&msc->probe_lock); + lockdep_assert_held(&msc->part_sel_lock); + + /* Cache Portion partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) { + u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR); + + props->cpbm_wd = FIELD_GET(MPAMF_CPOR_IDR_CPBM_WD, cpor_features); + if (props->cpbm_wd) + mpam_set_feature(mpam_feat_cpor_part, props); + } + + /* Memory bandwidth partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_MBW_PART, ris->idr)) { + u32 mbw_features = mpam_read_partsel_reg(msc, MBW_IDR); + + /* portion bitmap resolution */ + props->mbw_pbm_bits = FIELD_GET(MPAMF_MBW_IDR_BWPBM_WD, mbw_features); + if (props->mbw_pbm_bits && + FIELD_GET(MPAMF_MBW_IDR_HAS_PBM, mbw_features)) + mpam_set_feature(mpam_feat_mbw_part, props); + + props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features); + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) + mpam_set_feature(mpam_feat_mbw_max, props); + } + + /* Performance Monitoring */ + if (FIELD_GET(MPAMF_IDR_HAS_MSMON, ris->idr)) { + u32 msmon_features = mpam_read_partsel_reg(msc, MSMON_IDR); + + /* + * If the firmware max-nrdy-us property is missing, the + * CSU counters can't be used. Should we wait forever? + */ + err = device_property_read_u32(&msc->pdev->dev, + "arm,not-ready-us", + &msc->nrdy_usec); + + if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_CSU, msmon_features)) { + u32 csumonidr; + + csumonidr = mpam_read_partsel_reg(msc, CSUMON_IDR); + props->num_csu_mon = FIELD_GET(MPAMF_CSUMON_IDR_NUM_MON, csumonidr); + if (props->num_csu_mon) { + bool hw_managed; + + mpam_set_feature(mpam_feat_msmon_csu, props); + + /* Is NRDY hardware managed? */ + hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU); + if (hw_managed) + mpam_set_feature(mpam_feat_msmon_csu_hw_nrdy, props); + } + + /* + * Accept the missing firmware property if NRDY appears + * un-implemented. + */ + if (err && mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, props)) + dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware."); + } + if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) { + bool hw_managed; + u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR); + + props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr); + if (props->num_mbwu_mon) + mpam_set_feature(mpam_feat_msmon_mbwu, props); + + /* Is NRDY hardware managed? */ + hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); + if (hw_managed) + mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); + + /* + * Don't warn about any missing firmware property for + * MBWU NRDY - it doesn't make any sense! + */ + } + } +} + static int mpam_msc_hw_probe(struct mpam_msc *msc) { u64 idr; @@ -591,6 +734,12 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) mutex_unlock(&mpam_list_lock); if (IS_ERR(ris)) return PTR_ERR(ris); + ris->idr = idr; + + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris_idx, 0, msc); + mpam_ris_hw_probe(ris); + mutex_unlock(&msc->part_sel_lock); } spin_lock(&partid_max_lock); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 97f02cf92d7a8..cdaa019367e95 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -5,12 +5,14 @@ #define MPAM_INTERNAL_H #include +#include #include #include #include #include #include #include +#include #include #define MPAM_MSC_MAX_NUM_RIS 16 @@ -112,6 +114,33 @@ static inline void mpam_mon_sel_lock_init(struct mpam_msc *msc) raw_spin_lock_init(&msc->_mon_sel_lock); } +/* Bits for mpam features bitmaps */ +enum mpam_device_features { + mpam_feat_cpor_part, + mpam_feat_mbw_part, + mpam_feat_mbw_min, + mpam_feat_mbw_max, + mpam_feat_msmon, + mpam_feat_msmon_csu, + mpam_feat_msmon_csu_hw_nrdy, + mpam_feat_msmon_mbwu, + mpam_feat_msmon_mbwu_hw_nrdy, + MPAM_FEATURE_LAST +}; + +struct mpam_props { + DECLARE_BITMAP(features, MPAM_FEATURE_LAST); + + u16 cpbm_wd; + u16 mbw_pbm_bits; + u16 bwa_wd; + u16 num_csu_mon; + u16 num_mbwu_mon; +}; + +#define mpam_has_feature(_feat, x) test_bit(_feat, (x)->features) +#define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) + struct mpam_class { /* mpam_components in this class */ struct list_head components; @@ -151,6 +180,8 @@ struct mpam_vmsc { /* mpam_msc_ris in this vmsc */ struct list_head ris; + struct mpam_props props; + /* All RIS in this vMSC are members of this MSC */ struct mpam_msc *msc; @@ -162,6 +193,8 @@ struct mpam_vmsc { struct mpam_msc_ris { u8 ris_idx; + u64 idr; + struct mpam_props props; cpumask_t affinity; From c00313a9a99155c07d7c72fa77eb0b4ae10baa50 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:47 +0000 Subject: [PATCH 17/74] arm_mpam: Merge supported features during mpam_enable() into mpam_class To make a decision about whether to expose an mpam class as a resctrl resource we need to know its overall supported features and properties. Once we've probed all the resources, we can walk the tree and produce overall values by merging the bitmaps. This eliminates features that are only supported by some MSC that make up a component or class. If bitmap properties are mismatched within a component we cannot support the mismatched feature. Care has to be taken as vMSC may hold mismatched RIS. Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit c10ca83a778304f976cbea60bbbb2f1fac003f5c) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 214 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 3 + 2 files changed, 217 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index ff561a08cd0db..f9ac88bf06b70 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -965,6 +965,216 @@ static struct platform_driver mpam_msc_driver = { .remove = mpam_msc_drv_remove, }; +/* Any of these features mean the BWA_WD field is valid. */ +static bool mpam_has_bwa_wd_feature(struct mpam_props *props) +{ + if (mpam_has_feature(mpam_feat_mbw_min, props)) + return true; + if (mpam_has_feature(mpam_feat_mbw_max, props)) + return true; + return false; +} + +#define MISMATCHED_HELPER(parent, child, helper, field, alias) \ + helper(parent) && \ + ((helper(child) && (parent)->field != (child)->field) || \ + (!helper(child) && !(alias))) + +#define MISMATCHED_FEAT(parent, child, feat, field, alias) \ + mpam_has_feature((feat), (parent)) && \ + ((mpam_has_feature((feat), (child)) && (parent)->field != (child)->field) || \ + (!mpam_has_feature((feat), (child)) && !(alias))) + +#define CAN_MERGE_FEAT(parent, child, feat, alias) \ + (alias) && !mpam_has_feature((feat), (parent)) && \ + mpam_has_feature((feat), (child)) + +/* + * Combine two props fields. + * If this is for controls that alias the same resource, it is safe to just + * copy the values over. If two aliasing controls implement the same scheme + * a safe value must be picked. + * For non-aliasing controls, these control different resources, and the + * resulting safe value must be compatible with both. When merging values in + * the tree, all the aliasing resources must be handled first. + * On mismatch, parent is modified. + */ +static void __props_mismatch(struct mpam_props *parent, + struct mpam_props *child, bool alias) +{ + if (CAN_MERGE_FEAT(parent, child, mpam_feat_cpor_part, alias)) { + parent->cpbm_wd = child->cpbm_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_cpor_part, + cpbm_wd, alias)) { + pr_debug("cleared cpor_part\n"); + mpam_clear_feature(mpam_feat_cpor_part, parent); + parent->cpbm_wd = 0; + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_mbw_part, alias)) { + parent->mbw_pbm_bits = child->mbw_pbm_bits; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_mbw_part, + mbw_pbm_bits, alias)) { + pr_debug("cleared mbw_part\n"); + mpam_clear_feature(mpam_feat_mbw_part, parent); + parent->mbw_pbm_bits = 0; + } + + /* bwa_wd is a count of bits, fewer bits means less precision */ + if (alias && !mpam_has_bwa_wd_feature(parent) && + mpam_has_bwa_wd_feature(child)) { + parent->bwa_wd = child->bwa_wd; + } else if (MISMATCHED_HELPER(parent, child, mpam_has_bwa_wd_feature, + bwa_wd, alias)) { + pr_debug("took the min bwa_wd\n"); + parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd); + } + + /* For num properties, take the minimum */ + if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) { + parent->num_csu_mon = child->num_csu_mon; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_csu, + num_csu_mon, alias)) { + pr_debug("took the min num_csu_mon\n"); + parent->num_csu_mon = min(parent->num_csu_mon, + child->num_csu_mon); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_mbwu, alias)) { + parent->num_mbwu_mon = child->num_mbwu_mon; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_msmon_mbwu, + num_mbwu_mon, alias)) { + pr_debug("took the min num_mbwu_mon\n"); + parent->num_mbwu_mon = min(parent->num_mbwu_mon, + child->num_mbwu_mon); + } + + if (alias) { + /* Merge features for aliased resources */ + bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST); + } else { + /* Clear missing features for non aliasing */ + bitmap_and(parent->features, parent->features, child->features, MPAM_FEATURE_LAST); + } +} + +/* + * If a vmsc doesn't match class feature/configuration, do the right thing(tm). + * For 'num' properties we can just take the minimum. + * For properties where the mismatched unused bits would make a difference, we + * nobble the class feature, as we can't configure all the resources. + * e.g. The L3 cache is composed of two resources with 13 and 17 portion + * bitmaps respectively. + */ +static void +__class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc) +{ + struct mpam_props *cprops = &class->props; + struct mpam_props *vprops = &vmsc->props; + struct device *dev = &vmsc->msc->pdev->dev; + + lockdep_assert_held(&mpam_list_lock); /* we modify class */ + + dev_dbg(dev, "Merging features for class:0x%lx &= vmsc:0x%lx\n", + (long)cprops->features, (long)vprops->features); + + /* Take the safe value for any common features */ + __props_mismatch(cprops, vprops, false); +} + +static void +__vmsc_props_mismatch(struct mpam_vmsc *vmsc, struct mpam_msc_ris *ris) +{ + struct mpam_props *rprops = &ris->props; + struct mpam_props *vprops = &vmsc->props; + struct device *dev = &vmsc->msc->pdev->dev; + + lockdep_assert_held(&mpam_list_lock); /* we modify vmsc */ + + dev_dbg(dev, "Merging features for vmsc:0x%lx |= ris:0x%lx\n", + (long)vprops->features, (long)rprops->features); + + /* + * Merge mismatched features - Copy any features that aren't common, + * but take the safe value for any common features. + */ + __props_mismatch(vprops, rprops, true); +} + +/* + * Copy the first component's first vMSC's properties and features to the + * class. __class_props_mismatch() will remove conflicts. + * It is not possible to have a class with no components, or a component with + * no resources. The vMSC properties have already been built. + */ +static void mpam_enable_init_class_features(struct mpam_class *class) +{ + struct mpam_vmsc *vmsc; + struct mpam_component *comp; + + comp = list_first_entry(&class->components, + struct mpam_component, class_list); + vmsc = list_first_entry(&comp->vmsc, + struct mpam_vmsc, comp_list); + + class->props = vmsc->props; +} + +static void mpam_enable_merge_vmsc_features(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + struct mpam_class *class = comp->class; + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + list_for_each_entry(ris, &vmsc->ris, vmsc_list) { + __vmsc_props_mismatch(vmsc, ris); + class->nrdy_usec = max(class->nrdy_usec, + vmsc->msc->nrdy_usec); + } + } +} + +static void mpam_enable_merge_class_features(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + struct mpam_class *class = comp->class; + + list_for_each_entry(vmsc, &comp->vmsc, comp_list) + __class_props_mismatch(class, vmsc); +} + +/* + * Merge all the common resource features into class. + * vmsc features are bitwise-or'd together by mpam_enable_merge_vmsc_features() + * as the first step so that mpam_enable_init_class_features() can initialise + * the class with a representative set of features. + * Next the mpam_enable_merge_class_features() bitwise-and's all the vmsc + * features to form the class features. + * Other features are the min/max as appropriate. + * + * To avoid walking the whole tree twice, the class->nrdy_usec property is + * updated when working with the vmsc as it is a max(), and doesn't need + * initialising first. + */ +static void mpam_enable_merge_features(struct list_head *all_classes_list) +{ + struct mpam_class *class; + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, all_classes_list, classes_list) { + list_for_each_entry(comp, &class->components, class_list) + mpam_enable_merge_vmsc_features(comp); + + mpam_enable_init_class_features(class); + + list_for_each_entry(comp, &class->components, class_list) + mpam_enable_merge_class_features(comp); + } +} + static void mpam_enable_once(void) { /* @@ -975,6 +1185,10 @@ static void mpam_enable_once(void) partid_max_published = true; spin_unlock(&partid_max_lock); + mutex_lock(&mpam_list_lock); + mpam_enable_merge_features(&mpam_classes); + mutex_unlock(&mpam_list_lock); + mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index cdaa019367e95..4749ac223adcf 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -140,6 +140,7 @@ struct mpam_props { #define mpam_has_feature(_feat, x) test_bit(_feat, (x)->features) #define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) +#define mpam_clear_feature(_feat, x) clear_bit(_feat, (x)->features) struct mpam_class { /* mpam_components in this class */ @@ -147,6 +148,8 @@ struct mpam_class { cpumask_t affinity; + struct mpam_props props; + u32 nrdy_usec; u8 level; enum mpam_class_types type; From b674b6f6b3369666ad6f12a02f9a437143de539f Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:48 +0000 Subject: [PATCH 18/74] arm_mpam: Reset MSC controls from cpuhp callbacks When a CPU comes online, it may bring a newly accessible MSC with it. Only the default partid has its value reset by hardware, and even then the MSC might not have been reset since its config was previously dirtied. e.g. Kexec. Any in-use partid must have its configuration restored, or reset. In-use partids may be held in caches and evicted later. MSC are also reset when CPUs are taken offline to cover cases where firmware doesn't reset the MSC over reboot using UEFI, or kexec where there is no firmware involvement. If the configuration for a RIS has not been touched since it was brought online, it does not need resetting again. To reset, write the maximum values for all discovered controls. CC: Rohit Mathew Signed-off-by: James Morse Reviewed-by: Fenghua Yu Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Gavin Shan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit f188a36ca2416e8090453eacbabd2925b20eb906) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 109 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 3 + 2 files changed, 112 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index f9ac88bf06b70..4bd4d57a3baaa 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -752,8 +753,104 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) return 0; } +static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) +{ + u32 num_words, msb; + u32 bm = ~0; + int i; + + lockdep_assert_held(&msc->part_sel_lock); + + if (wd == 0) + return; + + /* + * Write all ~0 to all but the last 32bit-word, which may + * have fewer bits... + */ + num_words = DIV_ROUND_UP(wd, 32); + for (i = 0; i < num_words - 1; i++, reg += sizeof(bm)) + __mpam_write_reg(msc, reg, bm); + + /* + * ....and then the last (maybe) partial 32bit word. When wd is a + * multiple of 32, msb should be 31 to write a full 32bit word. + */ + msb = (wd - 1) % 32; + bm = GENMASK(msb, 0); + __mpam_write_reg(msc, reg, bm); +} + +static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) +{ + struct mpam_msc *msc = ris->vmsc->msc; + struct mpam_props *rprops = &ris->props; + + WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); + + mutex_lock(&msc->part_sel_lock); + __mpam_part_sel(ris->ris_idx, partid, msc); + + if (mpam_has_feature(mpam_feat_cpor_part, rprops)) + mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); + + if (mpam_has_feature(mpam_feat_mbw_part, rprops)) + mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); + + if (mpam_has_feature(mpam_feat_mbw_min, rprops)) + mpam_write_partsel_reg(msc, MBW_MIN, 0); + + if (mpam_has_feature(mpam_feat_mbw_max, rprops)) + mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); + + mutex_unlock(&msc->part_sel_lock); +} + +static void mpam_reset_ris(struct mpam_msc_ris *ris) +{ + u16 partid, partid_max; + + WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); + + if (ris->in_reset_state) + return; + + spin_lock(&partid_max_lock); + partid_max = mpam_partid_max; + spin_unlock(&partid_max_lock); + for (partid = 0; partid <= partid_max; partid++) + mpam_reset_ris_partid(ris, partid); +} + +static void mpam_reset_msc(struct mpam_msc *msc, bool online) +{ + struct mpam_msc_ris *ris; + + list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) { + mpam_reset_ris(ris); + + /* + * Set in_reset_state when coming online. The reset state + * for non-zero partid may be lost while the CPUs are offline. + */ + ris->in_reset_state = online; + } +} + static int mpam_cpu_online(unsigned int cpu) { + struct mpam_msc *msc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + if (atomic_fetch_inc(&msc->online_refs) == 0) + mpam_reset_msc(msc, true); + } + return 0; } @@ -792,6 +889,18 @@ static int mpam_discovery_cpu_online(unsigned int cpu) static int mpam_cpu_offline(unsigned int cpu) { + struct mpam_msc *msc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!cpumask_test_cpu(cpu, &msc->accessibility)) + continue; + + if (atomic_dec_and_test(&msc->online_refs)) + mpam_reset_msc(msc, false); + } + return 0; } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 4749ac223adcf..dec485cd8a912 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -5,6 +5,7 @@ #define MPAM_INTERNAL_H #include +#include #include #include #include @@ -45,6 +46,7 @@ struct mpam_msc { enum mpam_msc_iface iface; u32 nrdy_usec; cpumask_t accessibility; + atomic_t online_refs; /* * probe_lock is only taken during discovery. After discovery these @@ -198,6 +200,7 @@ struct mpam_msc_ris { u8 ris_idx; u64 idr; struct mpam_props props; + bool in_reset_state; cpumask_t affinity; From e6b2443955a5a98c07f37913777c853fc04a943e Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:49 +0000 Subject: [PATCH 19/74] arm_mpam: Add a helper to touch an MSC from any CPU Resetting RIS entries from the cpuhp callback is easy as the callback occurs on the correct CPU. This won't be true for any other caller that wants to reset or configure an MSC. Add a helper that schedules the provided function if necessary. Callers should take the cpuhp lock to prevent the cpuhp callbacks from changing the MSC state. Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 475228d15dd653584b840b8e6c5828cdc3884b1c) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 37 +++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 4bd4d57a3baaa..7941b093396ef 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -806,20 +806,51 @@ static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) mutex_unlock(&msc->part_sel_lock); } -static void mpam_reset_ris(struct mpam_msc_ris *ris) +/* + * Called via smp_call_on_cpu() to prevent migration, while still being + * pre-emptible. + */ +static int mpam_reset_ris(void *arg) { u16 partid, partid_max; + struct mpam_msc_ris *ris = arg; WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); if (ris->in_reset_state) - return; + return 0; spin_lock(&partid_max_lock); partid_max = mpam_partid_max; spin_unlock(&partid_max_lock); for (partid = 0; partid <= partid_max; partid++) mpam_reset_ris_partid(ris, partid); + + return 0; +} + +/* + * Get the preferred CPU for this MSC. If it is accessible from this CPU, + * this CPU is preferred. This can be preempted/migrated, it will only result + * in more work. + */ +static int mpam_get_msc_preferred_cpu(struct mpam_msc *msc) +{ + int cpu = raw_smp_processor_id(); + + if (cpumask_test_cpu(cpu, &msc->accessibility)) + return cpu; + + return cpumask_first_and(&msc->accessibility, cpu_online_mask); +} + +static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg) +{ + lockdep_assert_irqs_enabled(); + lockdep_assert_cpus_held(); + WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); + + return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true); } static void mpam_reset_msc(struct mpam_msc *msc, bool online) @@ -827,7 +858,7 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online) struct mpam_msc_ris *ris; list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) { - mpam_reset_ris(ris); + mpam_touch_msc(msc, &mpam_reset_ris, ris); /* * Set in_reset_state when coming online. The reset state From 0f2f900e9fc4896662a7b2cae74a4431254a7496 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:50 +0000 Subject: [PATCH 20/74] arm_mpam: Extend reset logic to allow devices to be reset any time cpuhp callbacks aren't the only time the MSC configuration may need to be reset. Resctrl has an API call to reset a class. If an MPAM error interrupt arrives it indicates the driver has misprogrammed an MSC. The safest thing to do is reset all the MSCs and disable MPAM. Add a helper to reset RIS via their class. Call this from mpam_disable(), which can be scheduled from the error interrupt handler. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: Fenghua Yu Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 3bd04fe7d807bbdcfe75b29ca82fae4e2d7dc524) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 57 ++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 7941b093396ef..7943d174b3f46 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -808,15 +808,13 @@ static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) /* * Called via smp_call_on_cpu() to prevent migration, while still being - * pre-emptible. + * pre-emptible. Caller must hold mpam_srcu. */ static int mpam_reset_ris(void *arg) { u16 partid, partid_max; struct mpam_msc_ris *ris = arg; - WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); - if (ris->in_reset_state) return 0; @@ -1337,8 +1335,55 @@ static void mpam_enable_once(void) mpam_partid_max + 1, mpam_pmg_max + 1); } +static void mpam_reset_component_locked(struct mpam_component *comp) +{ + struct mpam_vmsc *vmsc; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_msc *msc = vmsc->msc; + struct mpam_msc_ris *ris; + + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!ris->in_reset_state) + mpam_touch_msc(msc, mpam_reset_ris, ris); + ris->in_reset_state = true; + } + } +} + +static void mpam_reset_class_locked(struct mpam_class *class) +{ + struct mpam_component *comp; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(comp, &class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) + mpam_reset_component_locked(comp); +} + +static void mpam_reset_class(struct mpam_class *class) +{ + cpus_read_lock(); + mpam_reset_class_locked(class); + cpus_read_unlock(); +} + +/* + * Called in response to an error IRQ. + * All of MPAMs errors indicate a software bug, restore any modified + * controls to their reset values. + */ void mpam_disable(struct work_struct *ignored) { + int idx; + struct mpam_class *class; struct mpam_msc *msc, *tmp; mutex_lock(&mpam_cpuhp_state_lock); @@ -1348,6 +1393,12 @@ void mpam_disable(struct work_struct *ignored) } mutex_unlock(&mpam_cpuhp_state_lock); + idx = srcu_read_lock(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) + mpam_reset_class(class); + srcu_read_unlock(&mpam_srcu, idx); + mutex_lock(&mpam_list_lock); list_for_each_entry_safe(msc, tmp, &mpam_all_msc, all_msc_list) mpam_msc_destroy(msc); From dde6c0f2f6c7ba8388832f994c247b775a97fdc0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:51 +0000 Subject: [PATCH 21/74] arm_mpam: Register and enable IRQs Register and enable error IRQs. All the MPAM error interrupts indicate a software bug, e.g. out of range partid. If the error interrupt is ever signalled, attempt to disable MPAM. Only the irq handler accesses the MPAMF_ESR register, so no locking is needed. The work to disable MPAM after an error needs to happen at process context as it takes mutex. It also unregisters the interrupts, meaning it can't be done from the threaded part of a threaded interrupt. Instead, mpam_disable() gets scheduled. Enabling the IRQs in the MSC may involve cross calling to a CPU that can access the MSC. Once the IRQ is requested, the mpam_disable() path can be called asynchronously, which will walk structures sized by max_partid. Ensure this size is fixed before the interrupt is requested. CC: Rohit Mathew Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Rohit Mathew Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 49aa621c4dcaf8e3cfeb9e73d07a9746b889f9e8) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 280 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 13 ++ 2 files changed, 293 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 7943d174b3f46..21fccc3ff0025 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -200,6 +203,35 @@ static u64 mpam_msc_read_idr(struct mpam_msc *msc) return (idr_high << 32) | idr_low; } +static void mpam_msc_clear_esr(struct mpam_msc *msc) +{ + u64 esr_low = __mpam_read_reg(msc, MPAMF_ESR); + + if (!esr_low) + return; + + /* + * Clearing the high/low bits of MPAMF_ESR can not be atomic. + * Clear the top half first, so that the pending error bits in the + * lower half prevent hardware from updating either half of the + * register. + */ + if (msc->has_extd_esr) + __mpam_write_reg(msc, MPAMF_ESR + 4, 0); + __mpam_write_reg(msc, MPAMF_ESR, 0); +} + +static u64 mpam_msc_read_esr(struct mpam_msc *msc) +{ + u64 esr_high = 0, esr_low; + + esr_low = __mpam_read_reg(msc, MPAMF_ESR); + if (msc->has_extd_esr) + esr_high = __mpam_read_reg(msc, MPAMF_ESR + 4); + + return (esr_high << 32) | esr_low; +} + static void __mpam_part_sel_raw(u32 partsel, struct mpam_msc *msc) { lockdep_assert_held(&msc->part_sel_lock); @@ -729,6 +761,7 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) pmg_max = FIELD_GET(MPAMF_IDR_PMG_MAX, idr); msc->partid_max = min(msc->partid_max, partid_max); msc->pmg_max = min(msc->pmg_max, pmg_max); + msc->has_extd_esr = FIELD_GET(MPAMF_IDR_HAS_EXTD_ESR, idr); mutex_lock(&mpam_list_lock); ris = mpam_get_or_create_ris(msc, ris_idx); @@ -743,6 +776,9 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) mutex_unlock(&msc->part_sel_lock); } + /* Clear any stale errors */ + mpam_msc_clear_esr(msc); + spin_lock(&partid_max_lock); mpam_partid_max = min(mpam_partid_max, msc->partid_max); mpam_pmg_max = min(mpam_pmg_max, msc->pmg_max); @@ -866,6 +902,13 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online) } } +static void _enable_percpu_irq(void *_irq) +{ + int *irq = _irq; + + enable_percpu_irq(*irq, IRQ_TYPE_NONE); +} + static int mpam_cpu_online(unsigned int cpu) { struct mpam_msc *msc; @@ -876,6 +919,9 @@ static int mpam_cpu_online(unsigned int cpu) if (!cpumask_test_cpu(cpu, &msc->accessibility)) continue; + if (msc->reenable_error_ppi) + _enable_percpu_irq(&msc->reenable_error_ppi); + if (atomic_fetch_inc(&msc->online_refs) == 0) mpam_reset_msc(msc, true); } @@ -926,6 +972,9 @@ static int mpam_cpu_offline(unsigned int cpu) if (!cpumask_test_cpu(cpu, &msc->accessibility)) continue; + if (msc->reenable_error_ppi) + disable_percpu_irq(msc->reenable_error_ppi); + if (atomic_dec_and_test(&msc->online_refs)) mpam_reset_msc(msc, false); } @@ -952,6 +1001,42 @@ static void mpam_register_cpuhp_callbacks(int (*online)(unsigned int online), mutex_unlock(&mpam_cpuhp_state_lock); } +static int __setup_ppi(struct mpam_msc *msc) +{ + int cpu; + + msc->error_dev_id = alloc_percpu(struct mpam_msc *); + if (!msc->error_dev_id) + return -ENOMEM; + + for_each_cpu(cpu, &msc->accessibility) + *per_cpu_ptr(msc->error_dev_id, cpu) = msc; + + return 0; +} + +static int mpam_msc_setup_error_irq(struct mpam_msc *msc) +{ + int irq; + + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + return 0; + + /* Allocate and initialise the percpu device pointer for PPI */ + if (irq_is_percpu(irq)) + return __setup_ppi(msc); + + /* sanity check: shared interrupts can be routed anywhere? */ + if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) { + pr_err_once("msc:%u is a private resource with a shared error interrupt", + msc->id); + return -EINVAL; + } + + return 0; +} + /* * An MSC can control traffic from a set of CPUs, but may only be accessible * from a (hopefully wider) set of CPUs. The common reason for this is power @@ -1028,6 +1113,9 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) if (err) return ERR_PTR(err); + err = devm_mutex_init(dev, &msc->error_irq_lock); + if (err) + return ERR_PTR(err); mpam_mon_sel_lock_init(msc); msc->id = pdev->id; msc->pdev = pdev; @@ -1040,6 +1128,10 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) return ERR_PTR(-EINVAL); } + err = mpam_msc_setup_error_irq(msc); + if (err) + return ERR_PTR(err); + if (device_property_read_u32(&pdev->dev, "pcc-channel", &tmp)) msc->iface = MPAM_IFACE_MMIO; else @@ -1313,8 +1405,177 @@ static void mpam_enable_merge_features(struct list_head *all_classes_list) } } +static char *mpam_errcode_names[16] = { + [MPAM_ERRCODE_NONE] = "No error", + [MPAM_ERRCODE_PARTID_SEL_RANGE] = "PARTID_SEL_Range", + [MPAM_ERRCODE_REQ_PARTID_RANGE] = "Req_PARTID_Range", + [MPAM_ERRCODE_MSMONCFG_ID_RANGE] = "MSMONCFG_ID_RANGE", + [MPAM_ERRCODE_REQ_PMG_RANGE] = "Req_PMG_Range", + [MPAM_ERRCODE_MONITOR_RANGE] = "Monitor_Range", + [MPAM_ERRCODE_INTPARTID_RANGE] = "intPARTID_Range", + [MPAM_ERRCODE_UNEXPECTED_INTERNAL] = "Unexpected_INTERNAL", + [MPAM_ERRCODE_UNDEFINED_RIS_PART_SEL] = "Undefined_RIS_PART_SEL", + [MPAM_ERRCODE_RIS_NO_CONTROL] = "RIS_No_Control", + [MPAM_ERRCODE_UNDEFINED_RIS_MON_SEL] = "Undefined_RIS_MON_SEL", + [MPAM_ERRCODE_RIS_NO_MONITOR] = "RIS_No_Monitor", + [12 ... 15] = "Reserved" +}; + +static int mpam_enable_msc_ecr(void *_msc) +{ + struct mpam_msc *msc = _msc; + + __mpam_write_reg(msc, MPAMF_ECR, MPAMF_ECR_INTEN); + + return 0; +} + +/* This can run in mpam_disable(), and the interrupt handler on the same CPU */ +static int mpam_disable_msc_ecr(void *_msc) +{ + struct mpam_msc *msc = _msc; + + __mpam_write_reg(msc, MPAMF_ECR, 0); + + return 0; +} + +static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) +{ + u64 reg; + u16 partid; + u8 errcode, pmg, ris; + + if (WARN_ON_ONCE(!msc) || + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), + &msc->accessibility))) + return IRQ_NONE; + + reg = mpam_msc_read_esr(msc); + + errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg); + if (!errcode) + return IRQ_NONE; + + /* Clear level triggered irq */ + mpam_msc_clear_esr(msc); + + partid = FIELD_GET(MPAMF_ESR_PARTID_MON, reg); + pmg = FIELD_GET(MPAMF_ESR_PMG, reg); + ris = FIELD_GET(MPAMF_ESR_RIS, reg); + + pr_err_ratelimited("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n", + msc->id, mpam_errcode_names[errcode], partid, pmg, + ris); + + /* Disable this interrupt. */ + mpam_disable_msc_ecr(msc); + + /* + * Schedule the teardown work. Don't use a threaded IRQ as we can't + * unregister the interrupt from the threaded part of the handler. + */ + mpam_disable_reason = "hardware error interrupt"; + schedule_work(&mpam_broken_work); + + return IRQ_HANDLED; +} + +static irqreturn_t mpam_ppi_handler(int irq, void *dev_id) +{ + struct mpam_msc *msc = *(struct mpam_msc **)dev_id; + + return __mpam_irq_handler(irq, msc); +} + +static irqreturn_t mpam_spi_handler(int irq, void *dev_id) +{ + struct mpam_msc *msc = dev_id; + + return __mpam_irq_handler(irq, msc); +} + +static int mpam_register_irqs(void) +{ + int err, irq; + struct mpam_msc *msc; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + continue; + + /* The MPAM spec says the interrupt can be SPI, PPI or LPI */ + /* We anticipate sharing the interrupt with other MSCs */ + if (irq_is_percpu(irq)) { + err = request_percpu_irq(irq, &mpam_ppi_handler, + "mpam:msc:error", + msc->error_dev_id); + if (err) + return err; + + msc->reenable_error_ppi = irq; + smp_call_function_many(&msc->accessibility, + &_enable_percpu_irq, &irq, + true); + } else { + err = devm_request_irq(&msc->pdev->dev, irq, + &mpam_spi_handler, IRQF_SHARED, + "mpam:msc:error", msc); + if (err) + return err; + } + + mutex_lock(&msc->error_irq_lock); + msc->error_irq_req = true; + mpam_touch_msc(msc, mpam_enable_msc_ecr, msc); + msc->error_irq_hw_enabled = true; + mutex_unlock(&msc->error_irq_lock); + } + + return 0; +} + +static void mpam_unregister_irqs(void) +{ + int irq; + struct mpam_msc *msc; + + guard(cpus_read_lock)(); + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, + srcu_read_lock_held(&mpam_srcu)) { + irq = platform_get_irq_byname_optional(msc->pdev, "error"); + if (irq <= 0) + continue; + + mutex_lock(&msc->error_irq_lock); + if (msc->error_irq_hw_enabled) { + mpam_touch_msc(msc, mpam_disable_msc_ecr, msc); + msc->error_irq_hw_enabled = false; + } + + if (msc->error_irq_req) { + if (irq_is_percpu(irq)) { + msc->reenable_error_ppi = 0; + free_percpu_irq(irq, msc->error_dev_id); + } else { + devm_free_irq(&msc->pdev->dev, irq, msc); + } + msc->error_irq_req = false; + } + mutex_unlock(&msc->error_irq_lock); + } +} + static void mpam_enable_once(void) { + int err; + /* * Once the cpuhp callbacks have been changed, mpam_partid_max can no * longer change. @@ -1323,9 +1584,26 @@ static void mpam_enable_once(void) partid_max_published = true; spin_unlock(&partid_max_lock); + /* + * If all the MSC have been probed, enabling the IRQs happens next. + * That involves cross-calling to a CPU that can reach the MSC, and + * the locks must be taken in this order: + */ + cpus_read_lock(); mutex_lock(&mpam_list_lock); mpam_enable_merge_features(&mpam_classes); + + err = mpam_register_irqs(); + mutex_unlock(&mpam_list_lock); + cpus_read_unlock(); + + if (err) { + pr_warn("Failed to register irqs: %d\n", err); + mpam_disable_reason = "Failed to enable."; + schedule_work(&mpam_broken_work); + return; + } mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); @@ -1393,6 +1671,8 @@ void mpam_disable(struct work_struct *ignored) } mutex_unlock(&mpam_cpuhp_state_lock); + mpam_unregister_irqs(); + idx = srcu_read_lock(&mpam_srcu); list_for_each_entry_srcu(class, &mpam_classes, classes_list, srcu_read_lock_held(&mpam_srcu)) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index dec485cd8a912..fa9d9a176a543 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -46,6 +46,11 @@ struct mpam_msc { enum mpam_msc_iface iface; u32 nrdy_usec; cpumask_t accessibility; + bool has_extd_esr; + + int reenable_error_ppi; + struct mpam_msc * __percpu *error_dev_id; + atomic_t online_refs; /* @@ -59,6 +64,14 @@ struct mpam_msc { unsigned long ris_idxs; u32 ris_max; + /* + * error_irq_lock is taken when registering/unregistering the error + * interrupt and maniupulating the below flags. + */ + struct mutex error_irq_lock; + bool error_irq_req; + bool error_irq_hw_enabled; + /* mpam_msc_ris of this component */ struct list_head ris; From 1db9f99484d976bc5bba68ac25bf5e74ccbc0a20 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:52 +0000 Subject: [PATCH 22/74] arm_mpam: Use a static key to indicate when mpam is enabled Once all the MSC have been probed, the system wide usable number of PARTID is known and the configuration arrays can be allocated. After this point, checking all the MSC have been probed is pointless, and the cpuhp callbacks should restore the configuration, instead of just resetting the MSC. Add a static key to enable this behaviour. This will also allow MPAM to be disabled in response to an error, and the architecture code to enable/disable the context switch of the MPAM system registers. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 3796f75aa7958d26b93a2508de5fc1e0b2f8a853) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 12 ++++++++++++ drivers/resctrl/mpam_internal.h | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 21fccc3ff0025..c126a95490f16 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -29,6 +29,8 @@ #include "mpam_internal.h" +DEFINE_STATIC_KEY_FALSE(mpam_enabled); /* This moves to arch code */ + /* * mpam_list_lock protects the SRCU lists when writing. Once the * mpam_enabled key is enabled these lists are read-only, @@ -936,6 +938,9 @@ static int mpam_discovery_cpu_online(unsigned int cpu) struct mpam_msc *msc; bool new_device_probed = false; + if (mpam_is_enabled()) + return 0; + guard(srcu)(&mpam_srcu); list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, srcu_read_lock_held(&mpam_srcu)) { @@ -1471,6 +1476,10 @@ static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc) /* Disable this interrupt. */ mpam_disable_msc_ecr(msc); + /* Are we racing with the thread disabling MPAM? */ + if (!mpam_is_enabled()) + return IRQ_HANDLED; + /* * Schedule the teardown work. Don't use a threaded IRQ as we can't * unregister the interrupt from the threaded part of the handler. @@ -1605,6 +1614,7 @@ static void mpam_enable_once(void) return; } + static_branch_enable(&mpam_enabled); mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); @@ -1671,6 +1681,8 @@ void mpam_disable(struct work_struct *ignored) } mutex_unlock(&mpam_cpuhp_state_lock); + static_branch_disable(&mpam_enabled); + mpam_unregister_irqs(); idx = srcu_read_lock(&mpam_srcu); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index fa9d9a176a543..93a629f6e15a7 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,13 @@ struct platform_device; +DECLARE_STATIC_KEY_FALSE(mpam_enabled); + +static inline bool mpam_is_enabled(void) +{ + return static_branch_likely(&mpam_enabled); +} + /* * Structures protected by SRCU may not be freed for a surprising amount of * time (especially if perf is running). To ensure the MPAM error interrupt can From 3fbeae1eb353614ea70fb45498d88b3bdacce218 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:53 +0000 Subject: [PATCH 23/74] arm_mpam: Allow configuration to be applied and restored during cpu online When CPUs come online the MSC's original configuration should be restored. Add struct mpam_config to hold the configuration. For each component, this has a bitmap of features that have been changed from the reset values. The mpam_config is also used on RIS reset where all bits are set to ensure all features are reset. Once the maximum partid is known, allocate a configuration array for each component, and reprogram each RIS configuration from this. CC: Dave Martin Signed-off-by: James Morse Cc: Fujitsu Fujitsu Cc: Peter Newman peternewman@google.com Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 09b89d2a72f37b078198cbb09d5b9e13ba9d68b9) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 288 +++++++++++++++++++++++++++++--- drivers/resctrl/mpam_internal.h | 27 +++ 2 files changed, 290 insertions(+), 25 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index c126a95490f16..6dbd378acdcf3 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -145,6 +145,16 @@ static void mpam_free_garbage(void) } } +/* + * Once mpam is enabled, new requestors cannot further reduce the available + * partid. Assert that the size is fixed, and new requestors will be turned + * away. + */ +static void mpam_assert_partid_sizes_fixed(void) +{ + WARN_ON_ONCE(!partid_max_published); +} + static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg) { WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); @@ -338,12 +348,16 @@ mpam_component_alloc(struct mpam_class *class, int id) return comp; } +static void __destroy_component_cfg(struct mpam_component *comp); + static void mpam_component_destroy(struct mpam_component *comp) { struct mpam_class *class = comp->class; lockdep_assert_held(&mpam_list_lock); + __destroy_component_cfg(comp); + list_del_rcu(&comp->class_list); add_to_garbage(comp); @@ -819,31 +833,57 @@ static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) __mpam_write_reg(msc, reg, bm); } -static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) +/* Called via IPI. Call while holding an SRCU reference */ +static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, + struct mpam_config *cfg) { struct mpam_msc *msc = ris->vmsc->msc; struct mpam_props *rprops = &ris->props; - WARN_ON_ONCE(!srcu_read_lock_held((&mpam_srcu))); - mutex_lock(&msc->part_sel_lock); __mpam_part_sel(ris->ris_idx, partid, msc); - if (mpam_has_feature(mpam_feat_cpor_part, rprops)) - mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); + if (mpam_has_feature(mpam_feat_cpor_part, rprops) && + mpam_has_feature(mpam_feat_cpor_part, cfg)) { + if (cfg->reset_cpbm) + mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); + else + mpam_write_partsel_reg(msc, CPBM, cfg->cpbm); + } - if (mpam_has_feature(mpam_feat_mbw_part, rprops)) - mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); + if (mpam_has_feature(mpam_feat_mbw_part, rprops) && + mpam_has_feature(mpam_feat_mbw_part, cfg)) { + if (cfg->reset_mbw_pbm) + mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); + else + mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm); + } - if (mpam_has_feature(mpam_feat_mbw_min, rprops)) + if (mpam_has_feature(mpam_feat_mbw_min, rprops) && + mpam_has_feature(mpam_feat_mbw_min, cfg)) mpam_write_partsel_reg(msc, MBW_MIN, 0); - if (mpam_has_feature(mpam_feat_mbw_max, rprops)) - mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); + if (mpam_has_feature(mpam_feat_mbw_max, rprops) && + mpam_has_feature(mpam_feat_mbw_max, cfg)) { + if (cfg->reset_mbw_max) + mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); + else + mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max); + } mutex_unlock(&msc->part_sel_lock); } +static void mpam_init_reset_cfg(struct mpam_config *reset_cfg) +{ + *reset_cfg = (struct mpam_config) { + .reset_cpbm = true, + .reset_mbw_pbm = true, + .reset_mbw_max = true, + }; + bitmap_fill(reset_cfg->features, MPAM_FEATURE_LAST); +} + /* * Called via smp_call_on_cpu() to prevent migration, while still being * pre-emptible. Caller must hold mpam_srcu. @@ -851,16 +891,19 @@ static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid) static int mpam_reset_ris(void *arg) { u16 partid, partid_max; + struct mpam_config reset_cfg; struct mpam_msc_ris *ris = arg; if (ris->in_reset_state) return 0; + mpam_init_reset_cfg(&reset_cfg); + spin_lock(&partid_max_lock); partid_max = mpam_partid_max; spin_unlock(&partid_max_lock); for (partid = 0; partid <= partid_max; partid++) - mpam_reset_ris_partid(ris, partid); + mpam_reprogram_ris_partid(ris, partid, &reset_cfg); return 0; } @@ -889,19 +932,58 @@ static int mpam_touch_msc(struct mpam_msc *msc, int (*fn)(void *a), void *arg) return smp_call_on_cpu(mpam_get_msc_preferred_cpu(msc), fn, arg, true); } -static void mpam_reset_msc(struct mpam_msc *msc, bool online) +struct mpam_write_config_arg { + struct mpam_msc_ris *ris; + struct mpam_component *comp; + u16 partid; +}; + +static int __write_config(void *arg) +{ + struct mpam_write_config_arg *c = arg; + + mpam_reprogram_ris_partid(c->ris, c->partid, &c->comp->cfg[c->partid]); + + return 0; +} + +static void mpam_reprogram_msc(struct mpam_msc *msc) { + u16 partid; + bool reset; + struct mpam_config *cfg; struct mpam_msc_ris *ris; + struct mpam_write_config_arg arg; + + /* + * No lock for mpam_partid_max as partid_max_published has been + * set by mpam_enabled(), so the values can no longer change. + */ + mpam_assert_partid_sizes_fixed(); - list_for_each_entry_srcu(ris, &msc->ris, msc_list, srcu_read_lock_held(&mpam_srcu)) { - mpam_touch_msc(msc, &mpam_reset_ris, ris); + mutex_lock(&msc->cfg_lock); + list_for_each_entry_srcu(ris, &msc->ris, msc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_is_enabled() && !ris->in_reset_state) { + mpam_touch_msc(msc, &mpam_reset_ris, ris); + ris->in_reset_state = true; + continue; + } - /* - * Set in_reset_state when coming online. The reset state - * for non-zero partid may be lost while the CPUs are offline. - */ - ris->in_reset_state = online; + arg.comp = ris->vmsc->comp; + arg.ris = ris; + reset = true; + for (partid = 0; partid <= mpam_partid_max; partid++) { + cfg = &ris->vmsc->comp->cfg[partid]; + if (!bitmap_empty(cfg->features, MPAM_FEATURE_LAST)) + reset = false; + + arg.partid = partid; + mpam_touch_msc(msc, __write_config, &arg); + } + ris->in_reset_state = reset; } + mutex_unlock(&msc->cfg_lock); } static void _enable_percpu_irq(void *_irq) @@ -925,7 +1007,7 @@ static int mpam_cpu_online(unsigned int cpu) _enable_percpu_irq(&msc->reenable_error_ppi); if (atomic_fetch_inc(&msc->online_refs) == 0) - mpam_reset_msc(msc, true); + mpam_reprogram_msc(msc); } return 0; @@ -980,8 +1062,22 @@ static int mpam_cpu_offline(unsigned int cpu) if (msc->reenable_error_ppi) disable_percpu_irq(msc->reenable_error_ppi); - if (atomic_dec_and_test(&msc->online_refs)) - mpam_reset_msc(msc, false); + if (atomic_dec_and_test(&msc->online_refs)) { + struct mpam_msc_ris *ris; + + mutex_lock(&msc->cfg_lock); + list_for_each_entry_srcu(ris, &msc->ris, msc_list, + srcu_read_lock_held(&mpam_srcu)) { + mpam_touch_msc(msc, &mpam_reset_ris, ris); + + /* + * The reset state for non-zero partid may be + * lost while the CPUs are offline. + */ + ris->in_reset_state = false; + } + mutex_unlock(&msc->cfg_lock); + } } return 0; @@ -1121,6 +1217,11 @@ static struct mpam_msc *do_mpam_msc_drv_probe(struct platform_device *pdev) err = devm_mutex_init(dev, &msc->error_irq_lock); if (err) return ERR_PTR(err); + + err = devm_mutex_init(dev, &msc->cfg_lock); + if (err) + return ERR_PTR(err); + mpam_mon_sel_lock_init(msc); msc->id = pdev->id; msc->pdev = pdev; @@ -1581,6 +1682,72 @@ static void mpam_unregister_irqs(void) } } +static void __destroy_component_cfg(struct mpam_component *comp) +{ + add_to_garbage(comp->cfg); +} + +static void mpam_reset_component_cfg(struct mpam_component *comp) +{ + int i; + struct mpam_props *cprops = &comp->class->props; + + mpam_assert_partid_sizes_fixed(); + + if (!comp->cfg) + return; + + for (i = 0; i <= mpam_partid_max; i++) { + comp->cfg[i] = (struct mpam_config) {}; + if (cprops->cpbm_wd) + comp->cfg[i].cpbm = GENMASK(cprops->cpbm_wd - 1, 0); + if (cprops->mbw_pbm_bits) + comp->cfg[i].mbw_pbm = GENMASK(cprops->mbw_pbm_bits - 1, 0); + if (cprops->bwa_wd) + comp->cfg[i].mbw_max = GENMASK(15, 16 - cprops->bwa_wd); + } +} + +static int __allocate_component_cfg(struct mpam_component *comp) +{ + mpam_assert_partid_sizes_fixed(); + + if (comp->cfg) + return 0; + + comp->cfg = kcalloc(mpam_partid_max + 1, sizeof(*comp->cfg), GFP_KERNEL); + if (!comp->cfg) + return -ENOMEM; + + /* + * The array is free()d in one go, so only cfg[0]'s structure needs + * to be initialised. + */ + init_garbage(&comp->cfg[0].garbage); + + mpam_reset_component_cfg(comp); + + return 0; +} + +static int mpam_allocate_config(void) +{ + struct mpam_class *class; + struct mpam_component *comp; + + lockdep_assert_held(&mpam_list_lock); + + list_for_each_entry(class, &mpam_classes, classes_list) { + list_for_each_entry(comp, &class->components, class_list) { + int err = __allocate_component_cfg(comp); + if (err) + return err; + } + } + + return 0; +} + static void mpam_enable_once(void) { int err; @@ -1600,15 +1767,25 @@ static void mpam_enable_once(void) */ cpus_read_lock(); mutex_lock(&mpam_list_lock); - mpam_enable_merge_features(&mpam_classes); + do { + mpam_enable_merge_features(&mpam_classes); - err = mpam_register_irqs(); + err = mpam_register_irqs(); + if (err) { + pr_warn("Failed to register irqs: %d\n", err); + break; + } + err = mpam_allocate_config(); + if (err) { + pr_err("Failed to allocate configuration arrays.\n"); + break; + } + } while (0); mutex_unlock(&mpam_list_lock); cpus_read_unlock(); if (err) { - pr_warn("Failed to register irqs: %d\n", err); mpam_disable_reason = "Failed to enable."; schedule_work(&mpam_broken_work); return; @@ -1628,6 +1805,9 @@ static void mpam_reset_component_locked(struct mpam_component *comp) struct mpam_vmsc *vmsc; lockdep_assert_cpus_held(); + mpam_assert_partid_sizes_fixed(); + + mpam_reset_component_cfg(comp); guard(srcu)(&mpam_srcu); list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, @@ -1728,6 +1908,64 @@ void mpam_enable(struct work_struct *work) mpam_enable_once(); } +#define maybe_update_config(cfg, feature, newcfg, member, changes) do { \ + if (mpam_has_feature(feature, newcfg) && \ + (newcfg)->member != (cfg)->member) { \ + (cfg)->member = (newcfg)->member; \ + mpam_set_feature(feature, cfg); \ + \ + (changes) = true; \ + } \ +} while (0) + +static bool mpam_update_config(struct mpam_config *cfg, + const struct mpam_config *newcfg) +{ + bool has_changes = false; + + maybe_update_config(cfg, mpam_feat_cpor_part, newcfg, cpbm, has_changes); + maybe_update_config(cfg, mpam_feat_mbw_part, newcfg, mbw_pbm, has_changes); + maybe_update_config(cfg, mpam_feat_mbw_max, newcfg, mbw_max, has_changes); + + return has_changes; +} + +int mpam_apply_config(struct mpam_component *comp, u16 partid, + struct mpam_config *cfg) +{ + struct mpam_write_config_arg arg; + struct mpam_msc_ris *ris; + struct mpam_vmsc *vmsc; + struct mpam_msc *msc; + + lockdep_assert_cpus_held(); + + /* Don't pass in the current config! */ + WARN_ON_ONCE(&comp->cfg[partid] == cfg); + + if (!mpam_update_config(&comp->cfg[partid], cfg)) + return 0; + + arg.comp = comp; + arg.partid = partid; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + msc = vmsc->msc; + + mutex_lock(&msc->cfg_lock); + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + arg.ris = ris; + mpam_touch_msc(msc, __write_config, &arg); + } + mutex_unlock(&msc->cfg_lock); + } + + return 0; +} + static int __init mpam_msc_driver_init(void) { if (!system_supports_mpam()) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 93a629f6e15a7..b8fdbd7ab7a55 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -91,6 +91,9 @@ struct mpam_msc { */ struct mutex part_sel_lock; + /* cfg_lock protects the msc configuration. */ + struct mutex cfg_lock; + /* * mon_sel_lock protects access to the MSC hardware registers that are * affected by MPAMCFG_MON_SEL, and the mbwu_state. @@ -182,6 +185,21 @@ struct mpam_class { struct mpam_garbage garbage; }; +struct mpam_config { + /* Which configuration values are valid. */ + DECLARE_BITMAP(features, MPAM_FEATURE_LAST); + + u32 cpbm; + u32 mbw_pbm; + u16 mbw_max; + + bool reset_cpbm; + bool reset_mbw_pbm; + bool reset_mbw_max; + + struct mpam_garbage garbage; +}; + struct mpam_component { u32 comp_id; @@ -190,6 +208,12 @@ struct mpam_component { cpumask_t affinity; + /* + * Array of configuration values, indexed by partid. + * Read from cpuhp callbacks, hold the cpuhp lock when writing. + */ + struct mpam_config *cfg; + /* member of mpam_class:components */ struct list_head class_list; @@ -249,6 +273,9 @@ extern u8 mpam_pmg_max; void mpam_enable(struct work_struct *work); void mpam_disable(struct work_struct *work); +int mpam_apply_config(struct mpam_component *comp, u16 partid, + struct mpam_config *cfg); + int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); From 7257218ff637fb0d98206b2e0eb885696e764da0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:54 +0000 Subject: [PATCH 24/74] arm_mpam: Probe and reset the rest of the features MPAM supports more features than are going to be exposed to resctrl. For partid other than 0, the reset values of these controls isn't known. Discover the rest of the features so they can be reset to avoid any side effects when resctrl is in use. PARTID narrowing allows MSC/RIS to support less configuration space than is usable. If this feature is found on a class of device we are likely to use, then reduce the partid_max to make it usable. This allows us to map a PARTID to itself. CC: Rohit Mathew CC: Zeng Heng CC: Dave Martin Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 880df85d8673f8e2395f139d3618661366e5d4d8) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 188 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 18 +++ 2 files changed, 206 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 6dbd378acdcf3..67eb0c79ca494 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -259,6 +259,15 @@ static void __mpam_part_sel(u8 ris_idx, u16 partid, struct mpam_msc *msc) __mpam_part_sel_raw(partsel, msc); } +static void __mpam_intpart_sel(u8 ris_idx, u16 intpartid, struct mpam_msc *msc) +{ + u32 partsel = FIELD_PREP(MPAMCFG_PART_SEL_RIS, ris_idx) | + FIELD_PREP(MPAMCFG_PART_SEL_PARTID_SEL, intpartid) | + MPAMCFG_PART_SEL_INTERNAL; + + __mpam_part_sel_raw(partsel, msc); +} + int mpam_register_requestor(u16 partid_max, u8 pmg_max) { guard(spinlock)(&partid_max_lock); @@ -656,10 +665,34 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) struct mpam_msc *msc = ris->vmsc->msc; struct device *dev = &msc->pdev->dev; struct mpam_props *props = &ris->props; + struct mpam_class *class = ris->vmsc->comp->class; lockdep_assert_held(&msc->probe_lock); lockdep_assert_held(&msc->part_sel_lock); + /* Cache Capacity Partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_CCAP_PART, ris->idr)) { + u32 ccap_features = mpam_read_partsel_reg(msc, CCAP_IDR); + + props->cmax_wd = FIELD_GET(MPAMF_CCAP_IDR_CMAX_WD, ccap_features); + if (props->cmax_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CMAX_SOFTLIM, ccap_features)) + mpam_set_feature(mpam_feat_cmax_softlim, props); + + if (props->cmax_wd && + !FIELD_GET(MPAMF_CCAP_IDR_NO_CMAX, ccap_features)) + mpam_set_feature(mpam_feat_cmax_cmax, props); + + if (props->cmax_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CMIN, ccap_features)) + mpam_set_feature(mpam_feat_cmax_cmin, props); + + props->cassoc_wd = FIELD_GET(MPAMF_CCAP_IDR_CASSOC_WD, ccap_features); + if (props->cassoc_wd && + FIELD_GET(MPAMF_CCAP_IDR_HAS_CASSOC, ccap_features)) + mpam_set_feature(mpam_feat_cmax_cassoc, props); + } + /* Cache Portion partitioning */ if (FIELD_GET(MPAMF_IDR_HAS_CPOR_PART, ris->idr)) { u32 cpor_features = mpam_read_partsel_reg(msc, CPOR_IDR); @@ -682,6 +715,31 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features); if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) mpam_set_feature(mpam_feat_mbw_max, props); + + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MIN, mbw_features)) + mpam_set_feature(mpam_feat_mbw_min, props); + + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_PROP, mbw_features)) + mpam_set_feature(mpam_feat_mbw_prop, props); + } + + /* Priority partitioning */ + if (FIELD_GET(MPAMF_IDR_HAS_PRI_PART, ris->idr)) { + u32 pri_features = mpam_read_partsel_reg(msc, PRI_IDR); + + props->intpri_wd = FIELD_GET(MPAMF_PRI_IDR_INTPRI_WD, pri_features); + if (props->intpri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_INTPRI, pri_features)) { + mpam_set_feature(mpam_feat_intpri_part, props); + if (FIELD_GET(MPAMF_PRI_IDR_INTPRI_0_IS_LOW, pri_features)) + mpam_set_feature(mpam_feat_intpri_part_0_low, props); + } + + props->dspri_wd = FIELD_GET(MPAMF_PRI_IDR_DSPRI_WD, pri_features); + if (props->dspri_wd && FIELD_GET(MPAMF_PRI_IDR_HAS_DSPRI, pri_features)) { + mpam_set_feature(mpam_feat_dspri_part, props); + if (FIELD_GET(MPAMF_PRI_IDR_DSPRI_0_IS_LOW, pri_features)) + mpam_set_feature(mpam_feat_dspri_part_0_low, props); + } } /* Performance Monitoring */ @@ -706,6 +764,9 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) mpam_set_feature(mpam_feat_msmon_csu, props); + if (FIELD_GET(MPAMF_CSUMON_IDR_HAS_XCL, csumonidr)) + mpam_set_feature(mpam_feat_msmon_csu_xcl, props); + /* Is NRDY hardware managed? */ hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, CSU); if (hw_managed) @@ -727,6 +788,9 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) if (props->num_mbwu_mon) mpam_set_feature(mpam_feat_msmon_mbwu, props); + if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr)) + mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); + /* Is NRDY hardware managed? */ hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); if (hw_managed) @@ -738,6 +802,21 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) */ } } + + /* + * RIS with PARTID narrowing don't have enough storage for one + * configuration per PARTID. If these are in a class we could use, + * reduce the supported partid_max to match the number of intpartid. + * If the class is unknown, just ignore it. + */ + if (FIELD_GET(MPAMF_IDR_HAS_PARTID_NRW, ris->idr) && + class->type != MPAM_CLASS_UNKNOWN) { + u32 nrwidr = mpam_read_partsel_reg(msc, PARTID_NRW_IDR); + u16 partid_max = FIELD_GET(MPAMF_PARTID_NRW_IDR_INTPARTID_MAX, nrwidr); + + mpam_set_feature(mpam_feat_partid_nrw, props); + msc->partid_max = min(msc->partid_max, partid_max); + } } static int mpam_msc_hw_probe(struct mpam_msc *msc) @@ -837,12 +916,28 @@ static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, struct mpam_config *cfg) { + u32 pri_val = 0; + u16 cmax = MPAMCFG_CMAX_CMAX; struct mpam_msc *msc = ris->vmsc->msc; struct mpam_props *rprops = &ris->props; + u16 dspri = GENMASK(rprops->dspri_wd, 0); + u16 intpri = GENMASK(rprops->intpri_wd, 0); mutex_lock(&msc->part_sel_lock); __mpam_part_sel(ris->ris_idx, partid, msc); + if (mpam_has_feature(mpam_feat_partid_nrw, rprops)) { + /* Update the intpartid mapping */ + mpam_write_partsel_reg(msc, INTPARTID, + MPAMCFG_INTPARTID_INTERNAL | partid); + + /* + * Then switch to the 'internal' partid to update the + * configuration. + */ + __mpam_intpart_sel(ris->ris_idx, partid, msc); + } + if (mpam_has_feature(mpam_feat_cpor_part, rprops) && mpam_has_feature(mpam_feat_cpor_part, cfg)) { if (cfg->reset_cpbm) @@ -871,6 +966,35 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max); } + if (mpam_has_feature(mpam_feat_mbw_prop, rprops) && + mpam_has_feature(mpam_feat_mbw_prop, cfg)) + mpam_write_partsel_reg(msc, MBW_PROP, 0); + + if (mpam_has_feature(mpam_feat_cmax_cmax, rprops)) + mpam_write_partsel_reg(msc, CMAX, cmax); + + if (mpam_has_feature(mpam_feat_cmax_cmin, rprops)) + mpam_write_partsel_reg(msc, CMIN, 0); + + if (mpam_has_feature(mpam_feat_cmax_cassoc, rprops)) + mpam_write_partsel_reg(msc, CASSOC, MPAMCFG_CASSOC_CASSOC); + + if (mpam_has_feature(mpam_feat_intpri_part, rprops) || + mpam_has_feature(mpam_feat_dspri_part, rprops)) { + /* aces high? */ + if (!mpam_has_feature(mpam_feat_intpri_part_0_low, rprops)) + intpri = 0; + if (!mpam_has_feature(mpam_feat_dspri_part_0_low, rprops)) + dspri = 0; + + if (mpam_has_feature(mpam_feat_intpri_part, rprops)) + pri_val |= FIELD_PREP(MPAMCFG_PRI_INTPRI, intpri); + if (mpam_has_feature(mpam_feat_dspri_part, rprops)) + pri_val |= FIELD_PREP(MPAMCFG_PRI_DSPRI, dspri); + + mpam_write_partsel_reg(msc, PRI, pri_val); + } + mutex_unlock(&msc->part_sel_lock); } @@ -1308,6 +1432,18 @@ static bool mpam_has_bwa_wd_feature(struct mpam_props *props) return true; if (mpam_has_feature(mpam_feat_mbw_max, props)) return true; + if (mpam_has_feature(mpam_feat_mbw_prop, props)) + return true; + return false; +} + +/* Any of these features mean the CMAX_WD field is valid. */ +static bool mpam_has_cmax_wd_feature(struct mpam_props *props) +{ + if (mpam_has_feature(mpam_feat_cmax_cmax, props)) + return true; + if (mpam_has_feature(mpam_feat_cmax_cmin, props)) + return true; return false; } @@ -1366,6 +1502,23 @@ static void __props_mismatch(struct mpam_props *parent, parent->bwa_wd = min(parent->bwa_wd, child->bwa_wd); } + if (alias && !mpam_has_cmax_wd_feature(parent) && mpam_has_cmax_wd_feature(child)) { + parent->cmax_wd = child->cmax_wd; + } else if (MISMATCHED_HELPER(parent, child, mpam_has_cmax_wd_feature, + cmax_wd, alias)) { + pr_debug("%s took the min cmax_wd\n", __func__); + parent->cmax_wd = min(parent->cmax_wd, child->cmax_wd); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_cmax_cassoc, alias)) { + parent->cassoc_wd = child->cassoc_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_cmax_cassoc, + cassoc_wd, alias)) { + pr_debug("%s cleared cassoc_wd\n", __func__); + mpam_clear_feature(mpam_feat_cmax_cassoc, parent); + parent->cassoc_wd = 0; + } + /* For num properties, take the minimum */ if (CAN_MERGE_FEAT(parent, child, mpam_feat_msmon_csu, alias)) { parent->num_csu_mon = child->num_csu_mon; @@ -1385,6 +1538,41 @@ static void __props_mismatch(struct mpam_props *parent, child->num_mbwu_mon); } + if (CAN_MERGE_FEAT(parent, child, mpam_feat_intpri_part, alias)) { + parent->intpri_wd = child->intpri_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_intpri_part, + intpri_wd, alias)) { + pr_debug("%s took the min intpri_wd\n", __func__); + parent->intpri_wd = min(parent->intpri_wd, child->intpri_wd); + } + + if (CAN_MERGE_FEAT(parent, child, mpam_feat_dspri_part, alias)) { + parent->dspri_wd = child->dspri_wd; + } else if (MISMATCHED_FEAT(parent, child, mpam_feat_dspri_part, + dspri_wd, alias)) { + pr_debug("%s took the min dspri_wd\n", __func__); + parent->dspri_wd = min(parent->dspri_wd, child->dspri_wd); + } + + /* TODO: alias support for these two */ + /* {int,ds}pri may not have differing 0-low behaviour */ + if (mpam_has_feature(mpam_feat_intpri_part, parent) && + (!mpam_has_feature(mpam_feat_intpri_part, child) || + mpam_has_feature(mpam_feat_intpri_part_0_low, parent) != + mpam_has_feature(mpam_feat_intpri_part_0_low, child))) { + pr_debug("%s cleared intpri_part\n", __func__); + mpam_clear_feature(mpam_feat_intpri_part, parent); + mpam_clear_feature(mpam_feat_intpri_part_0_low, parent); + } + if (mpam_has_feature(mpam_feat_dspri_part, parent) && + (!mpam_has_feature(mpam_feat_dspri_part, child) || + mpam_has_feature(mpam_feat_dspri_part_0_low, parent) != + mpam_has_feature(mpam_feat_dspri_part_0_low, child))) { + pr_debug("%s cleared dspri_part\n", __func__); + mpam_clear_feature(mpam_feat_dspri_part, parent); + mpam_clear_feature(mpam_feat_dspri_part_0_low, parent); + } + if (alias) { /* Merge features for aliased resources */ bitmap_or(parent->features, parent->features, child->features, MPAM_FEATURE_LAST); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index b8fdbd7ab7a55..618e5355a95e9 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -143,14 +143,28 @@ static inline void mpam_mon_sel_lock_init(struct mpam_msc *msc) /* Bits for mpam features bitmaps */ enum mpam_device_features { mpam_feat_cpor_part, + mpam_feat_cmax_softlim, + mpam_feat_cmax_cmax, + mpam_feat_cmax_cmin, + mpam_feat_cmax_cassoc, mpam_feat_mbw_part, mpam_feat_mbw_min, mpam_feat_mbw_max, + mpam_feat_mbw_prop, + mpam_feat_intpri_part, + mpam_feat_intpri_part_0_low, + mpam_feat_dspri_part, + mpam_feat_dspri_part_0_low, mpam_feat_msmon, mpam_feat_msmon_csu, + mpam_feat_msmon_csu_capture, + mpam_feat_msmon_csu_xcl, mpam_feat_msmon_csu_hw_nrdy, mpam_feat_msmon_mbwu, + mpam_feat_msmon_mbwu_capture, + mpam_feat_msmon_mbwu_rwbw, mpam_feat_msmon_mbwu_hw_nrdy, + mpam_feat_partid_nrw, MPAM_FEATURE_LAST }; @@ -160,6 +174,10 @@ struct mpam_props { u16 cpbm_wd; u16 mbw_pbm_bits; u16 bwa_wd; + u16 cmax_wd; + u16 cassoc_wd; + u16 intpri_wd; + u16 dspri_wd; u16 num_csu_mon; u16 num_mbwu_mon; }; From d6eabb56526c50674bd4c6f75001898cc41bfbbc Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:55 +0000 Subject: [PATCH 25/74] arm_mpam: Add helpers to allocate monitors MPAM's MSC support a number of monitors, each of which supports bandwidth counters, or cache-storage-utilisation counters. To use a counter, a monitor needs to be configured. Add helpers to allocate and free CSU or MBWU monitors. Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit c891bae66423bc69a680ca1de34940132e2c8ace) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 2 ++ drivers/resctrl/mpam_internal.h | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 67eb0c79ca494..a7ba07ac5a2f8 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -305,6 +305,8 @@ mpam_class_alloc(u8 level_idx, enum mpam_class_types type) class->level = level_idx; class->type = type; INIT_LIST_HEAD_RCU(&class->classes_list); + ida_init(&class->ida_csu_mon); + ida_init(&class->ida_mbwu_mon); list_add_rcu(&class->classes_list, &mpam_classes); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 618e5355a95e9..8bbc67df6d975 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -200,6 +200,9 @@ struct mpam_class { /* member of mpam_classes */ struct list_head classes_list; + struct ida ida_csu_mon; + struct ida ida_mbwu_mon; + struct mpam_garbage garbage; }; @@ -279,6 +282,38 @@ struct mpam_msc_ris { struct mpam_garbage garbage; }; +static inline int mpam_alloc_csu_mon(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) + return -EOPNOTSUPP; + + return ida_alloc_max(&class->ida_csu_mon, cprops->num_csu_mon - 1, + GFP_KERNEL); +} + +static inline void mpam_free_csu_mon(struct mpam_class *class, int csu_mon) +{ + ida_free(&class->ida_csu_mon, csu_mon); +} + +static inline int mpam_alloc_mbwu_mon(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops)) + return -EOPNOTSUPP; + + return ida_alloc_max(&class->ida_mbwu_mon, cprops->num_mbwu_mon - 1, + GFP_KERNEL); +} + +static inline void mpam_free_mbwu_mon(struct mpam_class *class, int mbwu_mon) +{ + ida_free(&class->ida_mbwu_mon, mbwu_mon); +} + /* List of all classes - protected by srcu*/ extern struct srcu_struct mpam_srcu; extern struct list_head mpam_classes; From 4fd69833d79bb74daa7a36cd079b3064f567e513 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:56 +0000 Subject: [PATCH 26/74] arm_mpam: Add mpam_msmon_read() to read monitor value Reading a monitor involves configuring what you want to monitor, and reading the value. Components made up of multiple MSC may need values from each MSC. MSCs may take time to configure, returning 'not ready'. The maximum 'not ready' time should have been provided by firmware. Add mpam_msmon_read() to hide all this. If (one of) the MSC returns not ready, then wait the full timeout value before trying again. CC: Shanker Donthineni Cc: Shaopeng Tan (Fujitsu) Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 823e7c3712c584641b4ef890a8af34884c677197) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 235 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 19 +++ 2 files changed, 254 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index a7ba07ac5a2f8..4859c8b096c3e 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -886,6 +886,241 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) return 0; } +struct mon_read { + struct mpam_msc_ris *ris; + struct mon_cfg *ctx; + enum mpam_device_features type; + u64 *val; + int err; +}; + +static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, + u32 *flt_val) +{ + struct mon_cfg *ctx = m->ctx; + + /* + * For CSU counters its implementation-defined what happens when not + * filtering by partid. + */ + *ctl_val = MSMON_CFG_x_CTL_MATCH_PARTID; + + *flt_val = FIELD_PREP(MSMON_CFG_x_FLT_PARTID, ctx->partid); + + if (m->ctx->match_pmg) { + *ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG; + *flt_val |= FIELD_PREP(MSMON_CFG_x_FLT_PMG, ctx->pmg); + } + + switch (m->type) { + case mpam_feat_msmon_csu: + *ctl_val |= MSMON_CFG_CSU_CTL_TYPE_CSU; + + if (mpam_has_feature(mpam_feat_msmon_csu_xcl, &m->ris->props)) + *flt_val |= FIELD_PREP(MSMON_CFG_CSU_FLT_XCL, ctx->csu_exclude_clean); + + break; + case mpam_feat_msmon_mbwu: + *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU; + + if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props)) + *flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts); + + break; + default: + pr_warn("Unexpected monitor type %d\n", m->type); + } +} + +static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, + u32 *flt_val) +{ + struct mpam_msc *msc = m->ris->vmsc->msc; + + switch (m->type) { + case mpam_feat_msmon_csu: + *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL); + *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT); + break; + case mpam_feat_msmon_mbwu: + *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); + *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); + break; + default: + pr_warn("Unexpected monitor type %d\n", m->type); + } +} + +/* Remove values set by the hardware to prevent apparent mismatches. */ +static inline void clean_msmon_ctl_val(u32 *cur_ctl) +{ + *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS; +} + +static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, + u32 flt_val) +{ + struct mpam_msc *msc = m->ris->vmsc->msc; + + /* + * Write the ctl_val with the enable bit cleared, reset the counter, + * then enable counter. + */ + switch (m->type) { + case mpam_feat_msmon_csu: + mpam_write_monsel_reg(msc, CFG_CSU_FLT, flt_val); + mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val); + mpam_write_monsel_reg(msc, CSU, 0); + mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); + break; + case mpam_feat_msmon_mbwu: + mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val); + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val); + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); + /* Counting monitors require NRDY to be reset by software */ + mpam_write_monsel_reg(msc, MBWU, 0); + break; + default: + pr_warn("Unexpected monitor type %d\n", m->type); + } +} + +static void __ris_msmon_read(void *arg) +{ + u64 now; + bool nrdy = false; + bool config_mismatch; + struct mon_read *m = arg; + struct mon_cfg *ctx = m->ctx; + struct mpam_msc_ris *ris = m->ris; + struct mpam_props *rprops = &ris->props; + struct mpam_msc *msc = m->ris->vmsc->msc; + u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; + + if (!mpam_mon_sel_lock(msc)) { + m->err = -EIO; + return; + } + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, ctx->mon) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + + /* + * Read the existing configuration to avoid re-writing the same values. + * This saves waiting for 'nrdy' on subsequent reads. + */ + read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); + clean_msmon_ctl_val(&cur_ctl); + gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); + config_mismatch = cur_flt != flt_val || + cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN); + + if (config_mismatch) + write_msmon_ctl_flt_vals(m, ctl_val, flt_val); + + switch (m->type) { + case mpam_feat_msmon_csu: + now = mpam_read_monsel_reg(msc, CSU); + if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY; + break; + case mpam_feat_msmon_mbwu: + now = mpam_read_monsel_reg(msc, MBWU); + if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY; + break; + default: + m->err = -EINVAL; + } + mpam_mon_sel_unlock(msc); + + if (nrdy) { + m->err = -EBUSY; + return; + } + + now = FIELD_GET(MSMON___VALUE, now); + *m->val += now; +} + +static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) +{ + int err, any_err = 0; + struct mpam_vmsc *vmsc; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_msc *msc = vmsc->msc; + struct mpam_msc_ris *ris; + + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + arg->ris = ris; + + err = smp_call_function_any(&msc->accessibility, + __ris_msmon_read, arg, + true); + if (!err && arg->err) + err = arg->err; + + /* + * Save one error to be returned to the caller, but + * keep reading counters so that get reprogrammed. On + * platforms with NRDY this lets us wait once. + */ + if (err) + any_err = err; + } + } + + return any_err; +} + +int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, + enum mpam_device_features type, u64 *val) +{ + int err; + struct mon_read arg; + u64 wait_jiffies = 0; + struct mpam_props *cprops = &comp->class->props; + + might_sleep(); + + if (!mpam_is_enabled()) + return -EIO; + + if (!mpam_has_feature(type, cprops)) + return -EOPNOTSUPP; + + arg = (struct mon_read) { + .ctx = ctx, + .type = type, + .val = val, + }; + *val = 0; + + err = _msmon_read(comp, &arg); + if (err == -EBUSY && comp->class->nrdy_usec) + wait_jiffies = usecs_to_jiffies(comp->class->nrdy_usec); + + while (wait_jiffies) + wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies); + + if (err == -EBUSY) { + arg = (struct mon_read) { + .ctx = ctx, + .type = type, + .val = val, + }; + *val = 0; + + err = _msmon_read(comp, &arg); + } + + return err; +} + static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) { u32 num_words, msb; diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 8bbc67df6d975..12f0a5b7f39eb 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -186,6 +186,22 @@ struct mpam_props { #define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) #define mpam_clear_feature(_feat, x) clear_bit(_feat, (x)->features) +/* The values for MSMON_CFG_MBWU_FLT.RWBW */ +enum mon_filter_options { + COUNT_BOTH = 0, + COUNT_WRITE = 1, + COUNT_READ = 2, +}; + +struct mon_cfg { + u16 mon; + u8 pmg; + bool match_pmg; + bool csu_exclude_clean; + u32 partid; + enum mon_filter_options opts; +}; + struct mpam_class { /* mpam_components in this class */ struct list_head components; @@ -329,6 +345,9 @@ void mpam_disable(struct work_struct *work); int mpam_apply_config(struct mpam_component *comp, u16 partid, struct mpam_config *cfg); +int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, + enum mpam_device_features, u64 *val); + int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); From de2d431ddaed012a7f17d7021e7e0704e7d636c1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:22:57 +0000 Subject: [PATCH 27/74] arm_mpam: Track bandwidth counter state for power management Bandwidth counters need to run continuously to correctly reflect the bandwidth. Save the counter state when the hardware is reset due to CPU hotplug. Add struct mbwu_state to track the bandwidth counter. Support for tracking overflow with the same structure will be added in a subsequent commit. Cc: Zeng Heng Reviewed-by: Gavin Shan Reviewed-by: Zeng Heng Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 41e8a14950e1732af51cfec8fa09f8ded02a5ca9) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 126 +++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 21 +++++- 2 files changed, 145 insertions(+), 2 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 4859c8b096c3e..c8ea37558f690 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -993,6 +993,7 @@ static void __ris_msmon_read(void *arg) struct mon_read *m = arg; struct mon_cfg *ctx = m->ctx; struct mpam_msc_ris *ris = m->ris; + struct msmon_mbwu_state *mbwu_state; struct mpam_props *rprops = &ris->props; struct mpam_msc *msc = m->ris->vmsc->msc; u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt; @@ -1023,11 +1024,21 @@ static void __ris_msmon_read(void *arg) now = mpam_read_monsel_reg(msc, CSU); if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops)) nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); break; case mpam_feat_msmon_mbwu: now = mpam_read_monsel_reg(msc, MBWU); if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); + + if (nrdy) + break; + + mbwu_state = &ris->mbwu_state[ctx->mon]; + + /* Include bandwidth consumed before the last hardware reset */ + now += mbwu_state->correction; break; default: m->err = -EINVAL; @@ -1039,7 +1050,6 @@ static void __ris_msmon_read(void *arg) return; } - now = FIELD_GET(MSMON___VALUE, now); *m->val += now; } @@ -1235,6 +1245,67 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, mutex_unlock(&msc->part_sel_lock); } +/* Call with msc cfg_lock held */ +static int mpam_restore_mbwu_state(void *_ris) +{ + int i; + struct mon_read mwbu_arg; + struct mpam_msc_ris *ris = _ris; + + for (i = 0; i < ris->props.num_mbwu_mon; i++) { + if (ris->mbwu_state[i].enabled) { + mwbu_arg.ris = ris; + mwbu_arg.ctx = &ris->mbwu_state[i].cfg; + mwbu_arg.type = mpam_feat_msmon_mbwu; + + __ris_msmon_read(&mwbu_arg); + } + } + + return 0; +} + +/* Call with MSC cfg_lock held */ +static int mpam_save_mbwu_state(void *arg) +{ + int i; + u64 val; + struct mon_cfg *cfg; + u32 cur_flt, cur_ctl, mon_sel; + struct mpam_msc_ris *ris = arg; + struct msmon_mbwu_state *mbwu_state; + struct mpam_msc *msc = ris->vmsc->msc; + + for (i = 0; i < ris->props.num_mbwu_mon; i++) { + mbwu_state = &ris->mbwu_state[i]; + cfg = &mbwu_state->cfg; + + if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + return -EIO; + + mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) | + FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); + mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + + cur_flt = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); + cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0); + + val = mpam_read_monsel_reg(msc, MBWU); + mpam_write_monsel_reg(msc, MBWU, 0); + + cfg->mon = i; + cfg->pmg = FIELD_GET(MSMON_CFG_x_FLT_PMG, cur_flt); + cfg->match_pmg = FIELD_GET(MSMON_CFG_x_CTL_MATCH_PMG, cur_ctl); + cfg->partid = FIELD_GET(MSMON_CFG_x_FLT_PARTID, cur_flt); + mbwu_state->correction += val; + mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl); + mpam_mon_sel_unlock(msc); + } + + return 0; +} + static void mpam_init_reset_cfg(struct mpam_config *reset_cfg) { *reset_cfg = (struct mpam_config) { @@ -1343,6 +1414,9 @@ static void mpam_reprogram_msc(struct mpam_msc *msc) mpam_touch_msc(msc, __write_config, &arg); } ris->in_reset_state = reset; + + if (mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) + mpam_touch_msc(msc, &mpam_restore_mbwu_state, ris); } mutex_unlock(&msc->cfg_lock); } @@ -1436,6 +1510,9 @@ static int mpam_cpu_offline(unsigned int cpu) * lost while the CPUs are offline. */ ris->in_reset_state = false; + + if (mpam_is_enabled()) + mpam_touch_msc(msc, &mpam_save_mbwu_state, ris); } mutex_unlock(&msc->cfg_lock); } @@ -2109,7 +2186,22 @@ static void mpam_unregister_irqs(void) static void __destroy_component_cfg(struct mpam_component *comp) { + struct mpam_msc *msc; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + + lockdep_assert_held(&mpam_list_lock); + add_to_garbage(comp->cfg); + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + msc = vmsc->msc; + + if (mpam_mon_sel_lock(msc)) { + list_for_each_entry(ris, &vmsc->ris, vmsc_list) + add_to_garbage(ris->mbwu_state); + mpam_mon_sel_unlock(msc); + } + } } static void mpam_reset_component_cfg(struct mpam_component *comp) @@ -2135,6 +2227,8 @@ static void mpam_reset_component_cfg(struct mpam_component *comp) static int __allocate_component_cfg(struct mpam_component *comp) { + struct mpam_vmsc *vmsc; + mpam_assert_partid_sizes_fixed(); if (comp->cfg) @@ -2152,6 +2246,36 @@ static int __allocate_component_cfg(struct mpam_component *comp) mpam_reset_component_cfg(comp); + list_for_each_entry(vmsc, &comp->vmsc, comp_list) { + struct mpam_msc *msc; + struct mpam_msc_ris *ris; + struct msmon_mbwu_state *mbwu_state; + + if (!vmsc->props.num_mbwu_mon) + continue; + + msc = vmsc->msc; + list_for_each_entry(ris, &vmsc->ris, vmsc_list) { + if (!ris->props.num_mbwu_mon) + continue; + + mbwu_state = kcalloc(ris->props.num_mbwu_mon, + sizeof(*ris->mbwu_state), + GFP_KERNEL); + if (!mbwu_state) { + __destroy_component_cfg(comp); + return -ENOMEM; + } + + init_garbage(&mbwu_state[0].garbage); + + if (mpam_mon_sel_lock(msc)) { + ris->mbwu_state = mbwu_state; + mpam_mon_sel_unlock(msc); + } + } + } + return 0; } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 12f0a5b7f39eb..12ce80bc7ff7f 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -91,7 +91,10 @@ struct mpam_msc { */ struct mutex part_sel_lock; - /* cfg_lock protects the msc configuration. */ + /* + * cfg_lock protects the msc configuration and guards against mbwu_state + * save and restore racing. + */ struct mutex cfg_lock; /* @@ -202,6 +205,19 @@ struct mon_cfg { enum mon_filter_options opts; }; +/* Changes to msmon_mbwu_state are protected by the msc's mon_sel_lock. */ +struct msmon_mbwu_state { + bool enabled; + struct mon_cfg cfg; + + /* + * The value to add to the new reading to account for power management. + */ + u64 correction; + + struct mpam_garbage garbage; +}; + struct mpam_class { /* mpam_components in this class */ struct list_head components; @@ -295,6 +311,9 @@ struct mpam_msc_ris { /* parent: */ struct mpam_vmsc *vmsc; + /* msmon mbwu configuration is preserved over reset */ + struct msmon_mbwu_state *mbwu_state; + struct mpam_garbage garbage; }; From 8dc259f3fc0d1bcd4576248b6176db4cfdf35f08 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:22:58 +0000 Subject: [PATCH 28/74] arm_mpam: Consider overflow in bandwidth counter state Use the overflow status bit to track overflow on each bandwidth counter read and add the counter size to the correction when overflow is detected. This assumes that only a single overflow has occurred since the last read of the counter. Overflow interrupts, on hardware that supports them could be used to remove this limitation. Cc: Zeng Heng Reviewed-by: Gavin Shan Reviewed-by: Zeng Heng Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit b35363793291e36c91d4a5b62d7ae7079c70d826) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 24 ++++++++++++++++++++++-- drivers/resctrl/mpam_internal.h | 3 ++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index c8ea37558f690..ecb5ecad50f8b 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -985,11 +985,18 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, } } +static u64 mpam_msmon_overflow_val(enum mpam_device_features type) +{ + /* TODO: scaling, and long counters */ + return BIT_ULL(hweight_long(MSMON___VALUE)); +} + static void __ris_msmon_read(void *arg) { u64 now; bool nrdy = false; bool config_mismatch; + bool overflow; struct mon_read *m = arg; struct mon_cfg *ctx = m->ctx; struct mpam_msc_ris *ris = m->ris; @@ -1011,13 +1018,20 @@ static void __ris_msmon_read(void *arg) * This saves waiting for 'nrdy' on subsequent reads. */ read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); + overflow = cur_ctl & MSMON_CFG_x_CTL_OFLOW_STATUS; + clean_msmon_ctl_val(&cur_ctl); gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); config_mismatch = cur_flt != flt_val || cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN); - if (config_mismatch) + if (config_mismatch) { write_msmon_ctl_flt_vals(m, ctl_val, flt_val); + overflow = false; + } else if (overflow) { + mpam_write_monsel_reg(msc, CFG_MBWU_CTL, + cur_ctl & ~MSMON_CFG_x_CTL_OFLOW_STATUS); + } switch (m->type) { case mpam_feat_msmon_csu: @@ -1037,7 +1051,13 @@ static void __ris_msmon_read(void *arg) mbwu_state = &ris->mbwu_state[ctx->mon]; - /* Include bandwidth consumed before the last hardware reset */ + if (overflow) + mbwu_state->correction += mpam_msmon_overflow_val(m->type); + + /* + * Include bandwidth consumed before the last hardware reset and + * a counter size increment for each overflow. + */ now += mbwu_state->correction; break; default: diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 12ce80bc7ff7f..218e2f48c7bf2 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -211,7 +211,8 @@ struct msmon_mbwu_state { struct mon_cfg cfg; /* - * The value to add to the new reading to account for power management. + * The value to add to the new reading to account for power management, + * and overflow. */ u64 correction; From ecf1711fe03c6008941e8bc61ab82f297a55147e Mon Sep 17 00:00:00 2001 From: Rohit Mathew Date: Wed, 19 Nov 2025 12:22:59 +0000 Subject: [PATCH 29/74] arm_mpam: Probe for long/lwd mbwu counters mpam v0.1 and versions above v1.0 support optional long counter for memory bandwidth monitoring. The MPAMF_MBWUMON_IDR register has fields indicating support for long counters. Probe these feature bits. The mpam_feat_msmon_mbwu feature is used to indicate that bandwidth monitors are supported, instead of muddling this with which size of bandwidth monitors, add an explicit 31 bit counter feature. Signed-off-by: Rohit Mathew [ morse: Added 31bit counter feature to simplify later logic ] Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit fdc29a141d6364645509cb20129cba1f84e4c10f) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 35 ++++++++++++++++++++++----------- drivers/resctrl/mpam_internal.h | 3 +++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index ecb5ecad50f8b..380386cceb741 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -783,25 +783,36 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) dev_err_once(dev, "Counters are not usable because not-ready timeout was not provided by firmware."); } if (FIELD_GET(MPAMF_MSMON_IDR_MSMON_MBWU, msmon_features)) { - bool hw_managed; + bool has_long, hw_managed; u32 mbwumon_idr = mpam_read_partsel_reg(msc, MBWUMON_IDR); props->num_mbwu_mon = FIELD_GET(MPAMF_MBWUMON_IDR_NUM_MON, mbwumon_idr); - if (props->num_mbwu_mon) + if (props->num_mbwu_mon) { mpam_set_feature(mpam_feat_msmon_mbwu, props); - if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr)) - mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); + if (FIELD_GET(MPAMF_MBWUMON_IDR_HAS_RWBW, mbwumon_idr)) + mpam_set_feature(mpam_feat_msmon_mbwu_rwbw, props); - /* Is NRDY hardware managed? */ - hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); - if (hw_managed) - mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); + has_long = FIELD_GET(MPAMF_MBWUMON_IDR_HAS_LONG, mbwumon_idr); + if (has_long) { + if (FIELD_GET(MPAMF_MBWUMON_IDR_LWD, mbwumon_idr)) + mpam_set_feature(mpam_feat_msmon_mbwu_63counter, props); + else + mpam_set_feature(mpam_feat_msmon_mbwu_44counter, props); + } else { + mpam_set_feature(mpam_feat_msmon_mbwu_31counter, props); + } - /* - * Don't warn about any missing firmware property for - * MBWU NRDY - it doesn't make any sense! - */ + /* Is NRDY hardware managed? */ + hw_managed = mpam_ris_hw_probe_hw_nrdy(ris, MBWU); + if (hw_managed) + mpam_set_feature(mpam_feat_msmon_mbwu_hw_nrdy, props); + + /* + * Don't warn about any missing firmware property for + * MBWU NRDY - it doesn't make any sense! + */ + } } } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 218e2f48c7bf2..693a315c47107 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -164,6 +164,9 @@ enum mpam_device_features { mpam_feat_msmon_csu_xcl, mpam_feat_msmon_csu_hw_nrdy, mpam_feat_msmon_mbwu, + mpam_feat_msmon_mbwu_31counter, + mpam_feat_msmon_mbwu_44counter, + mpam_feat_msmon_mbwu_63counter, mpam_feat_msmon_mbwu_capture, mpam_feat_msmon_mbwu_rwbw, mpam_feat_msmon_mbwu_hw_nrdy, From 1b3fd0fbe7ab4d23fe18c5684bedefe83ee0297f Mon Sep 17 00:00:00 2001 From: Rohit Mathew Date: Wed, 19 Nov 2025 12:23:00 +0000 Subject: [PATCH 30/74] arm_mpam: Use long MBWU counters if supported Now that the larger counter sizes are probed, make use of them. Callers of mpam_msmon_read() may not know (or care!) about the different counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the driver pick the counter to use. Only 32bit accesses to the MSC are required to be supported by the spec, but these registers are 64bits. The lower half may overflow into the higher half between two 32bit reads. To avoid this, use a helper that reads the top half multiple times to check for overflow. Signed-off-by: Rohit Mathew [morse: merged multiple patches from Rohit, added explicit counter selection ] Signed-off-by: James Morse Cc: Peter Newman Reviewed-by: Ben Horgan Reviewed-by: Jonathan Cameron Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Shaopeng Tan Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 9e5afb7c32830bcd123976a7729ef4e2dff0cd77) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 145 ++++++++++++++++++++++++++++----- 1 file changed, 126 insertions(+), 19 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 380386cceb741..0fb08222b91d0 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -905,6 +905,50 @@ struct mon_read { int err; }; +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris) +{ + return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) || + mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props)); +} + +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc) +{ + int retry = 3; + u32 mbwu_l_low; + u64 mbwu_l_high1, mbwu_l_high2; + + mpam_mon_sel_lock_held(msc); + + WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4); + do { + mbwu_l_high1 = mbwu_l_high2; + mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L); + mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4); + + retry--; + } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0); + + if (mbwu_l_high1 == mbwu_l_high2) + return (mbwu_l_high1 << 32) | mbwu_l_low; + + pr_warn("Failed to read a stable value\n"); + return MSMON___L_NRDY; +} + +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc) +{ + mpam_mon_sel_lock_held(msc); + + WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz); + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility)); + + __mpam_write_reg(msc, MSMON_MBWU_L, 0); + __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0); +} + static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, u32 *flt_val) { @@ -931,7 +975,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, *flt_val |= FIELD_PREP(MSMON_CFG_CSU_FLT_XCL, ctx->csu_exclude_clean); break; - case mpam_feat_msmon_mbwu: + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU; if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props)) @@ -953,7 +999,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL); *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT); break; - case mpam_feat_msmon_mbwu: + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT); break; @@ -966,6 +1014,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val, static inline void clean_msmon_ctl_val(u32 *cur_ctl) { *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS; + + if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU) + *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L; } static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, @@ -984,12 +1035,17 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, mpam_write_monsel_reg(msc, CSU, 0); mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); break; - case mpam_feat_msmon_mbwu: + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN); /* Counting monitors require NRDY to be reset by software */ - mpam_write_monsel_reg(msc, MBWU, 0); + if (m->type == mpam_feat_msmon_mbwu_31counter) + mpam_write_monsel_reg(msc, MBWU, 0); + else + mpam_msc_zero_mbwu_l(m->ris->vmsc->msc); break; default: pr_warn("Unexpected monitor type %d\n", m->type); @@ -998,8 +1054,17 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, static u64 mpam_msmon_overflow_val(enum mpam_device_features type) { - /* TODO: scaling, and long counters */ - return BIT_ULL(hweight_long(MSMON___VALUE)); + /* TODO: implement scaling counters */ + switch (type) { + case mpam_feat_msmon_mbwu_63counter: + return BIT_ULL(hweight_long(MSMON___LWD_VALUE)); + case mpam_feat_msmon_mbwu_44counter: + return BIT_ULL(hweight_long(MSMON___L_VALUE)); + case mpam_feat_msmon_mbwu_31counter: + return BIT_ULL(hweight_long(MSMON___VALUE)); + default: + return 0; + } } static void __ris_msmon_read(void *arg) @@ -1029,7 +1094,12 @@ static void __ris_msmon_read(void *arg) * This saves waiting for 'nrdy' on subsequent reads. */ read_msmon_ctl_flt_vals(m, &cur_ctl, &cur_flt); - overflow = cur_ctl & MSMON_CFG_x_CTL_OFLOW_STATUS; + + if (mpam_feat_msmon_mbwu_31counter == m->type) + overflow = cur_ctl & MSMON_CFG_x_CTL_OFLOW_STATUS; + else if (mpam_feat_msmon_mbwu_44counter == m->type || + mpam_feat_msmon_mbwu_63counter == m->type) + overflow = cur_ctl & MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L; clean_msmon_ctl_val(&cur_ctl); gen_msmon_ctl_flt_vals(m, &ctl_val, &flt_val); @@ -1041,7 +1111,9 @@ static void __ris_msmon_read(void *arg) overflow = false; } else if (overflow) { mpam_write_monsel_reg(msc, CFG_MBWU_CTL, - cur_ctl & ~MSMON_CFG_x_CTL_OFLOW_STATUS); + cur_ctl & + ~(MSMON_CFG_x_CTL_OFLOW_STATUS | + MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L)); } switch (m->type) { @@ -1051,11 +1123,24 @@ static void __ris_msmon_read(void *arg) nrdy = now & MSMON___NRDY; now = FIELD_GET(MSMON___VALUE, now); break; - case mpam_feat_msmon_mbwu: - now = mpam_read_monsel_reg(msc, MBWU); - if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) - nrdy = now & MSMON___NRDY; - now = FIELD_GET(MSMON___VALUE, now); + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: + if (m->type != mpam_feat_msmon_mbwu_31counter) { + now = mpam_msc_read_mbwu_l(msc); + if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) + nrdy = now & MSMON___L_NRDY; + + if (m->type == mpam_feat_msmon_mbwu_63counter) + now = FIELD_GET(MSMON___LWD_VALUE, now); + else + now = FIELD_GET(MSMON___L_VALUE, now); + } else { + now = mpam_read_monsel_reg(msc, MBWU); + if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops)) + nrdy = now & MSMON___NRDY; + now = FIELD_GET(MSMON___VALUE, now); + } if (nrdy) break; @@ -1118,13 +1203,26 @@ static int _msmon_read(struct mpam_component *comp, struct mon_read *arg) return any_err; } +static enum mpam_device_features mpam_msmon_choose_counter(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops)) + return mpam_feat_msmon_mbwu_63counter; + if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, cprops)) + return mpam_feat_msmon_mbwu_44counter; + + return mpam_feat_msmon_mbwu_31counter; +} + int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, enum mpam_device_features type, u64 *val) { int err; struct mon_read arg; u64 wait_jiffies = 0; - struct mpam_props *cprops = &comp->class->props; + struct mpam_class *class = comp->class; + struct mpam_props *cprops = &class->props; might_sleep(); @@ -1134,6 +1232,9 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, if (!mpam_has_feature(type, cprops)) return -EOPNOTSUPP; + if (type == mpam_feat_msmon_mbwu) + type = mpam_msmon_choose_counter(class); + arg = (struct mon_read) { .ctx = ctx, .type = type, @@ -1142,8 +1243,8 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, *val = 0; err = _msmon_read(comp, &arg); - if (err == -EBUSY && comp->class->nrdy_usec) - wait_jiffies = usecs_to_jiffies(comp->class->nrdy_usec); + if (err == -EBUSY && class->nrdy_usec) + wait_jiffies = usecs_to_jiffies(class->nrdy_usec); while (wait_jiffies) wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies); @@ -1282,12 +1383,13 @@ static int mpam_restore_mbwu_state(void *_ris) int i; struct mon_read mwbu_arg; struct mpam_msc_ris *ris = _ris; + struct mpam_class *class = ris->vmsc->comp->class; for (i = 0; i < ris->props.num_mbwu_mon; i++) { if (ris->mbwu_state[i].enabled) { mwbu_arg.ris = ris; mwbu_arg.ctx = &ris->mbwu_state[i].cfg; - mwbu_arg.type = mpam_feat_msmon_mbwu; + mwbu_arg.type = mpam_msmon_choose_counter(class); __ris_msmon_read(&mwbu_arg); } @@ -1322,8 +1424,13 @@ static int mpam_save_mbwu_state(void *arg) cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL); mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0); - val = mpam_read_monsel_reg(msc, MBWU); - mpam_write_monsel_reg(msc, MBWU, 0); + if (mpam_ris_has_mbwu_long_counter(ris)) { + val = mpam_msc_read_mbwu_l(msc); + mpam_msc_zero_mbwu_l(msc); + } else { + val = mpam_read_monsel_reg(msc, MBWU); + mpam_write_monsel_reg(msc, MBWU, 0); + } cfg->mon = i; cfg->pmg = FIELD_GET(MSMON_CFG_x_FLT_PMG, cur_flt); From 1450b364576729fb8ad7a65606e4353d53b450bb Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:23:01 +0000 Subject: [PATCH 31/74] arm_mpam: Add helper to reset saved mbwu state resctrl expects to reset the bandwidth counters when the filesystem is mounted. To allow this, add a helper that clears the saved mbwu state. Instead of cross calling to each CPU that can access the component MSC to write to the counter, set a flag that causes it to be zero'd on the the next read. This is easily done by forcing a configuration update. Signed-off-by: James Morse Cc: Peter Newman Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 201d96ca4c867695880450930258cd5c97f099d4) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 48 ++++++++++++++++++++++++++++++++- drivers/resctrl/mpam_internal.h | 2 ++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 0fb08222b91d0..b4aa817994295 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -1075,6 +1075,7 @@ static void __ris_msmon_read(void *arg) bool overflow; struct mon_read *m = arg; struct mon_cfg *ctx = m->ctx; + bool reset_on_next_read = false; struct mpam_msc_ris *ris = m->ris; struct msmon_mbwu_state *mbwu_state; struct mpam_props *rprops = &ris->props; @@ -1089,6 +1090,20 @@ static void __ris_msmon_read(void *arg) FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx); mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel); + switch (m->type) { + case mpam_feat_msmon_mbwu_31counter: + case mpam_feat_msmon_mbwu_44counter: + case mpam_feat_msmon_mbwu_63counter: + mbwu_state = &ris->mbwu_state[ctx->mon]; + if (mbwu_state) { + reset_on_next_read = mbwu_state->reset_on_next_read; + mbwu_state->reset_on_next_read = false; + } + break; + default: + break; + } + /* * Read the existing configuration to avoid re-writing the same values. * This saves waiting for 'nrdy' on subsequent reads. @@ -1106,7 +1121,7 @@ static void __ris_msmon_read(void *arg) config_mismatch = cur_flt != flt_val || cur_ctl != (ctl_val | MSMON_CFG_x_CTL_EN); - if (config_mismatch) { + if (config_mismatch || reset_on_next_read) { write_msmon_ctl_flt_vals(m, ctl_val, flt_val); overflow = false; } else if (overflow) { @@ -1263,6 +1278,37 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, return err; } +void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx) +{ + struct mpam_msc *msc; + struct mpam_vmsc *vmsc; + struct mpam_msc_ris *ris; + + if (!mpam_is_enabled()) + return; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(vmsc, &comp->vmsc, comp_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_has_feature(mpam_feat_msmon_mbwu, &vmsc->props)) + continue; + + msc = vmsc->msc; + list_for_each_entry_srcu(ris, &vmsc->ris, vmsc_list, + srcu_read_lock_held(&mpam_srcu)) { + if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props)) + continue; + + if (WARN_ON_ONCE(!mpam_mon_sel_lock(msc))) + continue; + + ris->mbwu_state[ctx->mon].correction = 0; + ris->mbwu_state[ctx->mon].reset_on_next_read = true; + mpam_mon_sel_unlock(msc); + } + } +} + static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) { u32 num_words, msb; diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 693a315c47107..18d53c07b3d7a 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -211,6 +211,7 @@ struct mon_cfg { /* Changes to msmon_mbwu_state are protected by the msc's mon_sel_lock. */ struct msmon_mbwu_state { bool enabled; + bool reset_on_next_read; struct mon_cfg cfg; /* @@ -370,6 +371,7 @@ int mpam_apply_config(struct mpam_component *comp, u16 partid, int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, enum mpam_device_features, u64 *val); +void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx); int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); From 412623027b71da4cdb3f10ba6b17eb28328035f9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:23:02 +0000 Subject: [PATCH 32/74] arm_mpam: Add kunit test for bitmap reset The bitmap reset code has been a source of bugs. Add a unit test. This currently has to be built in, as the rest of the driver is builtin. Suggested-by: Jonathan Cameron Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit e3565d1fd4dcf2c7ee6912094066e47c7500eaf2) Signed-off-by: Lee Trager --- drivers/resctrl/Kconfig | 9 ++++ drivers/resctrl/mpam_devices.c | 4 ++ drivers/resctrl/test_mpam_devices.c | 69 +++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 drivers/resctrl/test_mpam_devices.c diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig index 5f7f748e611e1..c808e04703946 100644 --- a/drivers/resctrl/Kconfig +++ b/drivers/resctrl/Kconfig @@ -12,4 +12,13 @@ config ARM64_MPAM_DRIVER_DEBUG help Say yes here to enable debug messages from the MPAM driver. +config MPAM_KUNIT_TEST + bool "KUnit tests for MPAM driver " if !KUNIT_ALL_TESTS + depends on KUNIT=y + default KUNIT_ALL_TESTS + help + Enable this option to run tests in the MPAM driver. + + If unsure, say N. + endif diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index b4aa817994295..0b5b158e1aafb 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -2717,3 +2717,7 @@ static int __init mpam_msc_driver_init(void) /* Must occur after arm64_mpam_register_cpus() from arch_initcall() */ subsys_initcall(mpam_msc_driver_init); + +#ifdef CONFIG_MPAM_KUNIT_TEST +#include "test_mpam_devices.c" +#endif diff --git a/drivers/resctrl/test_mpam_devices.c b/drivers/resctrl/test_mpam_devices.c new file mode 100644 index 0000000000000..0cfb41b665c4c --- /dev/null +++ b/drivers/resctrl/test_mpam_devices.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. +/* This file is intended to be included into mpam_devices.c */ + +#include + +static void test_mpam_reset_msc_bitmap(struct kunit *test) +{ + char __iomem *buf = kunit_kzalloc(test, SZ_16K, GFP_KERNEL); + struct mpam_msc fake_msc = {}; + u32 *test_result; + + if (!buf) + return; + + fake_msc.mapped_hwpage = buf; + fake_msc.mapped_hwpage_sz = SZ_16K; + cpumask_copy(&fake_msc.accessibility, cpu_possible_mask); + + /* Satisfy lockdep checks */ + mutex_init(&fake_msc.part_sel_lock); + mutex_lock(&fake_msc.part_sel_lock); + + test_result = (u32 *)(buf + MPAMCFG_CPBM); + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 0); + KUNIT_EXPECT_EQ(test, test_result[0], 0); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 1); + KUNIT_EXPECT_EQ(test, test_result[0], 1); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 16); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffff); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 32); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); + KUNIT_EXPECT_EQ(test, test_result[1], 0); + test_result[0] = 0; + test_result[1] = 0; + + mpam_reset_msc_bitmap(&fake_msc, MPAMCFG_CPBM, 33); + KUNIT_EXPECT_EQ(test, test_result[0], 0xffffffff); + KUNIT_EXPECT_EQ(test, test_result[1], 1); + test_result[0] = 0; + test_result[1] = 0; + + mutex_unlock(&fake_msc.part_sel_lock); +} + +static struct kunit_case mpam_devices_test_cases[] = { + KUNIT_CASE(test_mpam_reset_msc_bitmap), + {} +}; + +static struct kunit_suite mpam_devices_test_suite = { + .name = "mpam_devices_test_suite", + .test_cases = mpam_devices_test_cases, +}; + +kunit_test_suites(&mpam_devices_test_suite); From d203c9051e446b98c24b10c4e02960f5b6adaa99 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 19 Nov 2025 12:23:03 +0000 Subject: [PATCH 33/74] arm_mpam: Add kunit tests for props_mismatch() When features are mismatched between MSC the way features are combined to the class determines whether resctrl can support this SoC. Add some tests to illustrate the sort of thing that is expected to work, and those that must be removed. Signed-off-by: James Morse Reviewed-by: Ben Horgan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Reviewed-by: Shaopeng Tan Tested-by: Fenghua Yu Tested-by: Carl Worth Tested-by: Gavin Shan Tested-by: Zeng Heng Tested-by: Shaopeng Tan Tested-by: Hanjun Guo Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit 2557e0eafec1547aa9e0e768d2376e66252dada4) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_internal.h | 14 +- drivers/resctrl/test_mpam_devices.c | 320 ++++++++++++++++++++++++++++ 2 files changed, 333 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 18d53c07b3d7a..e79c3c47259c9 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -23,6 +23,12 @@ struct platform_device; DECLARE_STATIC_KEY_FALSE(mpam_enabled); +#ifdef CONFIG_MPAM_KUNIT_TEST +#define PACKED_FOR_KUNIT __packed +#else +#define PACKED_FOR_KUNIT +#endif + static inline bool mpam_is_enabled(void) { return static_branch_likely(&mpam_enabled); @@ -186,7 +192,13 @@ struct mpam_props { u16 dspri_wd; u16 num_csu_mon; u16 num_mbwu_mon; -}; + +/* + * Kunit tests use memset() to set up feature combinations that should be + * removed, and will false-positive if the compiler introduces padding that + * isn't cleared during sanitisation. + */ +} PACKED_FOR_KUNIT; #define mpam_has_feature(_feat, x) test_bit(_feat, (x)->features) #define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) diff --git a/drivers/resctrl/test_mpam_devices.c b/drivers/resctrl/test_mpam_devices.c index 0cfb41b665c4c..3e8d564a0c647 100644 --- a/drivers/resctrl/test_mpam_devices.c +++ b/drivers/resctrl/test_mpam_devices.c @@ -4,6 +4,324 @@ #include +/* + * This test catches fields that aren't being sanitised - but can't tell you + * which one... + */ +static void test__props_mismatch(struct kunit *test) +{ + struct mpam_props parent = { 0 }; + struct mpam_props child; + + memset(&child, 0xff, sizeof(child)); + __props_mismatch(&parent, &child, false); + + memset(&child, 0, sizeof(child)); + KUNIT_EXPECT_EQ(test, memcmp(&parent, &child, sizeof(child)), 0); + + memset(&child, 0xff, sizeof(child)); + __props_mismatch(&parent, &child, true); + + KUNIT_EXPECT_EQ(test, memcmp(&parent, &child, sizeof(child)), 0); +} + +static struct list_head fake_classes_list; +static struct mpam_class fake_class = { 0 }; +static struct mpam_component fake_comp1 = { 0 }; +static struct mpam_component fake_comp2 = { 0 }; +static struct mpam_vmsc fake_vmsc1 = { 0 }; +static struct mpam_vmsc fake_vmsc2 = { 0 }; +static struct mpam_msc fake_msc1 = { 0 }; +static struct mpam_msc fake_msc2 = { 0 }; +static struct mpam_msc_ris fake_ris1 = { 0 }; +static struct mpam_msc_ris fake_ris2 = { 0 }; +static struct platform_device fake_pdev = { 0 }; + +static inline void reset_fake_hierarchy(void) +{ + INIT_LIST_HEAD(&fake_classes_list); + + memset(&fake_class, 0, sizeof(fake_class)); + fake_class.level = 3; + fake_class.type = MPAM_CLASS_CACHE; + INIT_LIST_HEAD_RCU(&fake_class.components); + INIT_LIST_HEAD(&fake_class.classes_list); + + memset(&fake_comp1, 0, sizeof(fake_comp1)); + memset(&fake_comp2, 0, sizeof(fake_comp2)); + fake_comp1.comp_id = 1; + fake_comp2.comp_id = 2; + INIT_LIST_HEAD(&fake_comp1.vmsc); + INIT_LIST_HEAD(&fake_comp1.class_list); + INIT_LIST_HEAD(&fake_comp2.vmsc); + INIT_LIST_HEAD(&fake_comp2.class_list); + + memset(&fake_vmsc1, 0, sizeof(fake_vmsc1)); + memset(&fake_vmsc2, 0, sizeof(fake_vmsc2)); + INIT_LIST_HEAD(&fake_vmsc1.ris); + INIT_LIST_HEAD(&fake_vmsc1.comp_list); + fake_vmsc1.msc = &fake_msc1; + INIT_LIST_HEAD(&fake_vmsc2.ris); + INIT_LIST_HEAD(&fake_vmsc2.comp_list); + fake_vmsc2.msc = &fake_msc2; + + memset(&fake_ris1, 0, sizeof(fake_ris1)); + memset(&fake_ris2, 0, sizeof(fake_ris2)); + fake_ris1.ris_idx = 1; + INIT_LIST_HEAD(&fake_ris1.msc_list); + fake_ris2.ris_idx = 2; + INIT_LIST_HEAD(&fake_ris2.msc_list); + + fake_msc1.pdev = &fake_pdev; + fake_msc2.pdev = &fake_pdev; + + list_add(&fake_class.classes_list, &fake_classes_list); +} + +static void test_mpam_enable_merge_features(struct kunit *test) +{ + reset_fake_hierarchy(); + + mutex_lock(&mpam_list_lock); + + /* One Class+Comp, two RIS in one vMSC with common features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = NULL; + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc1; + list_add(&fake_ris2.vmsc_list, &fake_vmsc1.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two RIS in one vMSC with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = NULL; + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc1; + list_add(&fake_ris2.vmsc_list, &fake_vmsc1.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* Multiple RIS within one MSC controlling the same resource can be mismatched */ + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_vmsc1.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + KUNIT_EXPECT_EQ(test, fake_vmsc1.props.cmax_wd, 4); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple RIS in different MSC can't control the same resource, + * mismatched features can not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 0); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with incompatible overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + mpam_set_feature(mpam_feat_mbw_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_mbw_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 5; + fake_ris2.props.cpbm_wd = 3; + fake_ris1.props.mbw_pbm_bits = 5; + fake_ris2.props.mbw_pbm_bits = 3; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple RIS in different MSC can't control the same resource, + * mismatched features can not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_mbw_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.mbw_pbm_bits, 0); + + reset_fake_hierarchy(); + + /* One Class+Comp, two MSC with overlapping features that need tweaking */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = NULL; + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp1; + list_add(&fake_vmsc2.comp_list, &fake_comp1.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_mbw_min, &fake_ris1.props); + mpam_set_feature(mpam_feat_mbw_min, &fake_ris2.props); + mpam_set_feature(mpam_feat_cmax_cmax, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmax, &fake_ris2.props); + fake_ris1.props.bwa_wd = 5; + fake_ris2.props.bwa_wd = 3; + fake_ris1.props.cmax_wd = 5; + fake_ris2.props.cmax_wd = 3; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * RIS with different control properties need to be sanitised so the + * class has the common set of properties. + */ + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_mbw_min, &fake_class.props)); + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cmax_cmax, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.bwa_wd, 3); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 3); + + reset_fake_hierarchy(); + + /* One Class Two Comp with overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = &fake_class; + list_add(&fake_comp2.class_list, &fake_class.components); + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp2; + list_add(&fake_vmsc2.comp_list, &fake_comp2.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cpor_part, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cpbm_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + KUNIT_EXPECT_TRUE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 4); + + reset_fake_hierarchy(); + + /* One Class Two Comp with non-overlapping features */ + fake_comp1.class = &fake_class; + list_add(&fake_comp1.class_list, &fake_class.components); + fake_comp2.class = &fake_class; + list_add(&fake_comp2.class_list, &fake_class.components); + fake_vmsc1.comp = &fake_comp1; + list_add(&fake_vmsc1.comp_list, &fake_comp1.vmsc); + fake_vmsc2.comp = &fake_comp2; + list_add(&fake_vmsc2.comp_list, &fake_comp2.vmsc); + fake_ris1.vmsc = &fake_vmsc1; + list_add(&fake_ris1.vmsc_list, &fake_vmsc1.ris); + fake_ris2.vmsc = &fake_vmsc2; + list_add(&fake_ris2.vmsc_list, &fake_vmsc2.ris); + + mpam_set_feature(mpam_feat_cpor_part, &fake_ris1.props); + mpam_set_feature(mpam_feat_cmax_cmin, &fake_ris2.props); + fake_ris1.props.cpbm_wd = 4; + fake_ris2.props.cmax_wd = 4; + + mpam_enable_merge_features(&fake_classes_list); + + /* + * Multiple components can't control the same resource, mismatched features can + * not be supported. + */ + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cpor_part, &fake_class.props)); + KUNIT_EXPECT_FALSE(test, mpam_has_feature(mpam_feat_cmax_cmin, &fake_class.props)); + KUNIT_EXPECT_EQ(test, fake_class.props.cpbm_wd, 0); + KUNIT_EXPECT_EQ(test, fake_class.props.cmax_wd, 0); + + mutex_unlock(&mpam_list_lock); +} + static void test_mpam_reset_msc_bitmap(struct kunit *test) { char __iomem *buf = kunit_kzalloc(test, SZ_16K, GFP_KERNEL); @@ -58,6 +376,8 @@ static void test_mpam_reset_msc_bitmap(struct kunit *test) static struct kunit_case mpam_devices_test_cases[] = { KUNIT_CASE(test_mpam_reset_msc_bitmap), + KUNIT_CASE(test_mpam_enable_merge_features), + KUNIT_CASE(test__props_mismatch), {} }; From 980b738a9f76bae709bf385bc31638ab815648e1 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Wed, 19 Nov 2025 12:23:04 +0000 Subject: [PATCH 34/74] MAINTAINERS: new entry for MPAM Driver Create a maintainer entry for the new MPAM Driver. Add myself and James Morse as maintainers. James created the driver and I have taken up the later versions of his series. Cc: James Morse Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Acked-by: Catalin Marinas Signed-off-by: Ben Horgan Signed-off-by: Catalin Marinas (cherry picked from commit ce1e1421f8d8cdb5e05e13dbb516caedd67e5ee8) Signed-off-by: Lee Trager --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 554e881b05bea..3a9816a5e0cbd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17468,6 +17468,16 @@ S: Maintained F: Documentation/devicetree/bindings/leds/backlight/mps,mp3309c.yaml F: drivers/video/backlight/mp3309c.c +MPAM DRIVER +M: James Morse +M: Ben Horgan +R: Reinette Chatre +R: Fenghua Yu +S: Maintained +F: drivers/resctrl/mpam_* +F: drivers/resctrl/test_mpam_* +F: include/linux/arm_mpam.h + MPS MP2869 DRIVER M: Wensheng Wang L: linux-hwmon@vger.kernel.org From 4d3eedfa8fb92ac73d218a47e39c8b3d7b70df7b Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Fri, 13 Mar 2026 14:45:38 +0000 Subject: [PATCH 35/74] arm_mpam: Ensure in_reset_state is false after applying configuration The per-RIS flag, in_reset_state, indicates whether or not the MSC registers are in reset state, and allows avoiding resetting when they are already in reset state. However, when mpam_apply_config() updates the configuration it doesn't update the in_reset_state flag and so even after the configuration update in_reset_state can be true and mpam_reset_ris() will skip the actual register restoration on subsequent resets. Once resctrl has a MPAM backend it will use resctrl_arch_reset_all_ctrls() to reset the MSC configuration on unmount and, if the in_reset_state flag is bogusly true, fail to reset the MSC configuration. The resulting non-reset MSC configuration can lead to persistent performance restrictions even after resctrl is unmounted. Fix by clearing in_reset_state to false immediately after successful configuration application, ensuring that the next reset operation properly restores MSC register defaults. Fixes: 09b89d2a72f3 ("arm_mpam: Allow configuration to be applied and restored during cpu online") Signed-off-by: Zeng Heng Acked-by: Ben Horgan [Horgan: rewrite commit message to not be specific to resctrl unmount] Signed-off-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: James Morse Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Jesse Chick Signed-off-by: James Morse (cherry picked from commit f91e913355f49c878fc77f995fd71b7800352bd2) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 0b5b158e1aafb..11f633bd4adac 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -2692,6 +2692,7 @@ int mpam_apply_config(struct mpam_component *comp, u16 partid, srcu_read_lock_held(&mpam_srcu)) { arg.ris = ris; mpam_touch_msc(msc, __write_config, &arg); + ris->in_reset_state = false; } mutex_unlock(&msc->cfg_lock); } From 8da160cc91473ed6af48cdc15da86a6add25c08d Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:45:39 +0000 Subject: [PATCH 36/74] arm_mpam: Reset when feature configuration bit unset To indicate that the configuration, of the controls used by resctrl, in a RIS need resetting to driver defaults the reset flags in mpam_config are set. However, these flags are only ever set temporarily at RIS scope in mpam_reset_ris() and hence mpam_cpu_online() will never reset these controls to default. As the hardware reset is unknown this leads to unknown configuration when the control values haven't been configured away from the defaults. Use the policy that an unset feature configuration bit means reset. In this way the mpam_config in the component can encode that it should be in reset state and mpam_reprogram_msc() will reset controls as needed. Fixes: 09b89d2a72f3 ("arm_mpam: Allow configuration to be applied and restored during cpu online") Signed-off-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: James Morse Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Jesse Chick [ morse: Removed unused reset flags from config structure ] Signed-off-by: James Morse (cherry picked from commit a1cb6577f575ba5ec2583caf4f791a86754dbf69) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 40 ++++++++++----------------------- drivers/resctrl/mpam_internal.h | 4 ---- 2 files changed, 12 insertions(+), 32 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 11f633bd4adac..2b529152d2b5d 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -1363,17 +1363,15 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, __mpam_intpart_sel(ris->ris_idx, partid, msc); } - if (mpam_has_feature(mpam_feat_cpor_part, rprops) && - mpam_has_feature(mpam_feat_cpor_part, cfg)) { - if (cfg->reset_cpbm) - mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); - else + if (mpam_has_feature(mpam_feat_cpor_part, rprops)) { + if (mpam_has_feature(mpam_feat_cpor_part, cfg)) mpam_write_partsel_reg(msc, CPBM, cfg->cpbm); + else + mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd); } - if (mpam_has_feature(mpam_feat_mbw_part, rprops) && - mpam_has_feature(mpam_feat_mbw_part, cfg)) { - if (cfg->reset_mbw_pbm) + if (mpam_has_feature(mpam_feat_mbw_part, rprops)) { + if (mpam_has_feature(mpam_feat_mbw_part, cfg)) mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits); else mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm); @@ -1383,16 +1381,14 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, mpam_has_feature(mpam_feat_mbw_min, cfg)) mpam_write_partsel_reg(msc, MBW_MIN, 0); - if (mpam_has_feature(mpam_feat_mbw_max, rprops) && - mpam_has_feature(mpam_feat_mbw_max, cfg)) { - if (cfg->reset_mbw_max) - mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); - else + if (mpam_has_feature(mpam_feat_mbw_max, rprops)) { + if (mpam_has_feature(mpam_feat_mbw_max, cfg)) mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max); + else + mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX); } - if (mpam_has_feature(mpam_feat_mbw_prop, rprops) && - mpam_has_feature(mpam_feat_mbw_prop, cfg)) + if (mpam_has_feature(mpam_feat_mbw_prop, rprops)) mpam_write_partsel_reg(msc, MBW_PROP, 0); if (mpam_has_feature(mpam_feat_cmax_cmax, rprops)) @@ -1490,16 +1486,6 @@ static int mpam_save_mbwu_state(void *arg) return 0; } -static void mpam_init_reset_cfg(struct mpam_config *reset_cfg) -{ - *reset_cfg = (struct mpam_config) { - .reset_cpbm = true, - .reset_mbw_pbm = true, - .reset_mbw_max = true, - }; - bitmap_fill(reset_cfg->features, MPAM_FEATURE_LAST); -} - /* * Called via smp_call_on_cpu() to prevent migration, while still being * pre-emptible. Caller must hold mpam_srcu. @@ -1507,14 +1493,12 @@ static void mpam_init_reset_cfg(struct mpam_config *reset_cfg) static int mpam_reset_ris(void *arg) { u16 partid, partid_max; - struct mpam_config reset_cfg; + struct mpam_config reset_cfg = {}; struct mpam_msc_ris *ris = arg; if (ris->in_reset_state) return 0; - mpam_init_reset_cfg(&reset_cfg); - spin_lock(&partid_max_lock); partid_max = mpam_partid_max; spin_unlock(&partid_max_lock); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index e79c3c47259c9..380dbbe0ddd68 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -263,10 +263,6 @@ struct mpam_config { u32 mbw_pbm; u16 mbw_max; - bool reset_cpbm; - bool reset_mbw_pbm; - bool reset_mbw_max; - struct mpam_garbage garbage; }; From 2fae201b6ac999d01867027e590f27099844215b Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:45:40 +0000 Subject: [PATCH 37/74] arm64/sysreg: Add MPAMSM_EL1 register The MPAMSM_EL1 register determines the MPAM configuration for an SMCU. Add the register definition. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Acked-by: Catalin Marinas Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 29fa1be82b83f87e603ed4c21fe86c6e05fd0282) Signed-off-by: Lee Trager --- arch/arm64/tools/sysreg | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 1c6cdf9d54bba..43c2759eb640b 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -5031,6 +5031,14 @@ Field 31:16 PARTID_D Field 15:0 PARTID_I EndSysreg +Sysreg MPAMSM_EL1 3 0 10 5 3 +Res0 63:48 +Field 47:40 PMG_D +Res0 39:32 +Field 31:16 PARTID_D +Res0 15:0 +EndSysreg + Sysreg ISR_EL1 3 0 12 1 0 Res0 63:11 Field 10 IS From b496a66dddd5c2718ffb6b53f019ec6ea454b30f Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:45:41 +0000 Subject: [PATCH 38/74] KVM: arm64: Preserve host MPAM configuration when changing traps When KVM enables or disables MPAM traps to EL2 it clears all other bits in MPAM2_EL2. Notably, it clears the partition ids (PARTIDs) and performance monitoring groups (PMGs). Avoid changing these bits in anticipation of adding support for MPAM in the kernel. Otherwise, on a VHE system with the host running at EL2 where MPAM2_EL2 and MPAM1_EL1 access the same register, any attempt to use MPAM to monitor or partition resources for kernel space would be foiled by running a KVM guest. Additionally, MPAM2_EL2.EnMPAMSM is always set to 0 which causes MPAMSM_EL1 to always trap. Keep EnMPAMSM set to 1 when not in a guest so that the kernel can use MPAMSM_EL1. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Acked-by: Marc Zyngier Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit eda1cd1f9d29b382a07d757cf8b29f9ee636355f) Signed-off-by: Lee Trager --- arch/arm64/kvm/hyp/include/hyp/switch.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index c5d5e5b86eaf0..63195275a8b82 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -269,7 +269,8 @@ static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu) { - u64 r = MPAM2_EL2_TRAPMPAM0EL1 | MPAM2_EL2_TRAPMPAM1EL1; + u64 clr = MPAM2_EL2_EnMPAMSM; + u64 set = MPAM2_EL2_TRAPMPAM0EL1 | MPAM2_EL2_TRAPMPAM1EL1; if (!system_supports_mpam()) return; @@ -279,18 +280,21 @@ static inline void __activate_traps_mpam(struct kvm_vcpu *vcpu) write_sysreg_s(MPAMHCR_EL2_TRAP_MPAMIDR_EL1, SYS_MPAMHCR_EL2); } else { /* From v1.1 TIDR can trap MPAMIDR, set it unconditionally */ - r |= MPAM2_EL2_TIDR; + set |= MPAM2_EL2_TIDR; } - write_sysreg_s(r, SYS_MPAM2_EL2); + sysreg_clear_set_s(SYS_MPAM2_EL2, clr, set); } static inline void __deactivate_traps_mpam(void) { + u64 clr = MPAM2_EL2_TRAPMPAM0EL1 | MPAM2_EL2_TRAPMPAM1EL1 | MPAM2_EL2_TIDR; + u64 set = MPAM2_EL2_EnMPAMSM; + if (!system_supports_mpam()) return; - write_sysreg_s(0, SYS_MPAM2_EL2); + sysreg_clear_set_s(SYS_MPAM2_EL2, clr, set); if (system_supports_mpam_hcr()) write_sysreg_s(MPAMHCR_HOST_FLAGS, SYS_MPAMHCR_EL2); From c69df8400557bfc0cee9fbccd33698e5beb2498b Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:45:42 +0000 Subject: [PATCH 39/74] KVM: arm64: Make MPAMSM_EL1 accesses UNDEF The MPAMSM_EL1 register controls the MPAM labeling for an SMCU, Streaming Mode Compute Unit. As there is no MPAM support in KVM, make sure MPAMSM_EL1 accesses trigger an UNDEF. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Acked-by: Marc Zyngier Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 2e7c684bdb50cfaf98da80ebaab4a961fdcd1aa2) Signed-off-by: Lee Trager --- arch/arm64/kvm/sys_regs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8d7309646ea64..9b63356eb12aa 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -3367,6 +3367,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_MPAM1_EL1), undef_access }, { SYS_DESC(SYS_MPAM0_EL1), undef_access }, + { SYS_DESC(SYS_MPAMSM_EL1), undef_access }, + { SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 }, { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, From 777a2f469ae35e5ea07ba5e6fef70608ef2e6b5c Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:43 +0000 Subject: [PATCH 40/74] arm64: mpam: Context switch the MPAM registers MPAM allows traffic in the SoC to be labeled by the OS, these labels are used to apply policy in caches and bandwidth regulators, and to monitor traffic in the SoC. The label is made up of a PARTID and PMG value. The x86 equivalent calls these CLOSID and RMID, but they don't map precisely. MPAM has two CPU system registers that is used to hold the PARTID and PMG values that traffic generated at each exception level will use. These can be set per-task by the resctrl file system. (resctrl is the defacto interface for controlling this stuff). Add a helper to switch this. struct task_struct's separate CLOSID and RMID fields are insufficient to implement resctrl using MPAM, as resctrl can change the PARTID (CLOSID) and PMG (sort of like the RMID) separately. On x86, the rmid is an independent number, so a race that writes a mismatched closid and rmid into hardware is benign. On arm64, the pmg bits extend the partid. (i.e. partid-5 has a pmg-0 that is not the same as partid-6's pmg-0). In this case, mismatching the values will 'dirty' a pmg value that resctrl believes is clean, and is not tracking with its 'limbo' code. To avoid this, the partid and pmg are always read and written as a pair. This requires a new u64 field. In struct task_struct there are two u32, rmid and closid for the x86 case, but as we can't use them here do something else. Add this new field, mpam_partid_pmg, to struct thread_info to avoid adding more architecture specific code to struct task_struct. Always use READ_ONCE()/WRITE_ONCE() when accessing this field. Resctrl allows a per-cpu 'default' value to be set, this overrides the values when scheduling a task in the default control-group, which has PARTID 0. The way 'code data prioritisation' gets emulated means the register value for the default group needs to be a variable. The current system register value is kept in a per-cpu variable to avoid writing to the system register if the value isn't going to change. Writes to this register may reset the hardware state for regulating bandwidth. Finally, there is no reason to context switch these registers unless there is a driver changing the values in struct task_struct. Hide the whole thing behind a static key. This also allows the driver to disable MPAM in response to errors reported by hardware. Move the existing static key to belong to the arch code, as in the future the MPAM driver may become a loadable module. All this should depend on whether there is an MPAM driver, hide it behind CONFIG_ARM64_MPAM. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick CC: Amit Singh Tomar Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Catalin Marinas Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 8e06d04ff1cf764066c62e5677bfb0b0c1d1fbbc) Signed-off-by: Lee Trager --- arch/arm64/Kconfig | 2 + arch/arm64/include/asm/mpam.h | 67 ++++++++++++++++++++++++++++ arch/arm64/include/asm/thread_info.h | 3 ++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/mpam.c | 13 ++++++ arch/arm64/kernel/process.c | 7 +++ drivers/resctrl/mpam_devices.c | 2 - drivers/resctrl/mpam_internal.h | 4 +- 8 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/include/asm/mpam.h create mode 100644 arch/arm64/kernel/mpam.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d17fc19e5eecc..2e8b57a35139d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2090,6 +2090,8 @@ config ARM64_MPAM MPAM is exposed to user-space via the resctrl pseudo filesystem. + This option enables the extra context switch code. + endmenu # "ARMv8.4 architectural features" menu "ARMv8.5 architectural features" diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h new file mode 100644 index 0000000000000..0747e0526927d --- /dev/null +++ b/arch/arm64/include/asm/mpam.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2025 Arm Ltd. */ + +#ifndef __ASM__MPAM_H +#define __ASM__MPAM_H + +#include +#include +#include + +#include + +DECLARE_STATIC_KEY_FALSE(mpam_enabled); +DECLARE_PER_CPU(u64, arm64_mpam_default); +DECLARE_PER_CPU(u64, arm64_mpam_current); + +/* + * The value of the MPAM0_EL1 sysreg when a task is in resctrl's default group. + * This is used by the context switch code to use the resctrl CPU property + * instead. The value is modified when CDP is enabled/disabled by mounting + * the resctrl filesystem. + */ +extern u64 arm64_mpam_global_default; + +/* + * The resctrl filesystem writes to the partid/pmg values for threads and CPUs, + * which may race with reads in mpam_thread_switch(). Ensure only one of the old + * or new values are used. Particular care should be taken with the pmg field as + * mpam_thread_switch() may read a partid and pmg that don't match, causing this + * value to be stored with cache allocations, despite being considered 'free' by + * resctrl. + */ +#ifdef CONFIG_ARM64_MPAM +static inline u64 mpam_get_regval(struct task_struct *tsk) +{ + return READ_ONCE(task_thread_info(tsk)->mpam_partid_pmg); +} + +static inline void mpam_thread_switch(struct task_struct *tsk) +{ + u64 oldregval; + int cpu = smp_processor_id(); + u64 regval = mpam_get_regval(tsk); + + if (!static_branch_likely(&mpam_enabled)) + return; + + if (regval == READ_ONCE(arm64_mpam_global_default)) + regval = READ_ONCE(per_cpu(arm64_mpam_default, cpu)); + + oldregval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); + if (oldregval == regval) + return; + + write_sysreg_s(regval | MPAM1_EL1_MPAMEN, SYS_MPAM1_EL1); + isb(); + + /* Synchronising the EL0 write is left until the ERET to EL0 */ + write_sysreg_s(regval, SYS_MPAM0_EL1); + + WRITE_ONCE(per_cpu(arm64_mpam_current, cpu), regval); +} +#else +static inline void mpam_thread_switch(struct task_struct *tsk) {} +#endif /* CONFIG_ARM64_MPAM */ + +#endif /* __ASM__MPAM_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index f241b8601ebd9..c226dabd50191 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -41,6 +41,9 @@ struct thread_info { #ifdef CONFIG_SHADOW_CALL_STACK void *scs_base; void *scs_sp; +#endif +#ifdef CONFIG_ARM64_MPAM + u64 mpam_partid_pmg; #endif u32 cpu; }; diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 76f32e424065e..15979f3665196 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -67,6 +67,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o +obj-$(CONFIG_ARM64_MPAM) += mpam.o obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c new file mode 100644 index 0000000000000..9866d2ca0faa9 --- /dev/null +++ b/arch/arm64/kernel/mpam.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Arm Ltd. */ + +#include + +#include +#include + +DEFINE_STATIC_KEY_FALSE(mpam_enabled); +DEFINE_PER_CPU(u64, arm64_mpam_default); +DEFINE_PER_CPU(u64, arm64_mpam_current); + +u64 arm64_mpam_global_default; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4c328b7c79ba3..d9994a46fe79f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -774,6 +775,12 @@ struct task_struct *__switch_to(struct task_struct *prev, if (prev->thread.sctlr_user != next->thread.sctlr_user) update_sctlr_el1(next->thread.sctlr_user); + /* + * MPAM thread switch happens after the DSB to ensure prev's accesses + * use prev's MPAM settings. + */ + mpam_thread_switch(next); + /* the actual thread switch */ last = cpu_switch_to(prev, next); diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 2b529152d2b5d..e7b244d226fb6 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -29,8 +29,6 @@ #include "mpam_internal.h" -DEFINE_STATIC_KEY_FALSE(mpam_enabled); /* This moves to arch code */ - /* * mpam_list_lock protects the SRCU lists when writing. Once the * mpam_enabled key is enabled these lists are read-only, diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 380dbbe0ddd68..50bab8b92064f 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -17,12 +17,12 @@ #include #include +#include + #define MPAM_MSC_MAX_NUM_RIS 16 struct platform_device; -DECLARE_STATIC_KEY_FALSE(mpam_enabled); - #ifdef CONFIG_MPAM_KUNIT_TEST #define PACKED_FOR_KUNIT __packed #else From bd3b10d5d175a30f4a85e0f8a52deef828509db8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:44 +0000 Subject: [PATCH 41/74] arm64: mpam: Re-initialise MPAM regs when CPU comes online Now that the MPAM system registers are expected to have values that change, reprogram them based on the previous value when a CPU is brought online. Previously MPAM's 'default PARTID' of 0 was always used for MPAM in kernel-space as this is the PARTID that hardware guarantees to reset. Because there are a limited number of PARTID, this value is exposed to user-space, meaning resctrl changes to the resctrl default group would also affect kernel threads. Instead, use the task's PARTID value for kernel work on behalf of user-space too. The default of 0 is kept for both user-space and kernel-space when MPAM is not enabled. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Catalin Marinas Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 87b78a5d70e83d4dbe31e1afda2be736a3330b31) Signed-off-by: Lee Trager --- arch/arm64/kernel/cpufeature.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5b26739af16b8..e92d4c306ea5d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include #include @@ -2452,13 +2453,17 @@ test_has_mpam(const struct arm64_cpu_capabilities *entry, int scope) static void cpu_enable_mpam(const struct arm64_cpu_capabilities *entry) { - /* - * Access by the kernel (at EL1) should use the reserved PARTID - * which is configured unrestricted. This avoids priority-inversion - * where latency sensitive tasks have to wait for a task that has - * been throttled to release the lock. - */ - write_sysreg_s(0, SYS_MPAM1_EL1); + int cpu = smp_processor_id(); + u64 regval = 0; + + if (IS_ENABLED(CONFIG_ARM64_MPAM) && static_branch_likely(&mpam_enabled)) + regval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); + + write_sysreg_s(regval | MPAM1_EL1_MPAMEN, SYS_MPAM1_EL1); + isb(); + + /* Synchronising the EL0 write is left until the ERET to EL0 */ + write_sysreg_s(regval, SYS_MPAM0_EL1); } static bool From f126c6c126ad16c1951413b6f95ba7bb3e61ed42 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:45:45 +0000 Subject: [PATCH 42/74] arm64: mpam: Drop the CONFIG_EXPERT restriction In anticipation of MPAM being useful remove the CONFIG_EXPERT restriction. This was done to prevent the driver being enabled before the user-space interface was wired up. Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: James Morse Acked-by: Catalin Marinas Signed-off-by: Ben Horgan [ morse: Added second paragraph ] Signed-off-by: James Morse (cherry picked from commit c544f00a473239835d22e7109b403314d8b85974) Signed-off-by: Lee Trager --- arch/arm64/Kconfig | 2 +- drivers/resctrl/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2e8b57a35139d..3a2e397fb3e10 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2067,7 +2067,7 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" - select ARM64_MPAM_DRIVER if EXPERT # does nothing yet + select ARM64_MPAM_DRIVER select ACPI_MPAM if ACPI help Memory System Resource Partitioning and Monitoring (MPAM) is an diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig index c808e04703946..c34e059c6e41f 100644 --- a/drivers/resctrl/Kconfig +++ b/drivers/resctrl/Kconfig @@ -1,6 +1,6 @@ menuconfig ARM64_MPAM_DRIVER bool "MPAM driver" - depends on ARM64 && ARM64_MPAM && EXPERT + depends on ARM64 && ARM64_MPAM help Memory System Resource Partitioning and Monitoring (MPAM) driver for System IP, e.g. caches and memory controllers. From dffc881b56c531f96e42cfcb6eac0c2dc16adffa Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:46 +0000 Subject: [PATCH 43/74] arm64: mpam: Advertise the CPUs MPAM limits to the driver Requesters need to populate the MPAM fields for any traffic they send on the interconnect. For the CPUs these values are taken from the corresponding MPAMy_ELx register. Each requester may have a limit on the largest PARTID or PMG value that can be used. The MPAM driver has to determine the system-wide minimum supported PARTID and PMG values. To do this, the driver needs to be told what each requestor's limit is. CPUs are special, but this infrastructure is also needed for the SMMU and GIC ITS. Call the helper to tell the MPAM driver what the CPUs can do. The return value can be ignored by the arch code as it runs well before the MPAM driver starts probing. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Catalin Marinas Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan [ morse: requestor->requester as argued by ispell ] Signed-off-by: James Morse (cherry picked from commit 831a7f16728c5ceef04ab99a699c3d9e519dc4b8) Signed-off-by: Lee Trager --- arch/arm64/kernel/mpam.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c index 9866d2ca0faa9..e6feff2324acb 100644 --- a/arch/arm64/kernel/mpam.c +++ b/arch/arm64/kernel/mpam.c @@ -3,6 +3,7 @@ #include +#include #include #include @@ -11,3 +12,14 @@ DEFINE_PER_CPU(u64, arm64_mpam_default); DEFINE_PER_CPU(u64, arm64_mpam_current); u64 arm64_mpam_global_default; + +static int __init arm64_mpam_register_cpus(void) +{ + u64 mpamidr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); + u16 partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, mpamidr); + u8 pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, mpamidr); + + return mpam_register_requestor(partid_max, pmg_max); +} +/* Must occur before mpam_msc_driver_init() from subsys_initcall() */ +arch_initcall(arm64_mpam_register_cpus) From 2a4125bcabf2678d60e9be187ea166d2f0647495 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:47 +0000 Subject: [PATCH 44/74] arm64: mpam: Add cpu_pm notifier to restore MPAM sysregs The MPAM system registers will be lost if the CPU is reset during PSCI's CPU_SUSPEND. Add a PM notifier to restore them. mpam_thread_switch(current) can't be used as this won't make any changes if the in-memory copy says the register already has the correct value. In reality the system register is UNKNOWN out of reset. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Catalin Marinas Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 735dad999905dfd246be1994bb8d203063aeb0d6) Signed-off-by: Lee Trager --- arch/arm64/kernel/mpam.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c index e6feff2324acb..48ec0ffd59997 100644 --- a/arch/arm64/kernel/mpam.c +++ b/arch/arm64/kernel/mpam.c @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -13,12 +14,44 @@ DEFINE_PER_CPU(u64, arm64_mpam_current); u64 arm64_mpam_global_default; +static int mpam_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + u64 regval; + int cpu = smp_processor_id(); + + switch (cmd) { + case CPU_PM_EXIT: + /* + * Don't use mpam_thread_switch() as the system register + * value has changed under our feet. + */ + regval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); + write_sysreg_s(regval | MPAM1_EL1_MPAMEN, SYS_MPAM1_EL1); + isb(); + + write_sysreg_s(regval, SYS_MPAM0_EL1); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block mpam_pm_nb = { + .notifier_call = mpam_pm_notifier, +}; + static int __init arm64_mpam_register_cpus(void) { u64 mpamidr = read_sanitised_ftr_reg(SYS_MPAMIDR_EL1); u16 partid_max = FIELD_GET(MPAMIDR_EL1_PARTID_MAX, mpamidr); u8 pmg_max = FIELD_GET(MPAMIDR_EL1_PMG_MAX, mpamidr); + if (!system_supports_mpam()) + return 0; + + cpu_pm_register_notifier(&mpam_pm_nb); return mpam_register_requestor(partid_max, pmg_max); } /* Must occur before mpam_msc_driver_init() from subsys_initcall() */ From 47c5e27fcfa86e90424c73f6b4c17289ecc2d56f Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:45:48 +0000 Subject: [PATCH 45/74] arm64: mpam: Initialise and context switch the MPAMSM_EL1 register The MPAMSM_EL1 sets the MPAM labels, PMG and PARTID, for loads and stores generated by a shared SMCU. Disable the traps so the kernel can use it and set it to the same configuration as the per-EL cpu MPAM configuration. If an SMCU is not shared with other cpus then it is implementation defined whether the configuration from MPAMSM_EL1 is used or that from the appropriate MPAMy_ELx. As we set the same, PMG_D and PARTID_D, configuration for MPAM0_EL1, MPAM1_EL1 and MPAMSM_EL1 the resulting configuration is the same regardless. The range of valid configurations for the PARTID and PMG in MPAMSM_EL1 is not currently specified in Arm Architectural Reference Manual but the architect has confirmed that it is intended to be the same as that for the cpu configuration in the MPAMy_ELx registers. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Catalin Marinas Reviewed-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 37fe0f984d9ca60e8d95fc9a85d37f4300159625) Signed-off-by: Lee Trager --- arch/arm64/include/asm/el2_setup.h | 3 ++- arch/arm64/include/asm/mpam.h | 2 ++ arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/kernel/mpam.c | 4 ++++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 9dbd0da8eee8c..42cebf1dac4a7 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -514,7 +514,8 @@ check_override id_aa64pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT, .Linit_mpam_\@, .Lskip_mpam_\@, x1, x2 .Linit_mpam_\@: - msr_s SYS_MPAM2_EL2, xzr // use the default partition + mov x0, #MPAM2_EL2_EnMPAMSM_MASK + msr_s SYS_MPAM2_EL2, x0 // use the default partition, // and disable lower traps mrs_s x0, SYS_MPAMIDR_EL1 tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h index 0747e0526927d..6bccbfdccb87e 100644 --- a/arch/arm64/include/asm/mpam.h +++ b/arch/arm64/include/asm/mpam.h @@ -53,6 +53,8 @@ static inline void mpam_thread_switch(struct task_struct *tsk) return; write_sysreg_s(regval | MPAM1_EL1_MPAMEN, SYS_MPAM1_EL1); + if (system_supports_sme()) + write_sysreg_s(regval & (MPAMSM_EL1_PARTID_D | MPAMSM_EL1_PMG_D), SYS_MPAMSM_EL1); isb(); /* Synchronising the EL0 write is left until the ERET to EL0 */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e92d4c306ea5d..e42c66d60aa5b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2460,6 +2460,8 @@ cpu_enable_mpam(const struct arm64_cpu_capabilities *entry) regval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); write_sysreg_s(regval | MPAM1_EL1_MPAMEN, SYS_MPAM1_EL1); + if (cpus_have_cap(ARM64_SME)) + write_sysreg_s(regval & (MPAMSM_EL1_PARTID_D | MPAMSM_EL1_PMG_D), SYS_MPAMSM_EL1); isb(); /* Synchronising the EL0 write is left until the ERET to EL0 */ diff --git a/arch/arm64/kernel/mpam.c b/arch/arm64/kernel/mpam.c index 48ec0ffd59997..3a490de4fa125 100644 --- a/arch/arm64/kernel/mpam.c +++ b/arch/arm64/kernel/mpam.c @@ -28,6 +28,10 @@ static int mpam_pm_notifier(struct notifier_block *self, */ regval = READ_ONCE(per_cpu(arm64_mpam_current, cpu)); write_sysreg_s(regval | MPAM1_EL1_MPAMEN, SYS_MPAM1_EL1); + if (system_supports_sme()) { + write_sysreg_s(regval & (MPAMSM_EL1_PARTID_D | MPAMSM_EL1_PMG_D), + SYS_MPAMSM_EL1); + } isb(); write_sysreg_s(regval, SYS_MPAM0_EL1); From e2cc1cd3659bd7b53b96cb4051d5148513da0a45 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:49 +0000 Subject: [PATCH 46/74] arm64: mpam: Add helpers to change a task or cpu's MPAM PARTID/PMG values Care must be taken when modifying the PARTID and PMG of a task in any per-task structure as writing these values may race with the task being scheduled in, and reading the modified values. Add helpers to set the task properties, and the CPU default value. These use WRITE_ONCE() that pairs with the READ_ONCE() in mpam_get_regval() to avoid causing torn values. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Cc: Dave Martin Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Catalin Marinas Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 2cf9ca3fae38b7894e7f1435cec92f9a679b42f9) Signed-off-by: Lee Trager --- arch/arm64/include/asm/mpam.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h index 6bccbfdccb87e..05aa71200f61a 100644 --- a/arch/arm64/include/asm/mpam.h +++ b/arch/arm64/include/asm/mpam.h @@ -4,6 +4,7 @@ #ifndef __ASM__MPAM_H #define __ASM__MPAM_H +#include #include #include #include @@ -22,6 +23,23 @@ DECLARE_PER_CPU(u64, arm64_mpam_current); */ extern u64 arm64_mpam_global_default; +#ifdef CONFIG_ARM64_MPAM +static inline u64 __mpam_regval(u16 partid_d, u16 partid_i, u8 pmg_d, u8 pmg_i) +{ + return FIELD_PREP(MPAM0_EL1_PARTID_D, partid_d) | + FIELD_PREP(MPAM0_EL1_PARTID_I, partid_i) | + FIELD_PREP(MPAM0_EL1_PMG_D, pmg_d) | + FIELD_PREP(MPAM0_EL1_PMG_I, pmg_i); +} + +static inline void mpam_set_cpu_defaults(int cpu, u16 partid_d, u16 partid_i, + u8 pmg_d, u8 pmg_i) +{ + u64 default_val = __mpam_regval(partid_d, partid_i, pmg_d, pmg_i); + + WRITE_ONCE(per_cpu(arm64_mpam_default, cpu), default_val); +} + /* * The resctrl filesystem writes to the partid/pmg values for threads and CPUs, * which may race with reads in mpam_thread_switch(). Ensure only one of the old @@ -30,12 +48,20 @@ extern u64 arm64_mpam_global_default; * value to be stored with cache allocations, despite being considered 'free' by * resctrl. */ -#ifdef CONFIG_ARM64_MPAM static inline u64 mpam_get_regval(struct task_struct *tsk) { return READ_ONCE(task_thread_info(tsk)->mpam_partid_pmg); } +static inline void mpam_set_task_partid_pmg(struct task_struct *tsk, + u16 partid_d, u16 partid_i, + u8 pmg_d, u8 pmg_i) +{ + u64 regval = __mpam_regval(partid_d, partid_i, pmg_d, pmg_i); + + WRITE_ONCE(task_thread_info(tsk)->mpam_partid_pmg, regval); +} + static inline void mpam_thread_switch(struct task_struct *tsk) { u64 oldregval; From 732e593aef32018345b12bbfe8afd146608f5ad8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:50 +0000 Subject: [PATCH 47/74] KVM: arm64: Force guest EL1 to use user-space's partid configuration While we trap the guest's attempts to read/write the MPAM control registers, the hardware continues to use them. Guest-EL0 uses KVM's user-space's configuration, as the value is left in the register, and guest-EL1 uses either the host kernel's configuration, or in the case of VHE, the UNKNOWN reset value of MPAM1_EL1. We want to force the guest-EL1 to use KVM's user-space's MPAM configuration. On nVHE rely on MPAM0_EL1 and MPAM1_EL1 always being programmed the same and on VHE copy MPAM0_EL1 into the guest's MPAM1_EL1. There is no need to restore as this is out of context once TGE is set. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Acked-by: Marc Zyngier Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 67faed4ccb4f4b8055f50e9cc5670f1fc09ea287) Signed-off-by: Lee Trager --- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index f28c6cf4fe1be..7ba3240e9c3ff 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -183,6 +183,21 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) } NOKPROBE_SYMBOL(sysreg_restore_guest_state_vhe); +/* + * The _EL0 value was written by the host's context switch and belongs to the + * VMM. Copy this into the guest's _EL1 register. + */ +static inline void __mpam_guest_load(void) +{ + u64 mask = MPAM0_EL1_PARTID_D | MPAM0_EL1_PARTID_I | MPAM0_EL1_PMG_D | MPAM0_EL1_PMG_I; + + if (system_supports_mpam()) { + u64 val = (read_sysreg_s(SYS_MPAM0_EL1) & mask) | MPAM1_EL1_MPAMEN; + + write_sysreg_el1(val, SYS_MPAM1); + } +} + /** * __vcpu_load_switch_sysregs - Load guest system registers to the physical CPU * @@ -222,6 +237,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu) */ __sysreg32_restore_state(vcpu); __sysreg_restore_user_state(guest_ctxt); + __mpam_guest_load(); if (unlikely(is_hyp_ctxt(vcpu))) { __sysreg_restore_vel2_state(vcpu); From 48727d71aa37bf9dccbe6c390bb1da7889c0625b Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:51 +0000 Subject: [PATCH 48/74] arm_mpam: resctrl: Add boilerplate cpuhp and domain allocation resctrl has its own data structures to describe its resources. We can't use these directly as we play tricks with the 'MBA' resource, picking the MPAM controls or monitors that best apply. We may export the same component as both L3 and MBA. Add mpam_resctrl_res[] as the array of class->resctrl mappings we are exporting, and add the cpuhp hooks that allocated and free the resctrl domain structures. Only the mpam control feature are considered here and monitor support will be added later. While we're here, plumb in a few other obvious things. CONFIG_ARM_CPU_RESCTRL is used to allow this code to be built even though it can't yet be linked against resctrl. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 09e61daf8e96b9bdb04dd112bdecf9382fd3f919) Signed-off-by: Lee Trager --- drivers/resctrl/Makefile | 1 + drivers/resctrl/mpam_devices.c | 12 ++ drivers/resctrl/mpam_internal.h | 22 ++- drivers/resctrl/mpam_resctrl.c | 324 ++++++++++++++++++++++++++++++++ include/linux/arm_mpam.h | 3 + 5 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 drivers/resctrl/mpam_resctrl.c diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile index 898199dcf80d5..40beaf999582c 100644 --- a/drivers/resctrl/Makefile +++ b/drivers/resctrl/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o mpam-y += mpam_devices.o +mpam-$(CONFIG_ARM_CPU_RESCTRL) += mpam_resctrl.o ccflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index e7b244d226fb6..e3835a5a20432 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -1611,6 +1611,9 @@ static int mpam_cpu_online(unsigned int cpu) mpam_reprogram_msc(msc); } + if (mpam_is_enabled()) + return mpam_resctrl_online_cpu(cpu); + return 0; } @@ -1654,6 +1657,9 @@ static int mpam_cpu_offline(unsigned int cpu) { struct mpam_msc *msc; + if (mpam_is_enabled()) + mpam_resctrl_offline_cpu(cpu); + guard(srcu)(&mpam_srcu); list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list, srcu_read_lock_held(&mpam_srcu)) { @@ -2500,6 +2506,12 @@ static void mpam_enable_once(void) mutex_unlock(&mpam_list_lock); cpus_read_unlock(); + if (!err) { + err = mpam_resctrl_setup(); + if (err) + pr_err("Failed to initialise resctrl: %d\n", err); + } + if (err) { mpam_disable_reason = "Failed to enable."; schedule_work(&mpam_broken_work); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 50bab8b92064f..cc46628856757 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include @@ -330,6 +330,16 @@ struct mpam_msc_ris { struct mpam_garbage garbage; }; +struct mpam_resctrl_dom { + struct mpam_component *ctrl_comp; + struct rdt_ctrl_domain resctrl_ctrl_dom; +}; + +struct mpam_resctrl_res { + struct mpam_class *class; + struct rdt_resource resctrl_res; +}; + static inline int mpam_alloc_csu_mon(struct mpam_class *class) { struct mpam_props *cprops = &class->props; @@ -384,6 +394,16 @@ void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx); int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, cpumask_t *affinity); +#ifdef CONFIG_RESCTRL_FS +int mpam_resctrl_setup(void); +int mpam_resctrl_online_cpu(unsigned int cpu); +void mpam_resctrl_offline_cpu(unsigned int cpu); +#else +static inline int mpam_resctrl_setup(void) { return 0; } +static inline int mpam_resctrl_online_cpu(unsigned int cpu) { return 0; } +static inline void mpam_resctrl_offline_cpu(unsigned int cpu) { } +#endif /* CONFIG_RESCTRL_FS */ + /* * MPAM MSCs have the following register layout. See: * Arm Memory System Resource Partitioning and Monitoring (MPAM) System diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c new file mode 100644 index 0000000000000..9a30709704142 --- /dev/null +++ b/drivers/resctrl/mpam_resctrl.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. + +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mpam_internal.h" + +/* + * The classes we've picked to map to resctrl resources, wrapped + * in with their resctrl structure. + * Class pointer may be NULL. + */ +static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES]; + +#define for_each_mpam_resctrl_control(res, rid) \ + for (rid = 0, res = &mpam_resctrl_controls[rid]; \ + rid < RDT_NUM_RESOURCES; \ + rid++, res = &mpam_resctrl_controls[rid]) + +/* The lock for modifying resctrl's domain lists from cpuhp callbacks. */ +static DEFINE_MUTEX(domain_list_lock); + +bool resctrl_arch_alloc_capable(void) +{ + struct mpam_resctrl_res *res; + enum resctrl_res_level rid; + + for_each_mpam_resctrl_control(res, rid) { + if (res->resctrl_res.alloc_capable) + return true; + } + + return false; +} + +/* + * MSC may raise an error interrupt if it sees an out or range partid/pmg, + * and go on to truncate the value. Regardless of what the hardware supports, + * only the system wide safe value is safe to use. + */ +u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) +{ + return mpam_partid_max + 1; +} + +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) +{ + if (l >= RDT_NUM_RESOURCES) + return NULL; + + return &mpam_resctrl_controls[l].resctrl_res; +} + +static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) +{ + /* TODO: initialise the resctrl resources */ + + return 0; +} + +static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) +{ + struct mpam_class *class = comp->class; + + if (class->type == MPAM_CLASS_CACHE) + return comp->comp_id; + + /* TODO: repaint domain ids to match the L3 domain ids */ + /* Otherwise, expose the ID used by the firmware table code. */ + return comp->comp_id; +} + +static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp, + enum resctrl_res_level rid, + struct rdt_domain_hdr *hdr) +{ + lockdep_assert_cpus_held(); + + INIT_LIST_HEAD(&hdr->list); + hdr->id = mpam_resctrl_pick_domain_id(cpu, comp); + hdr->rid = rid; + cpumask_set_cpu(cpu, &hdr->cpu_mask); +} + +static void mpam_resctrl_online_domain_hdr(unsigned int cpu, + struct rdt_domain_hdr *hdr) +{ + lockdep_assert_cpus_held(); + + cpumask_set_cpu(cpu, &hdr->cpu_mask); +} + +/** + * mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU. + * @cpu: The CPU to remove from the domain. + * @hdr: The domain's header. + * + * Removes @cpu from the header mask. If this was the last CPU in the domain, + * the domain header is removed from its parent list and true is returned, + * indicating the parent structure can be freed. + * If there are other CPUs in the domain, returns false. + */ +static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu, + struct rdt_domain_hdr *hdr) +{ + lockdep_assert_held(&domain_list_lock); + + cpumask_clear_cpu(cpu, &hdr->cpu_mask); + if (cpumask_empty(&hdr->cpu_mask)) { + list_del_rcu(&hdr->list); + synchronize_rcu(); + return true; + } + + return false; +} + +static void mpam_resctrl_domain_insert(struct list_head *list, + struct rdt_domain_hdr *new) +{ + struct rdt_domain_hdr *err; + struct list_head *pos = NULL; + + lockdep_assert_held(&domain_list_lock); + + err = resctrl_find_domain(list, new->id, &pos); + if (WARN_ON_ONCE(err)) + return; + + list_add_tail_rcu(&new->list, pos); +} + +static struct mpam_resctrl_dom * +mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) +{ + int err; + struct mpam_resctrl_dom *dom; + struct rdt_ctrl_domain *ctrl_d; + struct mpam_class *class = res->class; + struct mpam_component *comp_iter, *ctrl_comp; + struct rdt_resource *r = &res->resctrl_res; + + lockdep_assert_held(&domain_list_lock); + + ctrl_comp = NULL; + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(comp_iter, &class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + if (cpumask_test_cpu(cpu, &comp_iter->affinity)) { + ctrl_comp = comp_iter; + break; + } + } + + /* class has no component for this CPU */ + if (WARN_ON_ONCE(!ctrl_comp)) + return ERR_PTR(-EINVAL); + + dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu)); + if (!dom) + return ERR_PTR(-ENOMEM); + + if (r->alloc_capable) { + dom->ctrl_comp = ctrl_comp; + + ctrl_d = &dom->resctrl_ctrl_dom; + mpam_resctrl_domain_hdr_init(cpu, ctrl_comp, r->rid, &ctrl_d->hdr); + ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN; + err = resctrl_online_ctrl_domain(r, ctrl_d); + if (err) + goto free_domain; + + mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr); + } else { + pr_debug("Skipped control domain online - no controls\n"); + } + return dom; + +free_domain: + kfree(dom); + dom = ERR_PTR(err); + + return dom; +} + +static struct mpam_resctrl_dom * +mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) +{ + struct mpam_resctrl_dom *dom; + struct rdt_resource *r = &res->resctrl_res; + + lockdep_assert_cpus_held(); + + list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) { + if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity)) + return dom; + } + + return NULL; +} + +int mpam_resctrl_online_cpu(unsigned int cpu) +{ + struct mpam_resctrl_res *res; + enum resctrl_res_level rid; + + guard(mutex)(&domain_list_lock); + for_each_mpam_resctrl_control(res, rid) { + struct mpam_resctrl_dom *dom; + struct rdt_resource *r = &res->resctrl_res; + + if (!res->class) + continue; // dummy_resource; + + dom = mpam_resctrl_get_domain_from_cpu(cpu, res); + if (!dom) { + dom = mpam_resctrl_alloc_domain(cpu, res); + if (IS_ERR(dom)) + return PTR_ERR(dom); + } else { + if (r->alloc_capable) { + struct rdt_ctrl_domain *ctrl_d = &dom->resctrl_ctrl_dom; + + mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr); + } + } + } + + resctrl_online_cpu(cpu); + + return 0; +} + +void mpam_resctrl_offline_cpu(unsigned int cpu) +{ + struct mpam_resctrl_res *res; + enum resctrl_res_level rid; + + resctrl_offline_cpu(cpu); + + guard(mutex)(&domain_list_lock); + for_each_mpam_resctrl_control(res, rid) { + struct mpam_resctrl_dom *dom; + struct rdt_ctrl_domain *ctrl_d; + bool ctrl_dom_empty; + struct rdt_resource *r = &res->resctrl_res; + + if (!res->class) + continue; // dummy resource + + dom = mpam_resctrl_get_domain_from_cpu(cpu, res); + if (WARN_ON_ONCE(!dom)) + continue; + + if (r->alloc_capable) { + ctrl_d = &dom->resctrl_ctrl_dom; + ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); + if (ctrl_dom_empty) + resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d); + } else { + ctrl_dom_empty = true; + } + + if (ctrl_dom_empty) + kfree(dom); + } +} + +int mpam_resctrl_setup(void) +{ + int err = 0; + struct mpam_resctrl_res *res; + enum resctrl_res_level rid; + + cpus_read_lock(); + for_each_mpam_resctrl_control(res, rid) { + INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains); + res->resctrl_res.rid = rid; + } + + /* TODO: pick MPAM classes to map to resctrl resources */ + + /* Initialise the resctrl structures from the classes */ + for_each_mpam_resctrl_control(res, rid) { + if (!res->class) + continue; // dummy resource + + err = mpam_resctrl_control_init(res); + if (err) { + pr_debug("Failed to initialise rid %u\n", rid); + break; + } + } + cpus_read_unlock(); + + if (err) { + pr_debug("Internal error %d - resctrl not supported\n", err); + return err; + } + + if (!resctrl_arch_alloc_capable()) { + pr_debug("No alloc(%u) found - resctrl not supported\n", + resctrl_arch_alloc_capable()); + return -EOPNOTSUPP; + } + + /* TODO: call resctrl_init() */ + + return 0; +} diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 7f00c5285a326..2c7d1413a401f 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -49,6 +49,9 @@ static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, } #endif +bool resctrl_arch_alloc_capable(void); +bool resctrl_arch_mon_capable(void); + /** * mpam_register_requestor() - Register a requestor with the MPAM driver * @partid_max: The maximum PARTID value the requestor can generate. From abe6213d27cd625e88ebd65c570aec51a5a76980 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:52 +0000 Subject: [PATCH 49/74] arm_mpam: resctrl: Pick the caches we will use as resctrl resources Systems with MPAM support may have a variety of control types at any point of their system layout. We can only expose certain types of control, and only if they exist at particular locations. Start with the well-known caches. These have to be depth 2 or 3 and support MPAM's cache portion bitmap controls, with a number of portions fewer than resctrl's limit. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 52a4edb16121d07734e4e392767d26d286f08c35) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 91 +++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 9a30709704142..65bb670dc3fb1 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -65,9 +65,95 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) return &mpam_resctrl_controls[l].resctrl_res; } +static bool cache_has_usable_cpor(struct mpam_class *class) +{ + struct mpam_props *cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_cpor_part, cprops)) + return false; + + /* resctrl uses u32 for all bitmap configurations */ + return class->props.cpbm_wd <= 32; +} + +/* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ +static void mpam_resctrl_pick_caches(void) +{ + struct mpam_class *class; + struct mpam_resctrl_res *res; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) { + if (class->type != MPAM_CLASS_CACHE) { + pr_debug("class %u is not a cache\n", class->level); + continue; + } + + if (class->level != 2 && class->level != 3) { + pr_debug("class %u is not L2 or L3\n", class->level); + continue; + } + + if (!cache_has_usable_cpor(class)) { + pr_debug("class %u cache misses CPOR\n", class->level); + continue; + } + + if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { + pr_debug("class %u has missing CPUs, mask %*pb != %*pb\n", class->level, + cpumask_pr_args(&class->affinity), + cpumask_pr_args(cpu_possible_mask)); + continue; + } + + if (class->level == 2) + res = &mpam_resctrl_controls[RDT_RESOURCE_L2]; + else + res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + res->class = class; + } +} + static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) { - /* TODO: initialise the resctrl resources */ + struct mpam_class *class = res->class; + struct rdt_resource *r = &res->resctrl_res; + + switch (r->rid) { + case RDT_RESOURCE_L2: + case RDT_RESOURCE_L3: + r->schema_fmt = RESCTRL_SCHEMA_BITMAP; + r->cache.arch_has_sparse_bitmasks = true; + + r->cache.cbm_len = class->props.cpbm_wd; + /* mpam_devices will reject empty bitmaps */ + r->cache.min_cbm_bits = 1; + + if (r->rid == RDT_RESOURCE_L2) { + r->name = "L2"; + r->ctrl_scope = RESCTRL_L2_CACHE; + r->cdp_capable = true; + } else { + r->name = "L3"; + r->ctrl_scope = RESCTRL_L3_CACHE; + r->cdp_capable = true; + } + + /* + * Which bits are shared with other ...things... Unknown + * devices use partid-0 which uses all the bitmap fields. Until + * we have configured the SMMU and GIC not to do this 'all the + * bits' is the correct answer here. + */ + r->cache.shareable_bits = resctrl_get_default_ctrl(r); + r->alloc_capable = true; + break; + default: + return -EINVAL; + } return 0; } @@ -292,7 +378,8 @@ int mpam_resctrl_setup(void) res->resctrl_res.rid = rid; } - /* TODO: pick MPAM classes to map to resctrl resources */ + /* Find some classes to use for controls */ + mpam_resctrl_pick_caches(); /* Initialise the resctrl structures from the classes */ for_each_mpam_resctrl_control(res, rid) { From 0fe5719149a1ec64b8e6238758600e3258a62806 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:53 +0000 Subject: [PATCH 50/74] arm_mpam: resctrl: Implement resctrl_arch_reset_all_ctrls() We already have a helper for resetting an mpam class and component. Hook it up to resctrl_arch_reset_all_ctrls() and the domain offline path. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Shaopeng Tan Reviewed-by: Zeng Heng Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 370d166d878d0c0aa06568d67387a1151a200501) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 2 +- drivers/resctrl/mpam_internal.h | 3 +++ drivers/resctrl/mpam_resctrl.c | 13 +++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index e3835a5a20432..f0c7c6c296578 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -2551,7 +2551,7 @@ static void mpam_reset_component_locked(struct mpam_component *comp) } } -static void mpam_reset_class_locked(struct mpam_class *class) +void mpam_reset_class_locked(struct mpam_class *class) { struct mpam_component *comp; diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index cc46628856757..662b52dc3f3b6 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -384,6 +384,9 @@ extern u8 mpam_pmg_max; void mpam_enable(struct work_struct *work); void mpam_disable(struct work_struct *work); +/* Reset all the RIS in a class under cpus_read_lock() */ +void mpam_reset_class_locked(struct mpam_class *class); + int mpam_apply_config(struct mpam_component *comp, u16 partid, struct mpam_config *cfg); diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 65bb670dc3fb1..b2217d11561d8 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -170,6 +170,19 @@ static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) return comp->comp_id; } +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) +{ + struct mpam_resctrl_res *res; + + lockdep_assert_cpus_held(); + + if (!mpam_is_enabled()) + return; + + res = container_of(r, struct mpam_resctrl_res, resctrl_res); + mpam_reset_class_locked(res->class); +} + static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp, enum resctrl_res_level rid, struct rdt_domain_hdr *hdr) From 2b0e0d0d55796687ddf00e79ff359707ecc52237 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:54 +0000 Subject: [PATCH 51/74] arm_mpam: resctrl: Add resctrl_arch_get_config() Implement resctrl_arch_get_config() by testing the live configuration for a CPOR bitmap. For any other configuration type return the default. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 02cc661687886563a0e08ecee51c5ef7d1737237) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index b2217d11561d8..3af57b6f2c1b5 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -170,6 +170,49 @@ static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) return comp->comp_id; } +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, + u32 closid, enum resctrl_conf_type type) +{ + u32 partid; + struct mpam_config *cfg; + struct mpam_props *cprops; + struct mpam_resctrl_res *res; + struct mpam_resctrl_dom *dom; + enum mpam_device_features configured_by; + + lockdep_assert_cpus_held(); + + if (!mpam_is_enabled()) + return resctrl_get_default_ctrl(r); + + res = container_of(r, struct mpam_resctrl_res, resctrl_res); + dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); + cprops = &res->class->props; + + partid = resctrl_get_config_index(closid, type); + cfg = &dom->ctrl_comp->cfg[partid]; + + switch (r->rid) { + case RDT_RESOURCE_L2: + case RDT_RESOURCE_L3: + configured_by = mpam_feat_cpor_part; + break; + default: + return resctrl_get_default_ctrl(r); + } + + if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r) || + !mpam_has_feature(configured_by, cfg)) + return resctrl_get_default_ctrl(r); + + switch (configured_by) { + case mpam_feat_cpor_part: + return cfg->cpbm; + default: + return resctrl_get_default_ctrl(r); + } +} + void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) { struct mpam_resctrl_res *res; From e0fa334be7e3e02c11ba1076842fc2e5bf814f26 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:55 +0000 Subject: [PATCH 52/74] arm_mpam: resctrl: Implement helpers to update configuration resctrl has two helpers for updating the configuration. resctrl_arch_update_one() updates a single value, and is used by the software-controller to apply feedback to the bandwidth controls, it has to be called on one of the CPUs in the resctrl:domain. resctrl_arch_update_domains() copies multiple staged configurations, it can be called from anywhere. Both helpers should update any changes to the underlying hardware. Implement resctrl_arch_update_domains() to use resctrl_arch_update_one(). Neither need to be called on a specific CPU as the mpam driver will send IPIs as needed. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 9cd2b522be2cc64fab179d75537d2e8df38d26a6) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 70 ++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 3af57b6f2c1b5..ea60777934ffd 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -213,6 +213,76 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, } } +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val) +{ + u32 partid; + struct mpam_config cfg; + struct mpam_props *cprops; + struct mpam_resctrl_res *res; + struct mpam_resctrl_dom *dom; + + lockdep_assert_cpus_held(); + lockdep_assert_irqs_enabled(); + + /* + * No need to check the CPU as mpam_apply_config() doesn't care, and + * resctrl_arch_update_domains() relies on this. + */ + res = container_of(r, struct mpam_resctrl_res, resctrl_res); + dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); + cprops = &res->class->props; + + partid = resctrl_get_config_index(closid, t); + if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) { + pr_debug("Not alloc capable or computed PARTID out of range\n"); + return -EINVAL; + } + + /* + * Copy the current config to avoid clearing other resources when the + * same component is exposed multiple times through resctrl. + */ + cfg = dom->ctrl_comp->cfg[partid]; + + switch (r->rid) { + case RDT_RESOURCE_L2: + case RDT_RESOURCE_L3: + cfg.cpbm = cfg_val; + mpam_set_feature(mpam_feat_cpor_part, &cfg); + break; + default: + return -EINVAL; + } + + return mpam_apply_config(dom->ctrl_comp, partid, &cfg); +} + +int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) +{ + int err; + struct rdt_ctrl_domain *d; + + lockdep_assert_cpus_held(); + lockdep_assert_irqs_enabled(); + + list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) { + for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) { + struct resctrl_staged_config *cfg = &d->staged_config[t]; + + if (!cfg->have_new_ctrl) + continue; + + err = resctrl_arch_update_one(r, d, closid, t, + cfg->new_ctrl); + if (err) + return err; + } + } + + return 0; +} + void resctrl_arch_reset_all_ctrls(struct rdt_resource *r) { struct mpam_resctrl_res *res; From dec9bc55343018c5bafb4c46c5ca74474b7d6d1f Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:56 +0000 Subject: [PATCH 53/74] arm_mpam: resctrl: Add plumbing against arm64 task and cpu hooks arm64 provides helpers for changing a task's and a cpu's mpam partid/pmg values. These are used to back a number of resctrl_arch_ functions. Connect them up. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 9d2e1a99fae58ce992f147bdf83b5d9089f70b27) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 58 ++++++++++++++++++++++++++++++++++ include/linux/arm_mpam.h | 5 +++ 2 files changed, 63 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index ea60777934ffd..9cde5b7e644cc 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,8 @@ static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES]; /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */ static DEFINE_MUTEX(domain_list_lock); +static bool cdp_enabled; + bool resctrl_arch_alloc_capable(void) { struct mpam_resctrl_res *res; @@ -57,6 +60,61 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) return mpam_partid_max + 1; } +void resctrl_arch_sched_in(struct task_struct *tsk) +{ + lockdep_assert_preemption_disabled(); + + mpam_thread_switch(tsk); +} + +void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid) +{ + WARN_ON_ONCE(closid > U16_MAX); + WARN_ON_ONCE(rmid > U8_MAX); + + if (!cdp_enabled) { + mpam_set_cpu_defaults(cpu, closid, closid, rmid, rmid); + } else { + /* + * When CDP is enabled, resctrl halves the closid range and we + * use odd/even partid for one closid. + */ + u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); + u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); + + mpam_set_cpu_defaults(cpu, partid_d, partid_i, rmid, rmid); + } +} + +void resctrl_arch_sync_cpu_closid_rmid(void *info) +{ + struct resctrl_cpu_defaults *r = info; + + lockdep_assert_preemption_disabled(); + + if (r) { + resctrl_arch_set_cpu_default_closid_rmid(smp_processor_id(), + r->closid, r->rmid); + } + + resctrl_arch_sched_in(current); +} + +void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) +{ + WARN_ON_ONCE(closid > U16_MAX); + WARN_ON_ONCE(rmid > U8_MAX); + + if (!cdp_enabled) { + mpam_set_task_partid_pmg(tsk, closid, closid, rmid, rmid); + } else { + u32 partid_d = resctrl_get_config_index(closid, CDP_DATA); + u32 partid_i = resctrl_get_config_index(closid, CDP_CODE); + + mpam_set_task_partid_pmg(tsk, partid_d, partid_i, rmid, rmid); + } +} + struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) { if (l >= RDT_NUM_RESOURCES) diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 2c7d1413a401f..5a78299ec464b 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -52,6 +52,11 @@ static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx, bool resctrl_arch_alloc_capable(void); bool resctrl_arch_mon_capable(void); +void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid); +void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid); +void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid); +void resctrl_arch_sched_in(struct task_struct *tsk); + /** * mpam_register_requestor() - Register a requestor with the MPAM driver * @partid_max: The maximum PARTID value the requestor can generate. From 7398a32bf0927152f7d4642a65ed1afa60b9ceb1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:45:57 +0000 Subject: [PATCH 54/74] arm_mpam: resctrl: Add CDP emulation Intel RDT's CDP feature allows the cache to use a different control value depending on whether the accesses was for instruction fetch or a data access. MPAM's equivalent feature is the other way up: the CPU assigns a different partid label to traffic depending on whether it was instruction fetch or a data access, which causes the cache to use a different control value based solely on the partid. MPAM can emulate CDP, with the side effect that the alternative partid is seen by all MSC, it can't be enabled per-MSC. Add the resctrl hooks to turn this on or off. Add the helpers that match a closid against a task, which need to be aware that the value written to hardware is not the same as the one resctrl is using. Update the 'arm64_mpam_global_default' variable the arch code uses during context switch to know when the per-cpu value should be used instead. Also, update these per-cpu values and sync the resulting mpam partid/pmg configuration to hardware. resctrl can enable CDP for L2 caches, L3 caches or both. When it is enabled by one and not the other MPAM globally enabled CDP but hides the effect on the other cache resource. This hiding is possible as CPOR is the only supported cache control and that uses a resource bitmap; two partids with the same bitmap act as one. Awkwardly, the MB controls don't implement CDP and CDP can't be hidden as the memory bandwidth control is a maximum per partid which can't be modelled with more partids. If the total maximum is used for both the data and instruction partids then then the maximum may be exceeded and if it is split in two then the one using more bandwidth will hit a lower limit. Hence, hide the MB controls completely if CDP is enabled for any resource. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Cc: Dave Martin Cc: Amit Singh Tomar Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 6789fb99282c0a8e8e84701b7edf456f4a9e71e2) Signed-off-by: Lee Trager --- arch/arm64/include/asm/mpam.h | 1 + drivers/resctrl/mpam_internal.h | 1 + drivers/resctrl/mpam_resctrl.c | 122 ++++++++++++++++++++++++++++++++ include/linux/arm_mpam.h | 2 + 4 files changed, 126 insertions(+) diff --git a/arch/arm64/include/asm/mpam.h b/arch/arm64/include/asm/mpam.h index 05aa71200f61a..70d396e7b6da8 100644 --- a/arch/arm64/include/asm/mpam.h +++ b/arch/arm64/include/asm/mpam.h @@ -4,6 +4,7 @@ #ifndef __ASM__MPAM_H #define __ASM__MPAM_H +#include #include #include #include diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 662b52dc3f3b6..46ddb5e912f6c 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -338,6 +338,7 @@ struct mpam_resctrl_dom { struct mpam_resctrl_res { struct mpam_class *class; struct rdt_resource resctrl_res; + bool cdp_enabled; }; static inline int mpam_alloc_csu_mon(struct mpam_class *class) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 9cde5b7e644cc..2111542f485e1 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -35,6 +35,10 @@ static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES]; /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */ static DEFINE_MUTEX(domain_list_lock); +/* + * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1. + * This applies globally to all traffic the CPU generates. + */ static bool cdp_enabled; bool resctrl_arch_alloc_capable(void) @@ -50,6 +54,74 @@ bool resctrl_arch_alloc_capable(void) return false; } +bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) +{ + return mpam_resctrl_controls[rid].cdp_enabled; +} + +/** + * resctrl_reset_task_closids() - Reset the PARTID/PMG values for all tasks. + * + * At boot, all existing tasks use partid zero for D and I. + * To enable/disable CDP emulation, all these tasks need relabelling. + */ +static void resctrl_reset_task_closids(void) +{ + struct task_struct *p, *t; + + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + resctrl_arch_set_closid_rmid(t, RESCTRL_RESERVED_CLOSID, + RESCTRL_RESERVED_RMID); + } + read_unlock(&tasklist_lock); +} + +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable) +{ + u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID; + int cpu; + + /* + * resctrl_arch_set_cdp_enabled() is only called with enable set to + * false on error and unmount. + */ + cdp_enabled = enable; + mpam_resctrl_controls[rid].cdp_enabled = enable; + + /* The mbw_max feature can't hide cdp as it's a per-partid maximum. */ + if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled) + mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false; + + if (mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled && + mpam_resctrl_controls[RDT_RESOURCE_MBA].class) + mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = true; + + if (enable) { + if (mpam_partid_max < 1) + return -EINVAL; + + partid_d = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_DATA); + partid_i = resctrl_get_config_index(RESCTRL_RESERVED_CLOSID, CDP_CODE); + } + + mpam_set_task_partid_pmg(current, partid_d, partid_i, 0, 0); + WRITE_ONCE(arm64_mpam_global_default, mpam_get_regval(current)); + + resctrl_reset_task_closids(); + + for_each_possible_cpu(cpu) + mpam_set_cpu_defaults(cpu, partid_d, partid_i, 0, 0); + on_each_cpu(resctrl_arch_sync_cpu_closid_rmid, NULL, 1); + + return 0; +} + +static bool mpam_resctrl_hide_cdp(enum resctrl_res_level rid) +{ + return cdp_enabled && !resctrl_arch_get_cdp_enabled(rid); +} + /* * MSC may raise an error interrupt if it sees an out or range partid/pmg, * and go on to truncate the value. Regardless of what the hardware supports, @@ -115,6 +187,30 @@ void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid) } } +bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid) +{ + u64 regval = mpam_get_regval(tsk); + u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval); + + if (cdp_enabled) + tsk_closid >>= 1; + + return tsk_closid == closid; +} + +/* The task's pmg is not unique, the partid must be considered too */ +bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid) +{ + u64 regval = mpam_get_regval(tsk); + u32 tsk_closid = FIELD_GET(MPAM0_EL1_PARTID_D, regval); + u32 tsk_rmid = FIELD_GET(MPAM0_EL1_PMG_D, regval); + + if (cdp_enabled) + tsk_closid >>= 1; + + return (tsk_closid == closid) && (tsk_rmid == rmid); +} + struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) { if (l >= RDT_NUM_RESOURCES) @@ -247,6 +343,14 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); cprops = &res->class->props; + /* + * When CDP is enabled, but the resource doesn't support it, + * the control is cloned across both partids. + * Pick one at random to read: + */ + if (mpam_resctrl_hide_cdp(r->rid)) + type = CDP_DATA; + partid = resctrl_get_config_index(closid, type); cfg = &dom->ctrl_comp->cfg[partid]; @@ -274,6 +378,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val) { + int err; u32 partid; struct mpam_config cfg; struct mpam_props *cprops; @@ -291,6 +396,9 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom); cprops = &res->class->props; + if (mpam_resctrl_hide_cdp(r->rid)) + t = CDP_DATA; + partid = resctrl_get_config_index(closid, t); if (!r->alloc_capable || partid >= resctrl_arch_get_num_closid(r)) { pr_debug("Not alloc capable or computed PARTID out of range\n"); @@ -313,6 +421,20 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, return -EINVAL; } + /* + * When CDP is enabled, but the resource doesn't support it, we need to + * apply the same configuration to the other partid. + */ + if (mpam_resctrl_hide_cdp(r->rid)) { + partid = resctrl_get_config_index(closid, CDP_CODE); + err = mpam_apply_config(dom->ctrl_comp, partid, &cfg); + if (err) + return err; + + partid = resctrl_get_config_index(closid, CDP_DATA); + return mpam_apply_config(dom->ctrl_comp, partid, &cfg); + } + return mpam_apply_config(dom->ctrl_comp, partid, &cfg); } diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 5a78299ec464b..d329b1dc148ba 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -56,6 +56,8 @@ void resctrl_arch_set_cpu_default_closid(int cpu, u32 closid); void resctrl_arch_set_closid_rmid(struct task_struct *tsk, u32 closid, u32 rmid); void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid); void resctrl_arch_sched_in(struct task_struct *tsk); +bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid); +bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid); /** * mpam_register_requestor() - Register a requestor with the MPAM driver From a054fee3388a0046d746aff4fba9074260d9e9a3 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:45:58 +0000 Subject: [PATCH 55/74] arm_mpam: resctrl: Hide CDP emulation behind CONFIG_EXPERT When CDP is not enabled, the 'rmid_entry's in the limbo list, rmid_busy_llc, map directly to a (PARTID,PMG) pair and when CDP is enabled the mapping is to two different pairs. As the limbo list is reused between mounts and CDP disabled on unmount this can lead to stale mapping and the limbo handler will then make monitor reads with potentially out of range PARTID. This may then cause an MPAM error interrupt and the driver will disable MPAM. No problems are expected if you just mount the resctrl file system once with CDP enabled and never unmount it. Hide CDP emulation behind CONFIG_EXPERT to protect the unwary. Signed-off-by: Ben Horgan Reviewed-by: Gavin Shan Reviewed-by: Zeng Heng Reviewed-by: James Morse Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Jesse Chick Signed-off-by: James Morse (cherry picked from commit 01a0021f6c39557037bfc41ede7230a0696677ff) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 2111542f485e1..2331e6ddb814b 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -82,6 +82,18 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable) u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID; int cpu; + if (!IS_ENABLED(CONFIG_EXPERT) && enable) { + /* + * If the resctrl fs is mounted more than once, sequentially, + * then CDP can lead to the use of out of range PARTIDs. + */ + pr_warn("CDP not supported\n"); + return -EOPNOTSUPP; + } + + if (enable) + pr_warn("CDP is an expert feature and may cause MPAM to malfunction.\n"); + /* * resctrl_arch_set_cdp_enabled() is only called with enable set to * false on error and unmount. From 725bf1ea8a7ade15fea80ebc12e64053b9a24d73 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 13 Mar 2026 14:45:59 +0000 Subject: [PATCH 56/74] arm_mpam: resctrl: Convert to/from MPAMs fixed-point formats MPAM uses a fixed-point formats for some hardware controls. Resctrl provides the bandwidth controls as a percentage. Add helpers to convert between these. Ensure bwa_wd is at most 16 to make it clear higher values have no meaning. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Signed-off-by: Dave Martin Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 80d147d293130ee3c8a395cbbea1813e26ab9a1b) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 7 +++++ drivers/resctrl/mpam_resctrl.c | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index f0c7c6c296578..c9687880bfb64 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -713,6 +713,13 @@ static void mpam_ris_hw_probe(struct mpam_msc_ris *ris) mpam_set_feature(mpam_feat_mbw_part, props); props->bwa_wd = FIELD_GET(MPAMF_MBW_IDR_BWA_WD, mbw_features); + + /* + * The BWA_WD field can represent 0-63, but the control fields it + * describes have a maximum of 16 bits. + */ + props->bwa_wd = min(props->bwa_wd, 16); + if (props->bwa_wd && FIELD_GET(MPAMF_MBW_IDR_HAS_MAX, mbw_features)) mpam_set_feature(mpam_feat_mbw_max, props); diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 2331e6ddb814b..240a06df2f079 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -242,6 +243,56 @@ static bool cache_has_usable_cpor(struct mpam_class *class) return class->props.cpbm_wd <= 32; } +/* + * Each fixed-point hardware value architecturally represents a range + * of values: the full range 0% - 100% is split contiguously into + * (1 << cprops->bwa_wd) equal bands. + * + * Although the bwa_bwd fields have 6 bits the maximum valid value is 16 + * as it reports the width of fields that are at most 16 bits. When + * fewer than 16 bits are valid the least significant bits are + * ignored. The implied binary point is kept between bits 15 and 16 and + * so the valid bits are leftmost. + * + * See ARM IHI0099B.a "MPAM system component specification", Section 9.3, + * "The fixed-point fractional format" for more information. + * + * Find the nearest percentage value to the upper bound of the selected band: + */ +static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops) +{ + u32 val = mbw_max; + + val >>= 16 - cprops->bwa_wd; + val += 1; + val *= MAX_MBA_BW; + val = DIV_ROUND_CLOSEST(val, 1 << cprops->bwa_wd); + + return val; +} + +/* + * Find the band whose upper bound is closest to the specified percentage. + * + * A round-to-nearest policy is followed here as a balanced compromise + * between unexpected under-commit of the resource (where the total of + * a set of resource allocations after conversion is less than the + * expected total, due to rounding of the individual converted + * percentages) and over-commit (where the total of the converted + * allocations is greater than expected). + */ +static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) +{ + u32 val = pc; + + val <<= cprops->bwa_wd; + val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW); + val = max(val, 1) - 1; + val <<= 16 - cprops->bwa_wd; + + return val; +} + /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ static void mpam_resctrl_pick_caches(void) { From 295923d18b400c729238f0e851c2e0811c84680c Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:46:00 +0000 Subject: [PATCH 57/74] arm_mpam: resctrl: Add rmid index helpers Because MPAM's pmg aren't identical to RDT's rmid, resctrl handles some data structures by index. This allows x86 to map indexes to RMID, and MPAM to map them to partid-and-pmg. Add the helpers to do this. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Suggested-by: James Morse Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 3e9b35823aabcb85cc039960256426e50f1fd601) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 16 ++++++++++++++++ include/linux/arm_mpam.h | 3 +++ 2 files changed, 19 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 240a06df2f079..370830ab11197 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -145,6 +145,22 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *ignored) return mpam_partid_max + 1; } +u32 resctrl_arch_system_num_rmid_idx(void) +{ + return (mpam_pmg_max + 1) * (mpam_partid_max + 1); +} + +u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) +{ + return closid * (mpam_pmg_max + 1) + rmid; +} + +void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid) +{ + *closid = idx / (mpam_pmg_max + 1); + *rmid = idx % (mpam_pmg_max + 1); +} + void resctrl_arch_sched_in(struct task_struct *tsk) { lockdep_assert_preemption_disabled(); diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index d329b1dc148ba..7d23c90f077dc 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -58,6 +58,9 @@ void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid, u32 rmid); void resctrl_arch_sched_in(struct task_struct *tsk); bool resctrl_arch_match_closid(struct task_struct *tsk, u32 closid); bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 closid, u32 rmid); +u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid); +void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid); +u32 resctrl_arch_system_num_rmid_idx(void); /** * mpam_register_requestor() - Register a requestor with the MPAM driver From a4ae886d348ffa7e283919ef8823778f86de313f Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:46:01 +0000 Subject: [PATCH 58/74] arm_mpam: resctrl: Wait for cacheinfo to be ready In order to calculate the rmid realloc threshold the size of the cache needs to be known. Cache domains will also be named after the cache id. So that this information can be extracted from cacheinfo we need to wait for it to be ready. The cacheinfo information is populated in device_initcall() so we wait for that. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 1c1e2968a860c5af9fca67f1c0e88aab83ace0b3) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 370830ab11197..bf91cff05daf7 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -42,6 +43,13 @@ static DEFINE_MUTEX(domain_list_lock); */ static bool cdp_enabled; +/* + * We use cacheinfo to discover the size of the caches and their id. cacheinfo + * populates this from a device_initcall(). mpam_resctrl_setup() must wait. + */ +static bool cacheinfo_ready; +static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready); + bool resctrl_arch_alloc_capable(void) { struct mpam_resctrl_res *res; @@ -757,6 +765,8 @@ int mpam_resctrl_setup(void) struct mpam_resctrl_res *res; enum resctrl_res_level rid; + wait_event(wait_cacheinfo_ready, cacheinfo_ready); + cpus_read_lock(); for_each_mpam_resctrl_control(res, rid) { INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains); @@ -794,3 +804,12 @@ int mpam_resctrl_setup(void) return 0; } + +static int __init __cacheinfo_ready(void) +{ + cacheinfo_ready = true; + wake_up(&wait_cacheinfo_ready); + + return 0; +} +device_initcall_sync(__cacheinfo_ready); From 9bb9bb245bac2ac2fc51842db4b75f4443ac61dc Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:02 +0000 Subject: [PATCH 59/74] arm_mpam: resctrl: Add support for 'MB' resource resctrl supports 'MB', as a percentage throttling of traffic from the L3. This is the control that mba_sc uses, so ideally the class chosen should be as close as possible to the counters used for mbm_total. If there is a single L3, it's the last cache, and the topology of the memory matches then the traffic at the memory controller will be equivalent to that at egress of the L3. If these conditions are met allow the memory class to back MB. MB's percentage control should be backed either with the fixed point fraction MBW_MAX or bandwidth portion bitmaps. The bandwidth portion bitmaps is not used as its tricky to pick which bits to use to avoid contention, and may be possible to expose this as something other than a percentage in the future. Tested-by: Shaopeng Tan Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Gavin Shan Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Dave Martin Signed-off-by: Dave Martin Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 36528c7681b8093f5f9270d2af7c4326d771f181) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 281 ++++++++++++++++++++++++++++++++- 1 file changed, 280 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index bf91cff05daf7..60d111f7abfd5 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -267,6 +267,33 @@ static bool cache_has_usable_cpor(struct mpam_class *class) return class->props.cpbm_wd <= 32; } +static bool mba_class_use_mbw_max(struct mpam_props *cprops) +{ + return (mpam_has_feature(mpam_feat_mbw_max, cprops) && + cprops->bwa_wd); +} + +static bool class_has_usable_mba(struct mpam_props *cprops) +{ + return mba_class_use_mbw_max(cprops); +} + +/* + * Calculate the worst-case percentage change from each implemented step + * in the control. + */ +static u32 get_mba_granularity(struct mpam_props *cprops) +{ + if (!mba_class_use_mbw_max(cprops)) + return 0; + + /* + * bwa_wd is the number of bits implemented in the 0.xxx + * fixed point fraction. 1 bit is 50%, 2 is 25% etc. + */ + return DIV_ROUND_UP(MAX_MBA_BW, 1 << cprops->bwa_wd); +} + /* * Each fixed-point hardware value architecturally represents a range * of values: the full range 0% - 100% is split contiguously into @@ -317,6 +344,160 @@ static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops) return val; } +static u32 get_mba_min(struct mpam_props *cprops) +{ + if (!mba_class_use_mbw_max(cprops)) { + WARN_ON_ONCE(1); + return 0; + } + + return mbw_max_to_percent(0, cprops); +} + +/* Find the L3 cache that has affinity with this CPU */ +static int find_l3_equivalent_bitmask(int cpu, cpumask_var_t tmp_cpumask) +{ + u32 cache_id = get_cpu_cacheinfo_id(cpu, 3); + + lockdep_assert_cpus_held(); + + return mpam_get_cpumask_from_cache_id(cache_id, 3, tmp_cpumask); +} + +/* + * topology_matches_l3() - Is the provided class the same shape as L3 + * @victim: The class we'd like to pretend is L3. + * + * resctrl expects all the world's a Xeon, and all counters are on the + * L3. We allow some mapping counters on other classes. This requires + * that the CPU->domain mapping is the same kind of shape. + * + * Using cacheinfo directly would make this work even if resctrl can't + * use the L3 - but cacheinfo can't tell us anything about offline CPUs. + * Using the L3 resctrl domain list also depends on CPUs being online. + * Using the mpam_class we picked for L3 so we can use its domain list + * assumes that there are MPAM controls on the L3. + * Instead, this path eventually uses the mpam_get_cpumask_from_cache_id() + * helper which can tell us about offline CPUs ... but getting the cache_id + * to start with relies on at least one CPU per L3 cache being online at + * boot. + * + * Walk the victim component list and compare the affinity mask with the + * corresponding L3. The topology matches if each victim:component's affinity + * mask is the same as the CPU's corresponding L3's. These lists/masks are + * computed from firmware tables so don't change at runtime. + */ +static bool topology_matches_l3(struct mpam_class *victim) +{ + int cpu, err; + struct mpam_component *victim_iter; + + lockdep_assert_cpus_held(); + + cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL; + if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) + return false; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(victim_iter, &victim->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + if (cpumask_empty(&victim_iter->affinity)) { + pr_debug("class %u has CPU-less component %u - can't match L3!\n", + victim->level, victim_iter->comp_id); + return false; + } + + cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask); + if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) + return false; + + cpumask_clear(tmp_cpumask); + err = find_l3_equivalent_bitmask(cpu, tmp_cpumask); + if (err) { + pr_debug("Failed to find L3's equivalent component to class %u component %u\n", + victim->level, victim_iter->comp_id); + return false; + } + + /* Any differing bits in the affinity mask? */ + if (!cpumask_equal(tmp_cpumask, &victim_iter->affinity)) { + pr_debug("class %u component %u has Mismatched CPU mask with L3 equivalent\n" + "L3:%*pbl != victim:%*pbl\n", + victim->level, victim_iter->comp_id, + cpumask_pr_args(tmp_cpumask), + cpumask_pr_args(&victim_iter->affinity)); + + return false; + } + } + + return true; +} + +/* + * Test if the traffic for a class matches that at egress from the L3. For + * MSC at memory controllers this is only possible if there is a single L3 + * as otherwise the counters at the memory can include bandwidth from the + * non-local L3. + */ +static bool traffic_matches_l3(struct mpam_class *class) +{ + int err, cpu; + + lockdep_assert_cpus_held(); + + if (class->type == MPAM_CLASS_CACHE && class->level == 3) + return true; + + if (class->type == MPAM_CLASS_CACHE && class->level != 3) { + pr_debug("class %u is a different cache from L3\n", class->level); + return false; + } + + if (class->type != MPAM_CLASS_MEMORY) { + pr_debug("class %u is neither of type cache or memory\n", class->level); + return false; + } + + cpumask_var_t __free(free_cpumask_var) tmp_cpumask = CPUMASK_VAR_NULL; + if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) { + pr_debug("cpumask allocation failed\n"); + return false; + } + + cpu = cpumask_any_and(&class->affinity, cpu_online_mask); + err = find_l3_equivalent_bitmask(cpu, tmp_cpumask); + if (err) { + pr_debug("Failed to find L3 downstream to cpu %d\n", cpu); + return false; + } + + if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) { + pr_debug("There is more than one L3\n"); + return false; + } + + /* Be strict; the traffic might stop in the intermediate cache. */ + if (get_cpu_cacheinfo_id(cpu, 4) != -1) { + pr_debug("L3 isn't the last level of cache\n"); + return false; + } + + if (num_possible_nodes() > 1) { + pr_debug("There is more than one numa node\n"); + return false; + } + +#ifdef CONFIG_HMEM_REPORTING + if (node_devices[cpu_to_node(cpu)]->cache_dev) { + pr_debug("There is a memory side cache\n"); + return false; + } +#endif + + return true; +} + /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */ static void mpam_resctrl_pick_caches(void) { @@ -358,9 +539,68 @@ static void mpam_resctrl_pick_caches(void) } } +static void mpam_resctrl_pick_mba(void) +{ + struct mpam_class *class, *candidate_class = NULL; + struct mpam_resctrl_res *res; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) { + struct mpam_props *cprops = &class->props; + + if (class->level != 3 && class->type == MPAM_CLASS_CACHE) { + pr_debug("class %u is a cache but not the L3\n", class->level); + continue; + } + + if (!class_has_usable_mba(cprops)) { + pr_debug("class %u has no bandwidth control\n", + class->level); + continue; + } + + if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { + pr_debug("class %u has missing CPUs\n", class->level); + continue; + } + + if (!topology_matches_l3(class)) { + pr_debug("class %u topology doesn't match L3\n", + class->level); + continue; + } + + if (!traffic_matches_l3(class)) { + pr_debug("class %u traffic doesn't match L3 egress\n", + class->level); + continue; + } + + /* + * Pick a resource to be MBA that as close as possible to + * the L3. mbm_total counts the bandwidth leaving the L3 + * cache and MBA should correspond as closely as possible + * for proper operation of mba_sc. + */ + if (!candidate_class || class->level < candidate_class->level) + candidate_class = class; + } + + if (candidate_class) { + pr_debug("selected class %u to back MBA\n", + candidate_class->level); + res = &mpam_resctrl_controls[RDT_RESOURCE_MBA]; + res->class = candidate_class; + } +} + static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) { struct mpam_class *class = res->class; + struct mpam_props *cprops = &class->props; struct rdt_resource *r = &res->resctrl_res; switch (r->rid) { @@ -392,6 +632,19 @@ static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) r->cache.shareable_bits = resctrl_get_default_ctrl(r); r->alloc_capable = true; break; + case RDT_RESOURCE_MBA: + r->schema_fmt = RESCTRL_SCHEMA_RANGE; + r->ctrl_scope = RESCTRL_L3_CACHE; + + r->membw.delay_linear = true; + r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED; + r->membw.min_bw = get_mba_min(cprops); + r->membw.max_bw = MAX_MBA_BW; + r->membw.bw_gran = get_mba_granularity(cprops); + + r->name = "MB"; + r->alloc_capable = true; + break; default: return -EINVAL; } @@ -406,7 +659,17 @@ static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) if (class->type == MPAM_CLASS_CACHE) return comp->comp_id; - /* TODO: repaint domain ids to match the L3 domain ids */ + if (topology_matches_l3(class)) { + /* Use the corresponding L3 component ID as the domain ID */ + int id = get_cpu_cacheinfo_id(cpu, 3); + + /* Implies topology_matches_l3() made a mistake */ + if (WARN_ON_ONCE(id == -1)) + return comp->comp_id; + + return id; + } + /* Otherwise, expose the ID used by the firmware table code. */ return comp->comp_id; } @@ -446,6 +709,12 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, case RDT_RESOURCE_L3: configured_by = mpam_feat_cpor_part; break; + case RDT_RESOURCE_MBA: + if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { + configured_by = mpam_feat_mbw_max; + break; + } + fallthrough; default: return resctrl_get_default_ctrl(r); } @@ -457,6 +726,8 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, switch (configured_by) { case mpam_feat_cpor_part: return cfg->cpbm; + case mpam_feat_mbw_max: + return mbw_max_to_percent(cfg->mbw_max, cprops); default: return resctrl_get_default_ctrl(r); } @@ -504,6 +775,13 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, cfg.cpbm = cfg_val; mpam_set_feature(mpam_feat_cpor_part, &cfg); break; + case RDT_RESOURCE_MBA: + if (mpam_has_feature(mpam_feat_mbw_max, cprops)) { + cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops); + mpam_set_feature(mpam_feat_mbw_max, &cfg); + break; + } + fallthrough; default: return -EINVAL; } @@ -775,6 +1053,7 @@ int mpam_resctrl_setup(void) /* Find some classes to use for controls */ mpam_resctrl_pick_caches(); + mpam_resctrl_pick_mba(); /* Initialise the resctrl structures from the classes */ for_each_mpam_resctrl_control(res, rid) { From 64300710bf5e339499ae72ca96b90ecce42221c0 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 13 Mar 2026 14:46:03 +0000 Subject: [PATCH 60/74] arm_mpam: resctrl: Add kunit test for control format conversions resctrl specifies the format of the control schemes, and these don't match the hardware. Some of the conversions are a bit hairy - add some kunit tests. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Signed-off-by: Dave Martin [morse: squashed enough of Dave's fixes in here that it's his patch now!] Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 5dc8f73eaa5dfccb229b9a25c797720e6379f8e0) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 4 + drivers/resctrl/test_mpam_resctrl.c | 315 ++++++++++++++++++++++++++++ 2 files changed, 319 insertions(+) create mode 100644 drivers/resctrl/test_mpam_resctrl.c diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 60d111f7abfd5..f8d4666fbaa85 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -1092,3 +1092,7 @@ static int __init __cacheinfo_ready(void) return 0; } device_initcall_sync(__cacheinfo_ready); + +#ifdef CONFIG_MPAM_KUNIT_TEST +#include "test_mpam_resctrl.c" +#endif diff --git a/drivers/resctrl/test_mpam_resctrl.c b/drivers/resctrl/test_mpam_resctrl.c new file mode 100644 index 0000000000000..b93d6ad87e43f --- /dev/null +++ b/drivers/resctrl/test_mpam_resctrl.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2025 Arm Ltd. +/* This file is intended to be included into mpam_resctrl.c */ + +#include +#include +#include +#include +#include + +struct percent_value_case { + u8 pc; + u8 width; + u16 value; +}; + +/* + * Mysterious inscriptions taken from the union of ARM DDI 0598D.b, + * "Arm Architecture Reference Manual Supplement - Memory System + * Resource Partitioning and Monitoring (MPAM), for A-profile + * architecture", Section 9.8, "About the fixed-point fractional + * format" (exact percentage entries only) and ARM IHI0099B.a + * "MPAM system component specification", Section 9.3, + * "The fixed-point fractional format": + */ +static const struct percent_value_case percent_value_cases[] = { + /* Architectural cases: */ + { 1, 8, 1 }, { 1, 12, 0x27 }, { 1, 16, 0x28e }, + { 25, 8, 0x3f }, { 25, 12, 0x3ff }, { 25, 16, 0x3fff }, + { 33, 8, 0x53 }, { 33, 12, 0x546 }, { 33, 16, 0x5479 }, + { 35, 8, 0x58 }, { 35, 12, 0x598 }, { 35, 16, 0x5998 }, + { 45, 8, 0x72 }, { 45, 12, 0x732 }, { 45, 16, 0x7332 }, + { 50, 8, 0x7f }, { 50, 12, 0x7ff }, { 50, 16, 0x7fff }, + { 52, 8, 0x84 }, { 52, 12, 0x850 }, { 52, 16, 0x851d }, + { 55, 8, 0x8b }, { 55, 12, 0x8cb }, { 55, 16, 0x8ccb }, + { 58, 8, 0x93 }, { 58, 12, 0x946 }, { 58, 16, 0x9479 }, + { 75, 8, 0xbf }, { 75, 12, 0xbff }, { 75, 16, 0xbfff }, + { 80, 8, 0xcb }, { 80, 12, 0xccb }, { 80, 16, 0xcccb }, + { 88, 8, 0xe0 }, { 88, 12, 0xe13 }, { 88, 16, 0xe146 }, + { 95, 8, 0xf2 }, { 95, 12, 0xf32 }, { 95, 16, 0xf332 }, + { 100, 8, 0xff }, { 100, 12, 0xfff }, { 100, 16, 0xffff }, +}; + +static void test_percent_value_desc(const struct percent_value_case *param, + char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, + "pc=%d, width=%d, value=0x%.*x\n", + param->pc, param->width, + DIV_ROUND_UP(param->width, 4), param->value); +} + +KUNIT_ARRAY_PARAM(test_percent_value, percent_value_cases, + test_percent_value_desc); + +struct percent_value_test_info { + u32 pc; /* result of value-to-percent conversion */ + u32 value; /* result of percent-to-value conversion */ + u32 max_value; /* maximum raw value allowed by test params */ + unsigned int shift; /* promotes raw testcase value to 16 bits */ +}; + +/* + * Convert a reference percentage to a fixed-point MAX value and + * vice-versa, based on param (not test->param_value!) + */ +static void __prepare_percent_value_test(struct kunit *test, + struct percent_value_test_info *res, + const struct percent_value_case *param) +{ + struct mpam_props fake_props = { }; + + /* Reject bogus test parameters that would break the tests: */ + KUNIT_ASSERT_GE(test, param->width, 1); + KUNIT_ASSERT_LE(test, param->width, 16); + KUNIT_ASSERT_LT(test, param->value, 1 << param->width); + + mpam_set_feature(mpam_feat_mbw_max, &fake_props); + fake_props.bwa_wd = param->width; + + res->shift = 16 - param->width; + res->max_value = GENMASK_U32(param->width - 1, 0); + res->value = percent_to_mbw_max(param->pc, &fake_props); + res->pc = mbw_max_to_percent(param->value << res->shift, &fake_props); +} + +static void test_get_mba_granularity(struct kunit *test) +{ + int ret; + struct mpam_props fake_props = { }; + + /* Use MBW_MAX */ + mpam_set_feature(mpam_feat_mbw_max, &fake_props); + + fake_props.bwa_wd = 0; + KUNIT_EXPECT_FALSE(test, mba_class_use_mbw_max(&fake_props)); + + fake_props.bwa_wd = 1; + KUNIT_EXPECT_TRUE(test, mba_class_use_mbw_max(&fake_props)); + + /* Architectural maximum: */ + fake_props.bwa_wd = 16; + KUNIT_EXPECT_TRUE(test, mba_class_use_mbw_max(&fake_props)); + + /* No usable control... */ + fake_props.bwa_wd = 0; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 0); + + fake_props.bwa_wd = 1; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 50); /* DIV_ROUND_UP(100, 1 << 1)% = 50% */ + + fake_props.bwa_wd = 2; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 25); /* DIV_ROUND_UP(100, 1 << 2)% = 25% */ + + fake_props.bwa_wd = 3; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 13); /* DIV_ROUND_UP(100, 1 << 3)% = 13% */ + + fake_props.bwa_wd = 6; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 2); /* DIV_ROUND_UP(100, 1 << 6)% = 2% */ + + fake_props.bwa_wd = 7; + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 1); /* DIV_ROUND_UP(100, 1 << 7)% = 1% */ + + /* Granularity saturates at 1% */ + fake_props.bwa_wd = 16; /* architectural maximum */ + ret = get_mba_granularity(&fake_props); + KUNIT_EXPECT_EQ(test, ret, 1); /* DIV_ROUND_UP(100, 1 << 16)% = 1% */ +} + +static void test_mbw_max_to_percent(struct kunit *test) +{ + const struct percent_value_case *param = test->param_value; + struct percent_value_test_info res; + + /* + * Since the reference values in percent_value_cases[] all + * correspond to exact percentages, round-to-nearest will + * always give the exact percentage back when the MPAM max + * value has precision of 0.5% or finer. (Always true for the + * reference data, since they all specify 8 bits or more of + * precision. + * + * So, keep it simple and demand an exact match: + */ + __prepare_percent_value_test(test, &res, param); + KUNIT_EXPECT_EQ(test, res.pc, param->pc); +} + +static void test_percent_to_mbw_max(struct kunit *test) +{ + const struct percent_value_case *param = test->param_value; + struct percent_value_test_info res; + + __prepare_percent_value_test(test, &res, param); + + KUNIT_EXPECT_GE(test, res.value, param->value << res.shift); + KUNIT_EXPECT_LE(test, res.value, (param->value + 1) << res.shift); + KUNIT_EXPECT_LE(test, res.value, res.max_value << res.shift); + + /* No flexibility allowed for 0% and 100%! */ + + if (param->pc == 0) + KUNIT_EXPECT_EQ(test, res.value, 0); + + if (param->pc == 100) + KUNIT_EXPECT_EQ(test, res.value, res.max_value << res.shift); +} + +static const void *test_all_bwa_wd_gen_params(struct kunit *test, const void *prev, + char *desc) +{ + uintptr_t param = (uintptr_t)prev; + + if (param > 15) + return NULL; + + param++; + + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "wd=%u\n", (unsigned int)param); + + return (void *)param; +} + +static unsigned int test_get_bwa_wd(struct kunit *test) +{ + uintptr_t param = (uintptr_t)test->param_value; + + KUNIT_ASSERT_GE(test, param, 1); + KUNIT_ASSERT_LE(test, param, 16); + + return param; +} + +static void test_mbw_max_to_percent_limits(struct kunit *test) +{ + struct mpam_props fake_props = {0}; + u32 max_value; + + mpam_set_feature(mpam_feat_mbw_max, &fake_props); + fake_props.bwa_wd = test_get_bwa_wd(test); + max_value = GENMASK(15, 16 - fake_props.bwa_wd); + + KUNIT_EXPECT_EQ(test, mbw_max_to_percent(max_value, &fake_props), + MAX_MBA_BW); + KUNIT_EXPECT_EQ(test, mbw_max_to_percent(0, &fake_props), + get_mba_min(&fake_props)); + + /* + * Rounding policy dependent 0% sanity-check: + * With round-to-nearest, the minimum mbw_max value really + * should map to 0% if there are at least 200 steps. + * (100 steps may be enough for some other rounding policies.) + */ + if (fake_props.bwa_wd >= 8) + KUNIT_EXPECT_EQ(test, mbw_max_to_percent(0, &fake_props), 0); + + if (fake_props.bwa_wd < 8 && + mbw_max_to_percent(0, &fake_props) == 0) + kunit_warn(test, "wd=%d: Testsuite/driver Rounding policy mismatch?", + fake_props.bwa_wd); +} + +/* + * Check that converting a percentage to mbw_max and back again (or, as + * appropriate, vice-versa) always restores the original value: + */ +static void test_percent_max_roundtrip_stability(struct kunit *test) +{ + struct mpam_props fake_props = {0}; + unsigned int shift; + u32 pc, max, pc2, max2; + + mpam_set_feature(mpam_feat_mbw_max, &fake_props); + fake_props.bwa_wd = test_get_bwa_wd(test); + shift = 16 - fake_props.bwa_wd; + + /* + * Converting a valid value from the coarser scale to the finer + * scale and back again must yield the original value: + */ + if (fake_props.bwa_wd >= 7) { + /* More than 100 steps: only test exact pc values: */ + for (pc = get_mba_min(&fake_props); pc <= MAX_MBA_BW; pc++) { + max = percent_to_mbw_max(pc, &fake_props); + pc2 = mbw_max_to_percent(max, &fake_props); + KUNIT_EXPECT_EQ(test, pc2, pc); + } + } else { + /* Fewer than 100 steps: only test exact mbw_max values: */ + for (max = 0; max < 1 << 16; max += 1 << shift) { + pc = mbw_max_to_percent(max, &fake_props); + max2 = percent_to_mbw_max(pc, &fake_props); + KUNIT_EXPECT_EQ(test, max2, max); + } + } +} + +static void test_percent_to_max_rounding(struct kunit *test) +{ + const struct percent_value_case *param = test->param_value; + unsigned int num_rounded_up = 0, total = 0; + struct percent_value_test_info res; + + for (param = percent_value_cases, total = 0; + param < &percent_value_cases[ARRAY_SIZE(percent_value_cases)]; + param++, total++) { + __prepare_percent_value_test(test, &res, param); + if (res.value > param->value << res.shift) + num_rounded_up++; + } + + /* + * The MPAM driver applies a round-to-nearest policy, whereas a + * round-down policy seems to have been applied in the + * reference table from which the test vectors were selected. + * + * For a large and well-distributed suite of test vectors, + * about half should be rounded up and half down compared with + * the reference table. The actual test vectors are few in + * number and probably not very well distributed however, so + * tolerate a round-up rate of between 1/4 and 3/4 before + * crying foul: + */ + + kunit_info(test, "Round-up rate: %u%% (%u/%u)\n", + DIV_ROUND_CLOSEST(num_rounded_up * 100, total), + num_rounded_up, total); + + KUNIT_EXPECT_GE(test, 4 * num_rounded_up, 1 * total); + KUNIT_EXPECT_LE(test, 4 * num_rounded_up, 3 * total); +} + +static struct kunit_case mpam_resctrl_test_cases[] = { + KUNIT_CASE(test_get_mba_granularity), + KUNIT_CASE_PARAM(test_mbw_max_to_percent, test_percent_value_gen_params), + KUNIT_CASE_PARAM(test_percent_to_mbw_max, test_percent_value_gen_params), + KUNIT_CASE_PARAM(test_mbw_max_to_percent_limits, test_all_bwa_wd_gen_params), + KUNIT_CASE(test_percent_to_max_rounding), + KUNIT_CASE_PARAM(test_percent_max_roundtrip_stability, + test_all_bwa_wd_gen_params), + {} +}; + +static struct kunit_suite mpam_resctrl_test_suite = { + .name = "mpam_resctrl_test_suite", + .test_cases = mpam_resctrl_test_cases, +}; + +kunit_test_suites(&mpam_resctrl_test_suite); From 629ae140e49da4b67222a077a31e7c4414917d5c Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:46:04 +0000 Subject: [PATCH 61/74] arm_mpam: resctrl: Add monitor initialisation and domain boilerplate Add the boilerplate that tells resctrl about the mpam monitors that are available. resctrl expects all (non-telemetry) monitors to be on the L3 and so advertise them there and invent an L3 resctrl resource if required. The L3 cache itself has to exist as the cache ids are used as the domain ids. Bring the resctrl monitor domains online and offline based on the cpus they contain. Support for specific monitor types is left to later. Tested-by: Punit Agrawal Reviewed-by: Zeng Heng Reviewed-by: Jonathan Cameron Signed-off-by: Ben Horgan Reviewed-by: Gavin Shan Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Jesse Chick Signed-off-by: James Morse (cherry picked from commit 264c285999fce128fc52743bce582468b26e9f65) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_internal.h | 15 +++ drivers/resctrl/mpam_resctrl.c | 231 ++++++++++++++++++++++++++++++-- 2 files changed, 235 insertions(+), 11 deletions(-) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 46ddb5e912f6c..da11690b2ebcd 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -332,7 +332,16 @@ struct mpam_msc_ris { struct mpam_resctrl_dom { struct mpam_component *ctrl_comp; + + /* + * There is no single mon_comp because different events may be backed + * by different class/components. mon_comp is indexed by the event + * number. + */ + struct mpam_component *mon_comp[QOS_NUM_EVENTS]; + struct rdt_ctrl_domain resctrl_ctrl_dom; + struct rdt_l3_mon_domain resctrl_mon_dom; }; struct mpam_resctrl_res { @@ -341,6 +350,12 @@ struct mpam_resctrl_res { bool cdp_enabled; }; +struct mpam_resctrl_mon { + struct mpam_class *class; + + /* per-class data that resctrl needs will live here */ +}; + static inline int mpam_alloc_csu_mon(struct mpam_class *class) { struct mpam_props *cprops = &class->props; diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index f8d4666fbaa85..e03d0f400993c 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -34,6 +34,23 @@ static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES]; rid < RDT_NUM_RESOURCES; \ rid++, res = &mpam_resctrl_controls[rid]) +/* + * The classes we've picked to map to resctrl events. + * Resctrl believes all the worlds a Xeon, and these are all on the L3. This + * array lets us find the actual class backing the event counters. e.g. + * the only memory bandwidth counters may be on the memory controller, but to + * make use of them, we pretend they are on L3. Restrict the events considered + * to those supported by MPAM. + * Class pointer may be NULL. + */ +#define MPAM_MAX_EVENT QOS_L3_MBM_TOTAL_EVENT_ID +static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1]; + +#define for_each_mpam_resctrl_mon(mon, eventid) \ + for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid]; \ + eventid <= MPAM_MAX_EVENT; \ + eventid++, mon = &mpam_resctrl_counters[eventid]) + /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */ static DEFINE_MUTEX(domain_list_lock); @@ -63,6 +80,15 @@ bool resctrl_arch_alloc_capable(void) return false; } +bool resctrl_arch_mon_capable(void) +{ + struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + struct rdt_resource *l3 = &res->resctrl_res; + + /* All monitors are presented as being on the L3 cache */ + return l3->mon_capable; +} + bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) { return mpam_resctrl_controls[rid].cdp_enabled; @@ -89,6 +115,8 @@ static void resctrl_reset_task_closids(void) int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable) { u32 partid_i = RESCTRL_RESERVED_CLOSID, partid_d = RESCTRL_RESERVED_CLOSID; + struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + struct rdt_resource *l3 = &res->resctrl_res; int cpu; if (!IS_ENABLED(CONFIG_EXPERT) && enable) { @@ -110,6 +138,11 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level rid, bool enable) cdp_enabled = enable; mpam_resctrl_controls[rid].cdp_enabled = enable; + if (enable) + l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx() / 2; + else + l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx(); + /* The mbw_max feature can't hide cdp as it's a per-partid maximum. */ if (cdp_enabled && !mpam_resctrl_controls[RDT_RESOURCE_MBA].cdp_enabled) mpam_resctrl_controls[RDT_RESOURCE_MBA].resctrl_res.alloc_capable = false; @@ -674,6 +707,56 @@ static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp) return comp->comp_id; } +static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon, + enum resctrl_event_id type) +{ + struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + struct rdt_resource *l3 = &res->resctrl_res; + + lockdep_assert_cpus_held(); + + /* + * There also needs to be an L3 cache present. + * The check just requires any online CPU and it can't go offline as we + * hold the cpu lock. + */ + if (get_cpu_cacheinfo_id(raw_smp_processor_id(), 3) == -1) + return 0; + + /* + * If there are no MPAM resources on L3, force it into existence. + * topology_matches_l3() already ensures this looks like the L3. + * The domain-ids will be fixed up by mpam_resctrl_domain_hdr_init(). + */ + if (!res->class) { + pr_warn_once("Faking L3 MSC to enable counters.\n"); + res->class = mpam_resctrl_counters[type].class; + } + + /* + * Called multiple times!, once per event type that has a + * monitoring class. + * Setting name is necessary on monitor only platforms. + */ + l3->name = "L3"; + l3->mon_scope = RESCTRL_L3_CACHE; + + /* + * num-rmid is the upper bound for the number of monitoring groups that + * can exist simultaneously, including the default monitoring group for + * each control group. Hence, advertise the whole rmid_idx space even + * though each control group has its own pmg/rmid space. Unfortunately, + * this does mean userspace needs to know the architecture to correctly + * interpret this value. + */ + l3->mon.num_rmid = resctrl_arch_system_num_rmid_idx(); + + if (resctrl_enable_mon_event(type, false, 0, NULL)) + l3->mon_capable = true; + + return 0; +} + u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type) { @@ -901,11 +984,26 @@ static void mpam_resctrl_domain_insert(struct list_head *list, list_add_tail_rcu(&new->list, pos); } +static struct mpam_component *find_component(struct mpam_class *class, int cpu) +{ + struct mpam_component *comp; + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(comp, &class->components, class_list, + srcu_read_lock_held(&mpam_srcu)) { + if (cpumask_test_cpu(cpu, &comp->affinity)) + return comp; + } + + return NULL; +} + static struct mpam_resctrl_dom * mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) { int err; struct mpam_resctrl_dom *dom; + struct rdt_l3_mon_domain *mon_d; struct rdt_ctrl_domain *ctrl_d; struct mpam_class *class = res->class; struct mpam_component *comp_iter, *ctrl_comp; @@ -945,8 +1043,56 @@ mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) } else { pr_debug("Skipped control domain online - no controls\n"); } + + if (r->mon_capable) { + struct mpam_component *any_mon_comp; + struct mpam_resctrl_mon *mon; + enum resctrl_event_id eventid; + + /* + * Even if the monitor domain is backed by a different + * component, the L3 component IDs need to be used... only + * there may be no ctrl_comp for the L3. + * Search each event's class list for a component with + * overlapping CPUs and set up the dom->mon_comp array. + */ + + for_each_mpam_resctrl_mon(mon, eventid) { + struct mpam_component *mon_comp; + + if (!mon->class) + continue; // dummy resource + + mon_comp = find_component(mon->class, cpu); + dom->mon_comp[eventid] = mon_comp; + if (mon_comp) + any_mon_comp = mon_comp; + } + if (!any_mon_comp) { + WARN_ON_ONCE(0); + err = -EFAULT; + goto offline_ctrl_domain; + } + + mon_d = &dom->resctrl_mon_dom; + mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr); + mon_d->hdr.type = RESCTRL_MON_DOMAIN; + err = resctrl_online_mon_domain(r, &mon_d->hdr); + if (err) + goto offline_ctrl_domain; + + mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr); + } else { + pr_debug("Skipped monitor domain online - no monitors\n"); + } + return dom; +offline_ctrl_domain: + if (r->alloc_capable) { + mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr); + resctrl_offline_ctrl_domain(r, ctrl_d); + } free_domain: kfree(dom); dom = ERR_PTR(err); @@ -954,6 +1100,35 @@ mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res) return dom; } +/* + * We know all the monitors are associated with the L3, even if there are no + * controls and therefore no control component. Find the cache-id for the CPU + * and use that to search for existing resctrl domains. + * This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id + * for anything that is not a cache. + */ +static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu) +{ + int cache_id; + struct mpam_resctrl_dom *dom; + struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3]; + + lockdep_assert_cpus_held(); + + if (!l3->class) + return NULL; + cache_id = get_cpu_cacheinfo_id(cpu, 3); + if (cache_id < 0) + return NULL; + + list_for_each_entry_rcu(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) { + if (dom->resctrl_mon_dom.hdr.id == cache_id) + return dom; + } + + return NULL; +} + static struct mpam_resctrl_dom * mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) { @@ -967,7 +1142,11 @@ mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res) return dom; } - return NULL; + if (r->rid != RDT_RESOURCE_L3) + return NULL; + + /* Search the mon domain list too - needed on monitor only platforms. */ + return mpam_resctrl_get_mon_domain_from_cpu(cpu); } int mpam_resctrl_online_cpu(unsigned int cpu) @@ -994,6 +1173,11 @@ int mpam_resctrl_online_cpu(unsigned int cpu) mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr); } + if (r->mon_capable) { + struct rdt_l3_mon_domain *mon_d = &dom->resctrl_mon_dom; + + mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr); + } } } @@ -1012,8 +1196,9 @@ void mpam_resctrl_offline_cpu(unsigned int cpu) guard(mutex)(&domain_list_lock); for_each_mpam_resctrl_control(res, rid) { struct mpam_resctrl_dom *dom; + struct rdt_l3_mon_domain *mon_d; struct rdt_ctrl_domain *ctrl_d; - bool ctrl_dom_empty; + bool ctrl_dom_empty, mon_dom_empty; struct rdt_resource *r = &res->resctrl_res; if (!res->class) @@ -1032,7 +1217,16 @@ void mpam_resctrl_offline_cpu(unsigned int cpu) ctrl_dom_empty = true; } - if (ctrl_dom_empty) + if (r->mon_capable) { + mon_d = &dom->resctrl_mon_dom; + mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr); + if (mon_dom_empty) + resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr); + } else { + mon_dom_empty = true; + } + + if (ctrl_dom_empty && mon_dom_empty) kfree(dom); } } @@ -1042,12 +1236,15 @@ int mpam_resctrl_setup(void) int err = 0; struct mpam_resctrl_res *res; enum resctrl_res_level rid; + struct mpam_resctrl_mon *mon; + enum resctrl_event_id eventid; wait_event(wait_cacheinfo_ready, cacheinfo_ready); cpus_read_lock(); for_each_mpam_resctrl_control(res, rid) { INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains); + INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains); res->resctrl_res.rid = rid; } @@ -1063,25 +1260,37 @@ int mpam_resctrl_setup(void) err = mpam_resctrl_control_init(res); if (err) { pr_debug("Failed to initialise rid %u\n", rid); - break; + goto internal_error; } } - cpus_read_unlock(); - if (err) { - pr_debug("Internal error %d - resctrl not supported\n", err); - return err; + for_each_mpam_resctrl_mon(mon, eventid) { + if (!mon->class) + continue; // dummy resource + + err = mpam_resctrl_monitor_init(mon, eventid); + if (err) { + pr_debug("Failed to initialise event %u\n", eventid); + goto internal_error; + } } - if (!resctrl_arch_alloc_capable()) { - pr_debug("No alloc(%u) found - resctrl not supported\n", - resctrl_arch_alloc_capable()); + cpus_read_unlock(); + + if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) { + pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n", + resctrl_arch_alloc_capable(), resctrl_arch_mon_capable()); return -EOPNOTSUPP; } /* TODO: call resctrl_init() */ return 0; + +internal_error: + cpus_read_unlock(); + pr_debug("Internal error %d - resctrl not supported\n", err); + return err; } static int __init __cacheinfo_ready(void) From 612a22f92c5e5ff4bf5a4894f1d6ca7d66065d07 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:05 +0000 Subject: [PATCH 62/74] arm_mpam: resctrl: Add support for csu counters resctrl exposes a counter via a file named llc_occupancy. This isn't really a counter as its value goes up and down, this is a snapshot of the cache storage usage monitor. Add some picking code which will only find an L3. The resctrl counter file is called llc_occupancy but we don't check it is the last one as it is already identified as L3. Tested-by: Shaopeng Tan Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Gavin Shan Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Dave Martin Signed-off-by: Dave Martin Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 1458c4f053355f88cc5d190ca02243d2c60fa010) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 83 ++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index e03d0f400993c..07bb20a01b383 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -311,6 +311,28 @@ static bool class_has_usable_mba(struct mpam_props *cprops) return mba_class_use_mbw_max(cprops); } +static bool cache_has_usable_csu(struct mpam_class *class) +{ + struct mpam_props *cprops; + + if (!class) + return false; + + cprops = &class->props; + + if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) + return false; + + /* + * CSU counters settle on the value, so we can get away with + * having only one. + */ + if (!cprops->num_csu_mon) + return false; + + return true; +} + /* * Calculate the worst-case percentage change from each implemented step * in the control. @@ -630,6 +652,64 @@ static void mpam_resctrl_pick_mba(void) } } +static void counter_update_class(enum resctrl_event_id evt_id, + struct mpam_class *class) +{ + struct mpam_class *existing_class = mpam_resctrl_counters[evt_id].class; + + if (existing_class) { + if (class->level == 3) { + pr_debug("Existing class is L3 - L3 wins\n"); + return; + } + + if (existing_class->level < class->level) { + pr_debug("Existing class is closer to L3, %u versus %u - closer is better\n", + existing_class->level, class->level); + return; + } + } + + mpam_resctrl_counters[evt_id].class = class; +} + +static void mpam_resctrl_pick_counters(void) +{ + struct mpam_class *class; + + lockdep_assert_cpus_held(); + + guard(srcu)(&mpam_srcu); + list_for_each_entry_srcu(class, &mpam_classes, classes_list, + srcu_read_lock_held(&mpam_srcu)) { + /* The name of the resource is L3... */ + if (class->type == MPAM_CLASS_CACHE && class->level != 3) { + pr_debug("class %u is a cache but not the L3", class->level); + continue; + } + + if (!cpumask_equal(&class->affinity, cpu_possible_mask)) { + pr_debug("class %u does not cover all CPUs", + class->level); + continue; + } + + if (cache_has_usable_csu(class)) { + pr_debug("class %u has usable CSU", + class->level); + + /* CSU counters only make sense on a cache. */ + switch (class->type) { + case MPAM_CLASS_CACHE: + counter_update_class(QOS_L3_OCCUP_EVENT_ID, class); + break; + default: + break; + } + } + } +} + static int mpam_resctrl_control_init(struct mpam_resctrl_res *res) { struct mpam_class *class = res->class; @@ -1264,6 +1344,9 @@ int mpam_resctrl_setup(void) } } + /* Find some classes to use for monitors */ + mpam_resctrl_pick_counters(); + for_each_mpam_resctrl_mon(mon, eventid) { if (!mon->class) continue; // dummy resource From 7ae96e5b152ec83fd638fa6b6ce55498d09853cd Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:06 +0000 Subject: [PATCH 63/74] arm_mpam: resctrl: Allow resctrl to allocate monitors When resctrl wants to read a domain's 'QOS_L3_OCCUP', it needs to allocate a monitor on the corresponding resource. Monitors are allocated by class instead of component. Add helpers to allocate a CSU monitor. These helper return an out of range value for MBM counters. Allocating a montitor context is expected to block until hardware resources become available. This only makes sense for QOS_L3_OCCUP as unallocated MBM counters are losing data. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 2a3c79c61539779a09928893518c8286d7774b54) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_internal.h | 14 ++++++- drivers/resctrl/mpam_resctrl.c | 67 +++++++++++++++++++++++++++++++++ include/linux/arm_mpam.h | 5 +++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index da11690b2ebcd..1251abbb45ae7 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -29,6 +29,14 @@ struct platform_device; #define PACKED_FOR_KUNIT #endif +/* + * This 'mon' values must not alias an actual monitor, so must be larger than + * U16_MAX, but not be confused with an errno value, so smaller than + * (u32)-SZ_4K. + * USE_PRE_ALLOCATED is used to avoid confusion with an actual monitor. + */ +#define USE_PRE_ALLOCATED (U16_MAX + 1) + static inline bool mpam_is_enabled(void) { return static_branch_likely(&mpam_enabled); @@ -212,7 +220,11 @@ enum mon_filter_options { }; struct mon_cfg { - u16 mon; + /* + * mon must be large enough to hold out of range values like + * USE_PRE_ALLOCATED + */ + u32 mon; u8 pmg; bool match_pmg; bool csu_exclude_clean; diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 07bb20a01b383..9682ffb151846 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -22,6 +22,8 @@ #include "mpam_internal.h" +DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters); + /* * The classes we've picked to map to resctrl resources, wrapped * in with their resctrl structure. @@ -289,6 +291,71 @@ struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l) return &mpam_resctrl_controls[l].resctrl_res; } +static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; + + if (!mon->class) + return -EINVAL; + + switch (evtid) { + case QOS_L3_OCCUP_EVENT_ID: + /* With CDP, one monitor gets used for both code/data reads */ + return mpam_alloc_csu_mon(mon->class); + case QOS_L3_MBM_LOCAL_EVENT_ID: + case QOS_L3_MBM_TOTAL_EVENT_ID: + return USE_PRE_ALLOCATED; + default: + return -EOPNOTSUPP; + } +} + +void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, + enum resctrl_event_id evtid) +{ + DEFINE_WAIT(wait); + int *ret; + + ret = kmalloc_obj(*ret); + if (!ret) + return ERR_PTR(-ENOMEM); + + do { + prepare_to_wait(&resctrl_mon_ctx_waiters, &wait, + TASK_INTERRUPTIBLE); + *ret = resctrl_arch_mon_ctx_alloc_no_wait(evtid); + if (*ret == -ENOSPC) + schedule(); + } while (*ret == -ENOSPC && !signal_pending(current)); + finish_wait(&resctrl_mon_ctx_waiters, &wait); + + return ret; +} + +static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid, + u32 mon_idx) +{ + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; + + if (!mon->class) + return; + + if (evtid == QOS_L3_OCCUP_EVENT_ID) + mpam_free_csu_mon(mon->class, mon_idx); + + wake_up(&resctrl_mon_ctx_waiters); +} + +void resctrl_arch_mon_ctx_free(struct rdt_resource *r, + enum resctrl_event_id evtid, void *arch_mon_ctx) +{ + u32 mon_idx = *(u32 *)arch_mon_ctx; + + kfree(arch_mon_ctx); + + resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx); +} + static bool cache_has_usable_cpor(struct mpam_class *class) { struct mpam_props *cprops = &class->props; diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 7d23c90f077dc..e1461e32af756 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -5,6 +5,7 @@ #define __LINUX_ARM_MPAM_H #include +#include #include struct mpam_msc; @@ -62,6 +63,10 @@ u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid); void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid); u32 resctrl_arch_system_num_rmid_idx(void); +struct rdt_resource; +void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid); +void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx); + /** * mpam_register_requestor() - Register a requestor with the MPAM driver * @partid_max: The maximum PARTID value the requestor can generate. From ba60ed486d3f261f600d5d3ae2e2bf97dedb017e Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:07 +0000 Subject: [PATCH 64/74] arm_mpam: resctrl: Add resctrl_arch_rmid_read() resctrl uses resctrl_arch_rmid_read() to read counters. CDP emulation means the counter may need reading in three different ways. The helpers behind the resctrl_arch_ functions will be re-used for the ABMC equivalent functions. Add the rounding helper for checking monitor values while we're here. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Jesse Chick Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit fb56b29932ca276df268806ad52ed80f40f99a6e) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 82 ++++++++++++++++++++++++++++++++++ include/linux/arm_mpam.h | 5 +++ 2 files changed, 87 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 9682ffb151846..9a15ddd340f73 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -356,6 +356,88 @@ void resctrl_arch_mon_ctx_free(struct rdt_resource *r, resctrl_arch_mon_ctx_free_no_wait(evtid, mon_idx); } +static int __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + enum mpam_device_features mon_type, + int mon_idx, + enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val) +{ + struct mon_cfg cfg; + + if (!mpam_is_enabled()) + return -EINVAL; + + /* Shift closid to account for CDP */ + closid = resctrl_get_config_index(closid, cdp_type); + + if (irqs_disabled()) { + /* Check if we can access this domain without an IPI */ + return -EIO; + } + + cfg = (struct mon_cfg) { + .mon = mon_idx, + .match_pmg = true, + .partid = closid, + .pmg = rmid, + }; + + return mpam_msmon_read(mon_comp, &cfg, mon_type, val); +} + +static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp, + enum mpam_device_features mon_type, + int mon_idx, u32 closid, u32 rmid, u64 *val) +{ + if (cdp_enabled) { + u64 code_val = 0, data_val = 0; + int err; + + err = __read_mon(mon, mon_comp, mon_type, mon_idx, + CDP_CODE, closid, rmid, &code_val); + if (err) + return err; + + err = __read_mon(mon, mon_comp, mon_type, mon_idx, + CDP_DATA, closid, rmid, &data_val); + if (err) + return err; + + *val += code_val + data_val; + return 0; + } + + return __read_mon(mon, mon_comp, mon_type, mon_idx, + CDP_NONE, closid, rmid, val); +} + +/* MBWU when not in ABMC mode (not supported), and CSU counters. */ +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, + u32 closid, u32 rmid, enum resctrl_event_id eventid, + void *arch_priv, u64 *val, void *arch_mon_ctx) +{ + struct mpam_resctrl_dom *l3_dom; + struct mpam_component *mon_comp; + u32 mon_idx = *(u32 *)arch_mon_ctx; + enum mpam_device_features mon_type; + struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid]; + + resctrl_arch_rmid_read_context_check(); + + if (eventid >= QOS_NUM_EVENTS || !mon->class) + return -EINVAL; + + l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr); + mon_comp = l3_dom->mon_comp[eventid]; + + if (eventid != QOS_L3_OCCUP_EVENT_ID) + return -EINVAL; + + mon_type = mpam_feat_msmon_csu; + + return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx, + closid, rmid, val); +} + static bool cache_has_usable_cpor(struct mpam_class *class) { struct mpam_props *cprops = &class->props; diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index e1461e32af756..86d5e326d2bd3 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -67,6 +67,11 @@ struct rdt_resource; void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid); void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx); +static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) +{ + return val; +} + /** * mpam_register_requestor() - Register a requestor with the MPAM driver * @partid_max: The maximum PARTID value the requestor can generate. From 8bbfe7a37ac2700644307f872bdb05d74a274cdb Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:08 +0000 Subject: [PATCH 65/74] arm_mpam: resctrl: Update the rmid reallocation limit resctrl's limbo code needs to be told when the data left in a cache is small enough for the partid+pmg value to be re-allocated. x86 uses the cache size divided by the number of rmid users the cache may have. Do the same, but for the smallest cache, and with the number of partid-and-pmg users. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 49b04e401825431529e866470d8d2dcd8e9ef058) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 39 ++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 9a15ddd340f73..f82fff3519df4 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -438,6 +438,42 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, closid, rmid, val); } +/* + * The rmid realloc threshold should be for the smallest cache exposed to + * resctrl. + */ +static int update_rmid_limits(struct mpam_class *class) +{ + u32 num_unique_pmg = resctrl_arch_system_num_rmid_idx(); + struct mpam_props *cprops = &class->props; + struct cacheinfo *ci; + + lockdep_assert_cpus_held(); + + if (!mpam_has_feature(mpam_feat_msmon_csu, cprops)) + return 0; + + /* + * Assume cache levels are the same size for all CPUs... + * The check just requires any online CPU and it can't go offline as we + * hold the cpu lock. + */ + ci = get_cpu_cacheinfo_level(raw_smp_processor_id(), class->level); + if (!ci || ci->size == 0) { + pr_debug("Could not read cache size for class %u\n", + class->level); + return -EINVAL; + } + + if (!resctrl_rmid_realloc_limit || + ci->size < resctrl_rmid_realloc_limit) { + resctrl_rmid_realloc_limit = ci->size; + resctrl_rmid_realloc_threshold = ci->size / num_unique_pmg; + } + + return 0; +} + static bool cache_has_usable_cpor(struct mpam_class *class) { struct mpam_props *cprops = &class->props; @@ -850,6 +886,9 @@ static void mpam_resctrl_pick_counters(void) /* CSU counters only make sense on a cache. */ switch (class->type) { case MPAM_CLASS_CACHE: + if (update_rmid_limits(class)) + break; + counter_update_class(QOS_L3_OCCUP_EVENT_ID, class); break; default: From 45a4598bb6fba89c7e152fbd4df34e09b0f2218f Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:09 +0000 Subject: [PATCH 66/74] arm_mpam: resctrl: Add empty definitions for assorted resctrl functions A few resctrl features and hooks need to be provided, but aren't needed or supported on MPAM platforms. resctrl has individual hooks to separately enable and disable the closid/partid and rmid/pmg context switching code. For MPAM this is all the same thing, as the value in struct task_struct is used to cache the value that should be written to hardware. arm64's context switching code is enabled once MPAM is usable, but doesn't touch the hardware unless the value has changed. For now event configuration is not supported, and can be turned off by returning 'false' from resctrl_arch_is_evt_configurable(). The new io_alloc feature is not supported either, always return false from the enable helper to indicate and fail the enable. Add this, and empty definitions for the other hooks. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit efc775eadce2c6e0921c21d9c29a7b6686022281) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_resctrl.c | 65 ++++++++++++++++++++++++++++++++++ include/linux/arm_mpam.h | 9 +++++ 2 files changed, 74 insertions(+) diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index f82fff3519df4..777ecdc2d0f85 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -91,6 +91,71 @@ bool resctrl_arch_mon_capable(void) return l3->mon_capable; } +bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt) +{ + return false; +} + +void resctrl_arch_mon_event_config_read(void *info) +{ +} + +void resctrl_arch_mon_event_config_write(void *info) +{ +} + +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d) +{ +} + +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d, + u32 closid, u32 rmid, enum resctrl_event_id eventid) +{ +} + +void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, + u32 closid, u32 rmid, int cntr_id, + enum resctrl_event_id eventid) +{ +} + +void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d, + enum resctrl_event_id evtid, u32 rmid, u32 closid, + u32 cntr_id, bool assign) +{ +} + +int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d, + u32 unused, u32 rmid, int cntr_id, + enum resctrl_event_id eventid, u64 *val) +{ + return -EOPNOTSUPP; +} + +bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r) +{ + return false; +} + +int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable) +{ + return -EINVAL; +} + +int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable) +{ + return -EOPNOTSUPP; +} + +bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r) +{ + return false; +} + +void resctrl_arch_pre_mount(void) +{ +} + bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid) { return mpam_resctrl_controls[rid].cdp_enabled; diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h index 86d5e326d2bd3..f92a36187a527 100644 --- a/include/linux/arm_mpam.h +++ b/include/linux/arm_mpam.h @@ -67,6 +67,15 @@ struct rdt_resource; void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid); void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx); +/* + * The CPU configuration for MPAM is cheap to write, and is only written if it + * has changed. No need for fine grained enables. + */ +static inline void resctrl_arch_enable_mon(void) { } +static inline void resctrl_arch_disable_mon(void) { } +static inline void resctrl_arch_enable_alloc(void) { } +static inline void resctrl_arch_disable_alloc(void) { } + static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) { return val; From 61b57cea8bdb15a4312d8e1b2acd9affa48c4d16 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:10 +0000 Subject: [PATCH 67/74] arm64: mpam: Select ARCH_HAS_CPU_RESCTRL Enough MPAM support is present to enable ARCH_HAS_CPU_RESCTRL. Let it rip^Wlink! ARCH_HAS_CPU_RESCTRL indicates resctrl can be enabled. It is enabled by the arch code simply because it has 'arch' in its name. This removes ARM_CPU_RESCTRL as a mimic of X86_CPU_RESCTRL. While here, move the ACPI dependency to the driver's Kconfig file. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Acked-by: Catalin Marinas Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 4aab135bda1661a795e4fe96418bf840833e1119) Signed-off-by: Lee Trager --- arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/resctrl.h | 2 ++ drivers/resctrl/Kconfig | 7 +++++++ drivers/resctrl/Makefile | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/resctrl.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3a2e397fb3e10..a87305c21ad49 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2068,7 +2068,7 @@ config ARM64_TLB_RANGE config ARM64_MPAM bool "Enable support for MPAM" select ARM64_MPAM_DRIVER - select ACPI_MPAM if ACPI + select ARCH_HAS_CPU_RESCTRL help Memory System Resource Partitioning and Monitoring (MPAM) is an optional extension to the Arm architecture that allows each diff --git a/arch/arm64/include/asm/resctrl.h b/arch/arm64/include/asm/resctrl.h new file mode 100644 index 0000000000000..b506e95cf6e37 --- /dev/null +++ b/arch/arm64/include/asm/resctrl.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include diff --git a/drivers/resctrl/Kconfig b/drivers/resctrl/Kconfig index c34e059c6e41f..672abea3b03cc 100644 --- a/drivers/resctrl/Kconfig +++ b/drivers/resctrl/Kconfig @@ -1,6 +1,7 @@ menuconfig ARM64_MPAM_DRIVER bool "MPAM driver" depends on ARM64 && ARM64_MPAM + select ACPI_MPAM if ACPI help Memory System Resource Partitioning and Monitoring (MPAM) driver for System IP, e.g. caches and memory controllers. @@ -22,3 +23,9 @@ config MPAM_KUNIT_TEST If unsure, say N. endif + +config ARM64_MPAM_RESCTRL_FS + bool + default y if ARM64_MPAM_DRIVER && RESCTRL_FS + select RESCTRL_RMID_DEPENDS_ON_CLOSID + select RESCTRL_ASSIGN_FIXED diff --git a/drivers/resctrl/Makefile b/drivers/resctrl/Makefile index 40beaf999582c..4f6d0e81f9b8f 100644 --- a/drivers/resctrl/Makefile +++ b/drivers/resctrl/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_ARM64_MPAM_DRIVER) += mpam.o mpam-y += mpam_devices.o -mpam-$(CONFIG_ARM_CPU_RESCTRL) += mpam_resctrl.o +mpam-$(CONFIG_ARM64_MPAM_RESCTRL_FS) += mpam_resctrl.o ccflags-$(CONFIG_ARM64_MPAM_DRIVER_DEBUG) += -DDEBUG From e0d4f9bae383b50f4472e7cc5ad6109cfd417cc8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:11 +0000 Subject: [PATCH 68/74] arm_mpam: resctrl: Call resctrl_init() on platforms that can support resctrl Now that MPAM links against resctrl, call resctrl_init() to register the filesystem and setup resctrl's structures. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Peter Newman Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit fb481ec08699e9daf08ab839a79ab37b1bcca94d) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 32 ++++++++++++++--- drivers/resctrl/mpam_internal.h | 4 +++ drivers/resctrl/mpam_resctrl.c | 63 ++++++++++++++++++++++++++++++++- 3 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index c9687880bfb64..46760bb50f99d 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -73,6 +73,14 @@ static DECLARE_WORK(mpam_broken_work, &mpam_disable); /* When mpam is disabled, the printed reason to aid debugging */ static char *mpam_disable_reason; +/* + * Whether resctrl has been setup. Used by cpuhp in preference to + * mpam_is_enabled(). The disable call after an error interrupt makes + * mpam_is_enabled() false before the cpuhp callbacks are made. + * Reads/writes should hold mpam_cpuhp_state_lock, (or be cpuhp callbacks). + */ +static bool mpam_resctrl_enabled; + /* * An MSC is a physical container for controls and monitors, each identified by * their RIS index. These share a base-address, interrupts and some MMIO @@ -1618,7 +1626,7 @@ static int mpam_cpu_online(unsigned int cpu) mpam_reprogram_msc(msc); } - if (mpam_is_enabled()) + if (mpam_resctrl_enabled) return mpam_resctrl_online_cpu(cpu); return 0; @@ -1664,7 +1672,7 @@ static int mpam_cpu_offline(unsigned int cpu) { struct mpam_msc *msc; - if (mpam_is_enabled()) + if (mpam_resctrl_enabled) mpam_resctrl_offline_cpu(cpu); guard(srcu)(&mpam_srcu); @@ -2526,6 +2534,7 @@ static void mpam_enable_once(void) } static_branch_enable(&mpam_enabled); + mpam_resctrl_enabled = true; mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline, "mpam:online"); @@ -2585,24 +2594,39 @@ static void mpam_reset_class(struct mpam_class *class) void mpam_disable(struct work_struct *ignored) { int idx; + bool do_resctrl_exit; struct mpam_class *class; struct mpam_msc *msc, *tmp; + if (mpam_is_enabled()) + static_branch_disable(&mpam_enabled); + mutex_lock(&mpam_cpuhp_state_lock); if (mpam_cpuhp_state) { cpuhp_remove_state(mpam_cpuhp_state); mpam_cpuhp_state = 0; } + + /* + * Removing the cpuhp state called mpam_cpu_offline() and told resctrl + * all the CPUs are offline. + */ + do_resctrl_exit = mpam_resctrl_enabled; + mpam_resctrl_enabled = false; mutex_unlock(&mpam_cpuhp_state_lock); - static_branch_disable(&mpam_enabled); + if (do_resctrl_exit) + mpam_resctrl_exit(); mpam_unregister_irqs(); idx = srcu_read_lock(&mpam_srcu); list_for_each_entry_srcu(class, &mpam_classes, classes_list, - srcu_read_lock_held(&mpam_srcu)) + srcu_read_lock_held(&mpam_srcu)) { mpam_reset_class(class); + if (do_resctrl_exit) + mpam_resctrl_teardown_class(class); + } srcu_read_unlock(&mpam_srcu, idx); mutex_lock(&mpam_list_lock); diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 1251abbb45ae7..4cf50fb4fcb86 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -427,12 +427,16 @@ int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, #ifdef CONFIG_RESCTRL_FS int mpam_resctrl_setup(void); +void mpam_resctrl_exit(void); int mpam_resctrl_online_cpu(unsigned int cpu); void mpam_resctrl_offline_cpu(unsigned int cpu); +void mpam_resctrl_teardown_class(struct mpam_class *class); #else static inline int mpam_resctrl_setup(void) { return 0; } +static inline void mpam_resctrl_exit(void) { } static inline int mpam_resctrl_online_cpu(unsigned int cpu) { return 0; } static inline void mpam_resctrl_offline_cpu(unsigned int cpu) { } +static inline void mpam_resctrl_teardown_class(struct mpam_class *class) { } #endif /* CONFIG_RESCTRL_FS */ /* diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c index 777ecdc2d0f85..a9938006d0e6e 100644 --- a/drivers/resctrl/mpam_resctrl.c +++ b/drivers/resctrl/mpam_resctrl.c @@ -69,6 +69,12 @@ static bool cdp_enabled; static bool cacheinfo_ready; static DECLARE_WAIT_QUEUE_HEAD(wait_cacheinfo_ready); +/* + * If resctrl_init() succeeded, resctrl_exit() can be used to remove support + * for the filesystem in the event of an error. + */ +static bool resctrl_enabled; + bool resctrl_arch_alloc_capable(void) { struct mpam_resctrl_res *res; @@ -360,6 +366,9 @@ static int resctrl_arch_mon_ctx_alloc_no_wait(enum resctrl_event_id evtid) { struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; + if (!mpam_is_enabled()) + return -EINVAL; + if (!mon->class) return -EINVAL; @@ -402,6 +411,9 @@ static void resctrl_arch_mon_ctx_free_no_wait(enum resctrl_event_id evtid, { struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[evtid]; + if (!mpam_is_enabled()) + return; + if (!mon->class) return; @@ -488,6 +500,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr, resctrl_arch_rmid_read_context_check(); + if (!mpam_is_enabled()) + return -EINVAL; + if (eventid >= QOS_NUM_EVENTS || !mon->class) return -EINVAL; @@ -1162,6 +1177,9 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, lockdep_assert_cpus_held(); lockdep_assert_irqs_enabled(); + if (!mpam_is_enabled()) + return -EINVAL; + /* * No need to check the CPU as mpam_apply_config() doesn't care, and * resctrl_arch_update_domains() relies on this. @@ -1227,6 +1245,9 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) lockdep_assert_cpus_held(); lockdep_assert_irqs_enabled(); + if (!mpam_is_enabled()) + return -EINVAL; + list_for_each_entry_rcu(d, &r->ctrl_domains, hdr.list) { for (enum resctrl_conf_type t = 0; t < CDP_NUM_TYPES; t++) { struct resctrl_staged_config *cfg = &d->staged_config[t]; @@ -1619,7 +1640,11 @@ int mpam_resctrl_setup(void) return -EOPNOTSUPP; } - /* TODO: call resctrl_init() */ + err = resctrl_init(); + if (err) + return err; + + WRITE_ONCE(resctrl_enabled, true); return 0; @@ -1629,6 +1654,42 @@ int mpam_resctrl_setup(void) return err; } +void mpam_resctrl_exit(void) +{ + if (!READ_ONCE(resctrl_enabled)) + return; + + WRITE_ONCE(resctrl_enabled, false); + resctrl_exit(); +} + +/* + * The driver is detaching an MSC from this class, if resctrl was using it, + * pull on resctrl_exit(). + */ +void mpam_resctrl_teardown_class(struct mpam_class *class) +{ + struct mpam_resctrl_res *res; + enum resctrl_res_level rid; + struct mpam_resctrl_mon *mon; + enum resctrl_event_id eventid; + + might_sleep(); + + for_each_mpam_resctrl_control(res, rid) { + if (res->class == class) { + res->class = NULL; + break; + } + } + for_each_mpam_resctrl_mon(mon, eventid) { + if (mon->class == class) { + mon->class = NULL; + break; + } + } +} + static int __init __cacheinfo_ready(void) { cacheinfo_ready = true; From 9b6745fe15387a6e63344061172722387e868b1a Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Fri, 13 Mar 2026 14:46:12 +0000 Subject: [PATCH 69/74] arm_mpam: Add quirk framework The MPAM specification includes the MPAMF_IIDR, which serves to uniquely identify the MSC implementation through a combination of implementer details, product ID, variant, and revision. Certain hardware issues/errata can be resolved using software workarounds. Introduce a quirk framework to allow workarounds to be enabled based on the MPAMF_IIDR value. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Zeng Heng Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Signed-off-by: Shanker Donthineni Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Co-developed-by: James Morse Signed-off-by: James Morse (cherry picked from commit fa7745218c9828ac4849ef62bccad684aec0f422) Signed-off-by: Lee Trager --- drivers/resctrl/mpam_devices.c | 32 ++++++++++++++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 25 +++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 46760bb50f99d..3515b83c109d1 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -630,6 +630,30 @@ static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, return ERR_PTR(-ENOENT); } +static const struct mpam_quirk mpam_quirks[] = { + { NULL } /* Sentinel */ +}; + +static void mpam_enable_quirks(struct mpam_msc *msc) +{ + const struct mpam_quirk *quirk; + + for (quirk = &mpam_quirks[0]; quirk->iidr_mask; quirk++) { + int err = 0; + + if (quirk->iidr != (msc->iidr & quirk->iidr_mask)) + continue; + + if (quirk->init) + err = quirk->init(msc, quirk); + + if (err) + continue; + + mpam_set_quirk(quirk->workaround, msc); + } +} + /* * IHI009A.a has this nugget: "If a monitor does not support automatic behaviour * of NRDY, software can use this bit for any purpose" - so hardware might not @@ -864,8 +888,11 @@ static int mpam_msc_hw_probe(struct mpam_msc *msc) /* Grab an IDR value to find out how many RIS there are */ mutex_lock(&msc->part_sel_lock); idr = mpam_msc_read_idr(msc); + msc->iidr = mpam_read_partsel_reg(msc, IIDR); mutex_unlock(&msc->part_sel_lock); + mpam_enable_quirks(msc); + msc->ris_max = FIELD_GET(MPAMF_IDR_RIS_MAX, idr); /* Use these values so partid/pmg always starts with a valid value */ @@ -1971,6 +1998,7 @@ static bool mpam_has_cmax_wd_feature(struct mpam_props *props) * resulting safe value must be compatible with both. When merging values in * the tree, all the aliasing resources must be handled first. * On mismatch, parent is modified. + * Quirks on an MSC will apply to all MSC in that class. */ static void __props_mismatch(struct mpam_props *parent, struct mpam_props *child, bool alias) @@ -2090,6 +2118,7 @@ static void __props_mismatch(struct mpam_props *parent, * nobble the class feature, as we can't configure all the resources. * e.g. The L3 cache is composed of two resources with 13 and 17 portion * bitmaps respectively. + * Quirks on an MSC will apply to all MSC in that class. */ static void __class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc) @@ -2103,6 +2132,9 @@ __class_props_mismatch(struct mpam_class *class, struct mpam_vmsc *vmsc) dev_dbg(dev, "Merging features for class:0x%lx &= vmsc:0x%lx\n", (long)cprops->features, (long)vprops->features); + /* Merge quirks */ + class->quirks |= vmsc->msc->quirks; + /* Take the safe value for any common features */ __props_mismatch(cprops, vprops, false); } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 4cf50fb4fcb86..63e8ec3180b77 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -85,6 +85,8 @@ struct mpam_msc { u8 pmg_max; unsigned long ris_idxs; u32 ris_max; + u32 iidr; + u16 quirks; /* * error_irq_lock is taken when registering/unregistering the error @@ -212,6 +214,28 @@ struct mpam_props { #define mpam_set_feature(_feat, x) set_bit(_feat, (x)->features) #define mpam_clear_feature(_feat, x) clear_bit(_feat, (x)->features) +/* Workaround bits for msc->quirks */ +enum mpam_device_quirks { + MPAM_QUIRK_LAST +}; + +#define mpam_has_quirk(_quirk, x) ((1 << (_quirk) & (x)->quirks)) +#define mpam_set_quirk(_quirk, x) ((x)->quirks |= (1 << (_quirk))) + +struct mpam_quirk { + int (*init)(struct mpam_msc *msc, const struct mpam_quirk *quirk); + + u32 iidr; + u32 iidr_mask; + + enum mpam_device_quirks workaround; +}; + +#define MPAM_IIDR_MATCH_ONE (FIELD_PREP_CONST(MPAMF_IIDR_PRODUCTID, 0xfff) | \ + FIELD_PREP_CONST(MPAMF_IIDR_VARIANT, 0xf) | \ + FIELD_PREP_CONST(MPAMF_IIDR_REVISION, 0xf) | \ + FIELD_PREP_CONST(MPAMF_IIDR_IMPLEMENTER, 0xfff)) + /* The values for MSMON_CFG_MBWU_FLT.RWBW */ enum mon_filter_options { COUNT_BOTH = 0, @@ -255,6 +279,7 @@ struct mpam_class { struct mpam_props props; u32 nrdy_usec; + u16 quirks; u8 level; enum mpam_class_types type; From 72e7b7fe6a273bb8cb27c0acf8305ca4c8b499b1 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Fri, 13 Mar 2026 14:46:13 +0000 Subject: [PATCH 70/74] arm_mpam: Add workaround for T241-MPAM-1 The MPAM bandwidth partitioning controls will not be correctly configured, and hardware will retain default configuration register values, meaning generally that bandwidth will remain unprovisioned. To address the issue, follow the below steps after updating the MBW_MIN and/or MBW_MAX registers. - Perform 64b reads from all 12 bridge MPAM shadow registers at offsets (0x360048 + slice*0x10000 + partid*8). These registers are read-only. - Continue iterating until all 12 shadow register values match in a loop. pr_warn_once if the values fail to match within the loop count 1000. - Perform 64b writes with the value 0x0 to the two spare registers at offsets 0x1b0000 and 0x1c0000. In the hardware, writes to the MPAMCFG_MBW_MAX MPAMCFG_MBW_MIN registers are transformed into broadcast writes to the 12 shadow registers. The final two writes to the spare registers cause a final rank of downstream micro-architectural MPAM registers to be updated from the shadow copies. The intervening loop to read the 12 shadow registers helps avoid a race condition where writes to the spare registers occur before all shadow registers have been updated. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Punit Agrawal Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Gavin Shan Signed-off-by: Shanker Donthineni Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit 70e81fbedc6570b2397e07a645136af0a0eec907) Signed-off-by: Lee Trager --- Documentation/arch/arm64/silicon-errata.rst | 2 + drivers/resctrl/mpam_devices.c | 88 +++++++++++++++++++++ drivers/resctrl/mpam_internal.h | 9 +++ 3 files changed, 99 insertions(+) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 93cdf16937159..593315d1ff096 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -248,6 +248,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | NVIDIA | T241 GICv3/4.x | T241-FABRIC-4 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| NVIDIA | T241 MPAM | T241-MPAM-1 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 3515b83c109d1..4ca478c270677 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -29,6 +29,16 @@ #include "mpam_internal.h" +/* Values for the T241 errata workaround */ +#define T241_CHIPS_MAX 4 +#define T241_CHIP_NSLICES 12 +#define T241_SPARE_REG0_OFF 0x1b0000 +#define T241_SPARE_REG1_OFF 0x1c0000 +#define T241_CHIP_ID(phys) FIELD_GET(GENMASK_ULL(44, 43), phys) +#define T241_SHADOW_REG_OFF(sidx, pid) (0x360048 + (sidx) * 0x10000 + (pid) * 8) +#define SMCCC_SOC_ID_T241 0x036b0241 +static void __iomem *t241_scratch_regs[T241_CHIPS_MAX]; + /* * mpam_list_lock protects the SRCU lists when writing. Once the * mpam_enabled key is enabled these lists are read-only, @@ -630,7 +640,45 @@ static struct mpam_msc_ris *mpam_get_or_create_ris(struct mpam_msc *msc, return ERR_PTR(-ENOENT); } +static int mpam_enable_quirk_nvidia_t241_1(struct mpam_msc *msc, + const struct mpam_quirk *quirk) +{ + s32 soc_id = arm_smccc_get_soc_id_version(); + struct resource *r; + phys_addr_t phys; + + /* + * A mapping to a device other than the MSC is needed, check + * SOC_ID is NVIDIA T241 chip (036b:0241) + */ + if (soc_id < 0 || soc_id != SMCCC_SOC_ID_T241) + return -EINVAL; + + r = platform_get_resource(msc->pdev, IORESOURCE_MEM, 0); + if (!r) + return -EINVAL; + + /* Find the internal registers base addr from the CHIP ID */ + msc->t241_id = T241_CHIP_ID(r->start); + phys = FIELD_PREP(GENMASK_ULL(45, 44), msc->t241_id) | 0x19000000ULL; + + t241_scratch_regs[msc->t241_id] = ioremap(phys, SZ_8M); + if (WARN_ON_ONCE(!t241_scratch_regs[msc->t241_id])) + return -EINVAL; + + pr_info_once("Enabled workaround for NVIDIA T241 erratum T241-MPAM-1\n"); + + return 0; +} + static const struct mpam_quirk mpam_quirks[] = { + { + /* NVIDIA t241 erratum T241-MPAM-1 */ + .init = mpam_enable_quirk_nvidia_t241_1, + .iidr = MPAM_IIDR_NVIDIA_T241, + .iidr_mask = MPAM_IIDR_MATCH_ONE, + .workaround = T241_SCRUB_SHADOW_REGS, + }, { NULL } /* Sentinel */ }; @@ -1377,6 +1425,44 @@ static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd) __mpam_write_reg(msc, reg, bm); } +static void mpam_apply_t241_erratum(struct mpam_msc_ris *ris, u16 partid) +{ + int sidx, i, lcount = 1000; + void __iomem *regs; + u64 val0, val; + + regs = t241_scratch_regs[ris->vmsc->msc->t241_id]; + + for (i = 0; i < lcount; i++) { + /* Read the shadow register at index 0 */ + val0 = readq_relaxed(regs + T241_SHADOW_REG_OFF(0, partid)); + + /* Check if all the shadow registers have the same value */ + for (sidx = 1; sidx < T241_CHIP_NSLICES; sidx++) { + val = readq_relaxed(regs + + T241_SHADOW_REG_OFF(sidx, partid)); + if (val != val0) + break; + } + if (sidx == T241_CHIP_NSLICES) + break; + } + + if (i == lcount) + pr_warn_once("t241: inconsistent values in shadow regs"); + + /* Write a value zero to spare registers to take effect of MBW conf */ + writeq_relaxed(0, regs + T241_SPARE_REG0_OFF); + writeq_relaxed(0, regs + T241_SPARE_REG1_OFF); +} + +static void mpam_quirk_post_config_change(struct mpam_msc_ris *ris, u16 partid, + struct mpam_config *cfg) +{ + if (mpam_has_quirk(T241_SCRUB_SHADOW_REGS, ris->vmsc->msc)) + mpam_apply_t241_erratum(ris, partid); +} + /* Called via IPI. Call while holding an SRCU reference */ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, struct mpam_config *cfg) @@ -1456,6 +1542,8 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, mpam_write_partsel_reg(msc, PRI, pri_val); } + mpam_quirk_post_config_change(ris, partid, cfg); + mutex_unlock(&msc->part_sel_lock); } diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index 63e8ec3180b77..a4de763a6319a 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -130,6 +130,9 @@ struct mpam_msc { void __iomem *mapped_hwpage; size_t mapped_hwpage_sz; + /* Values only used on some platforms for quirks */ + u32 t241_id; + struct mpam_garbage garbage; }; @@ -216,6 +219,7 @@ struct mpam_props { /* Workaround bits for msc->quirks */ enum mpam_device_quirks { + T241_SCRUB_SHADOW_REGS, MPAM_QUIRK_LAST }; @@ -236,6 +240,11 @@ struct mpam_quirk { FIELD_PREP_CONST(MPAMF_IIDR_REVISION, 0xf) | \ FIELD_PREP_CONST(MPAMF_IIDR_IMPLEMENTER, 0xfff)) +#define MPAM_IIDR_NVIDIA_T241 (FIELD_PREP_CONST(MPAMF_IIDR_PRODUCTID, 0x241) | \ + FIELD_PREP_CONST(MPAMF_IIDR_VARIANT, 0) | \ + FIELD_PREP_CONST(MPAMF_IIDR_REVISION, 0) | \ + FIELD_PREP_CONST(MPAMF_IIDR_IMPLEMENTER, 0x36b)) + /* The values for MSMON_CFG_MBWU_FLT.RWBW */ enum mon_filter_options { COUNT_BOTH = 0, From a764811f96d6c5cbcc09acd5f62dd27661d0e3c2 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Fri, 13 Mar 2026 14:46:14 +0000 Subject: [PATCH 71/74] arm_mpam: Add workaround for T241-MPAM-4 In the T241 implementation of memory-bandwidth partitioning, in the absence of contention for bandwidth, the minimum bandwidth setting can affect the amount of achieved bandwidth. Specifically, the achieved bandwidth in the absence of contention can settle to any value between the values of MPAMCFG_MBW_MIN and MPAMCFG_MBW_MAX. Also, if MPAMCFG_MBW_MIN is set zero (below 0.78125%), once a core enters a throttled state, it will never leave that state. The first issue is not a concern if the MPAM software allows to program MPAMCFG_MBW_MIN through the sysfs interface. This patch ensures program MBW_MIN=1 (0.78125%) whenever MPAMCFG_MBW_MIN=0 is programmed. In the scenario where the resctrl doesn't support the MBW_MIN interface via sysfs, to achieve bandwidth closer to MBW_MAX in the absence of contention, software should configure a relatively narrow gap between MBW_MIN and MBW_MAX. The recommendation is to use a 5% gap to mitigate the problem. Clear the feature MBW_MIN feature from the class to ensure we don't accidentally change behaviour when resctrl adds support for a MBW_MIN interface. Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Fenghua Yu Reviewed-by: Gavin Shan Signed-off-by: Shanker Donthineni Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit a7efe23ed6dd08259ad1b238e9c33bb511666fd4) Signed-off-by: Lee Trager --- Documentation/arch/arm64/silicon-errata.rst | 2 + drivers/resctrl/mpam_devices.c | 55 +++++++++++++++++++-- drivers/resctrl/mpam_internal.h | 1 + 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 593315d1ff096..c10c4450364a2 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -250,6 +250,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | NVIDIA | T241 MPAM | T241-MPAM-1 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| NVIDIA | T241 MPAM | T241-MPAM-4 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 4ca478c270677..2ce1599b2808e 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -679,6 +679,12 @@ static const struct mpam_quirk mpam_quirks[] = { .iidr_mask = MPAM_IIDR_MATCH_ONE, .workaround = T241_SCRUB_SHADOW_REGS, }, + { + /* NVIDIA t241 erratum T241-MPAM-4 */ + .iidr = MPAM_IIDR_NVIDIA_T241, + .iidr_mask = MPAM_IIDR_MATCH_ONE, + .workaround = T241_FORCE_MBW_MIN_TO_ONE, + }, { NULL } /* Sentinel */ }; @@ -1463,6 +1469,37 @@ static void mpam_quirk_post_config_change(struct mpam_msc_ris *ris, u16 partid, mpam_apply_t241_erratum(ris, partid); } +static u16 mpam_wa_t241_force_mbw_min_to_one(struct mpam_props *props) +{ + u16 max_hw_value, min_hw_granule, res0_bits; + + res0_bits = 16 - props->bwa_wd; + max_hw_value = ((1 << props->bwa_wd) - 1) << res0_bits; + min_hw_granule = ~max_hw_value; + + return min_hw_granule + 1; +} + +static u16 mpam_wa_t241_calc_min_from_max(struct mpam_props *props, + struct mpam_config *cfg) +{ + u16 val = 0; + u16 max; + u16 delta = ((5 * MPAMCFG_MBW_MAX_MAX) / 100) - 1; + + if (mpam_has_feature(mpam_feat_mbw_max, cfg)) { + max = cfg->mbw_max; + } else { + /* Resetting. Hence, use the ris specific default. */ + max = GENMASK(15, 16 - props->bwa_wd); + } + + if (max > delta) + val = max - delta; + + return val; +} + /* Called via IPI. Call while holding an SRCU reference */ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, struct mpam_config *cfg) @@ -1503,9 +1540,18 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid, mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm); } - if (mpam_has_feature(mpam_feat_mbw_min, rprops) && - mpam_has_feature(mpam_feat_mbw_min, cfg)) - mpam_write_partsel_reg(msc, MBW_MIN, 0); + if (mpam_has_feature(mpam_feat_mbw_min, rprops)) { + u16 val = 0; + + if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, msc)) { + u16 min = mpam_wa_t241_force_mbw_min_to_one(rprops); + + val = mpam_wa_t241_calc_min_from_max(rprops, cfg); + val = max(val, min); + } + + mpam_write_partsel_reg(msc, MBW_MIN, val); + } if (mpam_has_feature(mpam_feat_mbw_max, rprops)) { if (mpam_has_feature(mpam_feat_mbw_max, cfg)) @@ -2287,6 +2333,9 @@ static void mpam_enable_merge_class_features(struct mpam_component *comp) list_for_each_entry(vmsc, &comp->vmsc, comp_list) __class_props_mismatch(class, vmsc); + + if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, class)) + mpam_clear_feature(mpam_feat_mbw_min, &class->props); } /* diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index a4de763a6319a..c0230379aba68 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -220,6 +220,7 @@ struct mpam_props { /* Workaround bits for msc->quirks */ enum mpam_device_quirks { T241_SCRUB_SHADOW_REGS, + T241_FORCE_MBW_MIN_TO_ONE, MPAM_QUIRK_LAST }; From fb12dea89b4057f2d582ee2fbe387e185400c33b Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Fri, 13 Mar 2026 14:46:15 +0000 Subject: [PATCH 72/74] arm_mpam: Add workaround for T241-MPAM-6 The registers MSMON_MBWU_L and MSMON_MBWU return the number of requests rather than the number of bytes transferred. Bandwidth resource monitoring is performed at the last level cache, where each request arrive in 64Byte granularity. The current implementation returns the number of transactions received at the last level cache but does not provide the value in bytes. Scaling by 64 gives an accurate byte count to match the MPAM specification for the MSMON_MBWU and MSMON_MBWU_L registers. This patch fixes the issue by reporting the actual number of bytes instead of the number of transactions from __ris_msmon_read(). Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Punit Agrawal Tested-by: Peter Newman Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Gavin Shan Signed-off-by: Shanker Donthineni Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit dc48eb1ff27cc3169c3c5cca5eb20645d04d9e22) Signed-off-by: Lee Trager --- Documentation/arch/arm64/silicon-errata.rst | 2 ++ drivers/resctrl/mpam_devices.c | 26 +++++++++++++++++++-- drivers/resctrl/mpam_internal.h | 1 + 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index c10c4450364a2..25d98f09b575b 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -252,6 +252,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | NVIDIA | T241 MPAM | T241-MPAM-4 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| NVIDIA | T241 MPAM | T241-MPAM-6 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index 2ce1599b2808e..f9e350a24f020 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -685,6 +685,12 @@ static const struct mpam_quirk mpam_quirks[] = { .iidr_mask = MPAM_IIDR_MATCH_ONE, .workaround = T241_FORCE_MBW_MIN_TO_ONE, }, + { + /* NVIDIA t241 erratum T241-MPAM-6 */ + .iidr = MPAM_IIDR_NVIDIA_T241, + .iidr_mask = MPAM_IIDR_MATCH_ONE, + .workaround = T241_MBW_COUNTER_SCALE_64, + }, { NULL } /* Sentinel */ }; @@ -1146,7 +1152,7 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val, } } -static u64 mpam_msmon_overflow_val(enum mpam_device_features type) +static u64 __mpam_msmon_overflow_val(enum mpam_device_features type) { /* TODO: implement scaling counters */ switch (type) { @@ -1161,6 +1167,18 @@ static u64 mpam_msmon_overflow_val(enum mpam_device_features type) } } +static u64 mpam_msmon_overflow_val(enum mpam_device_features type, + struct mpam_msc *msc) +{ + u64 overflow_val = __mpam_msmon_overflow_val(type); + + if (mpam_has_quirk(T241_MBW_COUNTER_SCALE_64, msc) && + type != mpam_feat_msmon_mbwu_63counter) + overflow_val *= 64; + + return overflow_val; +} + static void __ris_msmon_read(void *arg) { u64 now; @@ -1251,13 +1269,17 @@ static void __ris_msmon_read(void *arg) now = FIELD_GET(MSMON___VALUE, now); } + if (mpam_has_quirk(T241_MBW_COUNTER_SCALE_64, msc) && + m->type != mpam_feat_msmon_mbwu_63counter) + now *= 64; + if (nrdy) break; mbwu_state = &ris->mbwu_state[ctx->mon]; if (overflow) - mbwu_state->correction += mpam_msmon_overflow_val(m->type); + mbwu_state->correction += mpam_msmon_overflow_val(m->type, msc); /* * Include bandwidth consumed before the last hardware reset and diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index c0230379aba68..a65d82b280e0a 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -221,6 +221,7 @@ struct mpam_props { enum mpam_device_quirks { T241_SCRUB_SHADOW_REGS, T241_FORCE_MBW_MIN_TO_ONE, + T241_MBW_COUNTER_SCALE_64, MPAM_QUIRK_LAST }; From dc1efe47a4f914e34d0e3e8c7a87730f8160c24b Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 13 Mar 2026 14:46:16 +0000 Subject: [PATCH 73/74] arm_mpam: Quirk CMN-650's CSU NRDY behaviour CMN-650 is afflicted with an erratum where the CSU NRDY bit never clears. This tells us the monitor never finishes scanning the cache. The erratum document says to wait the maximum time, then ignore the field. Add a flag to indicate whether this is the final attempt to read the counter, and when this quirk is applied, ignore the NRDY field. This means accesses to this counter will always retry, even if the counter was previously programmed to the same values. The counter value is not expected to be stable, it drifts up and down with each allocation and eviction. The CSU register provides the value for a point in time. Tested-by: Punit Agrawal Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Jesse Chick Reviewed-by: Zeng Heng Reviewed-by: Gavin Shan Co-developed-by: Ben Horgan Signed-off-by: Ben Horgan Signed-off-by: James Morse (cherry picked from commit aeb8595a5f8ba4aac8b5c265a8bcc3f18b473cb5) Signed-off-by: Lee Trager --- Documentation/arch/arm64/silicon-errata.rst | 3 +++ drivers/resctrl/mpam_devices.c | 12 ++++++++++++ drivers/resctrl/mpam_internal.h | 6 ++++++ 3 files changed, 21 insertions(+) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 25d98f09b575b..5650c4b6d2c6d 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -215,6 +215,9 @@ stable kernels. | ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| ARM | CMN-650 | #3642720 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 | +----------------+-----------------+-----------------+-----------------------------+ | Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 | diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c index f9e350a24f020..f54882e4c039f 100644 --- a/drivers/resctrl/mpam_devices.c +++ b/drivers/resctrl/mpam_devices.c @@ -691,6 +691,12 @@ static const struct mpam_quirk mpam_quirks[] = { .iidr_mask = MPAM_IIDR_MATCH_ONE, .workaround = T241_MBW_COUNTER_SCALE_64, }, + { + /* ARM CMN-650 CSU erratum 3642720 */ + .iidr = MPAM_IIDR_ARM_CMN_650, + .iidr_mask = MPAM_IIDR_MATCH_ONE, + .workaround = IGNORE_CSU_NRDY, + }, { NULL } /* Sentinel */ }; @@ -1003,6 +1009,7 @@ struct mon_read { enum mpam_device_features type; u64 *val; int err; + bool waited_timeout; }; static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris) @@ -1249,6 +1256,10 @@ static void __ris_msmon_read(void *arg) if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops)) nrdy = now & MSMON___NRDY; now = FIELD_GET(MSMON___VALUE, now); + + if (mpam_has_quirk(IGNORE_CSU_NRDY, msc) && m->waited_timeout) + nrdy = false; + break; case mpam_feat_msmon_mbwu_31counter: case mpam_feat_msmon_mbwu_44counter: @@ -1385,6 +1396,7 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx, .ctx = ctx, .type = type, .val = val, + .waited_timeout = true, }; *val = 0; diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h index a65d82b280e0a..c458b75e87f13 100644 --- a/drivers/resctrl/mpam_internal.h +++ b/drivers/resctrl/mpam_internal.h @@ -222,6 +222,7 @@ enum mpam_device_quirks { T241_SCRUB_SHADOW_REGS, T241_FORCE_MBW_MIN_TO_ONE, T241_MBW_COUNTER_SCALE_64, + IGNORE_CSU_NRDY, MPAM_QUIRK_LAST }; @@ -247,6 +248,11 @@ struct mpam_quirk { FIELD_PREP_CONST(MPAMF_IIDR_REVISION, 0) | \ FIELD_PREP_CONST(MPAMF_IIDR_IMPLEMENTER, 0x36b)) +#define MPAM_IIDR_ARM_CMN_650 (FIELD_PREP_CONST(MPAMF_IIDR_PRODUCTID, 0) | \ + FIELD_PREP_CONST(MPAMF_IIDR_VARIANT, 0) | \ + FIELD_PREP_CONST(MPAMF_IIDR_REVISION, 0) | \ + FIELD_PREP_CONST(MPAMF_IIDR_IMPLEMENTER, 0x43b)) + /* The values for MSMON_CFG_MBWU_FLT.RWBW */ enum mon_filter_options { COUNT_BOTH = 0, From 49e3244c03f8541c0fa1df86bc129d0b5189d668 Mon Sep 17 00:00:00 2001 From: Ben Horgan Date: Fri, 13 Mar 2026 14:46:17 +0000 Subject: [PATCH 74/74] arm64: mpam: Add initial MPAM documentation MPAM (Memory Partitioning and Monitoring) is now exposed to user-space via resctrl. Add some documentation so the user knows what features to expect. Reviewed-by: Zeng Heng Reviewed-by: Shaopeng Tan Reviewed-by: Jonathan Cameron Acked-by: Catalin Marinas Signed-off-by: Ben Horgan Reviewed-by: Gavin Shan Tested-by: Gavin Shan Tested-by: Shaopeng Tan Tested-by: Jesse Chick Signed-off-by: James Morse (cherry picked from commit 4ce0a2ccc0358f3f746fa50815a599f861fd5d68) Signed-off-by: Lee Trager --- Documentation/arch/arm64/index.rst | 1 + Documentation/arch/arm64/mpam.rst | 72 ++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 Documentation/arch/arm64/mpam.rst diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst index 6a012c98bdcd3..189fa760dadea 100644 --- a/Documentation/arch/arm64/index.rst +++ b/Documentation/arch/arm64/index.rst @@ -23,6 +23,7 @@ ARM64 Architecture memory memory-tagging-extension mops + mpam perf pointer-authentication ptdump diff --git a/Documentation/arch/arm64/mpam.rst b/Documentation/arch/arm64/mpam.rst new file mode 100644 index 0000000000000..570f51a8d4ebf --- /dev/null +++ b/Documentation/arch/arm64/mpam.rst @@ -0,0 +1,72 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==== +MPAM +==== + +What is MPAM +============ +MPAM (Memory Partitioning and Monitoring) is a feature in the CPUs and memory +system components such as the caches or memory controllers that allow memory +traffic to be labelled, partitioned and monitored. + +Traffic is labelled by the CPU, based on the control or monitor group the +current task is assigned to using resctrl. Partitioning policy can be set +using the schemata file in resctrl, and monitor values read via resctrl. +See Documentation/filesystems/resctrl.rst for more details. + +This allows tasks that share memory system resources, such as caches, to be +isolated from each other according to the partitioning policy (so called noisy +neighbours). + +Supported Platforms +=================== +Use of this feature requires CPU support, support in the memory system +components, and a description from firmware of where the MPAM device controls +are in the MMIO address space. (e.g. the 'MPAM' ACPI table). + +The MMIO device that provides MPAM controls/monitors for a memory system +component is called a memory system component. (MSC). + +Because the user interface to MPAM is via resctrl, only MPAM features that are +compatible with resctrl can be exposed to user-space. + +MSC are considered as a group based on the topology. MSC that correspond with +the L3 cache are considered together, it is not possible to mix MSC between L2 +and L3 to 'cover' a resctrl schema. + +The supported features are: + +* Cache portion bitmap controls (CPOR) on the L2 or L3 caches. To expose + CPOR at L2 or L3, every CPU must have a corresponding CPU cache at this + level that also supports the feature. Mismatched big/little platforms are + not supported as resctrl's controls would then also depend on task + placement. + +* Memory bandwidth maximum controls (MBW_MAX) on or after the L3 cache. + resctrl uses the L3 cache-id to identify where the memory bandwidth + control is applied. For this reason the platform must have an L3 cache + with cache-id's supplied by firmware. (It doesn't need to support MPAM.) + + To be exported as the 'MB' schema, the topology of the group of MSC chosen + must match the topology of the L3 cache so that the cache-id's can be + repainted. For example: Platforms with Memory bandwidth maximum controls + on CPU-less NUMA nodes cannot expose the 'MB' schema to resctrl as these + nodes do not have a corresponding L3 cache. If the memory bandwidth + control is on the memory rather than the L3 then there must be a single + global L3 as otherwise it is unknown which L3 the traffic came from. There + must be no caches between the L3 and the memory so that the two ends of + the path have equivalent traffic. + + When the MPAM driver finds multiple groups of MSC it can use for the 'MB' + schema, it prefers the group closest to the L3 cache. + +* Cache Storage Usage (CSU) counters can expose the 'llc_occupancy' provided + there is at least one CSU monitor on each MSC that makes up the L3 group. + Exposing CSU counters from other caches or devices is not supported. + +Reporting Bugs +============== +If you are not seeing the counters or controls you expect please share the +debug messages produced when enabling dynamic debug and booting with: +dyndbg="file mpam_resctrl.c +pl"