Commit 2c8f7ef4 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'arch-cache-topo-5.20' of...

Merge tag 'arch-cache-topo-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux into driver-core-next

Sudeep writes:

cacheinfo and arch_topology updates for v5.20

These are updates to fix some discrepancies we have in the CPU topology
parsing from the device tree /cpu-map node and the divergence from the
behaviour on a ACPI enabled platform. The expectation is that both DT
and ACPI enabled systems must present consistent view of the CPU topology.

The current assignment of generated cluster count as the physical package
identifier for each CPU is wrong. The device tree bindings for CPU
topology supports sockets to infer the socket or physical package
identifier for a given CPU. It is now being made use of you address the
issue. These updates also assigns the cluster identifier as parsed from
the device tree cluster nodes within /cpu-map without support for
nesting of the clusters as there are no such reported/known platforms.

In order to be on par with ACPI PPTT physical package/socket support,
these updates also include support for socket nodes in /cpu-map.

The only exception is that the last level cache id information can be
inferred from the same ACPI PPTT while we need to parse CPU cache nodes
in the device tree. The cacheinfo changes here is to enable the re-use
of the cacheinfo to detect the cache attributes for all the CPU quite
early even before the scondardaries are booted so that the information
can be used to build the schedular domains especially the last level
cache(LLC).

* tag 'arch-cache-topo-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux: (21 commits)
  ACPI: Remove the unused find_acpi_cpu_cache_topology()
  arch_topology: Warn that topology for nested clusters is not supported
  arch_topology: Add support for parsing sockets in /cpu-map
  arch_topology: Set cluster identifier in each core/thread from /cpu-map
  arch_topology: Limit span of cpu_clustergroup_mask()
  arch_topology: Don't set cluster identifier as physical package identifier
  arch_topology: Avoid parsing through all the CPUs once a outlier CPU is found
  arch_topology: Check for non-negative value rather than -1 for IDs validity
  arch_topology: Set thread sibling cpumask only within the cluster
  arch_topology: Drop LLC identifier stash from the CPU topology
  arm64: topology: Remove redundant setting of llc_id in CPU topology
  arch_topology: Use the last level cache information from the cacheinfo
  arch_topology: Add support to parse and detect cache attributes
  cacheinfo: Align checks in cache_shared_cpu_map_{setup,remove} for readability
  cacheinfo: Use cache identifiers to check if the caches are shared if available
  cacheinfo: Allow early detection and population of cache attributes
  cacheinfo: Add support to check if last level cache(LLC) is valid or shared
  cacheinfo: Move cache_leaves_are_shared out of CONFIG_OF
  cacheinfo: Add helper to access any cache index for a given CPU
  cacheinfo: Use of_cpu_device_node_get instead cpu_dev->of_node
  ...
parents 2fd26970 7128af87
......@@ -89,8 +89,6 @@ int __init parse_acpi_topology(void)
return 0;
for_each_possible_cpu(cpu) {
int i, cache_id;
topology_id = find_acpi_cpu_topology(cpu, 0);
if (topology_id < 0)
return topology_id;
......@@ -107,18 +105,6 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].cluster_id = topology_id;
topology_id = find_acpi_cpu_topology_package(cpu);
cpu_topology[cpu].package_id = topology_id;
i = acpi_find_last_cache_level(cpu);
if (i > 0) {
/*
* this is the only part of cpu_topology that has
* a direct relationship with the cache topology
*/
cache_id = find_acpi_cpu_cache_topology(cpu, i);
if (cache_id > 0)
cpu_topology[cpu].llc_id = cache_id;
}
}
return 0;
......
......@@ -437,7 +437,8 @@ static void cache_setup_acpi_cpu(struct acpi_table_header *table,
pr_debug("found = %p %p\n", found_cache, cpu_node);
if (found_cache)
update_cache_properties(this_leaf, found_cache,
cpu_node, table->revision);
ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)),
table->revision);
index++;
}
......@@ -690,43 +691,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level)
return find_acpi_cpu_topology_tag(cpu, level, 0);
}
/**
* find_acpi_cpu_cache_topology() - Determine a unique cache topology value
* @cpu: Kernel logical CPU number
* @level: The cache level for which we would like a unique ID
*
* Determine a unique ID for each unified cache in the system
*
* Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
* Otherwise returns a value which represents a unique topological feature.
*/
int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
{
struct acpi_table_header *table;
struct acpi_pptt_cache *found_cache;
acpi_status status;
u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
struct acpi_pptt_processor *cpu_node = NULL;
int ret = -1;
status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
if (ACPI_FAILURE(status)) {
acpi_pptt_warn_missing();
return -ENOENT;
}
found_cache = acpi_find_cache_node(table, acpi_cpu_id,
CACHE_TYPE_UNIFIED,
level,
&cpu_node);
if (found_cache)
ret = ACPI_PTR_DIFF(cpu_node, table);
acpi_put_table(table);
return ret;
}
/**
* find_acpi_cpu_topology_package() - Determine a unique CPU package value
* @cpu: Kernel logical CPU number
......
......@@ -7,6 +7,7 @@
*/
#include <linux/acpi.h>
#include <linux/cacheinfo.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/device.h>
......@@ -496,7 +497,7 @@ static int __init get_cpu_for_node(struct device_node *node)
}
static int __init parse_core(struct device_node *core, int package_id,
int core_id)
int cluster_id, int core_id)
{
char name[20];
bool leaf = true;
......@@ -512,6 +513,7 @@ static int __init parse_core(struct device_node *core, int package_id,
cpu = get_cpu_for_node(t);
if (cpu >= 0) {
cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].cluster_id = cluster_id;
cpu_topology[cpu].core_id = core_id;
cpu_topology[cpu].thread_id = i;
} else if (cpu != -ENODEV) {
......@@ -533,6 +535,7 @@ static int __init parse_core(struct device_node *core, int package_id,
}
cpu_topology[cpu].package_id = package_id;
cpu_topology[cpu].cluster_id = cluster_id;
cpu_topology[cpu].core_id = core_id;
} else if (leaf && cpu != -ENODEV) {
pr_err("%pOF: Can't get CPU for leaf core\n", core);
......@@ -542,13 +545,13 @@ static int __init parse_core(struct device_node *core, int package_id,
return 0;
}
static int __init parse_cluster(struct device_node *cluster, int depth)
static int __init parse_cluster(struct device_node *cluster, int package_id,
int cluster_id, int depth)
{
char name[20];
bool leaf = true;
bool has_cores = false;
struct device_node *c;
static int package_id __initdata;
int core_id = 0;
int i, ret;
......@@ -563,7 +566,9 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
c = of_get_child_by_name(cluster, name);
if (c) {
leaf = false;
ret = parse_cluster(c, depth + 1);
ret = parse_cluster(c, package_id, i, depth + 1);
if (depth > 0)
pr_warn("Topology for clusters of clusters not yet supported\n");
of_node_put(c);
if (ret != 0)
return ret;
......@@ -587,7 +592,8 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
}
if (leaf) {
ret = parse_core(c, package_id, core_id++);
ret = parse_core(c, package_id, cluster_id,
core_id++);
} else {
pr_err("%pOF: Non-leaf cluster with core %s\n",
cluster, name);
......@@ -604,10 +610,33 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
if (leaf && !has_cores)
pr_warn("%pOF: empty cluster\n", cluster);
if (leaf)
return 0;
}
static int __init parse_socket(struct device_node *socket)
{
char name[20];
struct device_node *c;
bool has_socket = false;
int package_id = 0, ret;
do {
snprintf(name, sizeof(name), "socket%d", package_id);
c = of_get_child_by_name(socket, name);
if (c) {
has_socket = true;
ret = parse_cluster(c, package_id, -1, 0);
of_node_put(c);
if (ret != 0)
return ret;
}
package_id++;
} while (c);
return 0;
if (!has_socket)
ret = parse_cluster(socket, 0, -1, 0);
return ret;
}
static int __init parse_dt_topology(void)
......@@ -630,7 +659,7 @@ static int __init parse_dt_topology(void)
if (!map)
goto out;
ret = parse_cluster(map, 0);
ret = parse_socket(map);
if (ret != 0)
goto out_map;
......@@ -641,8 +670,10 @@ static int __init parse_dt_topology(void)
* only mark cores described in the DT as possible.
*/
for_each_possible_cpu(cpu)
if (cpu_topology[cpu].package_id == -1)
if (cpu_topology[cpu].package_id < 0) {
ret = -EINVAL;
break;
}
out_map:
of_node_put(map);
......@@ -667,7 +698,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
/* not numa in package, lets use the package siblings */
core_mask = &cpu_topology[cpu].core_sibling;
}
if (cpu_topology[cpu].llc_id != -1) {
if (last_level_cache_is_valid(cpu)) {
if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
core_mask = &cpu_topology[cpu].llc_sibling;
}
......@@ -686,6 +718,14 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
const struct cpumask *cpu_clustergroup_mask(int cpu)
{
/*
* Forbid cpu_clustergroup_mask() to span more or the same CPUs as
* cpu_coregroup_mask().
*/
if (cpumask_subset(cpu_coregroup_mask(cpu),
&cpu_topology[cpu].cluster_sibling))
return get_cpu_mask(cpu);
return &cpu_topology[cpu].cluster_sibling;
}
......@@ -698,7 +738,7 @@ void update_siblings_masks(unsigned int cpuid)
for_each_online_cpu(cpu) {
cpu_topo = &cpu_topology[cpu];
if (cpu_topo->llc_id != -1 && cpuid_topo->llc_id == cpu_topo->llc_id) {
if (last_level_cache_is_shared(cpu, cpuid)) {
cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
}
......@@ -706,15 +746,17 @@ void update_siblings_masks(unsigned int cpuid)
if (cpuid_topo->package_id != cpu_topo->package_id)
continue;
if (cpuid_topo->cluster_id == cpu_topo->cluster_id &&
cpuid_topo->cluster_id != -1) {
cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
continue;
if (cpuid_topo->cluster_id >= 0) {
cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling);
cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling);
}
cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
if (cpuid_topo->core_id != cpu_topo->core_id)
continue;
......@@ -750,7 +792,6 @@ void __init reset_cpu_topology(void)
cpu_topo->core_id = -1;
cpu_topo->cluster_id = -1;
cpu_topo->package_id = -1;
cpu_topo->llc_id = -1;
clear_cpu_topology(cpu);
}
......@@ -780,15 +821,28 @@ __weak int __init parse_acpi_topology(void)
#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
void __init init_cpu_topology(void)
{
int ret, cpu;
reset_cpu_topology();
ret = parse_acpi_topology();
if (!ret)
ret = of_have_populated_dt() && parse_dt_topology();
if (ret) {
/*
* Discard anything that was parsed if we hit an error so we
* don't use partial information.
*/
if (parse_acpi_topology())
reset_cpu_topology();
else if (of_have_populated_dt() && parse_dt_topology())
reset_cpu_topology();
return;
}
for_each_possible_cpu(cpu) {
ret = detect_cache_attributes(cpu);
if (ret) {
pr_info("Early cacheinfo failed, ret = %d\n", ret);
break;
}
}
}
#endif
......@@ -14,7 +14,7 @@
#include <linux/cpu.h>
#include <linux/device.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/smp.h>
......@@ -25,19 +25,60 @@ static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
#define ci_cacheinfo(cpu) (&per_cpu(ci_cpu_cacheinfo, cpu))
#define cache_leaves(cpu) (ci_cacheinfo(cpu)->num_leaves)
#define per_cpu_cacheinfo(cpu) (ci_cacheinfo(cpu)->info_list)
#define per_cpu_cacheinfo_idx(cpu, idx) \
(per_cpu_cacheinfo(cpu) + (idx))
struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
{
return ci_cacheinfo(cpu);
}
#ifdef CONFIG_OF
static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
struct cacheinfo *sib_leaf)
{
/*
* For non DT/ACPI systems, assume unique level 1 caches,
* system-wide shared caches for all other levels. This will be used
* only if arch specific code has not populated shared_cpu_map
*/
if (!(IS_ENABLED(CONFIG_OF) || IS_ENABLED(CONFIG_ACPI)))
return !(this_leaf->level == 1);
if ((sib_leaf->attributes & CACHE_ID) &&
(this_leaf->attributes & CACHE_ID))
return sib_leaf->id == this_leaf->id;
return sib_leaf->fw_token == this_leaf->fw_token;
}
bool last_level_cache_is_valid(unsigned int cpu)
{
struct cacheinfo *llc;
if (!cache_leaves(cpu))
return false;
llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
return (llc->attributes & CACHE_ID) || !!llc->fw_token;
}
bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y)
{
struct cacheinfo *llc_x, *llc_y;
if (!last_level_cache_is_valid(cpu_x) ||
!last_level_cache_is_valid(cpu_y))
return false;
llc_x = per_cpu_cacheinfo_idx(cpu_x, cache_leaves(cpu_x) - 1);
llc_y = per_cpu_cacheinfo_idx(cpu_y, cache_leaves(cpu_y) - 1);
return cache_leaves_are_shared(llc_x, llc_y);
}
#ifdef CONFIG_OF
/* OF properties to query for a given cache type */
struct cache_type_info {
const char *size_prop;
......@@ -157,27 +198,16 @@ static int cache_setup_of_node(unsigned int cpu)
{
struct device_node *np;
struct cacheinfo *this_leaf;
struct device *cpu_dev = get_cpu_device(cpu);
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
unsigned int index = 0;
/* skip if fw_token is already populated */
if (this_cpu_ci->info_list->fw_token) {
return 0;
}
if (!cpu_dev) {
pr_err("No cpu device for CPU %d\n", cpu);
return -ENODEV;
}
np = cpu_dev->of_node;
np = of_cpu_device_node_get(cpu);
if (!np) {
pr_err("Failed to find cpu%d device node\n", cpu);
return -ENOENT;
}
while (index < cache_leaves(cpu)) {
this_leaf = this_cpu_ci->info_list + index;
this_leaf = per_cpu_cacheinfo_idx(cpu, index);
if (this_leaf->level != 1)
np = of_find_next_cache_node(np);
else
......@@ -196,16 +226,6 @@ static int cache_setup_of_node(unsigned int cpu)
}
#else
static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
struct cacheinfo *sib_leaf)
{
/*
* For non-DT/ACPI systems, assume unique level 1 caches, system-wide
* shared caches for all other levels. This will be used only if
* arch specific code has not populated shared_cpu_map
*/
return !(this_leaf->level == 1);
}
#endif
int __weak cache_setup_acpi(unsigned int cpu)
......@@ -215,6 +235,18 @@ int __weak cache_setup_acpi(unsigned int cpu)
unsigned int coherency_max_size;
static int cache_setup_properties(unsigned int cpu)
{
int ret = 0;
if (of_have_populated_dt())
ret = cache_setup_of_node(cpu);
else if (!acpi_disabled)
ret = cache_setup_acpi(cpu);
return ret;
}
static int cache_shared_cpu_map_setup(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
......@@ -225,21 +257,21 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
if (this_cpu_ci->cpu_map_populated)
return 0;
if (of_have_populated_dt())
ret = cache_setup_of_node(cpu);
else if (!acpi_disabled)
ret = cache_setup_acpi(cpu);
/*
* skip setting up cache properties if LLC is valid, just need
* to update the shared cpu_map if the cache attributes were
* populated early before all the cpus are brought online
*/
if (!last_level_cache_is_valid(cpu)) {
ret = cache_setup_properties(cpu);
if (ret)
return ret;
}
for (index = 0; index < cache_leaves(cpu); index++) {
unsigned int i;
this_leaf = this_cpu_ci->info_list + index;
/* skip if shared_cpu_map is already populated */
if (!cpumask_empty(&this_leaf->shared_cpu_map))
continue;
this_leaf = per_cpu_cacheinfo_idx(cpu, index);
cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
for_each_online_cpu(i) {
......@@ -247,7 +279,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
if (i == cpu || !sib_cpu_ci->info_list)
continue;/* skip if itself or no cacheinfo */
sib_leaf = sib_cpu_ci->info_list + index;
sib_leaf = per_cpu_cacheinfo_idx(i, index);
if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
......@@ -263,23 +296,19 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
static void cache_shared_cpu_map_remove(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf, *sib_leaf;
unsigned int sibling, index;
for (index = 0; index < cache_leaves(cpu); index++) {
this_leaf = this_cpu_ci->info_list + index;
this_leaf = per_cpu_cacheinfo_idx(cpu, index);
for_each_cpu(sibling, &this_leaf->shared_cpu_map) {
struct cpu_cacheinfo *sib_cpu_ci;
if (sibling == cpu) /* skip itself */
continue;
struct cpu_cacheinfo *sib_cpu_ci =
get_cpu_cacheinfo(sibling);
sib_cpu_ci = get_cpu_cacheinfo(sibling);
if (!sib_cpu_ci->info_list)
continue;
if (sibling == cpu || !sib_cpu_ci->info_list)
continue;/* skip if itself or no cacheinfo */
sib_leaf = sib_cpu_ci->info_list + index;
sib_leaf = per_cpu_cacheinfo_idx(sibling, index);
cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
}
......@@ -310,17 +339,28 @@ int __weak populate_cache_leaves(unsigned int cpu)
return -ENOENT;
}
static int detect_cache_attributes(unsigned int cpu)
int detect_cache_attributes(unsigned int cpu)
{
int ret;
/* Since early detection of the cacheinfo is allowed via this
* function and this also gets called as CPU hotplug callbacks via
* cacheinfo_cpu_online, the initialisation can be skipped and only
* CPU maps can be updated as the CPU online status would be update
* if called via cacheinfo_cpu_online path.
*/
if (per_cpu_cacheinfo(cpu))
goto update_cpu_map;
if (init_cache_level(cpu) || !cache_leaves(cpu))
return -ENOENT;
per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
sizeof(struct cacheinfo), GFP_KERNEL);
if (per_cpu_cacheinfo(cpu) == NULL)
if (per_cpu_cacheinfo(cpu) == NULL) {
cache_leaves(cpu) = 0;
return -ENOMEM;
}
/*
* populate_cache_leaves() may completely setup the cache leaves and
......@@ -329,6 +369,8 @@ static int detect_cache_attributes(unsigned int cpu)
ret = populate_cache_leaves(cpu);
if (ret)
goto free_ci;
update_cpu_map:
/*
* For systems using DT for cache hierarchy, fw_token
* and shared_cpu_map will be set up here only if they are
......@@ -614,7 +656,6 @@ static int cache_add_dev(unsigned int cpu)
int rc;
struct device *ci_dev, *parent;
struct cacheinfo *this_leaf;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
const struct attribute_group **cache_groups;
rc = cpu_cache_sysfs_init(cpu);
......@@ -623,7 +664,7 @@ static int cache_add_dev(unsigned int cpu)
parent = per_cpu_cache_dev(cpu);
for (i = 0; i < cache_leaves(cpu); i++) {
this_leaf = this_cpu_ci->info_list + i;
this_leaf = per_cpu_cacheinfo_idx(cpu, i);
if (this_leaf->disable_sysfs)
continue;
if (this_leaf->type == CACHE_TYPE_NOCACHE)
......
......@@ -1429,7 +1429,6 @@ int find_acpi_cpu_topology(unsigned int cpu, int level);
int find_acpi_cpu_topology_cluster(unsigned int cpu);
int find_acpi_cpu_topology_package(unsigned int cpu);
int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
#else
static inline int acpi_pptt_cpu_is_thread(unsigned int cpu)
{
......@@ -1451,10 +1450,6 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
{
return -EINVAL;
}
static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
{
return -EINVAL;
}
#endif
#ifdef CONFIG_ACPI_PCC
......
......@@ -68,7 +68,6 @@ struct cpu_topology {
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
......
......@@ -82,6 +82,9 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
int init_cache_level(unsigned int cpu);
int populate_cache_leaves(unsigned int cpu);
int cache_setup_acpi(unsigned int cpu);
bool last_level_cache_is_valid(unsigned int cpu);
bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y);
int detect_cache_attributes(unsigned int cpu);
#ifndef CONFIG_ACPI_PPTT
/*
* acpi_find_last_cache_level is only called on ACPI enabled
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment