ktstr/
topology.rs

1//! CPU topology abstraction.
2//!
3//! [`TestTopology`] reads sysfs to discover CPUs, LLCs, and NUMA nodes.
4//! Provides cpuset generation methods used by
5//! [`CpusetSpec`](crate::scenario::ops::CpusetSpec).
6//!
7//! See the [Scenarios](https://ktstr.dev/guide/concepts/scenarios.html)
8//! chapter for how topology drives cpuset partitioning.
9
10use anyhow::{Context, Result, bail};
11use std::collections::{BTreeMap, BTreeSet};
12use std::fs;
13use std::path::Path;
14
15/// Information about a last-level cache domain.
16#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct LlcInfo {
18    cpus: Vec<usize>,
19    numa_node: usize,
20    cache_size_kib: Option<u64>,
21    /// core_id -> sorted list of CPU IDs (SMT siblings).
22    cores: BTreeMap<usize, Vec<usize>>,
23}
24
25impl LlcInfo {
26    /// Sorted list of CPU IDs in this LLC domain.
27    pub fn cpus(&self) -> &[usize] {
28        &self.cpus
29    }
30    /// NUMA node containing this LLC.
31    pub fn numa_node(&self) -> usize {
32        self.numa_node
33    }
34    /// LLC cache size in KiB when sysfs reported it, else `None`.
35    pub fn cache_size_kib(&self) -> Option<u64> {
36        self.cache_size_kib
37    }
38    /// Per-core sibling map: `core_id -> sorted list of CPU IDs that
39    /// are SMT siblings of that core`.
40    pub fn cores(&self) -> &BTreeMap<usize, Vec<usize>> {
41        &self.cores
42    }
43    /// Number of physical cores in the LLC; falls back to the CPU
44    /// count when core-group data is unavailable.
45    pub fn num_cores(&self) -> usize {
46        if self.cores.is_empty() {
47            self.cpus.len()
48        } else {
49            self.cores.len()
50        }
51    }
52}
53
54/// Per-node memory information (total and free KiB).
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub struct NodeMemInfo {
57    /// Total memory in KiB.
58    pub total_kib: u64,
59    /// Free memory in KiB.
60    pub free_kib: u64,
61}
62
63impl NodeMemInfo {
64    /// Used memory in KiB (`total_kib - free_kib`).
65    pub fn used_kib(&self) -> u64 {
66        self.total_kib.saturating_sub(self.free_kib)
67    }
68}
69
70/// CPU topology abstraction for test configuration.
71///
72/// Provides LLC-aware CPU partitioning, cpuset generation, NUMA
73/// distance queries, and per-node memory introspection. Built from
74/// sysfs ([`from_system`](Self::from_system)), a VM spec
75/// ([`from_vm_topology`](Self::from_vm_topology) — takes a
76/// [`crate::vmm::topology::Topology`] built via
77/// `Topology::new(numa, llcs, cores, threads)`), or synthetic
78/// parameters (`synthetic`, test-only).
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct TestTopology {
81    cpus: Vec<usize>,
82    llcs: Vec<LlcInfo>,
83    numa_nodes: BTreeSet<usize>,
84    /// Flat row-major NxN distance matrix. Dimension equals
85    /// `numa_nodes.len()`, rows ordered by ascending node ID.
86    /// Default 10/20 when sysfs distances are unavailable.
87    numa_distances: Vec<u8>,
88    /// Per-node memory info, keyed by NUMA node ID.
89    node_mem: BTreeMap<usize, NodeMemInfo>,
90    /// NUMA nodes that have memory but no CPUs (CXL memory-only).
91    memory_only_nodes: BTreeSet<usize>,
92}
93
94/// Parse a CPU list string (e.g., "0-3,5,7-9") into a sorted vec of CPU IDs.
95///
96/// Returns an error if any element is not a valid integer or range.
97/// For lenient parsing that skips invalid entries, use
98/// [`parse_cpu_list_lenient`].
99///
100/// # Why this is NOT [`crate::cpu_util::parse_cpu_list`]
101///
102/// `cpu_util` carries an `Option<Vec<u32>>`-returning variant
103/// with a range-expansion cap and dedup, intended for the
104/// per-task `/proc/<tid>/status:Cpus_allowed_list` capture path
105/// where the input is untrusted and a hostile range like
106/// `0-4294967295` would OOM the process. This parser ingests
107/// operator-supplied VM topology config (`anyhow::Error`
108/// propagation, `usize` interop with sysfs APIs, no DoS cap
109/// needed). See the module doc on [`crate::cpu_util`] for the
110/// full split rationale.
111pub fn parse_cpu_list(s: &str) -> Result<Vec<usize>> {
112    let mut cpus = Vec::new();
113    for part in s.trim().split(',') {
114        let part = part.trim();
115        if part.is_empty() {
116            continue;
117        }
118        if let Some((lo, hi)) = part.split_once('-') {
119            let lo: usize = lo.parse()?;
120            let hi: usize = hi.parse()?;
121            cpus.extend(lo..=hi);
122        } else {
123            cpus.push(part.parse()?);
124        }
125    }
126    cpus.sort();
127    Ok(cpus)
128}
129
130/// Parse a CPU list string, silently skipping invalid entries.
131///
132/// Unlike [`parse_cpu_list`], this never fails — non-numeric elements
133/// and reversed ranges are ignored. Returns a sorted ascending
134/// `Vec<usize>`, matching `parse_cpu_list`'s contract so callers that
135/// do `iter().min()` / binary search / fold-into-BTreeSet see
136/// identical ordering whichever parser they used.
137pub fn parse_cpu_list_lenient(s: &str) -> Vec<usize> {
138    let mut cpus = Vec::new();
139    for part in s.trim().split(',') {
140        let part = part.trim();
141        if part.is_empty() {
142            continue;
143        }
144        if let Some((lo, hi)) = part.split_once('-') {
145            if let (Ok(lo), Ok(hi)) = (lo.parse::<usize>(), hi.parse::<usize>()) {
146                cpus.extend(lo..=hi);
147            }
148        } else if let Ok(cpu) = part.parse::<usize>() {
149            cpus.push(cpu);
150        }
151    }
152    cpus.sort();
153    cpus
154}
155
156/// Find the sysfs index of the highest-level (last-level) cache for a CPU.
157///
158/// Iterates `/sys/devices/system/cpu/cpuN/cache/indexM/level` entries and
159/// returns the index with the largest level value.
160fn find_llc_index(cpu: usize) -> Result<usize> {
161    let cache_dir = format!("/sys/devices/system/cpu/cpu{cpu}/cache");
162    let mut max_level = 0usize;
163    let mut llc_index = 0usize;
164    for entry in fs::read_dir(&cache_dir).context("read cache dir")? {
165        let entry = entry?;
166        let name = entry.file_name();
167        let name = name.to_string_lossy();
168        if !name.starts_with("index") {
169            continue;
170        }
171        let level_path = entry.path().join("level");
172        if let Ok(level_str) = fs::read_to_string(&level_path)
173            && let Ok(level) = level_str.trim().parse::<usize>()
174            && level > max_level
175        {
176            let idx_str = name
177                .strip_prefix("index")
178                .expect("filtered by starts_with(\"index\") above");
179            match idx_str.parse::<usize>() {
180                Ok(idx) => {
181                    max_level = level;
182                    llc_index = idx;
183                }
184                Err(e) => {
185                    tracing::warn!(
186                        cache_dir = %cache_dir,
187                        entry = %name,
188                        err = %e,
189                        "malformed sysfs cache index name; skipping entry",
190                    );
191                }
192            }
193        }
194    }
195    Ok(llc_index)
196}
197
198/// Read the LLC cache ID for a CPU from sysfs.
199///
200/// Prefers the `id` file when available (x86_64 always has it).
201/// Falls back to the lowest CPU in the LLC's `shared_cpu_list`,
202/// which is unique per LLC group. The previous fallback used the
203/// cache index number, which is the same for every CPU and
204/// collapsed all LLCs into one group.
205fn read_llc_id(cpu: usize) -> Result<usize> {
206    let llc_index = find_llc_index(cpu)?;
207    let id_path = format!("/sys/devices/system/cpu/cpu{cpu}/cache/index{llc_index}/id");
208    if let Ok(id_str) = fs::read_to_string(&id_path)
209        && let Ok(id) = id_str.trim().parse::<usize>()
210    {
211        return Ok(id);
212    }
213    // Fallback: use the lowest CPU in shared_cpu_list as a stable
214    // group identifier. Each LLC group has a unique minimum CPU.
215    let shared_path =
216        format!("/sys/devices/system/cpu/cpu{cpu}/cache/index{llc_index}/shared_cpu_list");
217    if let Ok(shared_str) = fs::read_to_string(&shared_path) {
218        let siblings = parse_cpu_list_lenient(shared_str.trim());
219        if let Some(&min_cpu) = siblings.iter().min() {
220            return Ok(min_cpu);
221        }
222    }
223    Ok(0)
224}
225
226/// Read the NUMA node ID for a CPU from sysfs.
227fn read_numa_node(cpu: usize) -> Result<usize> {
228    let node_dir = format!("/sys/devices/system/cpu/cpu{cpu}");
229    for entry in fs::read_dir(&node_dir)? {
230        let entry = entry?;
231        let name = entry.file_name();
232        let name = name.to_string_lossy();
233        if name.starts_with("node")
234            && let Some(id_str) = name.strip_prefix("node")
235            && let Ok(id) = id_str.parse::<usize>()
236        {
237            return Ok(id);
238        }
239    }
240    Ok(0)
241}
242
243/// Read the LLC cache size in KiB for a CPU from sysfs.
244fn read_llc_cache_size(cpu: usize) -> Option<u64> {
245    let llc_index = find_llc_index(cpu).ok()?;
246    let size_path = format!("/sys/devices/system/cpu/cpu{cpu}/cache/index{llc_index}/size");
247    let size_str = fs::read_to_string(&size_path).ok()?;
248    parse_cache_size(size_str.trim())
249}
250
251/// Parse a cache size string like "32768K" or "32M" into KiB.
252///
253/// The sysfs `cache/index*/size` files use binary multiples (K=KiB,
254/// M=MiB) per Linux convention for cache sizes. Bare numeric input
255/// is interpreted as bytes and converted to KiB via ceiling division
256/// — a non-zero byte count smaller than 1 KiB still rounds up to 1
257/// KiB rather than silently becoming 0 KiB (which a consumer would
258/// read as "no cache"). Integer division on `500 / 1024 = 0` is the
259/// failure mode being avoided. Zero bytes still maps to 0 KiB.
260fn parse_cache_size(s: &str) -> Option<u64> {
261    let s = s.trim();
262    if let Some(kib) = s.strip_suffix('K') {
263        kib.parse().ok()
264    } else if let Some(mib) = s.strip_suffix('M') {
265        mib.parse::<u64>().ok().map(|v| v * 1024)
266    } else {
267        // Bare number: assume bytes, ceil-convert to KiB so sub-KiB
268        // values don't collapse to 0.
269        s.parse::<u64>().ok().map(|v| v.div_ceil(1024))
270    }
271}
272
273/// Read the core_id for a CPU from sysfs.
274fn read_core_id(cpu: usize) -> Option<usize> {
275    let path = format!("/sys/devices/system/cpu/cpu{cpu}/topology/core_id");
276    fs::read_to_string(&path)
277        .ok()
278        .and_then(|s| s.trim().parse().ok())
279}
280
281/// Read per-node memory info from `/sys/devices/system/node/nodeN/meminfo`.
282///
283/// Parses `MemTotal` and `MemFree` lines; returns `None` if the file
284/// is missing or unparseable.
285fn read_node_meminfo(node: usize) -> Option<NodeMemInfo> {
286    let path = format!("/sys/devices/system/node/node{node}/meminfo");
287    let content = fs::read_to_string(path).ok()?;
288    let mut total_kib = None;
289    let mut free_kib = None;
290    for line in content.lines() {
291        if let Some(rest) = line.strip_suffix("kB").map(str::trim_end) {
292            if rest.contains("MemTotal") {
293                total_kib = rest
294                    .rsplit_once(char::is_whitespace)
295                    .and_then(|(_, v)| v.parse().ok());
296            } else if rest.contains("MemFree") {
297                free_kib = rest
298                    .rsplit_once(char::is_whitespace)
299                    .and_then(|(_, v)| v.parse().ok());
300            }
301        }
302    }
303    Some(NodeMemInfo {
304        total_kib: total_kib?,
305        free_kib: free_kib?,
306    })
307}
308
309/// Read NUMA distance row from `/sys/devices/system/node/nodeN/distance`.
310///
311/// Returns the space-separated distance values as a `Vec<u8>`. Returns
312/// `None` when the file is missing, any token fails to parse, or the
313/// file is whitespace-only — `split_whitespace` on an empty/whitespace
314/// file yields zero tokens so `collect` succeeds with an empty vec,
315/// which the caller would otherwise accept as a valid distance row
316/// for a zero-node topology.
317fn read_node_distances(node: usize) -> Option<Vec<u8>> {
318    let path = format!("/sys/devices/system/node/node{node}/distance");
319    let content = fs::read_to_string(path).ok()?;
320    let values: Option<Vec<u8>> = content.split_whitespace().map(|s| s.parse().ok()).collect();
321    match values {
322        Some(v) if v.is_empty() => None,
323        other => other,
324    }
325}
326
327/// Read the cpulist for a NUMA node. Returns `true` if the node has
328/// no CPUs (memory-only / CXL).
329fn is_node_memory_only(node: usize) -> bool {
330    let path = format!("/sys/devices/system/node/node{node}/cpulist");
331    match fs::read_to_string(path) {
332        Ok(s) => s.trim().is_empty(),
333        Err(_) => false,
334    }
335}
336
337/// Build a synthetic single-LLC [`LlcInfo`] covering every online
338/// CPU. Used by [`TestTopology::from_system`] when sysfs reports
339/// online CPUs but no cache topology — the fallback path keeps
340/// downstream LLC-aware accessors non-empty.
341///
342/// Each CPU becomes its own core (no SMT sibling data available),
343/// so `num_cores()` equals the CPU count and the per-core sibling
344/// map is non-empty.
345fn synthesize_fallback_llc(cpus: &[usize], numa_node: usize) -> LlcInfo {
346    // Core map: with sysfs unavailable we can't reconstruct SMT
347    // sibling groupings, so assume no SMT — treat each CPU as its
348    // own core. Keeps `num_cores()` equal to the physical CPU count
349    // and `cores()` non-empty so consumers iterating sibling groups
350    // always see at least one entry per CPU.
351    let cores: BTreeMap<usize, Vec<usize>> = cpus.iter().map(|&c| (c, vec![c])).collect();
352    LlcInfo {
353        cpus: cpus.to_vec(),
354        numa_node,
355        cache_size_kib: None,
356        cores,
357    }
358}
359
360/// Intersect a sysfs-online CPU list with an optional
361/// `sched_getaffinity` result. Pure-fn carve-out of
362/// `TestTopology::from_system`'s cross-check so the 3 paths
363/// (narrowed + warn, disjoint + bail, no-affinity-fallback) are
364/// unit-testable without syscall mocking.
365///
366/// - `online_sysfs` — sorted, non-empty CPU list from
367///   `/sys/devices/system/cpu/online`.
368/// - `allowed` — `Some(set)` when [`crate::cpu_util::read_affinity`]
369///   succeeded; `None` on EPERM / ESRCH / syscall failure OR when
370///   the host's CPU count exceeded `AFFINITY_MAX_BITS = 262144`
371///   (cpu_util's ceiling). All None cases fall back to the full
372///   sysfs set (status quo) so a permission failure doesn't drop
373///   real CPUs.
374///
375/// Returns:
376/// - `Ok(intersect)` when the cross-check produced at least one
377///   usable CPU. Emits a `tracing::warn!` when the intersection
378///   was a strict subset (operator running inside a
379///   cgroup-cpuset-namespaced container or under a taskset-
380///   narrowed parent).
381/// - `Err(_)` when `allowed` is `Some` and disjoint from
382///   `online_sysfs` — no CPU is both kernel-online AND
383///   affinity-allowed, so no topology can be built. The
384///   diagnostic includes both the sysfs set and the disjoint
385///   allowed set so the operator can correlate against their
386///   container / taskset config.
387fn intersect_online_with_affinity(
388    online_sysfs: &[usize],
389    allowed: Option<BTreeSet<usize>>,
390) -> Result<Vec<usize>> {
391    let Some(allowed_set) = allowed else {
392        return Ok(online_sysfs.to_vec());
393    };
394    let intersect: Vec<usize> = online_sysfs
395        .iter()
396        .copied()
397        .filter(|c| allowed_set.contains(c))
398        .collect();
399    if intersect.is_empty() {
400        bail!(
401            "sched_getaffinity(0) cpuset is disjoint from \
402             /sys/devices/system/cpu/online (sysfs={online_sysfs:?}, \
403             allowed={allowed_set:?}); no usable CPUs to build a topology \
404             against — operator likely running inside a cpuset cgroup \
405             whose `cpuset.cpus` names CPUs not on this host"
406        );
407    }
408    if intersect.len() < online_sysfs.len() {
409        let dropped: Vec<usize> = online_sysfs
410            .iter()
411            .copied()
412            .filter(|c| !allowed_set.contains(c))
413            .collect();
414        tracing::warn!(
415            sysfs_online_count = online_sysfs.len(),
416            allowed_count = intersect.len(),
417            dropped_cpus = ?dropped,
418            "TestTopology::from_system: sched_getaffinity(0) is \
419             narrower than /sys/devices/system/cpu/online — running \
420             inside a cgroup-cpuset-namespaced container or with a \
421             taskset-restricted parent. Dropped CPUs would EPERM at \
422             sched_setaffinity time; topology now reflects only the \
423             actually-usable CPUs."
424        );
425    }
426    Ok(intersect)
427}
428
429/// First pass over the usable online CPUs: build the CPU set, the
430/// per-LLC [`LlcInfo`] map, and the NUMA node set. Carve-out of
431/// [`TestTopology::from_system`]'s per-CPU scan so the loop stays
432/// under the source-function size guard.
433///
434/// `online_cpus` — the sysfs-online set already intersected with the
435/// calling task's `sched_getaffinity(0)` cpuset (sorted ascending).
436///
437/// Returns `(cpus, llc_map, numa_nodes)`:
438/// - `cpus` — CPUs whose `/sys/devices/system/cpu/cpuN/` directory
439///   exists; CPUs listed in `online` but missing the per-CPU dir are
440///   warned and skipped.
441/// - `llc_map` — `llc_id -> LlcInfo`, each `info.cpus` and per-core
442///   sibling list sorted ascending.
443/// - `numa_nodes` — node IDs seen on the scanned CPUs (memory-only
444///   nodes are added later by the caller).
445fn scan_online_cpus(
446    online_cpus: &[usize],
447) -> (BTreeSet<usize>, BTreeMap<usize, LlcInfo>, BTreeSet<usize>) {
448    let mut cpus = BTreeSet::new();
449    let mut llc_map: BTreeMap<usize, LlcInfo> = BTreeMap::new();
450    let mut numa_nodes = BTreeSet::new();
451
452    // First pass: collect cache size per LLC (read once per LLC, not per CPU).
453    let mut llc_cache_sizes: BTreeMap<usize, Option<u64>> = BTreeMap::new();
454
455    for &cpu_id in online_cpus {
456        let cpu_path = format!("/sys/devices/system/cpu/cpu{cpu_id}");
457        if !Path::new(&cpu_path).exists() {
458            tracing::warn!(
459                cpu = cpu_id,
460                path = %cpu_path,
461                "/sys/devices/system/cpu/online listed this CPU but \
462                 /sys/devices/system/cpu/cpuN/ is absent; skipping — \
463                 the CPU will not appear in TestTopology.all_cpus()"
464            );
465            continue;
466        }
467        cpus.insert(cpu_id);
468        let llc_id = match read_llc_id(cpu_id) {
469            Ok(id) => id,
470            Err(e) => {
471                tracing::warn!(
472                    cpu = cpu_id,
473                    error = %e,
474                    "LLC id unreadable from sysfs; bucketing CPU into fallback LLC 0 — \
475                     LlcAligned affinity will merge this CPU with any other unreadable CPUs"
476                );
477                0
478            }
479        };
480        let node_id = match read_numa_node(cpu_id) {
481            Ok(id) => id,
482            Err(e) => {
483                tracing::warn!(
484                    cpu = cpu_id,
485                    error = %e,
486                    "NUMA node unreadable from sysfs; bucketing CPU into fallback node 0 — \
487                     NUMA-aware placement may be incorrect for this CPU"
488                );
489                0
490            }
491        };
492        // core_id unreadable = synthesize a singleton core using the
493        // CPU id. Without this, CPUs with missing core_id are added
494        // to `info.cpus` but excluded from `info.cores`, so per-core
495        // iterators silently drop them. Using cpu_id as the core id
496        // guarantees uniqueness; a degenerate topology (no SMT
497        // sibling info) is better than a CPU invisible to core-aware
498        // consumers.
499        let core_id = read_core_id(cpu_id).unwrap_or_else(|| {
500            tracing::warn!(
501                cpu = cpu_id,
502                "core_id unreadable from sysfs; synthesizing singleton core entry \
503                 using cpu_id as the core id — SMT sibling grouping unavailable for this CPU"
504            );
505            cpu_id
506        });
507        numa_nodes.insert(node_id);
508        llc_cache_sizes
509            .entry(llc_id)
510            .or_insert_with(|| read_llc_cache_size(cpu_id));
511        llc_map
512            .entry(llc_id)
513            .and_modify(|info| {
514                info.cpus.push(cpu_id);
515                info.cores.entry(core_id).or_default().push(cpu_id);
516            })
517            .or_insert_with(|| {
518                let mut cores = BTreeMap::new();
519                cores.insert(core_id, vec![cpu_id]);
520                LlcInfo {
521                    cpus: vec![cpu_id],
522                    numa_node: node_id,
523                    cache_size_kib: llc_cache_sizes.get(&llc_id).copied().flatten(),
524                    cores,
525                }
526            });
527    }
528    for info in llc_map.values_mut() {
529        info.cpus.sort();
530        for siblings in info.cores.values_mut() {
531            siblings.sort();
532        }
533    }
534    (cpus, llc_map, numa_nodes)
535}
536
537/// Build the flat row-major NxN NUMA distance matrix from sysfs.
538/// Carve-out of [`TestTopology::from_system`]'s distance block so
539/// the source function stays under the size guard.
540///
541/// `node_ids` — sorted NUMA node IDs; `n = node_ids.len()` is the
542/// matrix dimension. Reads `/sys/devices/system/node/nodeN/distance`
543/// for each node in order. Falls back to a uniform 10 (intra-node) /
544/// 20 (inter-node) matrix and warns when any row is missing,
545/// unparseable, the wrong length, or the assembled matrix is not
546/// exactly `n * n`.
547fn build_distance_matrix(node_ids: &[usize]) -> Vec<u8> {
548    let n = node_ids.len();
549    let mut matrix = Vec::with_capacity(n * n);
550    let mut fallback_reason: Option<String> = None;
551    for &nid in node_ids {
552        match read_node_distances(nid) {
553            Some(row) if row.len() == n => matrix.extend_from_slice(&row),
554            Some(row) => {
555                fallback_reason = Some(format!(
556                    "node{nid}/distance has {} entries, expected {n}",
557                    row.len()
558                ));
559                break;
560            }
561            None => {
562                fallback_reason = Some(format!("node{nid}/distance missing or unparseable"));
563                break;
564            }
565        }
566    }
567    if fallback_reason.is_some() || matrix.len() != n * n {
568        let reason = fallback_reason
569            .unwrap_or_else(|| format!("distance matrix length {} != {}", matrix.len(), n * n));
570        tracing::warn!(
571            reason = %reason,
572            numa_nodes = n,
573            "NUMA distance matrix unavailable from /sys/devices/system/node/*/distance; \
574             falling back to 10 (intra-node) / 20 (inter-node) — \
575             NUMA-aware placement decisions will use uniform distances"
576        );
577        matrix.clear();
578        matrix.resize(n * n, 0);
579        for i in 0..n {
580            for j in 0..n {
581                matrix[i * n + j] = if i == j { 10 } else { 20 };
582            }
583        }
584    }
585    matrix
586}
587
588impl TestTopology {
589    /// Discover topology from sysfs (reads `/sys/devices/system/cpu/`).
590    ///
591    /// Intersects sysfs's online-CPU set with the calling task's
592    /// `sched_getaffinity(0)` cpuset so the resulting `TestTopology`
593    /// only enumerates CPUs the process can actually run on. In a
594    /// cgroup-cpuset-namespaced container `/sys/devices/system/cpu`
595    /// reports the full host CPU set (not cgroup-filtered per the
596    /// kernel's `drivers/base/cpu.c` registration model), but
597    /// `sched_setaffinity` to CPUs outside the cgroup-allowed set
598    /// later fails with `EPERM`. Without this intersection an operator
599    /// running ktstr inside such a container sees confusing
600    /// affinity-EPERM errors far from the topology read; with it the
601    /// restriction surfaces at construction with a warn that names
602    /// the dropped CPUs.
603    pub fn from_system() -> Result<Self> {
604        let online_str =
605            fs::read_to_string("/sys/devices/system/cpu/online").context("read online cpus")?;
606        let online_cpus_sysfs = parse_cpu_list(&online_str)?;
607        if online_cpus_sysfs.is_empty() {
608            bail!("no online CPUs found");
609        }
610
611        // Cross-check against sched_getaffinity. read_affinity returns
612        // None on EPERM / ESRCH / syscall failure OR when the host's
613        // CPU count exceeds cpu_util's AFFINITY_MAX_BITS = 262144
614        // ceiling — both cases fall back to the full sysfs set
615        // (status quo). Returns u32 ids; widen to usize for the
616        // intersection.
617        let allowed: Option<BTreeSet<usize>> =
618            crate::cpu_util::read_affinity(0).map(|v| v.into_iter().map(|c| c as usize).collect());
619        let online_cpus = intersect_online_with_affinity(&online_cpus_sysfs, allowed)?;
620
621        let (cpus, llc_map, mut numa_nodes) = scan_online_cpus(&online_cpus);
622
623        // Discover additional NUMA nodes from /sys/devices/system/node/
624        // (catches memory-only nodes that have no CPUs).
625        if let Ok(entries) = fs::read_dir("/sys/devices/system/node") {
626            for entry in entries.flatten() {
627                let name = entry.file_name();
628                let name = name.to_string_lossy();
629                if let Some(id_str) = name.strip_prefix("node")
630                    && let Ok(id) = id_str.parse::<usize>()
631                {
632                    numa_nodes.insert(id);
633                }
634            }
635        }
636
637        let node_ids: Vec<usize> = numa_nodes.iter().copied().collect();
638
639        // Read per-node memory info.
640        let mut node_mem = BTreeMap::new();
641        for &nid in &node_ids {
642            if let Some(mi) = read_node_meminfo(nid) {
643                node_mem.insert(nid, mi);
644            }
645        }
646
647        // Identify memory-only nodes.
648        let mut memory_only_nodes = BTreeSet::new();
649        for &nid in &node_ids {
650            if is_node_memory_only(nid) {
651                memory_only_nodes.insert(nid);
652            }
653        }
654
655        // Build distance matrix. Try sysfs first, fall back to 10/20.
656        let numa_distances = build_distance_matrix(&node_ids);
657
658        let llcs: Vec<LlcInfo> = llc_map.into_values().collect();
659        // Construction-time invariant: every TestTopology has at
660        // least one LLC. If sysfs reports online CPUs but no LLC
661        // info (pathological kernel — missing
662        // `/sys/devices/system/cpu/*/cache/` entries, unreadable
663        // `shared_cpu_list`, or a cgroup-restricted view that hides
664        // the per-cpu cache topology), synthesize a single LLC
665        // covering all online CPUs so downstream accessors
666        // (`llc_aligned_cpuset`, `cpus_in_llc`, LlcAligned affinity
667        // resolution) always have something to return.
668        let llcs = if llcs.is_empty() {
669            let fallback_cpus: Vec<usize> = cpus.iter().copied().collect();
670            let fallback_node = *numa_nodes.iter().next().unwrap_or(&0);
671            tracing::warn!(
672                cpu_count = fallback_cpus.len(),
673                fallback_numa_node = fallback_node,
674                "LLC discovery empty from /sys/devices/system/cpu/*/cache/; \
675                 synthesizing a single fallback LLC covering all online CPUs — \
676                 LlcAligned affinity will pin to the entire machine"
677            );
678            vec![synthesize_fallback_llc(&fallback_cpus, fallback_node)]
679        } else {
680            llcs
681        };
682        // NUMA node set must be non-empty too (every online CPU has
683        // a NUMA node, so this is a belt-and-suspenders guard).
684        let numa_nodes = if numa_nodes.is_empty() {
685            tracing::warn!(
686                "NUMA node set empty after sysfs discovery (no nodeN entries and \
687                 no per-CPU node ids); synthesizing a fallback {{0}} — \
688                 NUMA-aware placement will treat the machine as single-node"
689            );
690            let mut s = BTreeSet::new();
691            s.insert(0);
692            s
693        } else {
694            numa_nodes
695        };
696        Ok(Self {
697            cpus: cpus.into_iter().collect(),
698            llcs,
699            numa_nodes,
700            numa_distances,
701            node_mem,
702            memory_only_nodes,
703        })
704    }
705
706    /// Total number of CPUs.
707    pub fn total_cpus(&self) -> usize {
708        self.cpus.len()
709    }
710    /// Number of last-level caches.
711    pub fn num_llcs(&self) -> usize {
712        self.llcs.len()
713    }
714    /// Number of NUMA nodes.
715    pub fn num_numa_nodes(&self) -> usize {
716        self.numa_nodes.len()
717    }
718    /// NUMA node IDs as a `BTreeSet`.
719    pub fn numa_node_ids(&self) -> &BTreeSet<usize> {
720        &self.numa_nodes
721    }
722    /// All LLC domains.
723    ///
724    /// # Ordering
725    ///
726    /// Returned slice is ordered by **LLC id** (ascending), not by
727    /// first-CPU. Both [`from_system`](Self::from_system) and
728    /// [`from_vm_topology`](Self::from_vm_topology) build the LLC
729    /// list by iterating a `BTreeMap<llc_id, LlcInfo>::into_values()`,
730    /// so the result is deterministic and stable across runs. When
731    /// sysfs assigns non-contiguous LLC ids (cache `id` file, or
732    /// `shared_cpu_list.min()` fallback), the slice order can differ
733    /// from CPU order — callers that need CPU-sorted LLCs must
734    /// sort by `llc.cpus()[0]` themselves.
735    pub fn llcs(&self) -> &[LlcInfo] {
736        &self.llcs
737    }
738    /// All CPU IDs, sorted.
739    pub fn all_cpus(&self) -> &[usize] {
740        &self.cpus
741    }
742    /// All CPU IDs as a `BTreeSet`.
743    pub fn all_cpuset(&self) -> BTreeSet<usize> {
744        self.cpus.iter().copied().collect()
745    }
746
747    /// CPUs available for workload placement. When the topology has
748    /// more than 2 CPUs, the last CPU is reserved for the root cgroup
749    /// (cgroup 0); with 2 or fewer CPUs, every CPU is returned.
750    pub fn usable_cpus(&self) -> &[usize] {
751        if self.cpus.len() > 2 {
752            &self.cpus[..self.cpus.len() - 1]
753        } else {
754            &self.cpus
755        }
756    }
757    /// Usable CPUs as a `BTreeSet`.
758    pub fn usable_cpuset(&self) -> BTreeSet<usize> {
759        self.usable_cpus().iter().copied().collect()
760    }
761    /// CPUs belonging to LLC at index `idx`.
762    ///
763    /// Out-of-range indices return an empty slice rather than
764    /// panicking. Construction guarantees at least one LLC (see
765    /// [`TestTopology::from_vm_topology_with_memory`]), so the only
766    /// way to hit the out-of-range branch is passing an index larger
767    /// than [`num_llcs`](Self::num_llcs) — a caller bug that used to
768    /// crash the whole scheduler test run.
769    pub fn cpus_in_llc(&self, idx: usize) -> &[usize] {
770        match self.llcs.get(idx) {
771            Some(llc) => &llc.cpus,
772            None => &[],
773        }
774    }
775    /// CPUs in LLC `idx` as a `BTreeSet`. See [`cpus_in_llc`](Self::cpus_in_llc)
776    /// for the out-of-range behavior (returns an empty set).
777    pub fn llc_aligned_cpuset(&self, idx: usize) -> BTreeSet<usize> {
778        match self.llcs.get(idx) {
779            Some(llc) => llc.cpus.iter().copied().collect(),
780            None => BTreeSet::new(),
781        }
782    }
783    /// CPUs in all LLCs belonging to NUMA node `node` as a `BTreeSet`.
784    pub fn numa_aligned_cpuset(&self, node: usize) -> BTreeSet<usize> {
785        self.llcs
786            .iter()
787            .filter(|llc| llc.numa_node() == node)
788            .flat_map(|llc| llc.cpus())
789            .copied()
790            .collect()
791    }
792
793    /// NUMA nodes covered by the given CPU set.
794    pub fn numa_nodes_for_cpuset(&self, cpus: &BTreeSet<usize>) -> BTreeSet<usize> {
795        self.llcs
796            .iter()
797            .filter(|llc| llc.cpus.iter().any(|c| cpus.contains(c)))
798            .map(|llc| llc.numa_node)
799            .collect()
800    }
801
802    /// Per-node memory info. Returns `None` when the node ID is not
803    /// present or meminfo is unavailable.
804    pub fn node_meminfo(&self, node_id: usize) -> Option<&NodeMemInfo> {
805        self.node_mem.get(&node_id)
806    }
807
808    /// Inter-node NUMA distance. Returns 255 when either node ID is
809    /// not present, matching the kernel's unreachable distance.
810    pub fn numa_distance(&self, from: usize, to: usize) -> u8 {
811        let n = self.numa_nodes.len();
812        let Some(from_idx) = self.numa_nodes.iter().position(|&id| id == from) else {
813            return 255;
814        };
815        let Some(to_idx) = self.numa_nodes.iter().position(|&id| id == to) else {
816            return 255;
817        };
818        self.numa_distances[from_idx * n + to_idx]
819    }
820
821    /// Whether the node is memory-only (has RAM but no CPUs). Typical
822    /// for CXL-attached memory tiers.
823    pub fn is_memory_only(&self, node_id: usize) -> bool {
824        self.memory_only_nodes.contains(&node_id)
825    }
826
827    /// One `BTreeSet` of CPUs per LLC.
828    pub fn split_by_llc(&self) -> Vec<BTreeSet<usize>> {
829        self.llcs
830            .iter()
831            .map(|l| l.cpus.iter().copied().collect())
832            .collect()
833    }
834
835    /// Generate `n` cpusets with `overlap_frac` overlap between adjacent sets.
836    pub fn overlapping_cpusets(&self, n: usize, overlap_frac: f64) -> Vec<BTreeSet<usize>> {
837        let total = self.cpus.len();
838        if n == 0 || total == 0 {
839            return vec![];
840        }
841        let base = total / n;
842        let overlap = ((base as f64) * overlap_frac).ceil() as usize;
843        let stride = if base > overlap { base - overlap } else { 1 };
844        (0..n)
845            .map(|i| {
846                let start = (i * stride) % total;
847                (0..base.max(1))
848                    .map(|j| self.cpus[(start + j) % total])
849                    .collect()
850            })
851            .collect()
852    }
853
854    /// Format a CPU set as a compact range string (e.g. `"0-3,5,7-9"`).
855    pub fn cpuset_string(cpus: &BTreeSet<usize>) -> String {
856        if cpus.is_empty() {
857            return String::new();
858        }
859        let sorted: Vec<usize> = cpus.iter().copied().collect();
860        let mut ranges = Vec::new();
861        let (mut start, mut end) = (sorted[0], sorted[0]);
862        for &cpu in &sorted[1..] {
863            if cpu == end + 1 {
864                end = cpu;
865            } else {
866                ranges.push(if start == end {
867                    format!("{start}")
868                } else {
869                    format!("{start}-{end}")
870                });
871                start = cpu;
872                end = cpu;
873            }
874        }
875        ranges.push(if start == end {
876            format!("{start}")
877        } else {
878            format!("{start}-{end}")
879        });
880        ranges.join(",")
881    }
882
883    /// Build a [`TestTopology`] from a [`Topology`](crate::vmm::topology::Topology).
884    ///
885    /// Populates LLCs, NUMA nodes, distances, per-node memory info,
886    /// and memory-only node flags from the VM spec. Handles both
887    /// uniform and explicit-node topologies. For uniform topologies,
888    /// pass `total_memory_mib` to populate per-node memory info; when
889    /// `None`, memory info is omitted.
890    ///
891    /// # Signature asymmetry with [`from_system`](Self::from_system)
892    ///
893    /// `from_system` returns `Result` because sysfs I/O is a
894    /// runtime-failable operation (unreadable files, cgroup-restricted
895    /// views, non-Linux hosts). `from_vm_topology` infallibly returns
896    /// `Self` because its input is already validated: every
897    /// [`Topology`](crate::vmm::topology::Topology) reaches this
898    /// function via `Topology::new`, which asserts `llcs > 0`,
899    /// `cores_per_llc > 0`, `threads_per_core > 0`, and
900    /// `numa_nodes > 0` at construction time. The remaining asserts
901    /// inside this function guard against hand-constructed `Topology`
902    /// struct literals that bypass `Topology::new`; they never fire
903    /// for any `Topology` obtained through the normal constructor.
904    ///
905    /// # Panics
906    ///
907    /// Panics if `topo` was constructed via struct literal
908    /// (bypassing `Topology::new`) with `llcs == 0`,
909    /// `cores_per_llc == 0`, `threads_per_core == 0`, or
910    /// `numa_nodes == 0`. Inputs obtained through `Topology::new`
911    /// satisfy the invariants and cannot trigger these asserts.
912    pub fn from_vm_topology(topo: &crate::vmm::topology::Topology) -> Self {
913        Self::from_vm_topology_with_memory(topo, None)
914    }
915
916    /// Build a [`TestTopology`] with optional total memory for uniform topologies.
917    ///
918    /// # Panics
919    ///
920    /// Panics if `topo` was constructed via struct literal
921    /// (bypassing `Topology::new`) with `llcs == 0`,
922    /// `cores_per_llc == 0`, `threads_per_core == 0`, or
923    /// `numa_nodes == 0`. Inputs obtained through `Topology::new`
924    /// satisfy the invariants and cannot trigger these asserts.
925    pub fn from_vm_topology_with_memory(
926        topo: &crate::vmm::topology::Topology,
927        total_memory_mib: Option<u32>,
928    ) -> Self {
929        // Construction-time invariant: every TestTopology has at
930        // least one LLC, core, and thread. Downstream code
931        // (`llc_aligned_cpuset`, `resolve_affinity_for_cgroup`'s
932        // LlcAligned branch, cpuset resolution) assumes this.
933        assert!(
934            topo.llcs > 0 && topo.cores_per_llc > 0 && topo.threads_per_core > 0,
935            "TestTopology requires non-zero llcs/cores/threads; got llcs={}, cores={}, threads={}",
936            topo.llcs,
937            topo.cores_per_llc,
938            topo.threads_per_core,
939        );
940        assert!(
941            topo.numa_nodes > 0,
942            "TestTopology requires at least one NUMA node; got {}",
943            topo.numa_nodes,
944        );
945        let llcs = topo.llcs;
946        let cores = topo.cores_per_llc;
947        let threads = topo.threads_per_core;
948        let numa_nodes = topo.numa_nodes;
949
950        let total = (llcs * cores * threads) as usize;
951        let cpus_per_llc = (cores * threads) as usize;
952        let cpus: Vec<usize> = (0..total).collect();
953
954        let llc_infos: Vec<LlcInfo> = (0..llcs as usize)
955            .map(|l| {
956                let start = l * cpus_per_llc;
957                let end = start + cpus_per_llc;
958                let mut core_map = BTreeMap::new();
959                for c in 0..cores as usize {
960                    let base = start + c * threads as usize;
961                    let siblings: Vec<usize> = (base..base + threads as usize).collect();
962                    core_map.insert(c, siblings);
963                }
964                LlcInfo {
965                    cpus: (start..end).collect(),
966                    numa_node: topo.numa_node_of(l as u32) as usize,
967                    cache_size_kib: None,
968                    cores: core_map,
969                }
970            })
971            .collect();
972
973        let n = numa_nodes as usize;
974        let numa_node_set: BTreeSet<usize> = (0..n).collect();
975
976        let mut distances = vec![0u8; n * n];
977        for i in 0..n {
978            for j in 0..n {
979                distances[i * n + j] = topo.distance(i as u32, j as u32);
980            }
981        }
982
983        let mut node_mem = BTreeMap::new();
984        let mut memory_only_nodes = BTreeSet::new();
985        match topo.nodes {
986            Some(nodes) => {
987                for (i, node) in nodes.iter().enumerate() {
988                    if node.memory_mib > 0 {
989                        node_mem.insert(
990                            i,
991                            NodeMemInfo {
992                                total_kib: (node.memory_mib as u64) * 1024,
993                                free_kib: (node.memory_mib as u64) * 1024,
994                            },
995                        );
996                    }
997                    if node.is_memory_only() {
998                        memory_only_nodes.insert(i);
999                    }
1000                }
1001            }
1002            None => {
1003                if let Some(total_mib) = total_memory_mib {
1004                    let per_node_mib = total_mib / numa_nodes;
1005                    for i in 0..n {
1006                        let mib = if i == n - 1 {
1007                            total_mib - per_node_mib * (numa_nodes - 1)
1008                        } else {
1009                            per_node_mib
1010                        };
1011                        node_mem.insert(
1012                            i,
1013                            NodeMemInfo {
1014                                total_kib: (mib as u64) * 1024,
1015                                free_kib: (mib as u64) * 1024,
1016                            },
1017                        );
1018                    }
1019                }
1020            }
1021        }
1022
1023        Self {
1024            cpus,
1025            llcs: llc_infos,
1026            numa_nodes: numa_node_set,
1027            numa_distances: distances,
1028            node_mem,
1029            memory_only_nodes,
1030        }
1031    }
1032
1033    #[cfg(test)]
1034    pub fn synthetic(num_cpus: usize, num_llcs: usize) -> Self {
1035        // Construction-time invariant: every TestTopology has at
1036        // least one LLC and at least one CPU. `llc_aligned_cpuset`,
1037        // `cpus_in_llc`, and affinity resolution all assume this.
1038        assert!(
1039            num_llcs > 0,
1040            "TestTopology::synthetic requires num_llcs > 0; got 0"
1041        );
1042        assert!(
1043            num_cpus > 0,
1044            "TestTopology::synthetic requires num_cpus > 0; got 0"
1045        );
1046        assert!(
1047            num_cpus >= num_llcs,
1048            "TestTopology::synthetic requires num_cpus ({num_cpus}) >= num_llcs ({num_llcs})",
1049        );
1050        let cpus: Vec<usize> = (0..num_cpus).collect();
1051        let per_llc = num_cpus / num_llcs;
1052        let llcs: Vec<LlcInfo> = (0..num_llcs)
1053            .map(|i| {
1054                let start = i * per_llc;
1055                let end = if i == num_llcs - 1 {
1056                    num_cpus
1057                } else {
1058                    (i + 1) * per_llc
1059                };
1060                LlcInfo {
1061                    cpus: (start..end).collect(),
1062                    numa_node: i,
1063                    cache_size_kib: None,
1064                    cores: BTreeMap::new(),
1065                }
1066            })
1067            .collect();
1068        let n = num_llcs;
1069        let numa_nodes: BTreeSet<usize> = (0..n).collect();
1070        let mut distances = vec![0u8; n * n];
1071        for i in 0..n {
1072            for j in 0..n {
1073                distances[i * n + j] = if i == j { 10 } else { 20 };
1074            }
1075        }
1076        Self {
1077            cpus,
1078            llcs,
1079            numa_nodes,
1080            numa_distances: distances,
1081            node_mem: BTreeMap::new(),
1082            memory_only_nodes: BTreeSet::new(),
1083        }
1084    }
1085}
1086
1087#[cfg(test)]
1088mod tests {
1089    use super::*;
1090
1091    /// `intersect_online_with_affinity` drops CPUs outside the
1092    /// allowed set and preserves sysfs ordering. Pins the
1093    /// narrowing-path (Some-allowed) return value separately from
1094    /// the tracing::warn side effect, which the surrounding
1095    /// production integration paths already exercise.
1096    #[test]
1097    fn intersect_drops_cpus_outside_allowed_set() {
1098        let online = vec![0, 1, 2, 3, 4];
1099        let allowed: BTreeSet<usize> = [0, 2, 4].into_iter().collect();
1100        let out =
1101            intersect_online_with_affinity(&online, Some(allowed)).expect("non-disjoint must Ok");
1102        assert_eq!(
1103            out,
1104            vec![0, 2, 4],
1105            "intersection must preserve sysfs order and drop forbidden CPUs"
1106        );
1107    }
1108
1109    /// Disjoint allowed-set bails with a diagnostic that includes
1110    /// BOTH the sysfs set AND the disjoint allowed set so the
1111    /// operator can correlate against the container / taskset
1112    /// config that imposed the restriction.
1113    #[test]
1114    fn intersect_disjoint_allowed_set_bails_with_both_sets() {
1115        let online = vec![0, 1, 2, 3];
1116        let allowed: BTreeSet<usize> = [4, 5, 6].into_iter().collect();
1117        let err =
1118            intersect_online_with_affinity(&online, Some(allowed)).expect_err("disjoint must Err");
1119        let msg = format!("{err:#}");
1120        assert!(
1121            msg.contains("disjoint from"),
1122            "bail must use the 'disjoint from' phrasing: {msg}"
1123        );
1124        assert!(
1125            msg.contains("/sys/devices/system/cpu/online"),
1126            "bail must cite the sysfs source: {msg}"
1127        );
1128        assert!(
1129            msg.contains("[0, 1, 2, 3]") || msg.contains("sysfs=[0, 1, 2, 3]"),
1130            "bail must include the sysfs CPU set verbatim: {msg}"
1131        );
1132        assert!(
1133            msg.contains("4") && msg.contains("5") && msg.contains("6"),
1134            "bail must include the disjoint allowed-set CPUs per FAF1: {msg}"
1135        );
1136    }
1137
1138    /// None-affinity (EPERM / ESRCH / syscall failure / >262144-CPU
1139    /// ceiling) returns the full sysfs set verbatim — fallback
1140    /// preserves the pre-sched_getaffinity-cross-check status quo so
1141    /// a permission failure doesn't accidentally drop real CPUs.
1142    #[test]
1143    fn intersect_none_affinity_returns_full_sysfs_set() {
1144        let online = vec![0, 1, 2, 3];
1145        let out =
1146            intersect_online_with_affinity(&online, None).expect("None-affinity must Ok-fallback");
1147        assert_eq!(
1148            out, online,
1149            "None-affinity must return sysfs set verbatim, not narrow"
1150        );
1151    }
1152
1153    #[test]
1154    fn cpuset_string_empty() {
1155        assert_eq!(TestTopology::cpuset_string(&BTreeSet::new()), "");
1156    }
1157
1158    #[test]
1159    fn cpuset_string_single() {
1160        assert_eq!(TestTopology::cpuset_string(&[3].into_iter().collect()), "3");
1161    }
1162
1163    #[test]
1164    fn cpuset_string_range() {
1165        assert_eq!(
1166            TestTopology::cpuset_string(&[0, 1, 2, 3].into_iter().collect()),
1167            "0-3"
1168        );
1169    }
1170
1171    #[test]
1172    fn cpuset_string_gaps() {
1173        assert_eq!(
1174            TestTopology::cpuset_string(&[0, 1, 3, 5, 6, 7].into_iter().collect()),
1175            "0-1,3,5-7"
1176        );
1177    }
1178
1179    #[test]
1180    fn synthetic_topology() {
1181        let t = TestTopology::synthetic(8, 2);
1182        assert_eq!(t.total_cpus(), 8);
1183        assert_eq!(t.num_llcs(), 2);
1184        assert_eq!(t.cpus_in_llc(0), &[0, 1, 2, 3]);
1185        assert_eq!(t.cpus_in_llc(1), &[4, 5, 6, 7]);
1186    }
1187
1188    #[test]
1189    fn overlapping_cpusets_basic() {
1190        let t = TestTopology::synthetic(8, 1);
1191        let sets = t.overlapping_cpusets(2, 0.5);
1192        assert_eq!(sets.len(), 2);
1193        for s in &sets {
1194            assert_eq!(s.len(), 4);
1195        }
1196        let overlap: BTreeSet<usize> = sets[0].intersection(&sets[1]).copied().collect();
1197        assert!(!overlap.is_empty());
1198    }
1199
1200    #[test]
1201    fn overlapping_cpusets_no_overlap() {
1202        let t = TestTopology::synthetic(8, 1);
1203        let sets = t.overlapping_cpusets(2, 0.0);
1204        assert_eq!(sets.len(), 2);
1205        let overlap: BTreeSet<usize> = sets[0].intersection(&sets[1]).copied().collect();
1206        assert!(overlap.is_empty());
1207    }
1208
1209    #[test]
1210    fn split_by_llc() {
1211        let t = TestTopology::synthetic(8, 2);
1212        let splits = t.split_by_llc();
1213        assert_eq!(splits.len(), 2);
1214        assert_eq!(splits[0], [0, 1, 2, 3].into_iter().collect());
1215        assert_eq!(splits[1], [4, 5, 6, 7].into_iter().collect());
1216    }
1217
1218    #[test]
1219    fn llc_aligned_cpuset() {
1220        let t = TestTopology::synthetic(8, 2);
1221        assert_eq!(t.llc_aligned_cpuset(0), [0, 1, 2, 3].into_iter().collect());
1222        assert_eq!(t.llc_aligned_cpuset(1), [4, 5, 6, 7].into_iter().collect());
1223    }
1224
1225    #[test]
1226    fn from_vm_topology_single_llc() {
1227        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 4, 2));
1228        assert_eq!(t.total_cpus(), 8);
1229        assert_eq!(t.num_llcs(), 1);
1230        assert_eq!(t.num_numa_nodes(), 1);
1231        assert_eq!(t.all_cpus(), &[0, 1, 2, 3, 4, 5, 6, 7]);
1232        assert_eq!(t.cpus_in_llc(0), &[0, 1, 2, 3, 4, 5, 6, 7]);
1233    }
1234
1235    #[test]
1236    fn from_vm_topology_multi_llc() {
1237        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 4, 2));
1238        assert_eq!(t.total_cpus(), 16);
1239        assert_eq!(t.num_llcs(), 2);
1240        assert_eq!(t.num_numa_nodes(), 1);
1241        assert_eq!(t.cpus_in_llc(0), &[0, 1, 2, 3, 4, 5, 6, 7]);
1242        assert_eq!(t.cpus_in_llc(1), &[8, 9, 10, 11, 12, 13, 14, 15]);
1243    }
1244
1245    #[test]
1246    fn from_vm_topology_no_smt() {
1247        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 2, 1));
1248        assert_eq!(t.total_cpus(), 4);
1249        assert_eq!(t.num_llcs(), 2);
1250        assert_eq!(t.cpus_in_llc(0), &[0, 1]);
1251        assert_eq!(t.cpus_in_llc(1), &[2, 3]);
1252    }
1253
1254    #[test]
1255    fn from_vm_topology_minimal() {
1256        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 1, 1));
1257        assert_eq!(t.total_cpus(), 1);
1258        assert_eq!(t.num_llcs(), 1);
1259        assert_eq!(t.all_cpus(), &[0]);
1260    }
1261
1262    #[test]
1263    fn from_vm_topology_multi_numa() {
1264        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 2));
1265        assert_eq!(t.total_cpus(), 32);
1266        assert_eq!(t.num_llcs(), 4);
1267        assert_eq!(t.num_numa_nodes(), 2);
1268        // LLCs 0,1 -> NUMA node 0; LLCs 2,3 -> NUMA node 1
1269        assert_eq!(t.llcs()[0].numa_node(), 0);
1270        assert_eq!(t.llcs()[1].numa_node(), 0);
1271        assert_eq!(t.llcs()[2].numa_node(), 1);
1272        assert_eq!(t.llcs()[3].numa_node(), 1);
1273    }
1274
1275    #[test]
1276    fn overlapping_cpusets_zero_n() {
1277        let t = TestTopology::synthetic(8, 1);
1278        assert!(t.overlapping_cpusets(0, 0.5).is_empty());
1279    }
1280
1281    #[test]
1282    fn synthetic_single_llc() {
1283        let t = TestTopology::synthetic(4, 1);
1284        assert_eq!(t.num_llcs(), 1);
1285        assert_eq!(t.total_cpus(), 4);
1286        assert_eq!(t.num_numa_nodes(), 1);
1287        assert_eq!(t.all_cpus(), &[0, 1, 2, 3]);
1288    }
1289
1290    #[test]
1291    fn synthetic_many_llcs() {
1292        let t = TestTopology::synthetic(16, 4);
1293        assert_eq!(t.num_llcs(), 4);
1294        for i in 0..4 {
1295            assert_eq!(t.cpus_in_llc(i).len(), 4);
1296        }
1297    }
1298
1299    #[test]
1300    fn cpuset_string_two_ranges() {
1301        assert_eq!(
1302            TestTopology::cpuset_string(&[0, 1, 2, 5, 6, 7].into_iter().collect()),
1303            "0-2,5-7"
1304        );
1305    }
1306
1307    #[test]
1308    fn cpuset_string_all_isolated() {
1309        assert_eq!(
1310            TestTopology::cpuset_string(&[1, 3, 5].into_iter().collect()),
1311            "1,3,5"
1312        );
1313    }
1314
1315    #[test]
1316    fn cpuset_string_large_range() {
1317        let cpus: BTreeSet<usize> = (0..128).collect();
1318        assert_eq!(TestTopology::cpuset_string(&cpus), "0-127");
1319    }
1320
1321    #[test]
1322    fn overlapping_cpusets_single_set() {
1323        let t = TestTopology::synthetic(8, 1);
1324        let sets = t.overlapping_cpusets(1, 0.5);
1325        assert_eq!(sets.len(), 1);
1326        assert_eq!(sets[0].len(), 8);
1327    }
1328
1329    #[test]
1330    fn split_by_llc_single() {
1331        let t = TestTopology::synthetic(4, 1);
1332        let splits = t.split_by_llc();
1333        assert_eq!(splits.len(), 1);
1334        assert_eq!(splits[0].len(), 4);
1335    }
1336
1337    /// Regression test for the split_by_llc bug: topology(2,4,1) must
1338    /// produce 2 disjoint LLC sets covering all 8 CPUs. Before the fix,
1339    /// from_system() on AMD hosts returned 1 LLC because CPUID leaf
1340    /// 0x8000001D was not patched, and the test panicked indexing
1341    /// llc_sets[1].
1342    #[test]
1343    fn split_by_llc_two_llc_regression() {
1344        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 4, 1));
1345        assert_eq!(t.total_cpus(), 8);
1346        assert_eq!(t.num_llcs(), 2);
1347
1348        let splits = t.split_by_llc();
1349        assert_eq!(splits.len(), 2, "2-LLC topology must produce 2 LLC sets");
1350
1351        // Sets must be disjoint
1352        let overlap: BTreeSet<usize> = splits[0].intersection(&splits[1]).copied().collect();
1353        assert!(
1354            overlap.is_empty(),
1355            "LLC sets must be disjoint: overlap={overlap:?}"
1356        );
1357
1358        // Union must cover all CPUs
1359        let union: BTreeSet<usize> = splits[0].union(&splits[1]).copied().collect();
1360        assert_eq!(union, t.all_cpuset(), "LLC sets must cover all CPUs");
1361
1362        // Each set has 4 CPUs (4 cores per LLC, 1 thread)
1363        assert_eq!(splits[0].len(), 4);
1364        assert_eq!(splits[1].len(), 4);
1365
1366        // Verify exact contents
1367        assert_eq!(splits[0], [0, 1, 2, 3].into_iter().collect());
1368        assert_eq!(splits[1], [4, 5, 6, 7].into_iter().collect());
1369    }
1370
1371    #[test]
1372    fn usable_cpus_reserves_last() {
1373        let t = TestTopology::synthetic(8, 2);
1374        assert_eq!(t.usable_cpus().len(), 7);
1375        assert!(!t.usable_cpus().contains(&7));
1376    }
1377
1378    #[test]
1379    fn usable_cpus_small_no_reserve() {
1380        let t = TestTopology::synthetic(2, 1);
1381        assert_eq!(t.usable_cpus().len(), 2);
1382    }
1383
1384    #[test]
1385    fn usable_cpus_single_cpu() {
1386        let t = TestTopology::synthetic(1, 1);
1387        assert_eq!(t.usable_cpus().len(), 1);
1388    }
1389
1390    #[test]
1391    fn parse_cpu_list_simple() {
1392        assert_eq!(parse_cpu_list("0,1,2,3").unwrap(), vec![0, 1, 2, 3]);
1393    }
1394
1395    #[test]
1396    fn parse_cpu_list_range() {
1397        assert_eq!(parse_cpu_list("0-3").unwrap(), vec![0, 1, 2, 3]);
1398    }
1399
1400    #[test]
1401    fn parse_cpu_list_mixed() {
1402        assert_eq!(
1403            parse_cpu_list("0-2,5,7-9").unwrap(),
1404            vec![0, 1, 2, 5, 7, 8, 9]
1405        );
1406    }
1407
1408    #[test]
1409    fn parse_cpu_list_empty() {
1410        assert!(parse_cpu_list("").unwrap().is_empty());
1411    }
1412
1413    #[test]
1414    fn parse_cpu_list_whitespace() {
1415        assert_eq!(parse_cpu_list("  0 , 1 , 2  ").unwrap(), vec![0, 1, 2]);
1416    }
1417
1418    #[test]
1419    fn from_vm_topology_large() {
1420        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 4, 8, 2));
1421        assert_eq!(t.total_cpus(), 64);
1422        assert_eq!(t.num_llcs(), 4);
1423        assert_eq!(t.num_numa_nodes(), 1);
1424    }
1425
1426    #[test]
1427    fn llc_info_accessors() {
1428        let t = TestTopology::synthetic(8, 2);
1429        let llcs = t.llcs();
1430        assert_eq!(llcs.len(), 2);
1431        assert_eq!(llcs[0].cpus(), &[0, 1, 2, 3]);
1432        assert_eq!(llcs[0].numa_node(), 0);
1433        assert_eq!(llcs[1].cpus(), &[4, 5, 6, 7]);
1434        assert_eq!(llcs[1].numa_node(), 1);
1435    }
1436
1437    #[test]
1438    fn from_vm_topology_cores_populated() {
1439        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 4, 2));
1440        let llc0 = &t.llcs()[0];
1441        assert_eq!(llc0.num_cores(), 4);
1442        assert_eq!(llc0.cores().len(), 4);
1443        assert_eq!(llc0.cores()[&0], vec![0, 1]);
1444        assert_eq!(llc0.cores()[&1], vec![2, 3]);
1445        assert_eq!(llc0.cores()[&2], vec![4, 5]);
1446        assert_eq!(llc0.cores()[&3], vec![6, 7]);
1447        let llc1 = &t.llcs()[1];
1448        assert_eq!(llc1.cores()[&0], vec![8, 9]);
1449    }
1450
1451    #[test]
1452    fn from_vm_topology_no_smt_cores() {
1453        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 4, 1));
1454        let llc = &t.llcs()[0];
1455        assert_eq!(llc.num_cores(), 4);
1456        assert_eq!(llc.cores()[&0], vec![0]);
1457        assert_eq!(llc.cores()[&3], vec![3]);
1458    }
1459
1460    #[test]
1461    fn parse_cache_size_formats() {
1462        assert_eq!(parse_cache_size("32768K"), Some(32768));
1463        assert_eq!(parse_cache_size("32M"), Some(32768));
1464        // 65536 bytes = 64 KiB exactly.
1465        assert_eq!(parse_cache_size("65536"), Some(64));
1466        // Sub-KiB bare-byte values round up to 1 KiB instead of 0 KiB.
1467        // Consumers treat 0 as "no cache"; a 500-byte cache is not
1468        // "no cache", it's "small cache."
1469        assert_eq!(parse_cache_size("500"), Some(1));
1470        assert_eq!(parse_cache_size("1"), Some(1));
1471        assert_eq!(parse_cache_size("1023"), Some(1));
1472        // Just over 1 KiB still ceils to 2 KiB.
1473        assert_eq!(parse_cache_size("1025"), Some(2));
1474        // Exact zero bytes maps to zero KiB.
1475        assert_eq!(parse_cache_size("0"), Some(0));
1476    }
1477
1478    #[test]
1479    fn num_cores_from_cores_map() {
1480        let llc = LlcInfo {
1481            cpus: vec![0, 1, 2, 3],
1482            numa_node: 0,
1483            cache_size_kib: None,
1484            cores: BTreeMap::from([(0, vec![0, 1]), (1, vec![2, 3])]),
1485        };
1486        assert_eq!(llc.num_cores(), 2);
1487    }
1488
1489    #[test]
1490    fn num_cores_fallback_to_cpus() {
1491        let llc = LlcInfo {
1492            cpus: vec![0, 1, 2, 3],
1493            numa_node: 0,
1494            cache_size_kib: None,
1495            cores: BTreeMap::new(),
1496        };
1497        assert_eq!(llc.num_cores(), 4);
1498    }
1499
1500    #[test]
1501    fn parse_cpu_list_lenient_simple() {
1502        assert_eq!(parse_cpu_list_lenient("0,1,2,3"), vec![0, 1, 2, 3]);
1503    }
1504
1505    #[test]
1506    fn parse_cpu_list_lenient_range() {
1507        assert_eq!(parse_cpu_list_lenient("0-3"), vec![0, 1, 2, 3]);
1508    }
1509
1510    #[test]
1511    fn parse_cpu_list_lenient_mixed() {
1512        assert_eq!(
1513            parse_cpu_list_lenient("0-2,5,7-9"),
1514            vec![0, 1, 2, 5, 7, 8, 9]
1515        );
1516    }
1517
1518    #[test]
1519    fn parse_cpu_list_lenient_empty() {
1520        assert!(parse_cpu_list_lenient("").is_empty());
1521    }
1522
1523    #[test]
1524    fn parse_cpu_list_lenient_skips_garbage() {
1525        assert_eq!(parse_cpu_list_lenient("0,abc,2,xyz-3,4"), vec![0, 2, 4]);
1526    }
1527
1528    #[test]
1529    fn parse_cpu_list_lenient_whitespace() {
1530        assert_eq!(parse_cpu_list_lenient("  0 , 1 , 2  "), vec![0, 1, 2]);
1531    }
1532
1533    #[test]
1534    fn cache_size_bare_number() {
1535        // Bare number without suffix is treated as bytes, converted to KiB.
1536        assert_eq!(parse_cache_size("1024"), Some(1));
1537    }
1538
1539    #[test]
1540    fn cache_size_empty_string() {
1541        assert_eq!(parse_cache_size(""), None);
1542    }
1543
1544    #[test]
1545    fn cache_size_whitespace_only() {
1546        assert_eq!(parse_cache_size("   "), None);
1547    }
1548
1549    #[test]
1550    fn numa_aligned_cpuset_two_nodes() {
1551        // 2 NUMA nodes, 4 LLCs (2 per NUMA), 4 cores, 1 thread
1552        // LLCs 0,1 -> NUMA 0 (CPUs 0-7), LLCs 2,3 -> NUMA 1 (CPUs 8-15)
1553        // Total = 4 * 4 * 1 = 16 CPUs per NUMA pair = each LLC has 4 CPUs
1554        // NUMA 0: LLCs 0,1 → CPUs 0-3, 4-7 = 0-7
1555        // NUMA 1: LLCs 2,3 → CPUs 8-11, 12-15 = 8-15 (but only 16 CPUs)
1556        //
1557        // Topology::new(2, 4, 4, 1) → 4 LLCs × 4 cores × 1 thread = 16 CPUs
1558        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1559        assert_eq!(t.total_cpus(), 16);
1560        assert_eq!(t.num_numa_nodes(), 2);
1561        assert_eq!(t.num_llcs(), 4);
1562
1563        let node0: BTreeSet<usize> = t.numa_aligned_cpuset(0);
1564        let node1: BTreeSet<usize> = t.numa_aligned_cpuset(1);
1565
1566        // NUMA 0: LLCs 0,1 each with 4 CPUs → CPUs 0-7
1567        let expected0: BTreeSet<usize> = (0..8).collect();
1568        assert_eq!(node0, expected0);
1569
1570        // NUMA 1: LLCs 2,3 each with 4 CPUs → CPUs 8-15
1571        let expected1: BTreeSet<usize> = (8..16).collect();
1572        assert_eq!(node1, expected1);
1573    }
1574
1575    // -- proptest --
1576
1577    use proptest::prop_assert;
1578
1579    proptest::proptest! {
1580        /// Any arbitrary input must either succeed and return a
1581        /// sorted Vec whose elements all came from the input, or
1582        /// fail without panicking. Broadened from 30 to 120
1583        /// characters to exercise long lists and pathological
1584        /// range/comma mixes.
1585        #[test]
1586        fn prop_parse_cpu_list_never_panics(s in "\\PC{0,120}") {
1587            if let Ok(cpus) = parse_cpu_list(&s) {
1588                for w in cpus.windows(2) {
1589                    prop_assert!(w[0] <= w[1], "parse_cpu_list not sorted: {cpus:?}");
1590                }
1591            }
1592        }
1593
1594        #[test]
1595        fn prop_parse_cpu_list_single_cpu(cpu in 0usize..256) {
1596            let result = parse_cpu_list(&cpu.to_string()).unwrap();
1597            assert_eq!(result, vec![cpu]);
1598        }
1599
1600        #[test]
1601        fn prop_parse_cpu_list_range_sorted(lo in 0usize..128, span in 1usize..64) {
1602            let hi = lo + span;
1603            let result = parse_cpu_list(&format!("{lo}-{hi}")).unwrap();
1604            assert_eq!(result.len(), span + 1);
1605            assert_eq!(*result.first().unwrap(), lo);
1606            assert_eq!(*result.last().unwrap(), hi);
1607            // Must be sorted.
1608            for w in result.windows(2) {
1609                assert!(w[0] <= w[1]);
1610            }
1611        }
1612
1613        /// Lenient parser must never panic AND its output must stay
1614        /// sorted — the strict parser's contract carries over.
1615        /// Broadened range from 30 to 120 characters.
1616        #[test]
1617        fn prop_parse_cpu_list_lenient_never_panics(s in "\\PC{0,120}") {
1618            let cpus = parse_cpu_list_lenient(&s);
1619            for w in cpus.windows(2) {
1620                prop_assert!(w[0] <= w[1], "parse_cpu_list_lenient not sorted: {cpus:?}");
1621            }
1622        }
1623
1624        #[test]
1625        fn prop_parse_cpu_list_lenient_superset_of_strict(
1626            lo in 0usize..64,
1627            hi in 64usize..128,
1628        ) {
1629            let s = format!("{lo}-{hi}");
1630            let strict = parse_cpu_list(&s).unwrap();
1631            let lenient = parse_cpu_list_lenient(&s);
1632            assert_eq!(strict, lenient);
1633        }
1634
1635        #[test]
1636        fn prop_parse_cpu_list_roundtrip(
1637            cpus in proptest::collection::btree_set(0usize..256, 1..16),
1638        ) {
1639            // Format as comma-separated list, parse back, compare.
1640            let s: String = cpus.iter().map(|c| c.to_string()).collect::<Vec<_>>().join(",");
1641            let parsed = parse_cpu_list(&s).unwrap();
1642            let roundtrip: std::collections::BTreeSet<usize> = parsed.into_iter().collect();
1643            assert_eq!(cpus, roundtrip);
1644        }
1645    }
1646
1647    #[test]
1648    fn numa_node_ids_synthetic() {
1649        let t = TestTopology::synthetic(8, 2);
1650        assert_eq!(*t.numa_node_ids(), [0, 1].into_iter().collect());
1651    }
1652
1653    #[test]
1654    fn numa_nodes_for_cpuset_single_node() {
1655        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1656        let cpuset: BTreeSet<usize> = (0..4).collect(); // LLC 0, NUMA 0
1657        assert_eq!(t.numa_nodes_for_cpuset(&cpuset), [0].into_iter().collect());
1658    }
1659
1660    #[test]
1661    fn numa_nodes_for_cpuset_both_nodes() {
1662        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1663        let cpuset: BTreeSet<usize> = [0, 8].into_iter().collect(); // NUMA 0 + NUMA 1
1664        assert_eq!(
1665            t.numa_nodes_for_cpuset(&cpuset),
1666            [0, 1].into_iter().collect()
1667        );
1668    }
1669
1670    #[test]
1671    fn numa_nodes_for_cpuset_empty() {
1672        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1673        assert!(t.numa_nodes_for_cpuset(&BTreeSet::new()).is_empty());
1674    }
1675
1676    // -- NUMA distance tests --
1677
1678    #[test]
1679    fn from_vm_topology_numa_distance_local() {
1680        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1681        assert_eq!(t.numa_distance(0, 0), 10);
1682        assert_eq!(t.numa_distance(1, 1), 10);
1683    }
1684
1685    #[test]
1686    fn from_vm_topology_numa_distance_remote() {
1687        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1688        assert_eq!(t.numa_distance(0, 1), 20);
1689        assert_eq!(t.numa_distance(1, 0), 20);
1690    }
1691
1692    #[test]
1693    fn from_vm_topology_numa_distance_single_node() {
1694        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 4, 1));
1695        assert_eq!(t.numa_distance(0, 0), 10);
1696    }
1697
1698    #[test]
1699    fn numa_distance_invalid_node() {
1700        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1701        assert_eq!(t.numa_distance(0, 99), 255);
1702        assert_eq!(t.numa_distance(99, 0), 255);
1703    }
1704
1705    #[test]
1706    fn synthetic_distances_default() {
1707        let t = TestTopology::synthetic(8, 2);
1708        assert_eq!(t.numa_distance(0, 0), 10);
1709        assert_eq!(t.numa_distance(0, 1), 20);
1710        assert_eq!(t.numa_distance(1, 0), 20);
1711    }
1712
1713    // -- node_meminfo tests --
1714
1715    #[test]
1716    fn node_meminfo_used_kib() {
1717        let mi = NodeMemInfo {
1718            total_kib: 1024,
1719            free_kib: 256,
1720        };
1721        assert_eq!(mi.used_kib(), 768);
1722    }
1723
1724    #[test]
1725    fn node_meminfo_used_kib_saturates() {
1726        let mi = NodeMemInfo {
1727            total_kib: 0,
1728            free_kib: 100,
1729        };
1730        assert_eq!(mi.used_kib(), 0);
1731    }
1732
1733    #[test]
1734    fn from_vm_topology_no_meminfo() {
1735        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1736        assert!(t.node_meminfo(0).is_none());
1737        assert!(t.node_meminfo(1).is_none());
1738    }
1739
1740    #[test]
1741    fn synthetic_no_meminfo() {
1742        let t = TestTopology::synthetic(8, 2);
1743        assert!(t.node_meminfo(0).is_none());
1744    }
1745
1746    // -- is_memory_only tests --
1747
1748    #[test]
1749    fn from_vm_topology_not_memory_only() {
1750        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1751        assert!(!t.is_memory_only(0));
1752        assert!(!t.is_memory_only(1));
1753    }
1754
1755    #[test]
1756    fn is_memory_only_nonexistent_node() {
1757        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(2, 4, 4, 1));
1758        assert!(!t.is_memory_only(99));
1759    }
1760
1761    /// Regression for the unchecked-index panic in `llc_aligned_cpuset`:
1762    /// an out-of-range index used to panic at `self.llcs[idx]`. Now
1763    /// it returns an empty BTreeSet so a caller bug degrades rather
1764    /// than crashing the test run.
1765    #[test]
1766    fn llc_aligned_cpuset_out_of_range_returns_empty() {
1767        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 4, 1));
1768        assert_eq!(t.num_llcs(), 2);
1769        let empty = t.llc_aligned_cpuset(99);
1770        assert!(
1771            empty.is_empty(),
1772            "out-of-range LLC idx must return empty, got {empty:?}"
1773        );
1774    }
1775
1776    /// Companion for `cpus_in_llc` — same out-of-range handling.
1777    #[test]
1778    fn cpus_in_llc_out_of_range_returns_empty_slice() {
1779        let t = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 4, 1));
1780        assert_eq!(t.cpus_in_llc(99), &[] as &[usize]);
1781    }
1782
1783    /// Regression for `AffinityIntent::LlcAligned` panic when the
1784    /// topology has zero LLCs: construction now asserts non-zero,
1785    /// so the path that used to hit `self.llcs[0]` on empty is
1786    /// unreachable.
1787    #[test]
1788    #[should_panic(expected = "non-zero llcs")]
1789    fn from_vm_topology_rejects_zero_llcs() {
1790        let bad = crate::vmm::topology::Topology {
1791            llcs: 0,
1792            cores_per_llc: 2,
1793            threads_per_core: 1,
1794            numa_nodes: 1,
1795            nodes: None,
1796            distances: None,
1797        };
1798        let _ = TestTopology::from_vm_topology(&bad);
1799    }
1800
1801    #[test]
1802    #[should_panic(expected = "num_llcs > 0")]
1803    fn synthetic_rejects_zero_llcs() {
1804        let _ = TestTopology::synthetic(4, 0);
1805    }
1806
1807    #[test]
1808    #[should_panic(expected = "num_cpus > 0")]
1809    fn synthetic_rejects_zero_cpus() {
1810        let _ = TestTopology::synthetic(0, 1);
1811    }
1812
1813    #[test]
1814    #[should_panic(expected = ">= num_llcs")]
1815    fn synthetic_rejects_more_llcs_than_cpus() {
1816        let _ = TestTopology::synthetic(2, 4);
1817    }
1818
1819    /// Every constructor must land a topology with at least one LLC
1820    /// so `llc_aligned_cpuset(0)` always returns a non-empty set.
1821    #[test]
1822    fn every_constructor_produces_nonzero_llcs() {
1823        let a = TestTopology::synthetic(8, 2);
1824        assert!(a.num_llcs() >= 1);
1825        let b = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 4, 1));
1826        assert!(b.num_llcs() >= 1);
1827        // `from_system` depends on /sys; skip when unavailable.
1828        if let Ok(c) = TestTopology::from_system() {
1829            assert!(
1830                c.num_llcs() >= 1,
1831                "from_system must always yield at least one LLC",
1832            );
1833        }
1834    }
1835
1836    /// Direct test of the fallback-LLC synthesis path. The only way
1837    /// `from_system` itself can reach the fallback is a pathological
1838    /// sysfs (online CPUs present, cache topology empty), which is
1839    /// impossible to inject reliably from a unit test. Extracting
1840    /// `synthesize_fallback_llc` to an independent helper lets us
1841    /// exercise the shape contract the fallback must satisfy
1842    /// downstream.
1843    #[test]
1844    fn synthesize_fallback_llc_populates_cpus_node_and_cores() {
1845        let cpus = [0, 1, 3, 7];
1846        let llc = synthesize_fallback_llc(&cpus, 2);
1847
1848        // Covers every input CPU.
1849        assert_eq!(llc.cpus(), &cpus);
1850
1851        // NUMA node faithfully carried through.
1852        assert_eq!(llc.numa_node(), 2);
1853
1854        // Cache size unknown (we had no sysfs entries to read).
1855        assert!(llc.cache_size_kib().is_none());
1856
1857        // One core per CPU (no SMT sibling reconstruction possible).
1858        assert_eq!(llc.cores().len(), cpus.len());
1859        for &c in &cpus {
1860            assert_eq!(
1861                llc.cores().get(&c).map(|v| v.as_slice()),
1862                Some(&[c][..]),
1863                "each CPU must appear as its own single-sibling core",
1864            );
1865        }
1866        assert_eq!(llc.num_cores(), cpus.len());
1867    }
1868
1869    /// Zero CPUs is legal input (`from_system` would bail earlier
1870    /// with "no online CPUs found", but the helper itself must not
1871    /// panic): an empty LlcInfo with empty cores map.
1872    #[test]
1873    fn synthesize_fallback_llc_empty_cpus_returns_empty_llc() {
1874        let llc = synthesize_fallback_llc(&[], 0);
1875        assert!(llc.cpus().is_empty());
1876        assert_eq!(llc.numa_node(), 0);
1877        assert!(llc.cores().is_empty());
1878        // With empty cores map, num_cores falls back to cpus.len() == 0.
1879        assert_eq!(llc.num_cores(), 0);
1880    }
1881
1882    // -- TestTopology + LlcInfo PartialEq + Eq --
1883
1884    #[test]
1885    fn test_topology_partial_eq_equal_for_same_spec() {
1886        let spec = crate::vmm::topology::Topology::new(1, 2, 4, 2);
1887        let a = TestTopology::from_vm_topology(&spec);
1888        let b = TestTopology::from_vm_topology(&spec);
1889        assert_eq!(a, b);
1890    }
1891
1892    #[test]
1893    fn test_topology_partial_eq_differs_for_different_spec() {
1894        let spec_a = crate::vmm::topology::Topology::new(1, 2, 4, 2);
1895        let spec_b = crate::vmm::topology::Topology::new(2, 4, 4, 2);
1896        let a = TestTopology::from_vm_topology(&spec_a);
1897        let b = TestTopology::from_vm_topology(&spec_b);
1898        assert_ne!(a, b);
1899    }
1900
1901    #[test]
1902    fn llc_info_partial_eq_equal_for_same_fields() {
1903        let a = synthesize_fallback_llc(&[0, 1], 0);
1904        let b = synthesize_fallback_llc(&[0, 1], 0);
1905        assert_eq!(a, b);
1906    }
1907
1908    #[test]
1909    fn llc_info_partial_eq_differs_for_different_cpus() {
1910        let a = synthesize_fallback_llc(&[0, 1], 0);
1911        let b = synthesize_fallback_llc(&[2, 3], 0);
1912        assert_ne!(a, b);
1913    }
1914
1915    #[test]
1916    fn mem_policy_partial_eq_default_equals_default() {
1917        use crate::workload::MemPolicy;
1918        assert_eq!(MemPolicy::default(), MemPolicy::Default);
1919        assert_ne!(MemPolicy::default(), MemPolicy::Local);
1920    }
1921}