ktstr/scenario/snapshot/
view.rs

1//! [`Snapshot`] is the entry point for a captured
2//! [`FailureDumpReport`], plus [`SnapshotMap`] for typed traversal of
3//! one map and the per-CPU resolver helpers it uses to project
4//! per-CPU array / hash entries down to a single slot.
5//!
6//! [`render_entry_key`] formats a [`SnapshotEntry`] key for the
7//! `NoMatch` diagnostic; lives here because it walks the same
8//! `SnapshotMap` entry shapes the type uses internally.
9
10use crate::monitor::arena::ArenaSnapshot;
11use crate::monitor::bpf_prog::ProgRuntimeStats;
12use crate::monitor::btf_render::RenderedValue;
13use crate::monitor::dump::{
14    EventCounterSample, FailureDumpFdArray, FailureDumpMap, FailureDumpPercpuEntry,
15    FailureDumpPercpuHashEntry, FailureDumpReport, FailureDumpRingbuf, FailureDumpStackTrace,
16    PerCpuTimeStats, PerNodeNumaStats, ProbeBssCounters,
17};
18use crate::monitor::scx_walker::{DsqState, RqScxState, ScxSchedState};
19use crate::monitor::task_enrichment::TaskEnrichment;
20
21use super::field::lookup_member;
22use super::{
23    ExcludedMap, HEX_KEY_PREFIX, NO_MATCH_KEY_CHAR_CAP, NO_MATCH_KEY_SAMPLE, SnapshotEntry,
24    SnapshotError, SnapshotField, SnapshotResult,
25};
26
27/// Borrowed view over a captured [`FailureDumpReport`] for typed
28/// traversal of BTF-rendered map values, per-CPU entries, and
29/// scalar variables.
30///
31/// Constructed from a [`FailureDumpReport`] reference (typically
32/// obtained via [`super::SnapshotBridge::drain`]); the view is cheap to
33/// build — it does not copy the underlying report. Accessor
34/// methods all return further borrowed views that walk the report
35/// in place.
36#[derive(Debug, Clone)]
37#[must_use = "Snapshot is a borrowed view; bind or chain accessors"]
38#[non_exhaustive]
39pub struct Snapshot<'a> {
40    report: &'a FailureDumpReport,
41    /// When `Some`, every map-walking accessor filters
42    /// [`FailureDumpReport::maps`] to maps whose `name` begins with
43    /// `<obj>.`. Populated by [`Self::active`] from the snapshot's
44    /// own `scx_sched_state` + `prog_runtime_stats`; `None` when the
45    /// snapshot was constructed via [`Self::new`] (unfiltered).
46    active_obj: Option<&'a str>,
47    /// Optional kernel-map-KVA whitelist used alongside
48    /// [`Self::active_obj`] to defend against the same-binary case
49    /// (two scheduler instances loaded from the same binary, e.g.
50    /// MITOSIS_FIXED + MITOSIS_ADAPTIVE both loading `scx_mitosis`,
51    /// where the obj prefix matches both copies' bss/data/rodata
52    /// maps). When set + non-empty, a map is "active" only if BOTH
53    /// `active_obj` matches its prefix AND its
54    /// [`FailureDumpMap::map_kva`] appears in the whitelist.
55    ///
56    /// `&[]` (empty) when [`Self::active`] resolved a prefix via the
57    /// Phase-1 name path (no walker run → no KVA set captured) OR
58    /// when the snapshot pre-dates the walker plumbing. In that
59    /// case `Snapshot::active`'s filter degrades to obj-prefix
60    /// matching only — still correct for the different-binary case;
61    /// loses the same-binary disambiguation guarantee.
62    active_map_kvas: &'a [u64],
63}
64
65impl<'a> Snapshot<'a> {
66    /// Build a borrowed view over `report` with no active-scheduler
67    /// filter. Every map-walking accessor sees every captured map.
68    pub fn new(report: &'a FailureDumpReport) -> Self {
69        Self {
70            report,
71            active_obj: None,
72            active_map_kvas: &[],
73        }
74    }
75
76    /// Iterate maps the current view exposes — every captured map
77    /// when `active_obj` is None; only maps whose name shares the
78    /// `<obj>.` prefix when [`Self::active`] populated the filter.
79    /// When [`Self::active_map_kvas`] is also populated, additionally
80    /// require the map's [`FailureDumpMap::map_kva`] to be in the
81    /// whitelist — this catches the same-binary case where two
82    /// scheduler instances' bss maps share an obj prefix but live at
83    /// distinct kernel addresses.
84    fn maps_iter(&self) -> impl Iterator<Item = &'a FailureDumpMap> + '_ {
85        let active = self.active_obj;
86        let kva_filter = self.active_map_kvas;
87        self.report.maps.iter().filter(move |m| match active {
88            None => true,
89            Some(obj) => {
90                if !map_belongs_to_obj(&m.name, obj) {
91                    return false;
92                }
93                // Empty whitelist = no KVA filter (phase-1 name path
94                // OR pre-walker snapshot). Non-empty = require the
95                // map's KVA to appear; defends against KVA aliasing
96                // and same-binary post-swap ambiguity per the
97                // FailureDumpReport::active_map_kvas doc.
98                if kva_filter.is_empty() {
99                    return true;
100                }
101                m.map_kva != 0 && kva_filter.contains(&m.map_kva)
102            }
103        })
104    }
105
106    /// Construct [`SnapshotError::ActiveFilterExcludedMaps`] for the
107    /// caller IFF the active KVA filter rejected EVERY captured
108    /// `<active_obj>.*` map. Returns `None` in every other case:
109    ///
110    /// - the view is not active-filtered (`active_obj` is `None`),
111    /// - the KVA whitelist is empty (no filter active),
112    /// - no map shares the active obj prefix at all (the standard
113    ///   `MapNotFound` / `VarNotFound` diagnostic carries it),
114    /// - at least one captured `<active_obj>.*` map passed the KVA
115    ///   whitelist (the admitted set is non-empty, so a lookup miss
116    ///   is a real typo / absent symbol — fall through to the
117    ///   standard diagnostic, do not falsely steer the operator at
118    ///   the filter).
119    ///
120    /// Only the "admitted set genuinely empty" case fires the rich
121    /// diagnostic. Caller is responsible for the `requested` field;
122    /// every other field is populated from the snapshot.
123    fn excluded_filter_err(&self, requested: String) -> Option<SnapshotError> {
124        let obj = self.active_obj?;
125        if self.active_map_kvas.is_empty() {
126            return None;
127        }
128        let mut excluded: Vec<ExcludedMap> = Vec::new();
129        let mut any_admitted = false;
130        for m in &self.report.maps {
131            if !map_belongs_to_obj(&m.name, obj) {
132                continue;
133            }
134            if m.map_kva != 0 && self.active_map_kvas.contains(&m.map_kva) {
135                any_admitted = true;
136                continue;
137            }
138            excluded.push(ExcludedMap {
139                name: m.name.clone(),
140                map_kva: m.map_kva,
141            });
142        }
143        if excluded.is_empty() || any_admitted {
144            return None;
145        }
146        Some(SnapshotError::ActiveFilterExcludedMaps {
147            requested,
148            active_obj: obj.to_string(),
149            excluded_maps: excluded,
150            whitelist_kvas: self.active_map_kvas.to_vec(),
151        })
152    }
153
154    /// Underlying [`FailureDumpReport`] borrowed back to the caller.
155    ///
156    /// **Escape hatch.** Most consumers should reach for the typed
157    /// accessors on [`Snapshot`] / [`SnapshotMap`] / [`SnapshotEntry`]
158    /// / [`SnapshotField`], which route through [`SnapshotError`] and
159    /// compose with the [`crate::assert::temporal`] patterns via
160    /// [`SeriesField`](crate::assert::temporal::SeriesField). Use
161    /// `report()` only when a [`FailureDumpReport`] field has no
162    /// typed accessor yet:
163    ///
164    /// - `vcpu_regs` — per-vCPU register snapshot captured at the
165    ///   freeze instant.
166    /// - `vcpu_perf_at_freeze` — per-vCPU hardware perf counter
167    ///   snapshot captured at the freeze instant.
168    /// - `dump_truncated_at_us` — microseconds-into-the-dump at
169    ///   which the soft deadline tripped.
170    /// - `sdt_allocations`, `scx_static_ranges` — SDT allocator and
171    ///   scx static memory layout snapshots used by the arena /
172    ///   pointer-renderer pipelines.
173    /// - `schema` — wire-format metadata
174    ///   ([`Self::is_placeholder`] already wraps the boolean form).
175    ///
176    /// All other fields documented as escape-only on
177    /// [`FailureDumpReport`] above now have first-class accessors on
178    /// [`Snapshot`] (`event_counter_timeline`, `rq_scx_states`,
179    /// `dsq_states`, `scx_sched_state`, `per_cpu_time`,
180    /// `per_node_numa`, `task_enrichments`, `prog_runtime_stats`,
181    /// `probe_counters`) and on [`SnapshotMap`] (`ringbuf`,
182    /// `arena`, `fd_array`, `stack_trace`, `map_error`).
183    ///
184    /// Five `*_unavailable` diagnostic accessors cover the subset of
185    /// walker-backed fields the dump pipeline writes a reason string
186    /// for: [`Self::scx_walker_unavailable`] (shared by
187    /// rq_scx_states / dsq_states / scx_sched_state — the scx
188    /// walker writes one reason for the whole group),
189    /// [`Self::task_enrichments_unavailable`],
190    /// [`Self::prog_runtime_stats_unavailable`],
191    /// [`Self::per_node_numa_unavailable`], and
192    /// [`Self::sdt_alloc_unavailable`] (for the still-escape-only
193    /// `sdt_allocations` field above). The remaining accessors
194    /// (`event_counter_timeline`, `per_cpu_time`, `probe_counters`)
195    /// have no companion diagnostic — empty / None is their only
196    /// "no capture" signal.
197    ///
198    /// **Caveats of the bypass:**
199    /// - No [`SnapshotError`] routing — call-site is on its own to
200    ///   handle missing fields / type mismatches / per-CPU
201    ///   narrowing.
202    /// - No [`SeriesField`](crate::assert::temporal::SeriesField)
203    ///   integration — temporal patterns
204    ///   ([`nondecreasing`](crate::assert::temporal::SeriesField::nondecreasing),
205    ///   [`rate_within`](crate::assert::temporal::SeriesField::rate_within),
206    ///   etc.) cannot consume raw `FailureDumpReport` field values.
207    /// - No placeholder-sample short-circuit
208    ///   ([`Self::is_placeholder`] check is the caller's
209    ///   responsibility).
210    pub fn report(&self) -> &'a FailureDumpReport {
211        self.report
212    }
213
214    /// Look up a BPF map by exact name. Respects the
215    /// [`Self::active`] filter when set — only maps the filter
216    /// admits are considered. Returns [`SnapshotError::MapNotFound`]
217    /// (with the captured map names in `available`) when no match
218    /// is found among the admitted maps, or
219    /// [`SnapshotError::PlaceholderSnapshot`] when the snapshot's
220    /// underlying `FailureDumpReport` is a placeholder (freeze
221    /// rendezvous failed; no maps to walk).
222    pub fn map(&self, name: &str) -> SnapshotResult<SnapshotMap<'a>> {
223        if self.report.is_placeholder {
224            return Err(SnapshotError::PlaceholderSnapshot { tag: None });
225        }
226        for m in self.maps_iter() {
227            if m.name == name {
228                return Ok(SnapshotMap { map: m, cpu: None });
229            }
230        }
231        if let Some(err) = self.excluded_filter_err(name.to_string()) {
232            return Err(err);
233        }
234        Err(SnapshotError::MapNotFound {
235            requested: name.to_string(),
236            available: self.maps_iter().map(|m| m.name.clone()).collect(),
237        })
238    }
239
240    /// Walk the BTF-rendered fields of every `*.bss` / `*.data` /
241    /// `*.rodata` global-section map for a top-level variable
242    /// named `name`. Convenience for `.var("nr_cpus_onln")` style
243    /// scalar reads without naming the section explicitly.
244    ///
245    /// Returns [`SnapshotField::Value`] on a unique match;
246    /// [`SnapshotField::Missing`] with
247    /// [`SnapshotError::VarNotFound`] (and the union of every
248    /// global-section map's top-level member names in `available`)
249    /// when no map exposes the name; OR — when more than one
250    /// global-section map exposes the name — auto-falls-back to
251    /// [`Self::live_var`] semantics (delegates to
252    /// [`Self::active`] and re-projects) before yielding
253    /// [`SnapshotError::AmbiguousVar`].
254    ///
255    /// # Auto-fallback contract
256    ///
257    /// When the raw scan finds 2+ hits AND the snapshot is not
258    /// already narrowed by [`Self::active`] (i.e.
259    /// `self.active_obj` is `None`), `var()` calls
260    /// [`Self::active`]: on `Ok` it returns `active.var(name)`
261    /// directly — whether [`SnapshotField::Value`],
262    /// [`SnapshotError::VarNotFound`], or
263    /// [`SnapshotError::AmbiguousVar`] persisting after the
264    /// live filter narrowed; on `Err` it falls through to the
265    /// pre-filter [`SnapshotError::AmbiguousVar`] (see next
266    /// section). The fallback exists so post-
267    /// [`crate::scenario::ops::Op::ReplaceScheduler`] callers
268    /// who name a global by string don't have to know about
269    /// [`Self::live_var`] explicitly — the principled
270    /// active-scheduler walker is consulted automatically when
271    /// the raw lookup is ambiguous. [`Self::live_var`] remains
272    /// the explicit-opt-in form for callers who want the live
273    /// filter unconditionally (skip the raw-scan path).
274    ///
275    /// # When `AmbiguousVar` STILL fires
276    ///
277    /// After the auto-fallback. The raw scan found 2+ hits AND
278    /// `active()` failed (no scheduler attached, multi-obj
279    /// without principled walker resolution, etc.). The
280    /// `found_in` list names every map the raw scan saw — the
281    /// operator needs all of them to reason about which obj
282    /// they want to address via [`Self::map`].
283    pub fn var(&self, name: &str) -> SnapshotField<'a> {
284        if self.report.is_placeholder {
285            return SnapshotField::Missing(SnapshotError::PlaceholderSnapshot { tag: None });
286        }
287        let mut hits: Vec<(&'a str, &'a RenderedValue)> = Vec::new();
288        for m in self.maps_iter() {
289            if !is_global_section_map(&m.name) {
290                continue;
291            }
292            if let Some(v) = m.value.as_ref()
293                && let Some(found) = lookup_member(v, name)
294            {
295                hits.push((m.name.as_str(), found));
296            }
297        }
298        match hits.len() {
299            1 => SnapshotField::Value(hits[0].1),
300            n if n > 1 => {
301                // Ambiguous at the raw-`var` layer — try the
302                // principled active-scheduler resolution before
303                // giving up. When `Snapshot::active()` succeeds it
304                // restricts the projection to the live scheduler's
305                // maps (and, when the walker populated the KVA
306                // whitelist, the live scheduler's specific map
307                // instances even in the same-binary case). If
308                // active() resolves to a Snapshot whose filtered
309                // maps_iter yields exactly one hit, return that.
310                // When the live filter ALSO can't narrow (e.g.,
311                // KVA whitelist excluded every match → narrows to
312                // zero, or live obj has 2+ copies of the same
313                // global — unusual but possible), surface THE
314                // LIVE-FILTERED diagnostic rather than the
315                // pre-filter AmbiguousVar list. The operator who
316                // hits ambiguity post-disambiguation needs to know the
317                // filter ran and what it admitted, not see the
318                // raw all-maps "ambiguous between OLD + NEW bss"
319                // list that misleads them into reaching for a
320                // picker the framework already obviated.
321                if self.active_obj.is_none()
322                    && let Ok(active) = self.active()
323                {
324                    return active.var(name);
325                }
326                SnapshotField::Missing(SnapshotError::AmbiguousVar {
327                    requested: name.to_string(),
328                    found_in: hits.iter().map(|(name, _)| (*name).to_string()).collect(),
329                })
330            }
331            _ => {
332                if let Some(err) = self.excluded_filter_err(name.to_string()) {
333                    return SnapshotField::Missing(err);
334                }
335                // No global-section map yielded the var. If a
336                // global-section map's contents failed to render
337                // (value absent, `error` set), the search could not
338                // confirm the var's absence — that map might hold it.
339                // Surface the render failure rather than a false
340                // VarNotFound that reads as "the symbol doesn't exist".
341                if let Some(m) = self.maps_iter().find(|m| {
342                    is_global_section_map(&m.name) && m.value.is_none() && m.error.is_some()
343                }) {
344                    return SnapshotField::Missing(SnapshotError::MapRenderIncomplete {
345                        map: m.name.clone(),
346                        error: m.error.clone().unwrap_or_default(),
347                    });
348                }
349                let mut available: Vec<String> = Vec::new();
350                for m in self.maps_iter() {
351                    if !is_global_section_map(&m.name) {
352                        continue;
353                    }
354                    if let Some(RenderedValue::Struct { members, .. }) = m.value.as_ref() {
355                        for member in members {
356                            available.push(member.name.clone());
357                        }
358                    }
359                }
360                available.sort();
361                available.dedup();
362                SnapshotField::Missing(SnapshotError::VarNotFound {
363                    requested: name.to_string(),
364                    available,
365                })
366            }
367        }
368    }
369
370    /// Iterate every global-section copy that carries a top-level
371    /// member named `name`. Yields `(owning_map_name, field)` pairs
372    /// in capture order. Use when [`Self::var`] errors
373    /// [`SnapshotError::AmbiguousVar`] and the caller needs to
374    /// reason across every observed copy explicitly (e.g. summing
375    /// counter deltas across two scheduler instances loaded
376    /// back-to-back in the same scenario).
377    ///
378    /// Respects the [`Self::active`] filter when set, so chained
379    /// `snapshot.active()?.vars(name)` is well-defined — it iterates
380    /// only the active scheduler's copies (typically exactly one,
381    /// since active() filters to one obj_name).
382    ///
383    /// Yields nothing on placeholder snapshots (the underlying
384    /// `report.maps` is empty by construction so nothing matches
385    /// anyway — callers needing "is this a placeholder?" use the
386    /// `Snapshot::is_placeholder` accessor explicitly).
387    pub fn vars(&self, name: &str) -> impl Iterator<Item = (&'a str, SnapshotField<'a>)> + '_ {
388        let needle = name.to_string();
389        self.maps_iter().filter_map(move |m| {
390            if !is_global_section_map(&m.name) {
391                return None;
392            }
393            let v = m.value.as_ref()?;
394            let found = lookup_member(v, &needle)?;
395            Some((m.name.as_str(), SnapshotField::Value(found)))
396        })
397    }
398
399    /// Project the snapshot to the currently-active scheduler's
400    /// maps. Returns a filtered [`Snapshot`] whose [`Self::map`] /
401    /// [`Self::var`] / [`Self::vars`] see only the maps whose name
402    /// shares the `<obj>.` prefix of the active scheduler's BPF
403    /// object. Composable: `snapshot.active()?.var(name)`.
404    ///
405    /// # When to use
406    ///
407    /// Tests that swap schedulers mid-scenario (via
408    /// [`crate::scenario::ops::Op::ReplaceScheduler`]) reach for
409    /// `.active()` after the swap so the per-phase post-swap
410    /// snapshots resolve the live scheduler's bss without hitting
411    /// [`SnapshotError::AmbiguousVar`] across both schedulers'
412    /// captured copies. Single-scheduler tests never need
413    /// `.active()` — there is no ambiguity to resolve.
414    ///
415    /// # Signal source
416    ///
417    /// "Active" comes from two fields the freeze coordinator
418    /// populates at capture time:
419    /// - [`crate::monitor::dump::FailureDumpReport::active_obj_name`]
420    ///   -- set by the target-free `prog_idr` walker (no `scx_root`
421    ///   dependency; works pre-6.16) that finds the live
422    ///   struct_ops prog's obj prefix (see `monitor/dump/mod.rs`
423    ///   `identify_active_obj_from_struct_ops`).
424    /// - [`crate::monitor::dump::FailureDumpReport::active_map_kvas`]
425    ///   -- the live scheduler's `prog.aux->used_maps` KVA set that
426    ///   the same walker publishes. Non-empty iff the walker resolved
427    ///   a global-section-bearing prog (the same-binary
428    ///   disambiguation case).
429    ///
430    /// When the walker resolved both fields, `active()` uses them
431    /// directly and the obj-prefix scan below is a sanity cross-
432    /// check against the captured map set. When the walker was
433    /// unavailable (placeholder dump, transient swap window before
434    /// the accessor-init worker republished, or kernel built
435    /// without struct_ops support), the obj-prefix scan with
436    /// per-section count fallback decides.
437    ///
438    /// # Failure cases
439    ///
440    /// - [`SnapshotError::PlaceholderSnapshot`]: the snapshot is a
441    ///   freeze-rendezvous-failure placeholder.
442    /// - [`SnapshotError::NoActiveScheduler`] (no global-section
443    ///   maps): the snapshot has no `<obj>.bss/.data/.rodata` —
444    ///   either no scheduler is attached, or the capture missed
445    ///   the global sections entirely.
446    /// - [`SnapshotError::NoActiveScheduler`] (multiple distinct
447    ///   obj prefixes, walker unavailable): two scheduler instances
448    ///   with DIFFERENT obj names coexist (back-to-back load of
449    ///   distinct binaries, or one scheduler composed of multiple
450    ///   BPF objects) AND the walker did not publish
451    ///   `active_obj_name`. Use [`Self::vars`] to enumerate every
452    ///   copy or [`Self::map`] to address a specific scheduler's
453    ///   bss directly.
454    /// - [`SnapshotError::NoActiveScheduler`] (multi-copy
455    ///   same-prefix, walker unavailable): an
456    ///   [`crate::scenario::ops::Op::ReplaceScheduler`] swap
457    ///   between two builds of the SAME binary left two
458    ///   `<obj>.bss` (or `.data` / `.rodata`) copies with
459    ///   identical names AND the walker did not publish
460    ///   `active_map_kvas` to disambiguate. The obj-prefix filter
461    ///   alone cannot pick the live copy without admitting both.
462    ///   Use [`Self::live_var_via`] / [`Self::live_vars_via`] with
463    ///   `crate::scenario::snapshot::pickers::max_by_sum_u64` to
464    ///   pick by counter activity.
465    ///
466    /// # Lifetime
467    ///
468    /// Pure projection over the frozen `FailureDumpReport`;
469    /// multiple calls return equivalent views. Caching the result
470    /// in a `let active = snapshot.active()?;` binding is fine but
471    /// not required.
472    pub fn active(&self) -> SnapshotResult<Snapshot<'a>> {
473        if self.report.is_placeholder {
474            return Err(SnapshotError::PlaceholderSnapshot { tag: None });
475        }
476        // Scan global-section maps to collect:
477        //   1. The distinct set of obj_name prefixes (used by the
478        //      multi-obj failure diagnostic).
479        //   2. Per-(prefix, section) counts (used to detect the
480        //      same-binary multi-copy case: two `<prefix>.bss` maps
481        //      coexist with identical names but distinct map KVAs).
482        // The producer-side helper in
483        // `monitor/dump/mod.rs` `count_global_sections_for_prefix`
484        // performs the same count; both sites use strict full-name
485        // equality to stay in lockstep.
486        let mut obj_names: Vec<&'a str> = Vec::new();
487        let mut counts: Vec<(&'a str, usize, usize, usize)> = Vec::new();
488        for m in &self.report.maps {
489            if !is_global_section_map(&m.name) {
490                continue;
491            }
492            let Some(obj) = m.name.split('.').next() else {
493                continue;
494            };
495            if obj.is_empty() {
496                continue;
497            }
498            if !obj_names.contains(&obj) {
499                obj_names.push(obj);
500                counts.push((obj, 0, 0, 0));
501            }
502            let entry = counts
503                .iter_mut()
504                .find(|(o, _, _, _)| *o == obj)
505                .expect("obj just pushed");
506            // Strict section suffix match — `<obj>.bss` exactly,
507            // not `<obj>.bss.shared` or other multi-segment names.
508            let section = m.name.split('.').nth(1).unwrap_or("");
509            match section {
510                "bss" if m.name == format!("{obj}.bss") => entry.1 += 1,
511                "data" if m.name == format!("{obj}.data") => entry.2 += 1,
512                "rodata" if m.name == format!("{obj}.rodata") => entry.3 += 1,
513                _ => {}
514            }
515        }
516        // Principled fast path: when the freeze-coord captured a
517        // non-None `active_obj_name` via the target-free prog_idr
518        // walker (`prog_idr → prog aux->used_maps → global-section
519        // sibling map`; no scx_root), prefer that even if multiple obj
520        // prefixes show up in `obj_names`. The KVA whitelist
521        // (`active_map_kvas`) pairs with the obj-name filter in
522        // `maps_iter` — when populated, same-binary multi-copy
523        // resolves to the live copy. When empty AND the matched
524        // prefix has any multi-copy section, the obj-prefix filter
525        // alone would admit both copies → fail loudly with a
526        // multi-copy diagnostic instead of silently surfacing
527        // AmbiguousVar at the var lookup.
528        if let Some(active_name) = self.report.active_obj_name.as_deref()
529            && let Some(matched) = obj_names.iter().find(|obj| **obj == active_name).copied()
530        {
531            if !self.report.active_map_kvas.is_empty() {
532                return Ok(Snapshot {
533                    report: self.report,
534                    active_obj: Some(matched),
535                    active_map_kvas: &self.report.active_map_kvas,
536                });
537            }
538            // Walker did not publish a whitelist. Check the matched
539            // prefix's section counts; if any multi-copy, bail.
540            if let Some(&(_, b, d, r)) = counts.iter().find(|(o, _, _, _)| *o == matched)
541                && (b > 1 || d > 1 || r > 1)
542            {
543                return Err(SnapshotError::NoActiveScheduler {
544                    reason: format_multi_copy_reason(matched, b, d, r),
545                });
546            }
547            return Ok(Snapshot {
548                report: self.report,
549                active_obj: Some(matched),
550                active_map_kvas: &[],
551            });
552        }
553        match (obj_names.as_slice(), counts.as_slice()) {
554            ([], _) => Err(SnapshotError::NoActiveScheduler {
555                reason: "snapshot has no global-section BPF maps (no scheduler \
556                         attached, or capture did not include bss/data/rodata)"
557                    .to_string(),
558            }),
559            ([only], [(_, b, d, r)]) if *b <= 1 && *d <= 1 && *r <= 1 => Ok(Snapshot {
560                report: self.report,
561                active_obj: Some(*only),
562                // Only one obj prefix in the snapshot AND no
563                // section has more than one copy — obj-prefix
564                // matching uniquely picks the scheduler's maps.
565                active_map_kvas: &[],
566            }),
567            ([only], [(_, b, d, r)]) => Err(SnapshotError::NoActiveScheduler {
568                reason: format_multi_copy_reason(only, *b, *d, *r),
569            }),
570            (multiple, _) => Err(SnapshotError::NoActiveScheduler {
571                reason: format!(
572                    "snapshot has {} BPF objects with global-section maps \
573                     ({:?}) and the principled target-free prog_idr walker \
574                     could not identify the active obj at capture time (no \
575                     alive struct_ops prog with a `<obj>.bss/.data/.rodata` \
576                     sibling in used_maps, or an empty used_maps whitelist) — \
577                     use \
578                     Snapshot::vars(name) to enumerate every copy or \
579                     Snapshot::map(\"<obj>.<section>\") to address a specific \
580                     scheduler's bss directly",
581                    multiple.len(),
582                    multiple
583                ),
584            }),
585        }
586    }
587
588    /// Read a single live counter from the active scheduler — the
589    /// **default** for single-variable reads. Convenience for
590    /// `self.active()?.var(name)`.
591    ///
592    /// **For multi-variable arithmetic on multiple counters** —
593    /// fractions, ratios, deltas computed across more than one
594    /// named field — use [`Self::live_vars_via`] instead.
595    /// `live_vars_via` resolves the picker ONCE across a name set
596    /// so independent per-name picks cannot corrupt the
597    /// cross-variable computation by selecting different bss
598    /// copies for different names. Repeatedly calling `live_var`
599    /// for two counters from the same scheduler is correct in the
600    /// walker-resolved case (both reads land in the same scheduler's
601    /// bss) but loses that guarantee on the picker-fallback path
602    /// — silent corruption of ratios.
603    ///
604    /// Returns a [`SnapshotField`] carrying either
605    /// [`SnapshotError::NoActiveScheduler`] (no scheduler
606    /// identifiable) or the standard [`Self::var`] error variants
607    /// ([`SnapshotError::VarNotFound`] / [`SnapshotError::TypeMismatch`]
608    /// from the inner var lookup).
609    pub fn live_var(&self, name: &str) -> SnapshotField<'a> {
610        match self.active() {
611            Ok(snap) => snap.var(name),
612            Err(err) => SnapshotField::Missing(err),
613        }
614    }
615
616    /// Caller-supplied disambiguator for the multi-bss case where
617    /// [`Self::live_var`] cannot resolve a single live copy by itself.
618    ///
619    /// [`Self::live_var`] delegates to [`Self::active`] to filter the
620    /// snapshot to one scheduler's maps. When [`Self::active`] cannot
621    /// pick a single scheduler — multiple BPF objects with
622    /// global-section maps are present AND the principled
623    /// `prog_idr → prog aux->used_maps → global-section map → obj prefix`
624    /// walker did not
625    /// identify the live one — it errors with
626    /// [`SnapshotError::NoActiveScheduler`] (the exact `reason` field
627    /// is the long-form message constructed at the bail site listing
628    /// the observed obj_names + the walker's failure cause), and
629    /// [`Self::live_var`] propagates that as [`SnapshotField::Missing`].
630    ///
631    /// `live_var_via` is the escape hatch: it skips the [`Self::active`]
632    /// filter entirely, enumerates every observed copy of `name` via
633    /// [`Self::vars`], and hands the slice to the caller-supplied
634    /// `picker` to pick one by index. Common case: an
635    /// `Op::ReplaceScheduler` swap between two builds of the same
636    /// scheduler that leaves two `<obj>.bss` maps in the snapshot
637    /// sharing one obj_name prefix.
638    ///
639    /// **For multi-variable arithmetic** (ratios, fractions, deltas
640    /// computed across more than one named field), use
641    /// [`Self::live_vars_via`] instead — it resolves the picker once
642    /// across a name set so independent per-name picks cannot
643    /// corrupt the cross-variable computation by selecting different
644    /// bss copies for different names.
645    ///
646    /// `picker` receives every observed copy of the named variable
647    /// (one entry per `<obj>.bss/.data/.rodata` map carrying it,
648    /// per [`Self::vars`]) and returns the index the caller wants
649    /// (typically chosen by inspecting each candidate's value via
650    /// `SnapshotField::as_u64` / `as_str` and applying a liveness
651    /// or activity fingerprint — see
652    /// [`crate::scenario::snapshot::pickers`] for predefined
653    /// pickers such as `max_by_counter_value`).
654    ///
655    /// Returns [`SnapshotField::Missing`] when:
656    /// - the snapshot's underlying `FailureDumpReport` is a
657    ///   placeholder (carrying
658    ///   [`SnapshotError::PlaceholderSnapshot`] — matches the
659    ///   sibling [`Self::var`] / [`Self::map`] placeholder-first
660    ///   contract so callers pattern-matching on the error variant
661    ///   distinguish "freeze rendezvous failed" from "name absent
662    ///   from a real capture"),
663    /// - the snapshot has no copies of `name` (carrying
664    ///   [`SnapshotError::VarNotFound`] with the list of available
665    ///   global-section maps),
666    /// - `picker` returns `None` (carrying
667    ///   [`SnapshotError::ProjectionFailed`] naming the picker as
668    ///   the source), OR
669    /// - `picker` returns `Some(idx)` outside the candidate range
670    ///   (carrying [`SnapshotError::ProjectionFailed`] with the bad
671    ///   index and the candidate count).
672    pub fn live_var_via(
673        &self,
674        name: &str,
675        picker: impl FnOnce(&[(&'a str, SnapshotField<'a>)]) -> Option<usize>,
676    ) -> SnapshotField<'a> {
677        if self.report.is_placeholder {
678            return SnapshotField::Missing(
679                crate::scenario::snapshot::SnapshotError::PlaceholderSnapshot { tag: None },
680            );
681        }
682        let candidates: Vec<(&'a str, SnapshotField<'a>)> = self.vars(name).collect();
683        if candidates.is_empty() {
684            if let Some(err) = self.excluded_filter_err(name.to_string()) {
685                return SnapshotField::Missing(err);
686            }
687            let available: Vec<String> = self
688                .report
689                .maps
690                .iter()
691                .filter(|m| is_global_section_map(&m.name))
692                .map(|m| m.name.clone())
693                .collect();
694            return SnapshotField::Missing(crate::scenario::snapshot::SnapshotError::VarNotFound {
695                requested: name.to_string(),
696                available,
697            });
698        }
699        match picker(&candidates) {
700            Some(idx) if idx < candidates.len() => {
701                let (_obj, field) = candidates.into_iter().nth(idx).unwrap();
702                field
703            }
704            Some(idx) => {
705                SnapshotField::Missing(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
706                    reason: format!(
707                        "live_var_via picker returned index {idx} out of range \
708                         (candidate count = {})",
709                        candidates.len()
710                    ),
711                })
712            }
713            None => {
714                SnapshotField::Missing(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
715                    reason: format!(
716                        "live_var_via picker for '{name}' returned None (no candidate \
717                         matched the supplied disambiguator)"
718                    ),
719                })
720            }
721        }
722    }
723
724    /// Caller-supplied disambiguator for the multi-bss case where
725    /// **multiple variables from the same scheduler instance** must
726    /// be read consistently — e.g. computing
727    /// `nr_mig_cross_dispatch / (nr_mig_same_dispatch + nr_mig_cross_dispatch)`
728    /// as a cross-LLC dispatch fraction from one scheduler's BPF
729    /// counters.
730    ///
731    /// # Why a separate primitive
732    ///
733    /// Calling [`Self::live_var_via`] N times independently risks
734    /// picking a DIFFERENT bss copy per call: the picker resolves
735    /// each name's candidate set independently, so two consecutive
736    /// `live_var_via("a", picker)` + `live_var_via("b", picker)`
737    /// calls can land on bss copy A for `a` and bss copy B for `b`,
738    /// corrupting any cross-variable arithmetic (ratio, fraction,
739    /// delta). `live_vars_via` resolves the picker ONCE across the
740    /// candidate set for all N names jointly so every returned
741    /// [`SnapshotField`] reads from the same source map.
742    ///
743    /// # Mechanism
744    ///
745    /// Per global-section map, look up each name in input order;
746    /// keep the map as a candidate row iff it has ALL the names
747    /// (intersection semantics — partial-coverage maps are absent
748    /// from the picker's input). The picker receives
749    /// `&[(map_name, fields_in_input_order)]` and returns the
750    /// chosen row's index. The returned `Vec<SnapshotField>` is
751    /// positional, keyed by the input `names` order — `result[0]`
752    /// is `names[0]`'s field from the picked map, `result[1]` is
753    /// `names[1]`'s field, etc.
754    ///
755    /// **Single-section constraint.** All `names` must reside in
756    /// the SAME global-section map — typically the scheduler's
757    /// `<obj>.bss`. A `bss` counter co-picked with a `data`
758    /// constant from the same scheduler obj lands in DIFFERENT
759    /// candidate rows (the obj's `.bss` map carries the first
760    /// name, its `.data` map carries the second, neither row has
761    /// both), the intersection collapses to empty, and the helper
762    /// returns [`SnapshotError::VarNotFound`]. If the test reads
763    /// from multiple sections, issue separate `live_vars_via`
764    /// calls (one per section's name group) and compose the
765    /// per-call results caller-side.
766    ///
767    /// # See also
768    ///
769    /// - [`Self::live_var_via`] for single-variable disambiguation.
770    /// - [`crate::scenario::snapshot::pickers::max_by_sum_u64`] for
771    ///   the "max-activity bss" heuristic over co-picked u64
772    ///   counters.
773    ///
774    /// # Errors
775    ///
776    /// - [`SnapshotError::PlaceholderSnapshot`] — the underlying
777    ///   `FailureDumpReport` is a placeholder; matches the sibling
778    ///   [`Self::live_var_via`] / [`Self::var`] / [`Self::map`]
779    ///   placeholder-first contract.
780    /// - [`SnapshotError::ProjectionFailed`] — `names` is empty
781    ///   (caller bug: nothing to co-pick), `picker` returns `None`
782    ///   (no candidate matched), or `picker` returns an
783    ///   out-of-range index.
784    /// - [`SnapshotError::VarNotFound`] — no global-section map
785    ///   has ALL the requested names. `requested` carries the
786    ///   joined name list, `available` carries the global-section
787    ///   map names that were scanned.
788    pub fn live_vars_via<P>(
789        &self,
790        names: &[&str],
791        picker: P,
792    ) -> crate::scenario::snapshot::SnapshotResult<Vec<SnapshotField<'a>>>
793    where
794        P: FnOnce(&[(&'a str, Vec<SnapshotField<'a>>)]) -> Option<usize>,
795    {
796        if self.report.is_placeholder {
797            return Err(
798                crate::scenario::snapshot::SnapshotError::PlaceholderSnapshot { tag: None },
799            );
800        }
801        if names.is_empty() {
802            return Err(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
803                reason: "live_vars_via called with an empty names slice — \
804                         co-pick requires at least one name"
805                    .to_string(),
806            });
807        }
808        // Group by MAP: each global-section map becomes a candidate
809        // row IFF it has ALL the requested names. Partial-coverage
810        // maps are dropped from the picker's input — they cannot
811        // answer the co-pick.
812        let mut candidates: Vec<(&'a str, Vec<SnapshotField<'a>>)> = Vec::new();
813        for m in self.maps_iter() {
814            if !is_global_section_map(&m.name) {
815                continue;
816            }
817            let Some(value) = m.value.as_ref() else {
818                continue;
819            };
820            let mut row: Vec<SnapshotField<'a>> = Vec::with_capacity(names.len());
821            let mut all_present = true;
822            for name in names {
823                if let Some(found) = lookup_member(value, name) {
824                    row.push(SnapshotField::Value(found));
825                } else {
826                    all_present = false;
827                    break;
828                }
829            }
830            if all_present {
831                candidates.push((m.name.as_str(), row));
832            }
833        }
834        if candidates.is_empty() {
835            let requested = format!("[{}]", names.join(", "));
836            if let Some(err) = self.excluded_filter_err(requested.clone()) {
837                return Err(err);
838            }
839            let available: Vec<String> = self
840                .report
841                .maps
842                .iter()
843                .filter(|m| is_global_section_map(&m.name))
844                .map(|m| m.name.clone())
845                .collect();
846            return Err(crate::scenario::snapshot::SnapshotError::VarNotFound {
847                requested,
848                available,
849            });
850        }
851        match picker(&candidates) {
852            Some(idx) if idx < candidates.len() => {
853                let (_obj, fields) = candidates.into_iter().nth(idx).unwrap();
854                Ok(fields)
855            }
856            Some(idx) => Err(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
857                reason: format!(
858                    "live_vars_via picker returned index {idx} out of range \
859                     (candidate count = {})",
860                    candidates.len()
861                ),
862            }),
863            None => Err(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
864                reason: format!(
865                    "live_vars_via picker for [{}] returned None (no candidate \
866                     matched the supplied disambiguator)",
867                    names.join(", ")
868                ),
869            }),
870        }
871    }
872
873    /// Number of maps the current view exposes — every captured
874    /// map when unfiltered; only maps the [`Self::active`] filter
875    /// admits when set.
876    pub fn map_count(&self) -> usize {
877        self.maps_iter().count()
878    }
879
880    /// True when the underlying [`FailureDumpReport`] is a
881    /// placeholder produced by [`FailureDumpReport::placeholder`]
882    /// — i.e. the freeze-rendezvous capture pipeline could not
883    /// produce real data. Periodic-sample temporal patterns use
884    /// this to skip the BPF axis on a placeholder sample (the
885    /// stats axis, when present, may still be valid). Bypassing
886    /// the projection-error path keeps the sample's diagnostic
887    /// distinct from "field missing on a real capture".
888    pub fn is_placeholder(&self) -> bool {
889        self.report.is_placeholder
890    }
891
892    // -----------------------------------------------------------------
893    // First-class accessors for fields the freeze-coordinator pipeline
894    // populates on `FailureDumpReport` outside the BPF-map axis. Each
895    // accessor returns either a borrowed slice (whole-vec views) or an
896    // `Option<&T>` keyed by the natural identifier. Empty vec is the
897    // normal state when the corresponding walker did not run — callers
898    // check the companion `*_unavailable` field on the raw report for
899    // the diagnostic reason. None on a keyed lookup means "the dump
900    // did not capture an entry for that key"; it is not an error.
901    //
902    // **Keyed-lookup naming convention.** `<base>_at(<key>)` is used
903    // when the key is a topology position (CPU index, NUMA node id)
904    // that the kernel allocates densely from 0; the `_at` mirrors
905    // `Vec::get(idx)` and reads naturally as "the row at this
906    // position". `<base>_by_<field>(<value>)` is used when the key is
907    // a sparse identifier (pid, program name) — the `_by_<field>`
908    // names which field the lookup compares against and reads
909    // naturally as "the entry whose <field> matches". The `<base>` is
910    // normally the singular form of the plural-vec accessor (e.g.
911    // `task_enrichments` → `task_enrichment_by_pid`), but stays
912    // plural when the singular reads unnaturally (e.g.
913    // `prog_runtime_stats` → `prog_runtime_stats_by_name` — the
914    // singular `prog_runtime_stat` would be awkward English; the
915    // `Stats` suffix is part of the canonical noun). Each keyed
916    // accessor returns the first match in walker enumeration order;
917    // production captures do not duplicate keys (kernel walker
918    // invariants), but the contract is left first-match-wins so a
919    // future duplicate-key scenario surfaces only one row without
920    // panicking.
921    // -----------------------------------------------------------------
922
923    /// Per-monitor-tick SCX_EV_* event counter samples. Each entry is
924    /// the cross-CPU sum of the 13 SCX event counters at one monitor
925    /// tick. Empty when no `EventCounterCapture` ran, or every sample
926    /// was suppressed (event-stat offsets unresolved, scx_root unset).
927    ///
928    /// Unlike the walker-backed accessors below, this field carries
929    /// no `*_unavailable` companion: an empty timeline is the only
930    /// signal for "no capture / no events".
931    pub fn event_counter_timeline(&self) -> &'a [EventCounterSample] {
932        &self.report.event_counter_timeline
933    }
934
935    /// Per-CPU `rq->scx` snapshots — one per CPU walked by
936    /// `crate::monitor::scx_walker`. Empty when the
937    /// `ScxWalkerCapture` was absent or every CPU's translate
938    /// failed (see `FailureDumpReport::scx_walker_unavailable`).
939    pub fn rq_scx_states(&self) -> &'a [RqScxState] {
940        &self.report.rq_scx_states
941    }
942
943    /// Per-DSQ snapshots — local, bypass, global, and user DSQs
944    /// reachable from `*scx_root`. Each entry carries `nr` (depth),
945    /// `seq` (BPF-iter counter), and the queued task KVAs. Empty
946    /// when the `ScxWalkerCapture` was absent (see
947    /// `FailureDumpReport::scx_walker_unavailable`).
948    pub fn dsq_states(&self) -> &'a [DsqState] {
949        &self.report.dsq_states
950    }
951
952    /// Top-level `scx_sched` state captured from `*scx_root`:
953    /// aborting flag, bypass_depth, exit_kind. `None` when no
954    /// scheduler is attached or `*scx_root` was unreadable (see
955    /// `FailureDumpReport::scx_walker_unavailable`).
956    pub fn scx_sched_state(&self) -> Option<&'a ScxSchedState> {
957        self.report.scx_sched_state.as_ref()
958    }
959
960    /// Per-CPU CPU-time / softirq / IRQ counter rows. One row per
961    /// CPU enumerated by `crate::monitor::dump::CpuTimeCapture`.
962    /// Empty when the capture was not wired or symbol/BTF
963    /// resolution failed.
964    pub fn per_cpu_time(&self) -> &'a [PerCpuTimeStats] {
965        &self.report.per_cpu_time
966    }
967
968    /// Per-CPU CPU-time row for CPU `cpu`, looked up by the `cpu`
969    /// field on each [`PerCpuTimeStats`] (not by vec position).
970    /// Returns `None` when no row matches — typical when the
971    /// walker skipped that CPU, the capture didn't run, or `cpu`
972    /// exceeded the topology. Returns the first match in walker
973    /// enumeration order if `cpu` appears more than once.
974    pub fn per_cpu_time_at(&self, cpu: u32) -> Option<&'a PerCpuTimeStats> {
975        self.report.per_cpu_time.iter().find(|c| c.cpu == cpu)
976    }
977
978    /// Per-cgroup PSI-irq rows for the test's workload cgroups, host-walked
979    /// from the cgroup hierarchy at this freeze (Phase A). One row per
980    /// workload-root leaf cgroup with per-cgroup PSI accounting enabled. Empty
981    /// when the capture was not wired, the workload root isn't present yet, or
982    /// `psi_cgroups_enabled` is off — loud-absent. RAW values; decoded + folded
983    /// at the metric layer (see
984    /// `crate::monitor::cgroup_walk::CgroupPsiStat`).
985    pub fn cgroup_psi(&self) -> &'a [crate::monitor::cgroup_walk::CgroupPsiStat] {
986        &self.report.cgroup_psi
987    }
988
989    /// Per-NUMA-node event counter rows captured from
990    /// `pglist_data->node_zones[]->vm_numa_event[]`. Empty until
991    /// the host-side NUMA walker lands (see
992    /// `FailureDumpReport::per_node_numa_unavailable`).
993    pub fn per_node_numa(&self) -> &'a [PerNodeNumaStats] {
994        &self.report.per_node_numa
995    }
996
997    /// Per-NUMA-node event-counter row for `node`, looked up by
998    /// the `node` field on each [`PerNodeNumaStats`]. Returns
999    /// `None` when no row matches. Returns the first match in
1000    /// walker enumeration order if `node` appears more than once.
1001    pub fn per_node_numa_at(&self, node: u32) -> Option<&'a PerNodeNumaStats> {
1002        self.report.per_node_numa.iter().find(|n| n.node == node)
1003    }
1004
1005    /// Per-task failure-dump enrichments — identity (pid, tgid,
1006    /// comm), process tree, scheduling priority, sched_class name,
1007    /// context-switch counters, watchdog disambiguation, lock
1008    /// slowpath stack matches. Empty when no task walker ran (see
1009    /// `FailureDumpReport::task_enrichments_unavailable`).
1010    pub fn task_enrichments(&self) -> &'a [TaskEnrichment] {
1011        &self.report.task_enrichments
1012    }
1013
1014    /// Look up the enrichment for `pid`. The returned reference
1015    /// matches the first task whose `task_struct.pid` equals `pid`
1016    /// in walker enumeration order. Returns `None` when no task with
1017    /// that pid was captured. Production captures dedupe by task_kva
1018    /// before push, so duplicate-pid rows do not occur in real
1019    /// dumps.
1020    pub fn task_enrichment_by_pid(&self, pid: i32) -> Option<&'a TaskEnrichment> {
1021        self.report.task_enrichments.iter().find(|t| t.pid == pid)
1022    }
1023
1024    /// Per-program BPF runtime stats — invocation count, total ns,
1025    /// recursion misses. One entry per struct_ops program reached
1026    /// by the prog walker. Empty when no struct_ops programs are
1027    /// loaded or the prog accessor was unavailable (see
1028    /// `FailureDumpReport::prog_runtime_stats_unavailable`).
1029    pub fn prog_runtime_stats(&self) -> &'a [ProgRuntimeStats] {
1030        &self.report.prog_runtime_stats
1031    }
1032
1033    /// Look up the runtime stats for the program registered with
1034    /// `name` (kernel-side `bpf_prog->aux->name`). Returns `None`
1035    /// when no program with that name was captured. Returns the
1036    /// first match in walker enumeration order if `name` appears
1037    /// more than once — struct_ops programs in real captures use
1038    /// distinct callback names (`select_cpu`, `enqueue`, etc.) so
1039    /// duplicates do not occur in production.
1040    pub fn prog_runtime_stats_by_name(&self, name: &str) -> Option<&'a ProgRuntimeStats> {
1041        self.report
1042            .prog_runtime_stats
1043            .iter()
1044            .find(|p| p.name == name)
1045    }
1046
1047    /// Probe BPF program's per-CPU diagnostic counter snapshot.
1048    /// `None` when the probe's `.bss` map isn't enumerated (probe
1049    /// not loaded), the program BTF can't be parsed, or the
1050    /// array's offset doesn't resolve. A populated
1051    /// `trigger_count > 0` is the structural signal that the
1052    /// `tp_btf/sched_ext_exit` handler fired during the run.
1053    pub fn probe_counters(&self) -> Option<&'a ProbeBssCounters> {
1054        self.report.probe_counters.as_ref()
1055    }
1056
1057    // -----------------------------------------------------------------
1058    // Companion `*_unavailable` diagnostic accessors. Each accessor
1059    // pairs with the walker-backed slice/option accessor above:
1060    // when the slice is empty (or the option is None), the matching
1061    // `*_unavailable()` returns `Some(reason)` if the walker
1062    // recorded one. `None` from the unavailable accessor means
1063    // either the walker ran normally (slice populated) or the field
1064    // is simply absent from the wire format (no reason recorded).
1065    // -----------------------------------------------------------------
1066
1067    /// Diagnostic reason recorded when [`Self::rq_scx_states`] /
1068    /// [`Self::dsq_states`] / [`Self::scx_sched_state`] could not
1069    /// be populated. `None` when the walker fully succeeded;
1070    /// otherwise `Some(reason)` (e.g. `"scx_root null"`,
1071    /// `"no scx walker"`, or a partial-degradation string from the
1072    /// dump pipeline).
1073    pub fn scx_walker_unavailable(&self) -> Option<&'a str> {
1074        self.report.scx_walker_unavailable.as_deref()
1075    }
1076
1077    /// Diagnostic reason recorded when [`Self::task_enrichments`]
1078    /// could not be populated. `None` when the walker yielded at
1079    /// least one enrichment; otherwise `Some(reason)`
1080    /// (e.g. `"no task walker available"`,
1081    /// `"task walker yielded zero tasks"`).
1082    pub fn task_enrichments_unavailable(&self) -> Option<&'a str> {
1083        self.report.task_enrichments_unavailable.as_deref()
1084    }
1085
1086    /// Diagnostic reason recorded when [`Self::prog_runtime_stats`]
1087    /// could not be populated. `None` when the walker yielded at
1088    /// least one program; otherwise `Some(reason)`
1089    /// (e.g. `"prog accessor unavailable"`,
1090    /// `"no struct_ops programs loaded"`).
1091    pub fn prog_runtime_stats_unavailable(&self) -> Option<&'a str> {
1092        self.report.prog_runtime_stats_unavailable.as_deref()
1093    }
1094
1095    /// Diagnostic reason recorded when [`Self::per_node_numa`]
1096    /// could not be populated — typically `"no NUMA walker"` until
1097    /// the host-side walker lands.
1098    pub fn per_node_numa_unavailable(&self) -> Option<&'a str> {
1099        self.report.per_node_numa_unavailable.as_deref()
1100    }
1101
1102    /// Diagnostic reason recorded when the SDT allocator snapshot
1103    /// (still escape-only via [`Self::report`]) could not be
1104    /// populated.
1105    pub fn sdt_alloc_unavailable(&self) -> Option<&'a str> {
1106        self.report.sdt_alloc_unavailable.as_deref()
1107    }
1108}
1109
1110/// True when a map name matches the libbpf-composed
1111/// `<obj>.<section>` naming for a global-section map.
1112fn is_global_section_map(name: &str) -> bool {
1113    name.ends_with(".bss") || name.ends_with(".data") || name.ends_with(".rodata")
1114}
1115
1116/// True when a map name's obj prefix (everything before the first
1117/// `.`) matches `obj`. Used by [`Snapshot::maps_iter`] when an
1118/// active-scheduler filter is set.
1119fn map_belongs_to_obj(map_name: &str, obj: &str) -> bool {
1120    map_name
1121        .split_once('.')
1122        .map(|(prefix, _)| prefix == obj)
1123        .unwrap_or(false)
1124}
1125
1126/// Render the multi-copy-same-prefix diagnostic for
1127/// [`Snapshot::active`]. `(bss, data, rodata)` are full-name
1128/// equality counts; any value > 1 means the prefix has multiple
1129/// copies of that section type in the captured `maps[]` (typical
1130/// cause: `Op::ReplaceScheduler` swap between two builds of the
1131/// same binary leaves the dying instance's globals adjacent to
1132/// the new instance's). The message names which section(s) are
1133/// multi-copy and steers the operator at the picker-based
1134/// disambiguators.
1135fn format_multi_copy_reason(prefix: &str, bss: usize, data: usize, rodata: usize) -> String {
1136    let mut parts: Vec<String> = Vec::new();
1137    if bss > 1 {
1138        parts.push(format!("{prefix}.bss × {bss}"));
1139    }
1140    if data > 1 {
1141        parts.push(format!("{prefix}.data × {data}"));
1142    }
1143    if rodata > 1 {
1144        parts.push(format!("{prefix}.rodata × {rodata}"));
1145    }
1146    let detail = parts.join(", ");
1147    format!(
1148        "snapshot has multiple same-name copies of {prefix}'s global-section maps \
1149         ({detail}) and the principled target-free prog_idr walker did not \
1150         publish an active_map_kvas whitelist to disambiguate (transient swap \
1151         window where \
1152         the accessor-init worker has not yet republished, or the walker is \
1153         unavailable on this kernel build) — use \
1154         `series.live_bpf_vars_via([\"name\"], pickers::max_by_sum_u64)` for \
1155         multi-variable counter co-pick, or \
1156         `Snapshot::live_var_via(name, pickers::max_by_counter_value)` for a \
1157         single-counter pick, to pick by counter activity"
1158    )
1159}
1160
1161// ---------------------------------------------------------------------------
1162// SnapshotMap
1163// ---------------------------------------------------------------------------
1164
1165/// One map's view, possibly narrowed to a specific per-CPU slot via
1166/// [`Self::cpu`]. Returned by [`Snapshot::map`].
1167#[derive(Debug)]
1168#[must_use = "SnapshotMap is a borrowed view; chain accessors"]
1169#[non_exhaustive]
1170pub struct SnapshotMap<'a> {
1171    map: &'a FailureDumpMap,
1172    /// When `Some(cpu)`, subsequent [`Self::at`] /
1173    /// [`Self::find`] calls walk only the per-CPU slot for that
1174    /// CPU; `None` walks the natural (non-per-CPU) entry list.
1175    cpu: Option<usize>,
1176}
1177
1178impl<'a> SnapshotMap<'a> {
1179    /// Map name as captured.
1180    pub fn name(&self) -> &'a str {
1181        &self.map.name
1182    }
1183
1184    /// When this map's contents failed to render at capture time
1185    /// (`FailureDumpMap::error` is set), produce the
1186    /// [`SnapshotError::MapRenderIncomplete`] that callers should
1187    /// surface in place of an "empty map" verdict. `None` when the
1188    /// map rendered successfully — the absence is then genuine and
1189    /// the caller's own "not found" / "out of range" error applies.
1190    fn render_incomplete_err(&self) -> Option<SnapshotError> {
1191        self.map
1192            .error
1193            .as_ref()
1194            .map(|error| SnapshotError::MapRenderIncomplete {
1195                map: self.map.name.clone(),
1196                error: error.clone(),
1197            })
1198    }
1199
1200    /// Underlying [`FailureDumpMap`].
1201    pub fn raw(&self) -> &'a FailureDumpMap {
1202        self.map
1203    }
1204
1205    /// Ringbuf occupancy snapshot for `BPF_MAP_TYPE_RINGBUF` /
1206    /// `BPF_MAP_TYPE_USER_RINGBUF` maps — capacity, consumer /
1207    /// producer / pending positions, and the cumulative
1208    /// `pending_bytes` gap. `None` for non-ringbuf maps or when
1209    /// the BTF offsets for `bpf_ringbuf_map` / `bpf_ringbuf`
1210    /// weren't resolvable at capture time.
1211    pub fn ringbuf(&self) -> Option<&'a FailureDumpRingbuf> {
1212        self.map.ringbuf.as_ref()
1213    }
1214
1215    /// Mapped-page snapshot for `BPF_MAP_TYPE_ARENA` maps. Borrows
1216    /// the per-page `(user_addr, bytes)` records plus the declared
1217    /// span / truncation flags. `None` for non-arena maps or when
1218    /// the arena walker failed to translate the user_vm window.
1219    pub fn arena(&self) -> Option<&'a ArenaSnapshot> {
1220        self.map.arena.as_ref()
1221    }
1222
1223    /// Populated-slot summary for FD-array families (`PROG_ARRAY`,
1224    /// `PERF_EVENT_ARRAY`, `ARRAY_OF_MAPS`, `SOCKMAP*`, etc.).
1225    /// `None` for non-FD-array maps. Surfaces the populated count,
1226    /// scanned slot count, populated-index list, and the two
1227    /// truncation flags ([`FailureDumpFdArray::truncated`] for the
1228    /// scan limit, [`FailureDumpFdArray::indices_truncated`] for the
1229    /// index list limit).
1230    pub fn fd_array(&self) -> Option<&'a FailureDumpFdArray> {
1231        self.map.fd_array.as_ref()
1232    }
1233
1234    /// Per-bucket summary for `BPF_MAP_TYPE_STACK_TRACE` maps.
1235    /// `None` for non-STACK_TRACE maps or when the BTF offsets for
1236    /// `bpf_stack_map` / `stack_map_bucket` weren't resolvable.
1237    pub fn stack_trace(&self) -> Option<&'a FailureDumpStackTrace> {
1238        self.map.stack_trace.as_ref()
1239    }
1240
1241    /// Per-map decode-error string set by the freeze coordinator
1242    /// when this map's contents are missing or partial. `None` on a
1243    /// successful render. Distinct from [`SnapshotError`] (which
1244    /// flows through the accessor API) — `map_error` surfaces the
1245    /// capture-side diagnostic the kernel-walker recorded before
1246    /// the snapshot was handed to test code.
1247    pub fn map_error(&self) -> Option<&'a str> {
1248        self.map.error.as_deref()
1249    }
1250
1251    /// Narrow this map view to a specific per-CPU slot. On a
1252    /// non-per-CPU map this is recorded but ignored when the
1253    /// underlying entries are not per-CPU. Use on
1254    /// `BPF_MAP_TYPE_PERCPU_ARRAY` / `BPF_MAP_TYPE_PERCPU_HASH` /
1255    /// `BPF_MAP_TYPE_LRU_PERCPU_HASH`.
1256    pub fn cpu(self, n: usize) -> SnapshotMap<'a> {
1257        SnapshotMap {
1258            map: self.map,
1259            cpu: Some(n),
1260        }
1261    }
1262
1263    /// Get an entry by ordinal index.
1264    ///
1265    /// For HASH-style entry lists, returns the `n`-th
1266    /// `crate::monitor::dump::FailureDumpEntry` in the captured order. For per-CPU
1267    /// array maps narrowed via [`Self::cpu`], returns the entry
1268    /// at key `n` with its per-CPU slot pre-resolved. For ARRAY
1269    /// maps with a single value, `n == 0` returns the value.
1270    pub fn at(&self, n: usize) -> SnapshotEntry<'a> {
1271        let resolved = self.entry_at(n);
1272        match resolved {
1273            Ok(e) => e,
1274            Err(err) => SnapshotEntry::Missing(err),
1275        }
1276    }
1277
1278    /// Find the first entry matching `predicate`. Returns
1279    /// [`SnapshotEntry::Missing`] with [`SnapshotError::NoMatch`]
1280    /// when no entry matches. The NoMatch payload carries the
1281    /// total entry count traversed and a small sample of rendered
1282    /// keys so the failure message can tell `empty map` apart from
1283    /// `populated map, predicate never matched`.
1284    pub fn find(&self, predicate: impl Fn(&SnapshotEntry<'a>) -> bool) -> SnapshotEntry<'a> {
1285        let mut len = 0usize;
1286        let mut available_keys: Vec<String> = Vec::with_capacity(NO_MATCH_KEY_SAMPLE);
1287        for entry in self.iter_entries() {
1288            if predicate(&entry) {
1289                return entry;
1290            }
1291            if available_keys.len() < NO_MATCH_KEY_SAMPLE
1292                && let Some(k) = render_entry_key(&entry)
1293            {
1294                available_keys.push(k);
1295            }
1296            len += 1;
1297        }
1298        // An empty traversal over a map whose contents failed to
1299        // render is a capture gap, not "predicate never matched".
1300        if len == 0
1301            && let Some(err) = self.render_incomplete_err()
1302        {
1303            return SnapshotEntry::Missing(err);
1304        }
1305        SnapshotEntry::Missing(SnapshotError::NoMatch {
1306            map: self.map.name.clone(),
1307            op: "find".to_string(),
1308            len,
1309            available_keys,
1310        })
1311    }
1312
1313    /// Collect every entry matching `predicate` into a Vec.
1314    pub fn filter(&self, predicate: impl Fn(&SnapshotEntry<'a>) -> bool) -> Vec<SnapshotEntry<'a>> {
1315        self.iter_entries().filter(|e| predicate(e)).collect()
1316    }
1317
1318    /// Find the entry whose `key_fn` produces the maximum u64.
1319    /// Returns [`SnapshotEntry::Missing`] when the map has no
1320    /// entries. The NoMatch payload's `len` is 0 in that case;
1321    /// `available_keys` is empty (the map has no keys to sample).
1322    pub fn max_by(&self, key_fn: impl Fn(&SnapshotEntry<'a>) -> u64) -> SnapshotEntry<'a> {
1323        let mut best: Option<(u64, SnapshotEntry<'a>)> = None;
1324        for entry in self.iter_entries() {
1325            let k = key_fn(&entry);
1326            let beats = best.as_ref().is_none_or(|(prev, _)| k > *prev);
1327            if beats {
1328                best = Some((k, entry));
1329            }
1330        }
1331        match best {
1332            Some((_, e)) => e,
1333            None => {
1334                // No entries to compare. A render failure (contents
1335                // unreadable at capture) is distinct from an
1336                // empty map; surface it so the gap is visible.
1337                if let Some(err) = self.render_incomplete_err() {
1338                    return SnapshotEntry::Missing(err);
1339                }
1340                SnapshotEntry::Missing(SnapshotError::NoMatch {
1341                    map: self.map.name.clone(),
1342                    op: "max_by".to_string(),
1343                    len: 0,
1344                    available_keys: Vec::new(),
1345                })
1346            }
1347        }
1348    }
1349
1350    /// Iterator over every entry under this view. Used by
1351    /// [`Self::find`] / [`Self::filter`] / [`Self::max_by`].
1352    fn iter_entries(&self) -> Box<dyn Iterator<Item = SnapshotEntry<'a>> + 'a> {
1353        if !self.map.percpu_entries.is_empty() {
1354            let cpu = self.cpu;
1355            let map = self.map;
1356            return Box::new(
1357                map.percpu_entries
1358                    .iter()
1359                    .map(move |e| resolve_percpu_entry(map, e, cpu)),
1360            );
1361        }
1362        if !self.map.percpu_hash_entries.is_empty() {
1363            let cpu = self.cpu;
1364            let map = self.map;
1365            return Box::new(
1366                map.percpu_hash_entries
1367                    .iter()
1368                    .map(move |e| resolve_percpu_hash_entry(map, e, cpu)),
1369            );
1370        }
1371        if !self.map.entries.is_empty() {
1372            return Box::new(self.map.entries.iter().map(SnapshotEntry::Hash));
1373        }
1374        if let Some(v) = self.map.value.as_ref() {
1375            return Box::new(std::iter::once(SnapshotEntry::Value(v)));
1376        }
1377        Box::new(std::iter::empty())
1378    }
1379
1380    /// Internal entry-by-index resolver returning a structured
1381    /// error for the surrounding [`Self::at`] arm.
1382    fn entry_at(&self, n: usize) -> SnapshotResult<SnapshotEntry<'a>> {
1383        if !self.map.percpu_entries.is_empty() {
1384            return resolve_percpu_entry_at(self.map, n, self.cpu);
1385        }
1386        if !self.map.percpu_hash_entries.is_empty() {
1387            return resolve_percpu_hash_entry_at(self.map, n, self.cpu);
1388        }
1389        if !self.map.entries.is_empty() {
1390            if n < self.map.entries.len() {
1391                return Ok(SnapshotEntry::Hash(&self.map.entries[n]));
1392            }
1393            return Err(SnapshotError::IndexOutOfRange {
1394                map: self.map.name.clone(),
1395                index: n,
1396                len: self.map.entries.len(),
1397            });
1398        }
1399        if let Some(v) = self.map.value.as_ref() {
1400            if n == 0 {
1401                return Ok(SnapshotEntry::Value(v));
1402            }
1403            return Err(SnapshotError::IndexOutOfRange {
1404                map: self.map.name.clone(),
1405                index: n,
1406                len: 1,
1407            });
1408        }
1409        // Nothing to walk. Distinguish a render failure (contents
1410        // could not be read at capture, `error` set) from a
1411        // genuinely-empty map so the operator sees the capture gap
1412        // rather than a misleading "index out of range, len 0".
1413        if let Some(err) = self.render_incomplete_err() {
1414            return Err(err);
1415        }
1416        Err(SnapshotError::IndexOutOfRange {
1417            map: self.map.name.clone(),
1418            index: n,
1419            len: 0,
1420        })
1421    }
1422}
1423
1424fn resolve_percpu_entry_at<'a>(
1425    map: &'a FailureDumpMap,
1426    n: usize,
1427    cpu: Option<usize>,
1428) -> SnapshotResult<SnapshotEntry<'a>> {
1429    if n >= map.percpu_entries.len() {
1430        return Err(SnapshotError::IndexOutOfRange {
1431            map: map.name.clone(),
1432            index: n,
1433            len: map.percpu_entries.len(),
1434        });
1435    }
1436    Ok(resolve_percpu_entry(map, &map.percpu_entries[n], cpu))
1437}
1438
1439fn resolve_percpu_entry<'a>(
1440    map: &'a FailureDumpMap,
1441    entry: &'a FailureDumpPercpuEntry,
1442    cpu: Option<usize>,
1443) -> SnapshotEntry<'a> {
1444    let Some(c) = cpu else {
1445        return SnapshotEntry::Percpu(entry);
1446    };
1447    if c >= entry.per_cpu.len() {
1448        return SnapshotEntry::Missing(SnapshotError::PerCpuSlot {
1449            map: map.name.clone(),
1450            cpu: u32::try_from(c).unwrap_or(u32::MAX),
1451            len: entry.per_cpu.len(),
1452            unmapped: false,
1453        });
1454    }
1455    match entry.per_cpu[c].as_ref() {
1456        Some(v) => SnapshotEntry::Value(v),
1457        None => SnapshotEntry::Missing(SnapshotError::PerCpuSlot {
1458            map: map.name.clone(),
1459            cpu: u32::try_from(c).unwrap_or(u32::MAX),
1460            len: entry.per_cpu.len(),
1461            unmapped: true,
1462        }),
1463    }
1464}
1465
1466fn resolve_percpu_hash_entry_at<'a>(
1467    map: &'a FailureDumpMap,
1468    n: usize,
1469    cpu: Option<usize>,
1470) -> SnapshotResult<SnapshotEntry<'a>> {
1471    if n >= map.percpu_hash_entries.len() {
1472        return Err(SnapshotError::IndexOutOfRange {
1473            map: map.name.clone(),
1474            index: n,
1475            len: map.percpu_hash_entries.len(),
1476        });
1477    }
1478    Ok(resolve_percpu_hash_entry(
1479        map,
1480        &map.percpu_hash_entries[n],
1481        cpu,
1482    ))
1483}
1484
1485fn resolve_percpu_hash_entry<'a>(
1486    map: &'a FailureDumpMap,
1487    entry: &'a FailureDumpPercpuHashEntry,
1488    cpu: Option<usize>,
1489) -> SnapshotEntry<'a> {
1490    let Some(c) = cpu else {
1491        return SnapshotEntry::PercpuHash(entry);
1492    };
1493    if c >= entry.per_cpu.len() {
1494        return SnapshotEntry::Missing(SnapshotError::PerCpuSlot {
1495            map: map.name.clone(),
1496            cpu: u32::try_from(c).unwrap_or(u32::MAX),
1497            len: entry.per_cpu.len(),
1498            unmapped: false,
1499        });
1500    }
1501    match entry.per_cpu[c].as_ref() {
1502        Some(v) => SnapshotEntry::Value(v),
1503        None => SnapshotEntry::Missing(SnapshotError::PerCpuSlot {
1504            map: map.name.clone(),
1505            cpu: u32::try_from(c).unwrap_or(u32::MAX),
1506            len: entry.per_cpu.len(),
1507            unmapped: true,
1508        }),
1509    }
1510}
1511
1512/// Render a [`SnapshotEntry`]'s key into a bounded `String` suitable
1513/// for the [`SnapshotError::NoMatch::available_keys`] sample.
1514///
1515/// Returns `None` for [`SnapshotEntry::Value`] (single-value ARRAY
1516/// maps have no key surface) and [`SnapshotEntry::Missing`] (no
1517/// entry was produced). Hash / per-CPU-hash entries fall back to
1518/// the hex-encoded raw key bytes via the `hex:` prefix when BTF
1519/// rendering was absent at capture time. The result is truncated
1520/// to [`NO_MATCH_KEY_CHAR_CAP`] chars with a trailing `…` to keep
1521/// wide struct keys from overrunning failure-message lines.
1522pub(super) fn render_entry_key(entry: &SnapshotEntry<'_>) -> Option<String> {
1523    let key = match entry {
1524        SnapshotEntry::Hash(e) => match e.key.as_ref() {
1525            Some(rv) => rv.to_string(),
1526            None => format!("{HEX_KEY_PREFIX}{}", e.key_hex),
1527        },
1528        SnapshotEntry::PercpuHash(e) => match e.key.as_ref() {
1529            Some(rv) => rv.to_string(),
1530            None => format!("{HEX_KEY_PREFIX}{}", e.key_hex),
1531        },
1532        SnapshotEntry::Percpu(e) => e.key.to_string(),
1533        SnapshotEntry::Value(_) | SnapshotEntry::Missing(_) => return None,
1534    };
1535    // Bytes-per-char is >= 1 in UTF-8, so byte-length <= char-cap implies
1536    // char-length <= char-cap — short-circuit the O(n) chars().count()
1537    // walk on the common ASCII case.
1538    if key.len() <= NO_MATCH_KEY_CHAR_CAP {
1539        return Some(key);
1540    }
1541    if key.chars().count() > NO_MATCH_KEY_CHAR_CAP {
1542        let mut truncated: String = key
1543            .chars()
1544            .take(NO_MATCH_KEY_CHAR_CAP.saturating_sub(1))
1545            .collect();
1546        truncated.push('…');
1547        Some(truncated)
1548    } else {
1549        Some(key)
1550    }
1551}