ktstr/scenario/snapshot/view.rs
1//! [`Snapshot`] is the entry point for a captured
2//! [`FailureDumpReport`], plus [`SnapshotMap`] for typed traversal of
3//! one map and the per-CPU resolver helpers it uses to project
4//! per-CPU array / hash entries down to a single slot.
5//!
6//! [`render_entry_key`] formats a [`SnapshotEntry`] key for the
7//! `NoMatch` diagnostic; lives here because it walks the same
8//! `SnapshotMap` entry shapes the type uses internally.
9
10use crate::monitor::arena::ArenaSnapshot;
11use crate::monitor::bpf_prog::ProgRuntimeStats;
12use crate::monitor::btf_render::RenderedValue;
13use crate::monitor::dump::{
14 EventCounterSample, FailureDumpFdArray, FailureDumpMap, FailureDumpPercpuEntry,
15 FailureDumpPercpuHashEntry, FailureDumpReport, FailureDumpRingbuf, FailureDumpStackTrace,
16 PerCpuTimeStats, PerNodeNumaStats, ProbeBssCounters,
17};
18use crate::monitor::scx_walker::{DsqState, RqScxState, ScxSchedState};
19use crate::monitor::task_enrichment::TaskEnrichment;
20
21use super::field::lookup_member;
22use super::{
23 ExcludedMap, HEX_KEY_PREFIX, NO_MATCH_KEY_CHAR_CAP, NO_MATCH_KEY_SAMPLE, SnapshotEntry,
24 SnapshotError, SnapshotField, SnapshotResult,
25};
26
27/// Borrowed view over a captured [`FailureDumpReport`] for typed
28/// traversal of BTF-rendered map values, per-CPU entries, and
29/// scalar variables.
30///
31/// Constructed from a [`FailureDumpReport`] reference (typically
32/// obtained via [`super::SnapshotBridge::drain`]); the view is cheap to
33/// build — it does not copy the underlying report. Accessor
34/// methods all return further borrowed views that walk the report
35/// in place.
36#[derive(Debug, Clone)]
37#[must_use = "Snapshot is a borrowed view; bind or chain accessors"]
38#[non_exhaustive]
39pub struct Snapshot<'a> {
40 report: &'a FailureDumpReport,
41 /// When `Some`, every map-walking accessor filters
42 /// [`FailureDumpReport::maps`] to maps whose `name` begins with
43 /// `<obj>.`. Populated by [`Self::active`] from the snapshot's
44 /// own `scx_sched_state` + `prog_runtime_stats`; `None` when the
45 /// snapshot was constructed via [`Self::new`] (unfiltered).
46 active_obj: Option<&'a str>,
47 /// Optional kernel-map-KVA whitelist used alongside
48 /// [`Self::active_obj`] to defend against the same-binary case
49 /// (two scheduler instances loaded from the same binary, e.g.
50 /// MITOSIS_FIXED + MITOSIS_ADAPTIVE both loading `scx_mitosis`,
51 /// where the obj prefix matches both copies' bss/data/rodata
52 /// maps). When set + non-empty, a map is "active" only if BOTH
53 /// `active_obj` matches its prefix AND its
54 /// [`FailureDumpMap::map_kva`] appears in the whitelist.
55 ///
56 /// `&[]` (empty) when [`Self::active`] resolved a prefix via the
57 /// Phase-1 name path (no walker run → no KVA set captured) OR
58 /// when the snapshot pre-dates the walker plumbing. In that
59 /// case `Snapshot::active`'s filter degrades to obj-prefix
60 /// matching only — still correct for the different-binary case;
61 /// loses the same-binary disambiguation guarantee.
62 active_map_kvas: &'a [u64],
63}
64
65impl<'a> Snapshot<'a> {
66 /// Build a borrowed view over `report` with no active-scheduler
67 /// filter. Every map-walking accessor sees every captured map.
68 pub fn new(report: &'a FailureDumpReport) -> Self {
69 Self {
70 report,
71 active_obj: None,
72 active_map_kvas: &[],
73 }
74 }
75
76 /// Iterate maps the current view exposes — every captured map
77 /// when `active_obj` is None; only maps whose name shares the
78 /// `<obj>.` prefix when [`Self::active`] populated the filter.
79 /// When [`Self::active_map_kvas`] is also populated, additionally
80 /// require the map's [`FailureDumpMap::map_kva`] to be in the
81 /// whitelist — this catches the same-binary case where two
82 /// scheduler instances' bss maps share an obj prefix but live at
83 /// distinct kernel addresses.
84 fn maps_iter(&self) -> impl Iterator<Item = &'a FailureDumpMap> + '_ {
85 let active = self.active_obj;
86 let kva_filter = self.active_map_kvas;
87 self.report.maps.iter().filter(move |m| match active {
88 None => true,
89 Some(obj) => {
90 if !map_belongs_to_obj(&m.name, obj) {
91 return false;
92 }
93 // Empty whitelist = no KVA filter (phase-1 name path
94 // OR pre-walker snapshot). Non-empty = require the
95 // map's KVA to appear; defends against KVA aliasing
96 // and same-binary post-swap ambiguity per the
97 // FailureDumpReport::active_map_kvas doc.
98 if kva_filter.is_empty() {
99 return true;
100 }
101 m.map_kva != 0 && kva_filter.contains(&m.map_kva)
102 }
103 })
104 }
105
106 /// Construct [`SnapshotError::ActiveFilterExcludedMaps`] for the
107 /// caller IFF the active KVA filter rejected EVERY captured
108 /// `<active_obj>.*` map. Returns `None` in every other case:
109 ///
110 /// - the view is not active-filtered (`active_obj` is `None`),
111 /// - the KVA whitelist is empty (no filter active),
112 /// - no map shares the active obj prefix at all (the standard
113 /// `MapNotFound` / `VarNotFound` diagnostic carries it),
114 /// - at least one captured `<active_obj>.*` map passed the KVA
115 /// whitelist (the admitted set is non-empty, so a lookup miss
116 /// is a real typo / absent symbol — fall through to the
117 /// standard diagnostic, do not falsely steer the operator at
118 /// the filter).
119 ///
120 /// Only the "admitted set genuinely empty" case fires the rich
121 /// diagnostic. Caller is responsible for the `requested` field;
122 /// every other field is populated from the snapshot.
123 fn excluded_filter_err(&self, requested: String) -> Option<SnapshotError> {
124 let obj = self.active_obj?;
125 if self.active_map_kvas.is_empty() {
126 return None;
127 }
128 let mut excluded: Vec<ExcludedMap> = Vec::new();
129 let mut any_admitted = false;
130 for m in &self.report.maps {
131 if !map_belongs_to_obj(&m.name, obj) {
132 continue;
133 }
134 if m.map_kva != 0 && self.active_map_kvas.contains(&m.map_kva) {
135 any_admitted = true;
136 continue;
137 }
138 excluded.push(ExcludedMap {
139 name: m.name.clone(),
140 map_kva: m.map_kva,
141 });
142 }
143 if excluded.is_empty() || any_admitted {
144 return None;
145 }
146 Some(SnapshotError::ActiveFilterExcludedMaps {
147 requested,
148 active_obj: obj.to_string(),
149 excluded_maps: excluded,
150 whitelist_kvas: self.active_map_kvas.to_vec(),
151 })
152 }
153
154 /// Underlying [`FailureDumpReport`] borrowed back to the caller.
155 ///
156 /// **Escape hatch.** Most consumers should reach for the typed
157 /// accessors on [`Snapshot`] / [`SnapshotMap`] / [`SnapshotEntry`]
158 /// / [`SnapshotField`], which route through [`SnapshotError`] and
159 /// compose with the [`crate::assert::temporal`] patterns via
160 /// [`SeriesField`](crate::assert::temporal::SeriesField). Use
161 /// `report()` only when a [`FailureDumpReport`] field has no
162 /// typed accessor yet:
163 ///
164 /// - `vcpu_regs` — per-vCPU register snapshot captured at the
165 /// freeze instant.
166 /// - `vcpu_perf_at_freeze` — per-vCPU hardware perf counter
167 /// snapshot captured at the freeze instant.
168 /// - `dump_truncated_at_us` — microseconds-into-the-dump at
169 /// which the soft deadline tripped.
170 /// - `sdt_allocations`, `scx_static_ranges` — SDT allocator and
171 /// scx static memory layout snapshots used by the arena /
172 /// pointer-renderer pipelines.
173 /// - `schema` — wire-format metadata
174 /// ([`Self::is_placeholder`] already wraps the boolean form).
175 ///
176 /// All other fields documented as escape-only on
177 /// [`FailureDumpReport`] above now have first-class accessors on
178 /// [`Snapshot`] (`event_counter_timeline`, `rq_scx_states`,
179 /// `dsq_states`, `scx_sched_state`, `per_cpu_time`,
180 /// `per_node_numa`, `task_enrichments`, `prog_runtime_stats`,
181 /// `probe_counters`) and on [`SnapshotMap`] (`ringbuf`,
182 /// `arena`, `fd_array`, `stack_trace`, `map_error`).
183 ///
184 /// Five `*_unavailable` diagnostic accessors cover the subset of
185 /// walker-backed fields the dump pipeline writes a reason string
186 /// for: [`Self::scx_walker_unavailable`] (shared by
187 /// rq_scx_states / dsq_states / scx_sched_state — the scx
188 /// walker writes one reason for the whole group),
189 /// [`Self::task_enrichments_unavailable`],
190 /// [`Self::prog_runtime_stats_unavailable`],
191 /// [`Self::per_node_numa_unavailable`], and
192 /// [`Self::sdt_alloc_unavailable`] (for the still-escape-only
193 /// `sdt_allocations` field above). The remaining accessors
194 /// (`event_counter_timeline`, `per_cpu_time`, `probe_counters`)
195 /// have no companion diagnostic — empty / None is their only
196 /// "no capture" signal.
197 ///
198 /// **Caveats of the bypass:**
199 /// - No [`SnapshotError`] routing — call-site is on its own to
200 /// handle missing fields / type mismatches / per-CPU
201 /// narrowing.
202 /// - No [`SeriesField`](crate::assert::temporal::SeriesField)
203 /// integration — temporal patterns
204 /// ([`nondecreasing`](crate::assert::temporal::SeriesField::nondecreasing),
205 /// [`rate_within`](crate::assert::temporal::SeriesField::rate_within),
206 /// etc.) cannot consume raw `FailureDumpReport` field values.
207 /// - No placeholder-sample short-circuit
208 /// ([`Self::is_placeholder`] check is the caller's
209 /// responsibility).
210 pub fn report(&self) -> &'a FailureDumpReport {
211 self.report
212 }
213
214 /// Look up a BPF map by exact name. Respects the
215 /// [`Self::active`] filter when set — only maps the filter
216 /// admits are considered. Returns [`SnapshotError::MapNotFound`]
217 /// (with the captured map names in `available`) when no match
218 /// is found among the admitted maps, or
219 /// [`SnapshotError::PlaceholderSnapshot`] when the snapshot's
220 /// underlying `FailureDumpReport` is a placeholder (freeze
221 /// rendezvous failed; no maps to walk).
222 pub fn map(&self, name: &str) -> SnapshotResult<SnapshotMap<'a>> {
223 if self.report.is_placeholder {
224 return Err(SnapshotError::PlaceholderSnapshot { tag: None });
225 }
226 for m in self.maps_iter() {
227 if m.name == name {
228 return Ok(SnapshotMap { map: m, cpu: None });
229 }
230 }
231 if let Some(err) = self.excluded_filter_err(name.to_string()) {
232 return Err(err);
233 }
234 Err(SnapshotError::MapNotFound {
235 requested: name.to_string(),
236 available: self.maps_iter().map(|m| m.name.clone()).collect(),
237 })
238 }
239
240 /// Walk the BTF-rendered fields of every `*.bss` / `*.data` /
241 /// `*.rodata` global-section map for a top-level variable
242 /// named `name`. Convenience for `.var("nr_cpus_onln")` style
243 /// scalar reads without naming the section explicitly.
244 ///
245 /// Returns [`SnapshotField::Value`] on a unique match;
246 /// [`SnapshotField::Missing`] with
247 /// [`SnapshotError::VarNotFound`] (and the union of every
248 /// global-section map's top-level member names in `available`)
249 /// when no map exposes the name; OR — when more than one
250 /// global-section map exposes the name — auto-falls-back to
251 /// [`Self::live_var`] semantics (delegates to
252 /// [`Self::active`] and re-projects) before yielding
253 /// [`SnapshotError::AmbiguousVar`].
254 ///
255 /// # Auto-fallback contract
256 ///
257 /// When the raw scan finds 2+ hits AND the snapshot is not
258 /// already narrowed by [`Self::active`] (i.e.
259 /// `self.active_obj` is `None`), `var()` calls
260 /// [`Self::active`]: on `Ok` it returns `active.var(name)`
261 /// directly — whether [`SnapshotField::Value`],
262 /// [`SnapshotError::VarNotFound`], or
263 /// [`SnapshotError::AmbiguousVar`] persisting after the
264 /// live filter narrowed; on `Err` it falls through to the
265 /// pre-filter [`SnapshotError::AmbiguousVar`] (see next
266 /// section). The fallback exists so post-
267 /// [`crate::scenario::ops::Op::ReplaceScheduler`] callers
268 /// who name a global by string don't have to know about
269 /// [`Self::live_var`] explicitly — the principled
270 /// active-scheduler walker is consulted automatically when
271 /// the raw lookup is ambiguous. [`Self::live_var`] remains
272 /// the explicit-opt-in form for callers who want the live
273 /// filter unconditionally (skip the raw-scan path).
274 ///
275 /// # When `AmbiguousVar` STILL fires
276 ///
277 /// After the auto-fallback. The raw scan found 2+ hits AND
278 /// `active()` failed (no scheduler attached, multi-obj
279 /// without principled walker resolution, etc.). The
280 /// `found_in` list names every map the raw scan saw — the
281 /// operator needs all of them to reason about which obj
282 /// they want to address via [`Self::map`].
283 pub fn var(&self, name: &str) -> SnapshotField<'a> {
284 if self.report.is_placeholder {
285 return SnapshotField::Missing(SnapshotError::PlaceholderSnapshot { tag: None });
286 }
287 let mut hits: Vec<(&'a str, &'a RenderedValue)> = Vec::new();
288 for m in self.maps_iter() {
289 if !is_global_section_map(&m.name) {
290 continue;
291 }
292 if let Some(v) = m.value.as_ref()
293 && let Some(found) = lookup_member(v, name)
294 {
295 hits.push((m.name.as_str(), found));
296 }
297 }
298 match hits.len() {
299 1 => SnapshotField::Value(hits[0].1),
300 n if n > 1 => {
301 // Ambiguous at the raw-`var` layer — try the
302 // principled active-scheduler resolution before
303 // giving up. When `Snapshot::active()` succeeds it
304 // restricts the projection to the live scheduler's
305 // maps (and, when the walker populated the KVA
306 // whitelist, the live scheduler's specific map
307 // instances even in the same-binary case). If
308 // active() resolves to a Snapshot whose filtered
309 // maps_iter yields exactly one hit, return that.
310 // When the live filter ALSO can't narrow (e.g.,
311 // KVA whitelist excluded every match → narrows to
312 // zero, or live obj has 2+ copies of the same
313 // global — unusual but possible), surface THE
314 // LIVE-FILTERED diagnostic rather than the
315 // pre-filter AmbiguousVar list. The operator who
316 // hits ambiguity post-disambiguation needs to know the
317 // filter ran and what it admitted, not see the
318 // raw all-maps "ambiguous between OLD + NEW bss"
319 // list that misleads them into reaching for a
320 // picker the framework already obviated.
321 if self.active_obj.is_none()
322 && let Ok(active) = self.active()
323 {
324 return active.var(name);
325 }
326 SnapshotField::Missing(SnapshotError::AmbiguousVar {
327 requested: name.to_string(),
328 found_in: hits.iter().map(|(name, _)| (*name).to_string()).collect(),
329 })
330 }
331 _ => {
332 if let Some(err) = self.excluded_filter_err(name.to_string()) {
333 return SnapshotField::Missing(err);
334 }
335 // No global-section map yielded the var. If a
336 // global-section map's contents failed to render
337 // (value absent, `error` set), the search could not
338 // confirm the var's absence — that map might hold it.
339 // Surface the render failure rather than a false
340 // VarNotFound that reads as "the symbol doesn't exist".
341 if let Some(m) = self.maps_iter().find(|m| {
342 is_global_section_map(&m.name) && m.value.is_none() && m.error.is_some()
343 }) {
344 return SnapshotField::Missing(SnapshotError::MapRenderIncomplete {
345 map: m.name.clone(),
346 error: m.error.clone().unwrap_or_default(),
347 });
348 }
349 let mut available: Vec<String> = Vec::new();
350 for m in self.maps_iter() {
351 if !is_global_section_map(&m.name) {
352 continue;
353 }
354 if let Some(RenderedValue::Struct { members, .. }) = m.value.as_ref() {
355 for member in members {
356 available.push(member.name.clone());
357 }
358 }
359 }
360 available.sort();
361 available.dedup();
362 SnapshotField::Missing(SnapshotError::VarNotFound {
363 requested: name.to_string(),
364 available,
365 })
366 }
367 }
368 }
369
370 /// Iterate every global-section copy that carries a top-level
371 /// member named `name`. Yields `(owning_map_name, field)` pairs
372 /// in capture order. Use when [`Self::var`] errors
373 /// [`SnapshotError::AmbiguousVar`] and the caller needs to
374 /// reason across every observed copy explicitly (e.g. summing
375 /// counter deltas across two scheduler instances loaded
376 /// back-to-back in the same scenario).
377 ///
378 /// Respects the [`Self::active`] filter when set, so chained
379 /// `snapshot.active()?.vars(name)` is well-defined — it iterates
380 /// only the active scheduler's copies (typically exactly one,
381 /// since active() filters to one obj_name).
382 ///
383 /// Yields nothing on placeholder snapshots (the underlying
384 /// `report.maps` is empty by construction so nothing matches
385 /// anyway — callers needing "is this a placeholder?" use the
386 /// `Snapshot::is_placeholder` accessor explicitly).
387 pub fn vars(&self, name: &str) -> impl Iterator<Item = (&'a str, SnapshotField<'a>)> + '_ {
388 let needle = name.to_string();
389 self.maps_iter().filter_map(move |m| {
390 if !is_global_section_map(&m.name) {
391 return None;
392 }
393 let v = m.value.as_ref()?;
394 let found = lookup_member(v, &needle)?;
395 Some((m.name.as_str(), SnapshotField::Value(found)))
396 })
397 }
398
399 /// Project the snapshot to the currently-active scheduler's
400 /// maps. Returns a filtered [`Snapshot`] whose [`Self::map`] /
401 /// [`Self::var`] / [`Self::vars`] see only the maps whose name
402 /// shares the `<obj>.` prefix of the active scheduler's BPF
403 /// object. Composable: `snapshot.active()?.var(name)`.
404 ///
405 /// # When to use
406 ///
407 /// Tests that swap schedulers mid-scenario (via
408 /// [`crate::scenario::ops::Op::ReplaceScheduler`]) reach for
409 /// `.active()` after the swap so the per-phase post-swap
410 /// snapshots resolve the live scheduler's bss without hitting
411 /// [`SnapshotError::AmbiguousVar`] across both schedulers'
412 /// captured copies. Single-scheduler tests never need
413 /// `.active()` — there is no ambiguity to resolve.
414 ///
415 /// # Signal source
416 ///
417 /// "Active" comes from two fields the freeze coordinator
418 /// populates at capture time:
419 /// - [`crate::monitor::dump::FailureDumpReport::active_obj_name`]
420 /// -- set by the target-free `prog_idr` walker (no `scx_root`
421 /// dependency; works pre-6.16) that finds the live
422 /// struct_ops prog's obj prefix (see `monitor/dump/mod.rs`
423 /// `identify_active_obj_from_struct_ops`).
424 /// - [`crate::monitor::dump::FailureDumpReport::active_map_kvas`]
425 /// -- the live scheduler's `prog.aux->used_maps` KVA set that
426 /// the same walker publishes. Non-empty iff the walker resolved
427 /// a global-section-bearing prog (the same-binary
428 /// disambiguation case).
429 ///
430 /// When the walker resolved both fields, `active()` uses them
431 /// directly and the obj-prefix scan below is a sanity cross-
432 /// check against the captured map set. When the walker was
433 /// unavailable (placeholder dump, transient swap window before
434 /// the accessor-init worker republished, or kernel built
435 /// without struct_ops support), the obj-prefix scan with
436 /// per-section count fallback decides.
437 ///
438 /// # Failure cases
439 ///
440 /// - [`SnapshotError::PlaceholderSnapshot`]: the snapshot is a
441 /// freeze-rendezvous-failure placeholder.
442 /// - [`SnapshotError::NoActiveScheduler`] (no global-section
443 /// maps): the snapshot has no `<obj>.bss/.data/.rodata` —
444 /// either no scheduler is attached, or the capture missed
445 /// the global sections entirely.
446 /// - [`SnapshotError::NoActiveScheduler`] (multiple distinct
447 /// obj prefixes, walker unavailable): two scheduler instances
448 /// with DIFFERENT obj names coexist (back-to-back load of
449 /// distinct binaries, or one scheduler composed of multiple
450 /// BPF objects) AND the walker did not publish
451 /// `active_obj_name`. Use [`Self::vars`] to enumerate every
452 /// copy or [`Self::map`] to address a specific scheduler's
453 /// bss directly.
454 /// - [`SnapshotError::NoActiveScheduler`] (multi-copy
455 /// same-prefix, walker unavailable): an
456 /// [`crate::scenario::ops::Op::ReplaceScheduler`] swap
457 /// between two builds of the SAME binary left two
458 /// `<obj>.bss` (or `.data` / `.rodata`) copies with
459 /// identical names AND the walker did not publish
460 /// `active_map_kvas` to disambiguate. The obj-prefix filter
461 /// alone cannot pick the live copy without admitting both.
462 /// Use [`Self::live_var_via`] / [`Self::live_vars_via`] with
463 /// `crate::scenario::snapshot::pickers::max_by_sum_u64` to
464 /// pick by counter activity.
465 ///
466 /// # Lifetime
467 ///
468 /// Pure projection over the frozen `FailureDumpReport`;
469 /// multiple calls return equivalent views. Caching the result
470 /// in a `let active = snapshot.active()?;` binding is fine but
471 /// not required.
472 pub fn active(&self) -> SnapshotResult<Snapshot<'a>> {
473 if self.report.is_placeholder {
474 return Err(SnapshotError::PlaceholderSnapshot { tag: None });
475 }
476 // Scan global-section maps to collect:
477 // 1. The distinct set of obj_name prefixes (used by the
478 // multi-obj failure diagnostic).
479 // 2. Per-(prefix, section) counts (used to detect the
480 // same-binary multi-copy case: two `<prefix>.bss` maps
481 // coexist with identical names but distinct map KVAs).
482 // The producer-side helper in
483 // `monitor/dump/mod.rs` `count_global_sections_for_prefix`
484 // performs the same count; both sites use strict full-name
485 // equality to stay in lockstep.
486 let mut obj_names: Vec<&'a str> = Vec::new();
487 let mut counts: Vec<(&'a str, usize, usize, usize)> = Vec::new();
488 for m in &self.report.maps {
489 if !is_global_section_map(&m.name) {
490 continue;
491 }
492 let Some(obj) = m.name.split('.').next() else {
493 continue;
494 };
495 if obj.is_empty() {
496 continue;
497 }
498 if !obj_names.contains(&obj) {
499 obj_names.push(obj);
500 counts.push((obj, 0, 0, 0));
501 }
502 let entry = counts
503 .iter_mut()
504 .find(|(o, _, _, _)| *o == obj)
505 .expect("obj just pushed");
506 // Strict section suffix match — `<obj>.bss` exactly,
507 // not `<obj>.bss.shared` or other multi-segment names.
508 let section = m.name.split('.').nth(1).unwrap_or("");
509 match section {
510 "bss" if m.name == format!("{obj}.bss") => entry.1 += 1,
511 "data" if m.name == format!("{obj}.data") => entry.2 += 1,
512 "rodata" if m.name == format!("{obj}.rodata") => entry.3 += 1,
513 _ => {}
514 }
515 }
516 // Principled fast path: when the freeze-coord captured a
517 // non-None `active_obj_name` via the target-free prog_idr
518 // walker (`prog_idr → prog aux->used_maps → global-section
519 // sibling map`; no scx_root), prefer that even if multiple obj
520 // prefixes show up in `obj_names`. The KVA whitelist
521 // (`active_map_kvas`) pairs with the obj-name filter in
522 // `maps_iter` — when populated, same-binary multi-copy
523 // resolves to the live copy. When empty AND the matched
524 // prefix has any multi-copy section, the obj-prefix filter
525 // alone would admit both copies → fail loudly with a
526 // multi-copy diagnostic instead of silently surfacing
527 // AmbiguousVar at the var lookup.
528 if let Some(active_name) = self.report.active_obj_name.as_deref()
529 && let Some(matched) = obj_names.iter().find(|obj| **obj == active_name).copied()
530 {
531 if !self.report.active_map_kvas.is_empty() {
532 return Ok(Snapshot {
533 report: self.report,
534 active_obj: Some(matched),
535 active_map_kvas: &self.report.active_map_kvas,
536 });
537 }
538 // Walker did not publish a whitelist. Check the matched
539 // prefix's section counts; if any multi-copy, bail.
540 if let Some(&(_, b, d, r)) = counts.iter().find(|(o, _, _, _)| *o == matched)
541 && (b > 1 || d > 1 || r > 1)
542 {
543 return Err(SnapshotError::NoActiveScheduler {
544 reason: format_multi_copy_reason(matched, b, d, r),
545 });
546 }
547 return Ok(Snapshot {
548 report: self.report,
549 active_obj: Some(matched),
550 active_map_kvas: &[],
551 });
552 }
553 match (obj_names.as_slice(), counts.as_slice()) {
554 ([], _) => Err(SnapshotError::NoActiveScheduler {
555 reason: "snapshot has no global-section BPF maps (no scheduler \
556 attached, or capture did not include bss/data/rodata)"
557 .to_string(),
558 }),
559 ([only], [(_, b, d, r)]) if *b <= 1 && *d <= 1 && *r <= 1 => Ok(Snapshot {
560 report: self.report,
561 active_obj: Some(*only),
562 // Only one obj prefix in the snapshot AND no
563 // section has more than one copy — obj-prefix
564 // matching uniquely picks the scheduler's maps.
565 active_map_kvas: &[],
566 }),
567 ([only], [(_, b, d, r)]) => Err(SnapshotError::NoActiveScheduler {
568 reason: format_multi_copy_reason(only, *b, *d, *r),
569 }),
570 (multiple, _) => Err(SnapshotError::NoActiveScheduler {
571 reason: format!(
572 "snapshot has {} BPF objects with global-section maps \
573 ({:?}) and the principled target-free prog_idr walker \
574 could not identify the active obj at capture time (no \
575 alive struct_ops prog with a `<obj>.bss/.data/.rodata` \
576 sibling in used_maps, or an empty used_maps whitelist) — \
577 use \
578 Snapshot::vars(name) to enumerate every copy or \
579 Snapshot::map(\"<obj>.<section>\") to address a specific \
580 scheduler's bss directly",
581 multiple.len(),
582 multiple
583 ),
584 }),
585 }
586 }
587
588 /// Read a single live counter from the active scheduler — the
589 /// **default** for single-variable reads. Convenience for
590 /// `self.active()?.var(name)`.
591 ///
592 /// **For multi-variable arithmetic on multiple counters** —
593 /// fractions, ratios, deltas computed across more than one
594 /// named field — use [`Self::live_vars_via`] instead.
595 /// `live_vars_via` resolves the picker ONCE across a name set
596 /// so independent per-name picks cannot corrupt the
597 /// cross-variable computation by selecting different bss
598 /// copies for different names. Repeatedly calling `live_var`
599 /// for two counters from the same scheduler is correct in the
600 /// walker-resolved case (both reads land in the same scheduler's
601 /// bss) but loses that guarantee on the picker-fallback path
602 /// — silent corruption of ratios.
603 ///
604 /// Returns a [`SnapshotField`] carrying either
605 /// [`SnapshotError::NoActiveScheduler`] (no scheduler
606 /// identifiable) or the standard [`Self::var`] error variants
607 /// ([`SnapshotError::VarNotFound`] / [`SnapshotError::TypeMismatch`]
608 /// from the inner var lookup).
609 pub fn live_var(&self, name: &str) -> SnapshotField<'a> {
610 match self.active() {
611 Ok(snap) => snap.var(name),
612 Err(err) => SnapshotField::Missing(err),
613 }
614 }
615
616 /// Caller-supplied disambiguator for the multi-bss case where
617 /// [`Self::live_var`] cannot resolve a single live copy by itself.
618 ///
619 /// [`Self::live_var`] delegates to [`Self::active`] to filter the
620 /// snapshot to one scheduler's maps. When [`Self::active`] cannot
621 /// pick a single scheduler — multiple BPF objects with
622 /// global-section maps are present AND the principled
623 /// `prog_idr → prog aux->used_maps → global-section map → obj prefix`
624 /// walker did not
625 /// identify the live one — it errors with
626 /// [`SnapshotError::NoActiveScheduler`] (the exact `reason` field
627 /// is the long-form message constructed at the bail site listing
628 /// the observed obj_names + the walker's failure cause), and
629 /// [`Self::live_var`] propagates that as [`SnapshotField::Missing`].
630 ///
631 /// `live_var_via` is the escape hatch: it skips the [`Self::active`]
632 /// filter entirely, enumerates every observed copy of `name` via
633 /// [`Self::vars`], and hands the slice to the caller-supplied
634 /// `picker` to pick one by index. Common case: an
635 /// `Op::ReplaceScheduler` swap between two builds of the same
636 /// scheduler that leaves two `<obj>.bss` maps in the snapshot
637 /// sharing one obj_name prefix.
638 ///
639 /// **For multi-variable arithmetic** (ratios, fractions, deltas
640 /// computed across more than one named field), use
641 /// [`Self::live_vars_via`] instead — it resolves the picker once
642 /// across a name set so independent per-name picks cannot
643 /// corrupt the cross-variable computation by selecting different
644 /// bss copies for different names.
645 ///
646 /// `picker` receives every observed copy of the named variable
647 /// (one entry per `<obj>.bss/.data/.rodata` map carrying it,
648 /// per [`Self::vars`]) and returns the index the caller wants
649 /// (typically chosen by inspecting each candidate's value via
650 /// `SnapshotField::as_u64` / `as_str` and applying a liveness
651 /// or activity fingerprint — see
652 /// [`crate::scenario::snapshot::pickers`] for predefined
653 /// pickers such as `max_by_counter_value`).
654 ///
655 /// Returns [`SnapshotField::Missing`] when:
656 /// - the snapshot's underlying `FailureDumpReport` is a
657 /// placeholder (carrying
658 /// [`SnapshotError::PlaceholderSnapshot`] — matches the
659 /// sibling [`Self::var`] / [`Self::map`] placeholder-first
660 /// contract so callers pattern-matching on the error variant
661 /// distinguish "freeze rendezvous failed" from "name absent
662 /// from a real capture"),
663 /// - the snapshot has no copies of `name` (carrying
664 /// [`SnapshotError::VarNotFound`] with the list of available
665 /// global-section maps),
666 /// - `picker` returns `None` (carrying
667 /// [`SnapshotError::ProjectionFailed`] naming the picker as
668 /// the source), OR
669 /// - `picker` returns `Some(idx)` outside the candidate range
670 /// (carrying [`SnapshotError::ProjectionFailed`] with the bad
671 /// index and the candidate count).
672 pub fn live_var_via(
673 &self,
674 name: &str,
675 picker: impl FnOnce(&[(&'a str, SnapshotField<'a>)]) -> Option<usize>,
676 ) -> SnapshotField<'a> {
677 if self.report.is_placeholder {
678 return SnapshotField::Missing(
679 crate::scenario::snapshot::SnapshotError::PlaceholderSnapshot { tag: None },
680 );
681 }
682 let candidates: Vec<(&'a str, SnapshotField<'a>)> = self.vars(name).collect();
683 if candidates.is_empty() {
684 if let Some(err) = self.excluded_filter_err(name.to_string()) {
685 return SnapshotField::Missing(err);
686 }
687 let available: Vec<String> = self
688 .report
689 .maps
690 .iter()
691 .filter(|m| is_global_section_map(&m.name))
692 .map(|m| m.name.clone())
693 .collect();
694 return SnapshotField::Missing(crate::scenario::snapshot::SnapshotError::VarNotFound {
695 requested: name.to_string(),
696 available,
697 });
698 }
699 match picker(&candidates) {
700 Some(idx) if idx < candidates.len() => {
701 let (_obj, field) = candidates.into_iter().nth(idx).unwrap();
702 field
703 }
704 Some(idx) => {
705 SnapshotField::Missing(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
706 reason: format!(
707 "live_var_via picker returned index {idx} out of range \
708 (candidate count = {})",
709 candidates.len()
710 ),
711 })
712 }
713 None => {
714 SnapshotField::Missing(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
715 reason: format!(
716 "live_var_via picker for '{name}' returned None (no candidate \
717 matched the supplied disambiguator)"
718 ),
719 })
720 }
721 }
722 }
723
724 /// Caller-supplied disambiguator for the multi-bss case where
725 /// **multiple variables from the same scheduler instance** must
726 /// be read consistently — e.g. computing
727 /// `nr_mig_cross_dispatch / (nr_mig_same_dispatch + nr_mig_cross_dispatch)`
728 /// as a cross-LLC dispatch fraction from one scheduler's BPF
729 /// counters.
730 ///
731 /// # Why a separate primitive
732 ///
733 /// Calling [`Self::live_var_via`] N times independently risks
734 /// picking a DIFFERENT bss copy per call: the picker resolves
735 /// each name's candidate set independently, so two consecutive
736 /// `live_var_via("a", picker)` + `live_var_via("b", picker)`
737 /// calls can land on bss copy A for `a` and bss copy B for `b`,
738 /// corrupting any cross-variable arithmetic (ratio, fraction,
739 /// delta). `live_vars_via` resolves the picker ONCE across the
740 /// candidate set for all N names jointly so every returned
741 /// [`SnapshotField`] reads from the same source map.
742 ///
743 /// # Mechanism
744 ///
745 /// Per global-section map, look up each name in input order;
746 /// keep the map as a candidate row iff it has ALL the names
747 /// (intersection semantics — partial-coverage maps are absent
748 /// from the picker's input). The picker receives
749 /// `&[(map_name, fields_in_input_order)]` and returns the
750 /// chosen row's index. The returned `Vec<SnapshotField>` is
751 /// positional, keyed by the input `names` order — `result[0]`
752 /// is `names[0]`'s field from the picked map, `result[1]` is
753 /// `names[1]`'s field, etc.
754 ///
755 /// **Single-section constraint.** All `names` must reside in
756 /// the SAME global-section map — typically the scheduler's
757 /// `<obj>.bss`. A `bss` counter co-picked with a `data`
758 /// constant from the same scheduler obj lands in DIFFERENT
759 /// candidate rows (the obj's `.bss` map carries the first
760 /// name, its `.data` map carries the second, neither row has
761 /// both), the intersection collapses to empty, and the helper
762 /// returns [`SnapshotError::VarNotFound`]. If the test reads
763 /// from multiple sections, issue separate `live_vars_via`
764 /// calls (one per section's name group) and compose the
765 /// per-call results caller-side.
766 ///
767 /// # See also
768 ///
769 /// - [`Self::live_var_via`] for single-variable disambiguation.
770 /// - [`crate::scenario::snapshot::pickers::max_by_sum_u64`] for
771 /// the "max-activity bss" heuristic over co-picked u64
772 /// counters.
773 ///
774 /// # Errors
775 ///
776 /// - [`SnapshotError::PlaceholderSnapshot`] — the underlying
777 /// `FailureDumpReport` is a placeholder; matches the sibling
778 /// [`Self::live_var_via`] / [`Self::var`] / [`Self::map`]
779 /// placeholder-first contract.
780 /// - [`SnapshotError::ProjectionFailed`] — `names` is empty
781 /// (caller bug: nothing to co-pick), `picker` returns `None`
782 /// (no candidate matched), or `picker` returns an
783 /// out-of-range index.
784 /// - [`SnapshotError::VarNotFound`] — no global-section map
785 /// has ALL the requested names. `requested` carries the
786 /// joined name list, `available` carries the global-section
787 /// map names that were scanned.
788 pub fn live_vars_via<P>(
789 &self,
790 names: &[&str],
791 picker: P,
792 ) -> crate::scenario::snapshot::SnapshotResult<Vec<SnapshotField<'a>>>
793 where
794 P: FnOnce(&[(&'a str, Vec<SnapshotField<'a>>)]) -> Option<usize>,
795 {
796 if self.report.is_placeholder {
797 return Err(
798 crate::scenario::snapshot::SnapshotError::PlaceholderSnapshot { tag: None },
799 );
800 }
801 if names.is_empty() {
802 return Err(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
803 reason: "live_vars_via called with an empty names slice — \
804 co-pick requires at least one name"
805 .to_string(),
806 });
807 }
808 // Group by MAP: each global-section map becomes a candidate
809 // row IFF it has ALL the requested names. Partial-coverage
810 // maps are dropped from the picker's input — they cannot
811 // answer the co-pick.
812 let mut candidates: Vec<(&'a str, Vec<SnapshotField<'a>>)> = Vec::new();
813 for m in self.maps_iter() {
814 if !is_global_section_map(&m.name) {
815 continue;
816 }
817 let Some(value) = m.value.as_ref() else {
818 continue;
819 };
820 let mut row: Vec<SnapshotField<'a>> = Vec::with_capacity(names.len());
821 let mut all_present = true;
822 for name in names {
823 if let Some(found) = lookup_member(value, name) {
824 row.push(SnapshotField::Value(found));
825 } else {
826 all_present = false;
827 break;
828 }
829 }
830 if all_present {
831 candidates.push((m.name.as_str(), row));
832 }
833 }
834 if candidates.is_empty() {
835 let requested = format!("[{}]", names.join(", "));
836 if let Some(err) = self.excluded_filter_err(requested.clone()) {
837 return Err(err);
838 }
839 let available: Vec<String> = self
840 .report
841 .maps
842 .iter()
843 .filter(|m| is_global_section_map(&m.name))
844 .map(|m| m.name.clone())
845 .collect();
846 return Err(crate::scenario::snapshot::SnapshotError::VarNotFound {
847 requested,
848 available,
849 });
850 }
851 match picker(&candidates) {
852 Some(idx) if idx < candidates.len() => {
853 let (_obj, fields) = candidates.into_iter().nth(idx).unwrap();
854 Ok(fields)
855 }
856 Some(idx) => Err(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
857 reason: format!(
858 "live_vars_via picker returned index {idx} out of range \
859 (candidate count = {})",
860 candidates.len()
861 ),
862 }),
863 None => Err(crate::scenario::snapshot::SnapshotError::ProjectionFailed {
864 reason: format!(
865 "live_vars_via picker for [{}] returned None (no candidate \
866 matched the supplied disambiguator)",
867 names.join(", ")
868 ),
869 }),
870 }
871 }
872
873 /// Number of maps the current view exposes — every captured
874 /// map when unfiltered; only maps the [`Self::active`] filter
875 /// admits when set.
876 pub fn map_count(&self) -> usize {
877 self.maps_iter().count()
878 }
879
880 /// True when the underlying [`FailureDumpReport`] is a
881 /// placeholder produced by [`FailureDumpReport::placeholder`]
882 /// — i.e. the freeze-rendezvous capture pipeline could not
883 /// produce real data. Periodic-sample temporal patterns use
884 /// this to skip the BPF axis on a placeholder sample (the
885 /// stats axis, when present, may still be valid). Bypassing
886 /// the projection-error path keeps the sample's diagnostic
887 /// distinct from "field missing on a real capture".
888 pub fn is_placeholder(&self) -> bool {
889 self.report.is_placeholder
890 }
891
892 // -----------------------------------------------------------------
893 // First-class accessors for fields the freeze-coordinator pipeline
894 // populates on `FailureDumpReport` outside the BPF-map axis. Each
895 // accessor returns either a borrowed slice (whole-vec views) or an
896 // `Option<&T>` keyed by the natural identifier. Empty vec is the
897 // normal state when the corresponding walker did not run — callers
898 // check the companion `*_unavailable` field on the raw report for
899 // the diagnostic reason. None on a keyed lookup means "the dump
900 // did not capture an entry for that key"; it is not an error.
901 //
902 // **Keyed-lookup naming convention.** `<base>_at(<key>)` is used
903 // when the key is a topology position (CPU index, NUMA node id)
904 // that the kernel allocates densely from 0; the `_at` mirrors
905 // `Vec::get(idx)` and reads naturally as "the row at this
906 // position". `<base>_by_<field>(<value>)` is used when the key is
907 // a sparse identifier (pid, program name) — the `_by_<field>`
908 // names which field the lookup compares against and reads
909 // naturally as "the entry whose <field> matches". The `<base>` is
910 // normally the singular form of the plural-vec accessor (e.g.
911 // `task_enrichments` → `task_enrichment_by_pid`), but stays
912 // plural when the singular reads unnaturally (e.g.
913 // `prog_runtime_stats` → `prog_runtime_stats_by_name` — the
914 // singular `prog_runtime_stat` would be awkward English; the
915 // `Stats` suffix is part of the canonical noun). Each keyed
916 // accessor returns the first match in walker enumeration order;
917 // production captures do not duplicate keys (kernel walker
918 // invariants), but the contract is left first-match-wins so a
919 // future duplicate-key scenario surfaces only one row without
920 // panicking.
921 // -----------------------------------------------------------------
922
923 /// Per-monitor-tick SCX_EV_* event counter samples. Each entry is
924 /// the cross-CPU sum of the 13 SCX event counters at one monitor
925 /// tick. Empty when no `EventCounterCapture` ran, or every sample
926 /// was suppressed (event-stat offsets unresolved, scx_root unset).
927 ///
928 /// Unlike the walker-backed accessors below, this field carries
929 /// no `*_unavailable` companion: an empty timeline is the only
930 /// signal for "no capture / no events".
931 pub fn event_counter_timeline(&self) -> &'a [EventCounterSample] {
932 &self.report.event_counter_timeline
933 }
934
935 /// Per-CPU `rq->scx` snapshots — one per CPU walked by
936 /// `crate::monitor::scx_walker`. Empty when the
937 /// `ScxWalkerCapture` was absent or every CPU's translate
938 /// failed (see `FailureDumpReport::scx_walker_unavailable`).
939 pub fn rq_scx_states(&self) -> &'a [RqScxState] {
940 &self.report.rq_scx_states
941 }
942
943 /// Per-DSQ snapshots — local, bypass, global, and user DSQs
944 /// reachable from `*scx_root`. Each entry carries `nr` (depth),
945 /// `seq` (BPF-iter counter), and the queued task KVAs. Empty
946 /// when the `ScxWalkerCapture` was absent (see
947 /// `FailureDumpReport::scx_walker_unavailable`).
948 pub fn dsq_states(&self) -> &'a [DsqState] {
949 &self.report.dsq_states
950 }
951
952 /// Top-level `scx_sched` state captured from `*scx_root`:
953 /// aborting flag, bypass_depth, exit_kind. `None` when no
954 /// scheduler is attached or `*scx_root` was unreadable (see
955 /// `FailureDumpReport::scx_walker_unavailable`).
956 pub fn scx_sched_state(&self) -> Option<&'a ScxSchedState> {
957 self.report.scx_sched_state.as_ref()
958 }
959
960 /// Per-CPU CPU-time / softirq / IRQ counter rows. One row per
961 /// CPU enumerated by `crate::monitor::dump::CpuTimeCapture`.
962 /// Empty when the capture was not wired or symbol/BTF
963 /// resolution failed.
964 pub fn per_cpu_time(&self) -> &'a [PerCpuTimeStats] {
965 &self.report.per_cpu_time
966 }
967
968 /// Per-CPU CPU-time row for CPU `cpu`, looked up by the `cpu`
969 /// field on each [`PerCpuTimeStats`] (not by vec position).
970 /// Returns `None` when no row matches — typical when the
971 /// walker skipped that CPU, the capture didn't run, or `cpu`
972 /// exceeded the topology. Returns the first match in walker
973 /// enumeration order if `cpu` appears more than once.
974 pub fn per_cpu_time_at(&self, cpu: u32) -> Option<&'a PerCpuTimeStats> {
975 self.report.per_cpu_time.iter().find(|c| c.cpu == cpu)
976 }
977
978 /// Per-cgroup PSI-irq rows for the test's workload cgroups, host-walked
979 /// from the cgroup hierarchy at this freeze (Phase A). One row per
980 /// workload-root leaf cgroup with per-cgroup PSI accounting enabled. Empty
981 /// when the capture was not wired, the workload root isn't present yet, or
982 /// `psi_cgroups_enabled` is off — loud-absent. RAW values; decoded + folded
983 /// at the metric layer (see
984 /// `crate::monitor::cgroup_walk::CgroupPsiStat`).
985 pub fn cgroup_psi(&self) -> &'a [crate::monitor::cgroup_walk::CgroupPsiStat] {
986 &self.report.cgroup_psi
987 }
988
989 /// Per-NUMA-node event counter rows captured from
990 /// `pglist_data->node_zones[]->vm_numa_event[]`. Empty until
991 /// the host-side NUMA walker lands (see
992 /// `FailureDumpReport::per_node_numa_unavailable`).
993 pub fn per_node_numa(&self) -> &'a [PerNodeNumaStats] {
994 &self.report.per_node_numa
995 }
996
997 /// Per-NUMA-node event-counter row for `node`, looked up by
998 /// the `node` field on each [`PerNodeNumaStats`]. Returns
999 /// `None` when no row matches. Returns the first match in
1000 /// walker enumeration order if `node` appears more than once.
1001 pub fn per_node_numa_at(&self, node: u32) -> Option<&'a PerNodeNumaStats> {
1002 self.report.per_node_numa.iter().find(|n| n.node == node)
1003 }
1004
1005 /// Per-task failure-dump enrichments — identity (pid, tgid,
1006 /// comm), process tree, scheduling priority, sched_class name,
1007 /// context-switch counters, watchdog disambiguation, lock
1008 /// slowpath stack matches. Empty when no task walker ran (see
1009 /// `FailureDumpReport::task_enrichments_unavailable`).
1010 pub fn task_enrichments(&self) -> &'a [TaskEnrichment] {
1011 &self.report.task_enrichments
1012 }
1013
1014 /// Look up the enrichment for `pid`. The returned reference
1015 /// matches the first task whose `task_struct.pid` equals `pid`
1016 /// in walker enumeration order. Returns `None` when no task with
1017 /// that pid was captured. Production captures dedupe by task_kva
1018 /// before push, so duplicate-pid rows do not occur in real
1019 /// dumps.
1020 pub fn task_enrichment_by_pid(&self, pid: i32) -> Option<&'a TaskEnrichment> {
1021 self.report.task_enrichments.iter().find(|t| t.pid == pid)
1022 }
1023
1024 /// Per-program BPF runtime stats — invocation count, total ns,
1025 /// recursion misses. One entry per struct_ops program reached
1026 /// by the prog walker. Empty when no struct_ops programs are
1027 /// loaded or the prog accessor was unavailable (see
1028 /// `FailureDumpReport::prog_runtime_stats_unavailable`).
1029 pub fn prog_runtime_stats(&self) -> &'a [ProgRuntimeStats] {
1030 &self.report.prog_runtime_stats
1031 }
1032
1033 /// Look up the runtime stats for the program registered with
1034 /// `name` (kernel-side `bpf_prog->aux->name`). Returns `None`
1035 /// when no program with that name was captured. Returns the
1036 /// first match in walker enumeration order if `name` appears
1037 /// more than once — struct_ops programs in real captures use
1038 /// distinct callback names (`select_cpu`, `enqueue`, etc.) so
1039 /// duplicates do not occur in production.
1040 pub fn prog_runtime_stats_by_name(&self, name: &str) -> Option<&'a ProgRuntimeStats> {
1041 self.report
1042 .prog_runtime_stats
1043 .iter()
1044 .find(|p| p.name == name)
1045 }
1046
1047 /// Probe BPF program's per-CPU diagnostic counter snapshot.
1048 /// `None` when the probe's `.bss` map isn't enumerated (probe
1049 /// not loaded), the program BTF can't be parsed, or the
1050 /// array's offset doesn't resolve. A populated
1051 /// `trigger_count > 0` is the structural signal that the
1052 /// `tp_btf/sched_ext_exit` handler fired during the run.
1053 pub fn probe_counters(&self) -> Option<&'a ProbeBssCounters> {
1054 self.report.probe_counters.as_ref()
1055 }
1056
1057 // -----------------------------------------------------------------
1058 // Companion `*_unavailable` diagnostic accessors. Each accessor
1059 // pairs with the walker-backed slice/option accessor above:
1060 // when the slice is empty (or the option is None), the matching
1061 // `*_unavailable()` returns `Some(reason)` if the walker
1062 // recorded one. `None` from the unavailable accessor means
1063 // either the walker ran normally (slice populated) or the field
1064 // is simply absent from the wire format (no reason recorded).
1065 // -----------------------------------------------------------------
1066
1067 /// Diagnostic reason recorded when [`Self::rq_scx_states`] /
1068 /// [`Self::dsq_states`] / [`Self::scx_sched_state`] could not
1069 /// be populated. `None` when the walker fully succeeded;
1070 /// otherwise `Some(reason)` (e.g. `"scx_root null"`,
1071 /// `"no scx walker"`, or a partial-degradation string from the
1072 /// dump pipeline).
1073 pub fn scx_walker_unavailable(&self) -> Option<&'a str> {
1074 self.report.scx_walker_unavailable.as_deref()
1075 }
1076
1077 /// Diagnostic reason recorded when [`Self::task_enrichments`]
1078 /// could not be populated. `None` when the walker yielded at
1079 /// least one enrichment; otherwise `Some(reason)`
1080 /// (e.g. `"no task walker available"`,
1081 /// `"task walker yielded zero tasks"`).
1082 pub fn task_enrichments_unavailable(&self) -> Option<&'a str> {
1083 self.report.task_enrichments_unavailable.as_deref()
1084 }
1085
1086 /// Diagnostic reason recorded when [`Self::prog_runtime_stats`]
1087 /// could not be populated. `None` when the walker yielded at
1088 /// least one program; otherwise `Some(reason)`
1089 /// (e.g. `"prog accessor unavailable"`,
1090 /// `"no struct_ops programs loaded"`).
1091 pub fn prog_runtime_stats_unavailable(&self) -> Option<&'a str> {
1092 self.report.prog_runtime_stats_unavailable.as_deref()
1093 }
1094
1095 /// Diagnostic reason recorded when [`Self::per_node_numa`]
1096 /// could not be populated — typically `"no NUMA walker"` until
1097 /// the host-side walker lands.
1098 pub fn per_node_numa_unavailable(&self) -> Option<&'a str> {
1099 self.report.per_node_numa_unavailable.as_deref()
1100 }
1101
1102 /// Diagnostic reason recorded when the SDT allocator snapshot
1103 /// (still escape-only via [`Self::report`]) could not be
1104 /// populated.
1105 pub fn sdt_alloc_unavailable(&self) -> Option<&'a str> {
1106 self.report.sdt_alloc_unavailable.as_deref()
1107 }
1108}
1109
1110/// True when a map name matches the libbpf-composed
1111/// `<obj>.<section>` naming for a global-section map.
1112fn is_global_section_map(name: &str) -> bool {
1113 name.ends_with(".bss") || name.ends_with(".data") || name.ends_with(".rodata")
1114}
1115
1116/// True when a map name's obj prefix (everything before the first
1117/// `.`) matches `obj`. Used by [`Snapshot::maps_iter`] when an
1118/// active-scheduler filter is set.
1119fn map_belongs_to_obj(map_name: &str, obj: &str) -> bool {
1120 map_name
1121 .split_once('.')
1122 .map(|(prefix, _)| prefix == obj)
1123 .unwrap_or(false)
1124}
1125
1126/// Render the multi-copy-same-prefix diagnostic for
1127/// [`Snapshot::active`]. `(bss, data, rodata)` are full-name
1128/// equality counts; any value > 1 means the prefix has multiple
1129/// copies of that section type in the captured `maps[]` (typical
1130/// cause: `Op::ReplaceScheduler` swap between two builds of the
1131/// same binary leaves the dying instance's globals adjacent to
1132/// the new instance's). The message names which section(s) are
1133/// multi-copy and steers the operator at the picker-based
1134/// disambiguators.
1135fn format_multi_copy_reason(prefix: &str, bss: usize, data: usize, rodata: usize) -> String {
1136 let mut parts: Vec<String> = Vec::new();
1137 if bss > 1 {
1138 parts.push(format!("{prefix}.bss × {bss}"));
1139 }
1140 if data > 1 {
1141 parts.push(format!("{prefix}.data × {data}"));
1142 }
1143 if rodata > 1 {
1144 parts.push(format!("{prefix}.rodata × {rodata}"));
1145 }
1146 let detail = parts.join(", ");
1147 format!(
1148 "snapshot has multiple same-name copies of {prefix}'s global-section maps \
1149 ({detail}) and the principled target-free prog_idr walker did not \
1150 publish an active_map_kvas whitelist to disambiguate (transient swap \
1151 window where \
1152 the accessor-init worker has not yet republished, or the walker is \
1153 unavailable on this kernel build) — use \
1154 `series.live_bpf_vars_via([\"name\"], pickers::max_by_sum_u64)` for \
1155 multi-variable counter co-pick, or \
1156 `Snapshot::live_var_via(name, pickers::max_by_counter_value)` for a \
1157 single-counter pick, to pick by counter activity"
1158 )
1159}
1160
1161// ---------------------------------------------------------------------------
1162// SnapshotMap
1163// ---------------------------------------------------------------------------
1164
1165/// One map's view, possibly narrowed to a specific per-CPU slot via
1166/// [`Self::cpu`]. Returned by [`Snapshot::map`].
1167#[derive(Debug)]
1168#[must_use = "SnapshotMap is a borrowed view; chain accessors"]
1169#[non_exhaustive]
1170pub struct SnapshotMap<'a> {
1171 map: &'a FailureDumpMap,
1172 /// When `Some(cpu)`, subsequent [`Self::at`] /
1173 /// [`Self::find`] calls walk only the per-CPU slot for that
1174 /// CPU; `None` walks the natural (non-per-CPU) entry list.
1175 cpu: Option<usize>,
1176}
1177
1178impl<'a> SnapshotMap<'a> {
1179 /// Map name as captured.
1180 pub fn name(&self) -> &'a str {
1181 &self.map.name
1182 }
1183
1184 /// When this map's contents failed to render at capture time
1185 /// (`FailureDumpMap::error` is set), produce the
1186 /// [`SnapshotError::MapRenderIncomplete`] that callers should
1187 /// surface in place of an "empty map" verdict. `None` when the
1188 /// map rendered successfully — the absence is then genuine and
1189 /// the caller's own "not found" / "out of range" error applies.
1190 fn render_incomplete_err(&self) -> Option<SnapshotError> {
1191 self.map
1192 .error
1193 .as_ref()
1194 .map(|error| SnapshotError::MapRenderIncomplete {
1195 map: self.map.name.clone(),
1196 error: error.clone(),
1197 })
1198 }
1199
1200 /// Underlying [`FailureDumpMap`].
1201 pub fn raw(&self) -> &'a FailureDumpMap {
1202 self.map
1203 }
1204
1205 /// Ringbuf occupancy snapshot for `BPF_MAP_TYPE_RINGBUF` /
1206 /// `BPF_MAP_TYPE_USER_RINGBUF` maps — capacity, consumer /
1207 /// producer / pending positions, and the cumulative
1208 /// `pending_bytes` gap. `None` for non-ringbuf maps or when
1209 /// the BTF offsets for `bpf_ringbuf_map` / `bpf_ringbuf`
1210 /// weren't resolvable at capture time.
1211 pub fn ringbuf(&self) -> Option<&'a FailureDumpRingbuf> {
1212 self.map.ringbuf.as_ref()
1213 }
1214
1215 /// Mapped-page snapshot for `BPF_MAP_TYPE_ARENA` maps. Borrows
1216 /// the per-page `(user_addr, bytes)` records plus the declared
1217 /// span / truncation flags. `None` for non-arena maps or when
1218 /// the arena walker failed to translate the user_vm window.
1219 pub fn arena(&self) -> Option<&'a ArenaSnapshot> {
1220 self.map.arena.as_ref()
1221 }
1222
1223 /// Populated-slot summary for FD-array families (`PROG_ARRAY`,
1224 /// `PERF_EVENT_ARRAY`, `ARRAY_OF_MAPS`, `SOCKMAP*`, etc.).
1225 /// `None` for non-FD-array maps. Surfaces the populated count,
1226 /// scanned slot count, populated-index list, and the two
1227 /// truncation flags ([`FailureDumpFdArray::truncated`] for the
1228 /// scan limit, [`FailureDumpFdArray::indices_truncated`] for the
1229 /// index list limit).
1230 pub fn fd_array(&self) -> Option<&'a FailureDumpFdArray> {
1231 self.map.fd_array.as_ref()
1232 }
1233
1234 /// Per-bucket summary for `BPF_MAP_TYPE_STACK_TRACE` maps.
1235 /// `None` for non-STACK_TRACE maps or when the BTF offsets for
1236 /// `bpf_stack_map` / `stack_map_bucket` weren't resolvable.
1237 pub fn stack_trace(&self) -> Option<&'a FailureDumpStackTrace> {
1238 self.map.stack_trace.as_ref()
1239 }
1240
1241 /// Per-map decode-error string set by the freeze coordinator
1242 /// when this map's contents are missing or partial. `None` on a
1243 /// successful render. Distinct from [`SnapshotError`] (which
1244 /// flows through the accessor API) — `map_error` surfaces the
1245 /// capture-side diagnostic the kernel-walker recorded before
1246 /// the snapshot was handed to test code.
1247 pub fn map_error(&self) -> Option<&'a str> {
1248 self.map.error.as_deref()
1249 }
1250
1251 /// Narrow this map view to a specific per-CPU slot. On a
1252 /// non-per-CPU map this is recorded but ignored when the
1253 /// underlying entries are not per-CPU. Use on
1254 /// `BPF_MAP_TYPE_PERCPU_ARRAY` / `BPF_MAP_TYPE_PERCPU_HASH` /
1255 /// `BPF_MAP_TYPE_LRU_PERCPU_HASH`.
1256 pub fn cpu(self, n: usize) -> SnapshotMap<'a> {
1257 SnapshotMap {
1258 map: self.map,
1259 cpu: Some(n),
1260 }
1261 }
1262
1263 /// Get an entry by ordinal index.
1264 ///
1265 /// For HASH-style entry lists, returns the `n`-th
1266 /// `crate::monitor::dump::FailureDumpEntry` in the captured order. For per-CPU
1267 /// array maps narrowed via [`Self::cpu`], returns the entry
1268 /// at key `n` with its per-CPU slot pre-resolved. For ARRAY
1269 /// maps with a single value, `n == 0` returns the value.
1270 pub fn at(&self, n: usize) -> SnapshotEntry<'a> {
1271 let resolved = self.entry_at(n);
1272 match resolved {
1273 Ok(e) => e,
1274 Err(err) => SnapshotEntry::Missing(err),
1275 }
1276 }
1277
1278 /// Find the first entry matching `predicate`. Returns
1279 /// [`SnapshotEntry::Missing`] with [`SnapshotError::NoMatch`]
1280 /// when no entry matches. The NoMatch payload carries the
1281 /// total entry count traversed and a small sample of rendered
1282 /// keys so the failure message can tell `empty map` apart from
1283 /// `populated map, predicate never matched`.
1284 pub fn find(&self, predicate: impl Fn(&SnapshotEntry<'a>) -> bool) -> SnapshotEntry<'a> {
1285 let mut len = 0usize;
1286 let mut available_keys: Vec<String> = Vec::with_capacity(NO_MATCH_KEY_SAMPLE);
1287 for entry in self.iter_entries() {
1288 if predicate(&entry) {
1289 return entry;
1290 }
1291 if available_keys.len() < NO_MATCH_KEY_SAMPLE
1292 && let Some(k) = render_entry_key(&entry)
1293 {
1294 available_keys.push(k);
1295 }
1296 len += 1;
1297 }
1298 // An empty traversal over a map whose contents failed to
1299 // render is a capture gap, not "predicate never matched".
1300 if len == 0
1301 && let Some(err) = self.render_incomplete_err()
1302 {
1303 return SnapshotEntry::Missing(err);
1304 }
1305 SnapshotEntry::Missing(SnapshotError::NoMatch {
1306 map: self.map.name.clone(),
1307 op: "find".to_string(),
1308 len,
1309 available_keys,
1310 })
1311 }
1312
1313 /// Collect every entry matching `predicate` into a Vec.
1314 pub fn filter(&self, predicate: impl Fn(&SnapshotEntry<'a>) -> bool) -> Vec<SnapshotEntry<'a>> {
1315 self.iter_entries().filter(|e| predicate(e)).collect()
1316 }
1317
1318 /// Find the entry whose `key_fn` produces the maximum u64.
1319 /// Returns [`SnapshotEntry::Missing`] when the map has no
1320 /// entries. The NoMatch payload's `len` is 0 in that case;
1321 /// `available_keys` is empty (the map has no keys to sample).
1322 pub fn max_by(&self, key_fn: impl Fn(&SnapshotEntry<'a>) -> u64) -> SnapshotEntry<'a> {
1323 let mut best: Option<(u64, SnapshotEntry<'a>)> = None;
1324 for entry in self.iter_entries() {
1325 let k = key_fn(&entry);
1326 let beats = best.as_ref().is_none_or(|(prev, _)| k > *prev);
1327 if beats {
1328 best = Some((k, entry));
1329 }
1330 }
1331 match best {
1332 Some((_, e)) => e,
1333 None => {
1334 // No entries to compare. A render failure (contents
1335 // unreadable at capture) is distinct from an
1336 // empty map; surface it so the gap is visible.
1337 if let Some(err) = self.render_incomplete_err() {
1338 return SnapshotEntry::Missing(err);
1339 }
1340 SnapshotEntry::Missing(SnapshotError::NoMatch {
1341 map: self.map.name.clone(),
1342 op: "max_by".to_string(),
1343 len: 0,
1344 available_keys: Vec::new(),
1345 })
1346 }
1347 }
1348 }
1349
1350 /// Iterator over every entry under this view. Used by
1351 /// [`Self::find`] / [`Self::filter`] / [`Self::max_by`].
1352 fn iter_entries(&self) -> Box<dyn Iterator<Item = SnapshotEntry<'a>> + 'a> {
1353 if !self.map.percpu_entries.is_empty() {
1354 let cpu = self.cpu;
1355 let map = self.map;
1356 return Box::new(
1357 map.percpu_entries
1358 .iter()
1359 .map(move |e| resolve_percpu_entry(map, e, cpu)),
1360 );
1361 }
1362 if !self.map.percpu_hash_entries.is_empty() {
1363 let cpu = self.cpu;
1364 let map = self.map;
1365 return Box::new(
1366 map.percpu_hash_entries
1367 .iter()
1368 .map(move |e| resolve_percpu_hash_entry(map, e, cpu)),
1369 );
1370 }
1371 if !self.map.entries.is_empty() {
1372 return Box::new(self.map.entries.iter().map(SnapshotEntry::Hash));
1373 }
1374 if let Some(v) = self.map.value.as_ref() {
1375 return Box::new(std::iter::once(SnapshotEntry::Value(v)));
1376 }
1377 Box::new(std::iter::empty())
1378 }
1379
1380 /// Internal entry-by-index resolver returning a structured
1381 /// error for the surrounding [`Self::at`] arm.
1382 fn entry_at(&self, n: usize) -> SnapshotResult<SnapshotEntry<'a>> {
1383 if !self.map.percpu_entries.is_empty() {
1384 return resolve_percpu_entry_at(self.map, n, self.cpu);
1385 }
1386 if !self.map.percpu_hash_entries.is_empty() {
1387 return resolve_percpu_hash_entry_at(self.map, n, self.cpu);
1388 }
1389 if !self.map.entries.is_empty() {
1390 if n < self.map.entries.len() {
1391 return Ok(SnapshotEntry::Hash(&self.map.entries[n]));
1392 }
1393 return Err(SnapshotError::IndexOutOfRange {
1394 map: self.map.name.clone(),
1395 index: n,
1396 len: self.map.entries.len(),
1397 });
1398 }
1399 if let Some(v) = self.map.value.as_ref() {
1400 if n == 0 {
1401 return Ok(SnapshotEntry::Value(v));
1402 }
1403 return Err(SnapshotError::IndexOutOfRange {
1404 map: self.map.name.clone(),
1405 index: n,
1406 len: 1,
1407 });
1408 }
1409 // Nothing to walk. Distinguish a render failure (contents
1410 // could not be read at capture, `error` set) from a
1411 // genuinely-empty map so the operator sees the capture gap
1412 // rather than a misleading "index out of range, len 0".
1413 if let Some(err) = self.render_incomplete_err() {
1414 return Err(err);
1415 }
1416 Err(SnapshotError::IndexOutOfRange {
1417 map: self.map.name.clone(),
1418 index: n,
1419 len: 0,
1420 })
1421 }
1422}
1423
1424fn resolve_percpu_entry_at<'a>(
1425 map: &'a FailureDumpMap,
1426 n: usize,
1427 cpu: Option<usize>,
1428) -> SnapshotResult<SnapshotEntry<'a>> {
1429 if n >= map.percpu_entries.len() {
1430 return Err(SnapshotError::IndexOutOfRange {
1431 map: map.name.clone(),
1432 index: n,
1433 len: map.percpu_entries.len(),
1434 });
1435 }
1436 Ok(resolve_percpu_entry(map, &map.percpu_entries[n], cpu))
1437}
1438
1439fn resolve_percpu_entry<'a>(
1440 map: &'a FailureDumpMap,
1441 entry: &'a FailureDumpPercpuEntry,
1442 cpu: Option<usize>,
1443) -> SnapshotEntry<'a> {
1444 let Some(c) = cpu else {
1445 return SnapshotEntry::Percpu(entry);
1446 };
1447 if c >= entry.per_cpu.len() {
1448 return SnapshotEntry::Missing(SnapshotError::PerCpuSlot {
1449 map: map.name.clone(),
1450 cpu: u32::try_from(c).unwrap_or(u32::MAX),
1451 len: entry.per_cpu.len(),
1452 unmapped: false,
1453 });
1454 }
1455 match entry.per_cpu[c].as_ref() {
1456 Some(v) => SnapshotEntry::Value(v),
1457 None => SnapshotEntry::Missing(SnapshotError::PerCpuSlot {
1458 map: map.name.clone(),
1459 cpu: u32::try_from(c).unwrap_or(u32::MAX),
1460 len: entry.per_cpu.len(),
1461 unmapped: true,
1462 }),
1463 }
1464}
1465
1466fn resolve_percpu_hash_entry_at<'a>(
1467 map: &'a FailureDumpMap,
1468 n: usize,
1469 cpu: Option<usize>,
1470) -> SnapshotResult<SnapshotEntry<'a>> {
1471 if n >= map.percpu_hash_entries.len() {
1472 return Err(SnapshotError::IndexOutOfRange {
1473 map: map.name.clone(),
1474 index: n,
1475 len: map.percpu_hash_entries.len(),
1476 });
1477 }
1478 Ok(resolve_percpu_hash_entry(
1479 map,
1480 &map.percpu_hash_entries[n],
1481 cpu,
1482 ))
1483}
1484
1485fn resolve_percpu_hash_entry<'a>(
1486 map: &'a FailureDumpMap,
1487 entry: &'a FailureDumpPercpuHashEntry,
1488 cpu: Option<usize>,
1489) -> SnapshotEntry<'a> {
1490 let Some(c) = cpu else {
1491 return SnapshotEntry::PercpuHash(entry);
1492 };
1493 if c >= entry.per_cpu.len() {
1494 return SnapshotEntry::Missing(SnapshotError::PerCpuSlot {
1495 map: map.name.clone(),
1496 cpu: u32::try_from(c).unwrap_or(u32::MAX),
1497 len: entry.per_cpu.len(),
1498 unmapped: false,
1499 });
1500 }
1501 match entry.per_cpu[c].as_ref() {
1502 Some(v) => SnapshotEntry::Value(v),
1503 None => SnapshotEntry::Missing(SnapshotError::PerCpuSlot {
1504 map: map.name.clone(),
1505 cpu: u32::try_from(c).unwrap_or(u32::MAX),
1506 len: entry.per_cpu.len(),
1507 unmapped: true,
1508 }),
1509 }
1510}
1511
1512/// Render a [`SnapshotEntry`]'s key into a bounded `String` suitable
1513/// for the [`SnapshotError::NoMatch::available_keys`] sample.
1514///
1515/// Returns `None` for [`SnapshotEntry::Value`] (single-value ARRAY
1516/// maps have no key surface) and [`SnapshotEntry::Missing`] (no
1517/// entry was produced). Hash / per-CPU-hash entries fall back to
1518/// the hex-encoded raw key bytes via the `hex:` prefix when BTF
1519/// rendering was absent at capture time. The result is truncated
1520/// to [`NO_MATCH_KEY_CHAR_CAP`] chars with a trailing `…` to keep
1521/// wide struct keys from overrunning failure-message lines.
1522pub(super) fn render_entry_key(entry: &SnapshotEntry<'_>) -> Option<String> {
1523 let key = match entry {
1524 SnapshotEntry::Hash(e) => match e.key.as_ref() {
1525 Some(rv) => rv.to_string(),
1526 None => format!("{HEX_KEY_PREFIX}{}", e.key_hex),
1527 },
1528 SnapshotEntry::PercpuHash(e) => match e.key.as_ref() {
1529 Some(rv) => rv.to_string(),
1530 None => format!("{HEX_KEY_PREFIX}{}", e.key_hex),
1531 },
1532 SnapshotEntry::Percpu(e) => e.key.to_string(),
1533 SnapshotEntry::Value(_) | SnapshotEntry::Missing(_) => return None,
1534 };
1535 // Bytes-per-char is >= 1 in UTF-8, so byte-length <= char-cap implies
1536 // char-length <= char-cap — short-circuit the O(n) chars().count()
1537 // walk on the common ASCII case.
1538 if key.len() <= NO_MATCH_KEY_CHAR_CAP {
1539 return Some(key);
1540 }
1541 if key.chars().count() > NO_MATCH_KEY_CHAR_CAP {
1542 let mut truncated: String = key
1543 .chars()
1544 .take(NO_MATCH_KEY_CHAR_CAP.saturating_sub(1))
1545 .collect();
1546 truncated.push('…');
1547 Some(truncated)
1548 } else {
1549 Some(key)
1550 }
1551}