ktstr/cli/stats_cmds/
explain_sidecar.rs

1//! Per-sidecar `Option`-field absence diagnostic surface.
2//!
3//! Holds [`explain_sidecar`] (the `cargo ktstr stats explain-sidecar` entry
4//! point), the static catalog ([`SIDECAR_NONE_CATALOG`]) cataloging
5//! every `Option<T>` field on
6//! [`crate::test_support::SidecarResult`] with cause prose +
7//! actionable fix wording, the [`NoneClassification`] tag splitting
8//! "expected" from "actionable" gaps, the run-directory walker
9//! ([`walk_run_with_stats`]) plus its file-count helper
10//! ([`count_sidecar_files`]) and per-walk [`WalkStats`], and both
11//! renderers ([`render_explain_sidecar_text`] /
12//! [`render_explain_sidecar_json`] with the schema-versioned
13//! [`ExplainOutput`] / [`WalkStatsJson`] / [`WalkError`] /
14//! [`WalkIoError`] / [`FieldDiagnostic`] shapes).
15
16use std::path::Path;
17
18use anyhow::{Result, bail};
19
20use super::dispatch::suggest_closest_run_key;
21
22/// Whether a `None` value on a [`crate::test_support::SidecarResult`]
23/// `Option` field is the expected steady-state shape (e.g. `payload`
24/// for a scheduler-only test) or signals a recoverable gap an
25/// operator can remediate (e.g. `kernel_commit` from a tarball-cache
26/// kernel that has no on-disk source tree to probe).
27///
28/// Used by [`explain_sidecar`] to label every diagnostic block; the
29/// `JSON` shape exposes this as a `"classification"` string per
30/// field so dashboards can color-code "expected" vs "actionable"
31/// blocks without re-deriving the rule from causes prose.
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33enum NoneClassification {
34    /// `None` is the expected steady state for this field — no
35    /// operator action recovers it (the source data does not
36    /// exist or has not been wired yet).
37    Expected,
38    /// `None` indicates a recoverable gap — re-running the test in
39    /// a different environment (in-repo cwd, non-tarball kernel,
40    /// non-host-only test) would populate the field.
41    Actionable,
42}
43
44impl NoneClassification {
45    /// Stable string token for the JSON `classification` key on
46    /// [`explain_sidecar`]'s machine-readable output. The text
47    /// renderer uses the same token so a human reading the
48    /// terminal output sees the same label they would scrape from
49    /// JSON.
50    fn as_str(self) -> &'static str {
51        match self {
52            Self::Expected => "expected",
53            Self::Actionable => "actionable",
54        }
55    }
56}
57
58/// Catalog entry for one [`crate::test_support::SidecarResult`]
59/// `Option` field — wired into [`SIDECAR_NONE_CATALOG`] so the
60/// diagnostic surface stays in lockstep with the on-disk schema.
61///
62/// Each entry was derived from the rustdoc on the corresponding
63/// field in `src/test_support/sidecar/mod.rs` — see the per-field
64/// references at the catalog site for the source of truth. This
65/// catalog is static (no live probing): explain-sidecar's purpose
66/// is post-hoc archive diagnosis, not live debugging, and dynamic
67/// probes against an absent host (e.g. checking `KTSTR_KERNEL` on
68/// a CI runner that produced an archived sidecar) would return
69/// nonsense.
70struct NoneCatalogEntry {
71    /// Field name as serialized on disk (matches the struct field
72    /// identifier verbatim — serde uses the field name without
73    /// rename attributes for [`crate::test_support::SidecarResult`]).
74    field: &'static str,
75    /// Classification when this field is `None`. See
76    /// [`NoneClassification`].
77    classification: NoneClassification,
78    /// Human-readable cause prose, one entry per documented cause
79    /// from the field's rustdoc. The text renderer prints each
80    /// cause on its own bulleted line; the JSON shape emits them
81    /// as a JSON string array verbatim.
82    causes: &'static [&'static str],
83    /// Operator-actionable remediation, when one applies.
84    /// `Some(...)` for fields where re-running in a different
85    /// configuration would populate the value (e.g.
86    /// `kernel_commit` recovers when `KTSTR_KERNEL` points at a
87    /// local source tree); `None` for fields whose `None` is the
88    /// steady-state shape with no recourse (e.g.
89    /// `scheduler_commit` is reserved on the schema for future
90    /// enrichment).
91    ///
92    /// One fix per entry: this is the most-common-case
93    /// remediation, picked when a field has multiple causes that
94    /// all converge on the same operator action. The current
95    /// shape covers the typical case without forcing a deeper
96    /// data-model change; a per-cause split is possible if the
97    /// catalog grows fields whose causes legitimately diverge
98    /// in their remediation.
99    fix: Option<&'static str>,
100}
101
102/// Static catalog covering every `Option<T>` field on
103/// [`crate::test_support::SidecarResult`]. Order matches the on-
104/// disk schema declaration order so a human diff against
105/// `SidecarResult` reads top-to-bottom.
106///
107/// Causes prose is sourced FROM the per-field rustdoc on
108/// `SidecarResult` — see `src/test_support/sidecar/mod.rs` for the
109/// single source of truth on what each `None` means. A future
110/// schema change that adds, removes, or renames an `Option`
111/// field MUST update this catalog; the
112/// `none_catalog_covers_every_option_field` test enforces
113/// `SIDECAR_NONE_CATALOG.len() == EXPECTED_OPTION_FIELD_COUNT`
114/// (a hand-coded `11`) and asserts the projection helper
115/// enumerates the same field names in the same order. A new
116/// `Option` field on `SidecarResult` requires bumping the
117/// constant, extending [`project_optional_fields`]'s array
118/// (which has compile-checked length `11` and will fail to
119/// compile on a missing entry), and adding a catalog row.
120const SIDECAR_NONE_CATALOG: &[NoneCatalogEntry] = &[
121    NoneCatalogEntry {
122        field: "scheduler_commit",
123        classification: NoneClassification::Expected,
124        causes: &["no SchedulerSpec variant currently exposes a reliable \
125             commit source — reserved on the schema for future \
126             enrichment (e.g. --version probe or ELF-note read on \
127             the resolved scheduler binary)"],
128        fix: None,
129    },
130    NoneCatalogEntry {
131        field: "resolve_source",
132        classification: NoneClassification::Actionable,
133        causes: &[
134            "the sidecar predates the `resolve_source` field (added when \
135             scheduler-resolution provenance began being recorded), so it \
136             falls back to None via serde's tolerate-absence rule",
137            "a pre-VM-boot skip: no scheduler binary was resolved, so there \
138             is no discovery path to record",
139        ],
140        fix: Some(
141            "re-run the test to regenerate the sidecar — a real run records \
142             the scheduler's discovery path",
143        ),
144    },
145    NoneCatalogEntry {
146        field: "project_commit",
147        classification: NoneClassification::Actionable,
148        causes: &[
149            "current_dir() could not be resolved at sidecar-write \
150             time (process cwd was rmdir'd while alive)",
151            "test process cwd was not inside any git repository",
152            "HEAD could not be read (unborn HEAD on a fresh \
153             `git init` with zero commits, or a corrupt repository)",
154        ],
155        fix: Some(
156            "run from inside a git-tracked source tree with at \
157             least one commit",
158        ),
159    },
160    NoneCatalogEntry {
161        field: "payload",
162        classification: NoneClassification::Expected,
163        causes: &["test declared no binary payload (scheduler-only test \
164             or pure-scenario test that never invokes \
165             ctx.payload(...))"],
166        fix: None,
167    },
168    NoneCatalogEntry {
169        field: "monitor",
170        classification: NoneClassification::Actionable,
171        causes: &[
172            "host-only test path: monitor loop never started",
173            "early VM failure: monitor loop terminated before \
174             producing samples",
175            "sample collection produced no valid data",
176        ],
177        fix: None,
178    },
179    NoneCatalogEntry {
180        field: "kvm_stats",
181        classification: NoneClassification::Actionable,
182        causes: &[
183            "host-only test path: VM did not run",
184            "KVM stats were unavailable on this host (e.g. KVM \
185             module not loaded, /dev/kvm permissions, or kernel \
186             missing the stats interface)",
187        ],
188        fix: None,
189    },
190    NoneCatalogEntry {
191        field: "kernel_version",
192        classification: NoneClassification::Actionable,
193        causes: &[
194            "host-only test path: no kernel under test",
195            "neither cache metadata nor `include/config/kernel.release` \
196             yielded a version string",
197        ],
198        fix: None,
199    },
200    NoneCatalogEntry {
201        field: "kernel_commit",
202        classification: NoneClassification::Actionable,
203        causes: &[
204            "KTSTR_KERNEL is unset or empty",
205            "kernel source is a Tarball or Git transient cache \
206             entry (no on-disk source tree to probe)",
207            "resolved kernel directory is not a git repository \
208             (gix::open failed)",
209            "HEAD cannot be read (unborn HEAD on a fresh `git init` \
210             with zero commits)",
211            "gix probe failed for another reason — metadata, not \
212             a gate",
213        ],
214        fix: Some(
215            "set KTSTR_KERNEL to a local kernel source tree that \
216             is a git repository (e.g. a git clone of the kernel)",
217        ),
218    },
219    NoneCatalogEntry {
220        field: "host",
221        classification: NoneClassification::Actionable,
222        causes: &[
223            "test-fixture path: not the production sidecar \
224             writer (production writers always populate `host`)",
225            "pre-enrichment archive: sidecar predates the \
226             host-context landing — re-run the test to \
227             regenerate under the current schema",
228        ],
229        fix: Some(
230            "for pre-enrichment archives, re-run the test to \
231             regenerate under the current schema; test-fixture \
232             sidecars are not production runs and cannot be \
233             recovered by re-running",
234        ),
235    },
236    NoneCatalogEntry {
237        field: "cleanup_duration_ms",
238        classification: NoneClassification::Actionable,
239        causes: &[
240            "host-only / host-only-stub test path: no VM teardown \
241             window to time",
242            "run was killed by the watchdog before \
243             `KtstrVm::collect_results` returned",
244        ],
245        fix: None,
246    },
247    NoneCatalogEntry {
248        field: "run_source",
249        classification: NoneClassification::Actionable,
250        causes: &["pre-rename archive: sidecar carries the old `source` \
251             key which the current schema drops as an unknown \
252             field, leaving `run_source` to fall back to None via \
253             serde's tolerate-absence rule. Re-run the test to \
254             regenerate under the new schema, or rename the key \
255             in-place before deserialize"],
256        fix: Some(
257            "re-run the test to regenerate, or rename the on-disk \
258             `source` key to `run_source`",
259        ),
260    },
261];
262
263/// Project one [`crate::test_support::SidecarResult`] onto its
264/// `Option` fields, returning `(field_name, is_some)` pairs in the
265/// same order as [`SIDECAR_NONE_CATALOG`].
266///
267/// Hand-written rather than derived because:
268/// - Only the 11 `Option<T>` fields are diagnostic surface; the
269///   non-`Option` fields (`test_name`, `passed`, `stats`, etc.)
270///   are always populated by deserialize and would clutter the
271///   output without adding signal.
272/// - The order MUST match the catalog so the field-by-field
273///   lookup in [`render_explain_sidecar_text`] resolves
274///   correctly.
275///
276/// A future schema addition that introduces a new `Option<T>`
277/// field on `SidecarResult` MUST update this projection; the
278/// `[(_, _); 11]` array literal makes the length compile-checked
279/// — adding an entry without updating the length is a compile
280/// error — and the `none_catalog_covers_every_option_field` test
281/// asserts the catalog and projection enumerate the same names
282/// in the same order.
283fn project_optional_fields(sc: &crate::test_support::SidecarResult) -> [(&'static str, bool); 11] {
284    [
285        ("scheduler_commit", sc.scheduler_commit.is_some()),
286        ("resolve_source", sc.resolve_source.is_some()),
287        ("project_commit", sc.project_commit.is_some()),
288        ("payload", sc.payload.is_some()),
289        ("monitor", sc.monitor.is_some()),
290        ("kvm_stats", sc.kvm_stats.is_some()),
291        ("kernel_version", sc.kernel_version.is_some()),
292        ("kernel_commit", sc.kernel_commit.is_some()),
293        ("host", sc.host.is_some()),
294        ("cleanup_duration_ms", sc.cleanup_duration_ms.is_some()),
295        ("run_source", sc.run_source.is_some()),
296    ]
297}
298
299/// File-walk statistics for the run directory under
300/// [`explain_sidecar`]. Used to drive the `walked / valid` header
301/// and the corrupt-sidecars footer; the steady-state invariant is
302/// `walked == valid + errors.len() + io_errors.len()`.
303struct WalkStats {
304    walked: usize,
305    valid: usize,
306    errors: Vec<crate::test_support::SidecarParseError>,
307    io_errors: Vec<crate::test_support::SidecarIoError>,
308}
309
310/// Count `.ktstr.json` files under `run_dir` using
311/// [`crate::test_support::collect_sidecars`]'s walk shape (flat
312/// files plus one level of subdirectories). Pure file-existence
313/// pass — no parsing, no `serde_json` work — used purely to
314/// derive the `walked` count for [`WalkStats`].
315fn count_sidecar_files(run_dir: &Path) -> usize {
316    let mut count = 0usize;
317    let entries = match std::fs::read_dir(run_dir) {
318        Ok(e) => e,
319        Err(_) => return 0,
320    };
321    let mut subdirs = Vec::new();
322    for entry in entries.flatten() {
323        let path = entry.path();
324        if path.is_dir() {
325            subdirs.push(path);
326            continue;
327        }
328        if crate::test_support::is_sidecar_filename(&path) {
329            count += 1;
330        }
331    }
332    for sub in subdirs {
333        if let Ok(entries) = std::fs::read_dir(&sub) {
334            for entry in entries.flatten() {
335                if crate::test_support::is_sidecar_filename(&entry.path()) {
336                    count += 1;
337                }
338            }
339        }
340    }
341    count
342}
343
344/// Load sidecars under `run_dir` and report file-walk statistics.
345fn walk_run_with_stats(run_dir: &Path) -> (Vec<crate::test_support::SidecarResult>, WalkStats) {
346    let walked = count_sidecar_files(run_dir);
347    let (sidecars, errors, io_errors) = crate::test_support::collect_sidecars_with_errors(run_dir);
348    let valid = sidecars.len();
349    (
350        sidecars,
351        WalkStats {
352            walked,
353            valid,
354            errors,
355            io_errors,
356        },
357    )
358}
359
360/// Diagnose `Option`-field absences for a run's sidecars. Mirrors
361/// `show_run_host`'s shape (`--run` + optional `--dir`,
362/// printable string return). See the original rustdoc on the
363/// public surface for the full JSON / text contract and exit-code
364/// policy.
365pub fn explain_sidecar(run: &str, dir: Option<&Path>, json: bool) -> Result<String> {
366    if run.is_empty() {
367        bail!(
368            "run argument must not be empty. The run argument is \
369             joined onto the run-root via `Path::join` and must \
370             contain at least one `Normal` path component — i.e. \
371             must not be empty, `.`, `..`, or absolute (e.g. a \
372             typical run key shape: `6.14-abc1234` or \
373             `6.14-abc1234-dirty`). To point at a different pool \
374             root, use `--dir`. Run `cargo ktstr stats list` to \
375             enumerate available run keys.",
376        );
377    }
378    for component in std::path::Path::new(run).components() {
379        match component {
380            std::path::Component::CurDir
381            | std::path::Component::ParentDir
382            | std::path::Component::RootDir
383            | std::path::Component::Prefix(_) => {
384                bail!(
385                    "run '{run}' contains pool-root-aliasing or \
386                     path-traversal components (`.`, `..`, or absolute \
387                     path). The run argument is joined onto the \
388                     run-root via `Path::join` and must contain only \
389                     `Normal` path components — no `.`, `..`, or \
390                     absolute prefix (e.g. a typical run key shape: \
391                     `6.14-abc1234` or `6.14-abc1234-dirty`; \
392                     multi-component paths like `gauntlet/job-1` are \
393                     also accepted). To point at a different pool \
394                     root, use `--dir`. Run `cargo ktstr stats list` \
395                     to enumerate available run keys.",
396                );
397            }
398            std::path::Component::Normal(_) => {}
399        }
400    }
401    let root: std::path::PathBuf = match dir {
402        Some(d) => d.to_path_buf(),
403        None => crate::test_support::runs_root(),
404    };
405    let run_dir = root.join(run);
406    if !run_dir.exists() {
407        let suggestion = suggest_closest_run_key(run, &root)
408            .map(|name| format!(" Did you mean `{name}`?"))
409            .unwrap_or_default();
410        bail!(
411            "run '{run}' not found under {}.{suggestion} \
412             Run `cargo ktstr stats list` to enumerate available run keys.",
413            root.display(),
414        );
415    }
416    let (sidecars, walk_stats) = walk_run_with_stats(&run_dir);
417    if walk_stats.walked == 0 {
418        bail!(
419            "run '{run}' has no sidecar data (searched {})",
420            run_dir.display(),
421        );
422    }
423    if json {
424        Ok(render_explain_sidecar_json(&sidecars, &walk_stats))
425    } else {
426        Ok(render_explain_sidecar_text(&sidecars, &walk_stats))
427    }
428}
429
430/// Render the per-sidecar text block for [`explain_sidecar`].
431fn render_explain_sidecar_text(
432    sidecars: &[crate::test_support::SidecarResult],
433    walk_stats: &WalkStats,
434) -> String {
435    use std::fmt::Write as _;
436    let mut sorted: Vec<&crate::test_support::SidecarResult> = sidecars.iter().collect();
437    sorted.sort_by(|a, b| {
438        a.test_name
439            .cmp(&b.test_name)
440            .then_with(|| a.run_id.cmp(&b.run_id))
441    });
442    let mut out = String::new();
443    let _ = writeln!(
444        out,
445        "walked {} sidecar file(s), parsed {} valid\n",
446        walk_stats.walked, walk_stats.valid,
447    );
448    for sc in &sorted {
449        let _ = writeln!(out, "test: {}", sc.test_name);
450        let _ = writeln!(out, "  topology: {}", sc.topology);
451        let _ = writeln!(out, "  scheduler: {}", sc.scheduler);
452        let _ = writeln!(out, "  run_id: {}", sc.run_id);
453        let arch = sc
454            .host
455            .as_ref()
456            .and_then(|h| h.arch.as_deref())
457            .unwrap_or("-");
458        let _ = writeln!(out, "  arch: {arch}");
459        // vCPU count + effective host-CPU budget, with the overcommit
460        // marker: cpu_budget < vcpus means the host time-sliced the guest's
461        // vCPU threads, so wake-latency / off-CPU / run-delay timing metrics
462        // are host-contention-confounded. schedstat run_delay accumulates
463        // `rq_clock` deltas (kernel/sched/stats.h sched_info_arrive), and
464        // rq->clock tracks the guest TSC and is NEVER steal-adjusted — the
465        // steal subtraction hits only rq->clock_task (kernel/sched/core.c
466        // update_rq_clock_task) — so the off-host deschedule window INFLATES
467        // run_delay for guest tasks enqueued-and-waiting across it, just
468        // like wake-latency and off-CPU, not "freezes" it. Matches
469        // host_topology::overcommit_warning. The raw metrics are
470        // KEPT (never None'd); this annotates them. The branch order is
471        // skip -> malformed -> overcommit -> plain: cpu_budget == 0 renders
472        // skip irrespective of vcpus (the writer pairs 0/0), and vcpus == 0
473        // with a nonzero budget is a malformed on-disk sidecar the writer
474        // never emits.
475        if sc.cpu_budget == 0 {
476            let _ = writeln!(out, "  cpu_budget: - (skip; VM not booted)");
477        } else if sc.vcpus == 0 {
478            let _ = writeln!(
479                out,
480                "  cpu_budget: {} / 0 vcpus  [malformed: vcpus=0 with a nonzero \
481                 budget; the writer never emits this]",
482                sc.cpu_budget,
483            );
484        } else if sc.cpu_budget < sc.vcpus {
485            let _ = writeln!(
486                out,
487                "  cpu_budget: {} / {} vcpus  [OVERCOMMIT: host time-slices the \
488                 guest vCPUs -> wake-latency / off-CPU / run-delay timing metrics \
489                 are host-contention-confounded; compare the overcommit-invariant \
490                 worst_iterations_per_cpu_sec, not raw timing]",
491                sc.cpu_budget, sc.vcpus,
492            );
493        } else {
494            let _ = writeln!(out, "  cpu_budget: {} / {} vcpus", sc.cpu_budget, sc.vcpus);
495        }
496        // Per-cgroup CPU placement — which host/guest CPUs each cgroup's
497        // workers actually ran on. Surfaced on EVERY run (the scheduler
498        // failure dump only fires on failure); reads the always-written
499        // ScenarioStats.cgroups. Cgroups with no recorded CPU (no worker
500        // reported one) are skipped.
501        for cg in sc.stats.cgroups.iter().filter(|c| !c.cpus_used.is_empty()) {
502            let label = if cg.cgroup_name.is_empty() {
503                "(unnamed)"
504            } else {
505                cg.cgroup_name.as_str()
506            };
507            let cpus = cg
508                .cpus_used
509                .iter()
510                .map(|c| c.to_string())
511                .collect::<Vec<_>>()
512                .join(",");
513            let _ = writeln!(out, "  cgroup {label}: ran on cpus [{cpus}]");
514        }
515        let projected = project_optional_fields(sc);
516        let populated: Vec<&'static str> = projected
517            .iter()
518            .filter(|(_, b)| *b)
519            .map(|(n, _)| *n)
520            .collect();
521        let none_fields: Vec<&'static str> = projected
522            .iter()
523            .filter(|(_, b)| !*b)
524            .map(|(n, _)| *n)
525            .collect();
526        let populated_text = if populated.is_empty() {
527            "<none>".to_string()
528        } else {
529            populated.join(", ")
530        };
531        let _ = writeln!(
532            out,
533            "  populated optional fields ({}): {populated_text}",
534            populated.len(),
535        );
536        if none_fields.is_empty() {
537            let _ = writeln!(out, "  none fields: <all populated>\n");
538            continue;
539        }
540        let _ = writeln!(out, "  none fields ({}):", none_fields.len());
541        for field in none_fields {
542            let entry = SIDECAR_NONE_CATALOG
543                .iter()
544                .find(|e| e.field == field)
545                .expect(
546                    "catalog must cover every projected field — \
547                     guarded by none_catalog_covers_every_option_field",
548                );
549            let _ = writeln!(
550                out,
551                "    {} [{}]",
552                entry.field,
553                entry.classification.as_str(),
554            );
555            for cause in entry.causes {
556                let _ = writeln!(out, "      - {cause}");
557            }
558            if let Some(fix) = entry.fix {
559                let _ = writeln!(out, "      fix: {fix}");
560            }
561        }
562        out.push('\n');
563    }
564    if !walk_stats.errors.is_empty() {
565        let _ = writeln!(out, "corrupt sidecars ({}):", walk_stats.errors.len());
566        for err in &walk_stats.errors {
567            let _ = writeln!(out, "  {}", err.path.display());
568            let _ = writeln!(out, "    error: {}", err.raw_error);
569            if let Some(prose) = &err.enriched_message {
570                let _ = writeln!(out, "    enriched: {prose}");
571            }
572        }
573        out.push('\n');
574    }
575    if !walk_stats.io_errors.is_empty() {
576        let _ = writeln!(out, "io errors ({}):", walk_stats.io_errors.len());
577        for err in &walk_stats.io_errors {
578            let _ = writeln!(out, "  {}", err.path.display());
579            let _ = writeln!(out, "    error: {}", err.raw_error);
580        }
581        out.push('\n');
582    }
583    out
584}
585
586/// JSON schema version stamp emitted on
587/// [`ExplainOutput::_schema_version`]. Bumped on any incompatible
588/// shape change.
589// "2": added `overcommit_runs` (the count of pooled sidecars whose
590// host CPU budget was below their vCPU count, so their timing metrics
591// are host-contention-confounded — the JSON-path mirror of the text
592// per-sidecar OVERCOMMIT marker, for scripted consumers that gate on
593// data quality).
594const EXPLAIN_SIDECAR_SCHEMA_VERSION: &str = "2";
595
596#[derive(serde::Serialize)]
597struct ExplainOutput<'a> {
598    _schema_version: &'a str,
599    _walk: WalkStatsJson<'a>,
600    /// Count of pooled sidecars with `cpu_budget != 0 && cpu_budget <
601    /// vcpus` — host-overcommitted runs whose wake-latency / off-CPU /
602    /// run-delay timing metrics are host-contention-confounded. Skip
603    /// rows (`cpu_budget == 0`) never booted and are excluded.
604    overcommit_runs: usize,
605    fields: std::collections::BTreeMap<&'a str, FieldDiagnostic<'a>>,
606}
607
608#[derive(serde::Serialize)]
609struct WalkStatsJson<'a> {
610    walked: usize,
611    valid: usize,
612    errors: Vec<WalkError<'a>>,
613    io_errors: Vec<WalkIoError<'a>>,
614}
615
616#[derive(serde::Serialize)]
617struct WalkError<'a> {
618    path: String,
619    error: &'a str,
620    enriched_message: Option<&'a str>,
621}
622
623#[derive(serde::Serialize)]
624struct WalkIoError<'a> {
625    path: String,
626    error: &'a str,
627}
628
629#[derive(serde::Serialize)]
630struct FieldDiagnostic<'a> {
631    none_count: usize,
632    some_count: usize,
633    classification: &'a str,
634    causes: &'a [&'a str],
635    fix: Option<&'a str>,
636}
637
638/// Render the aggregate JSON shape for [`explain_sidecar`].
639fn render_explain_sidecar_json(
640    sidecars: &[crate::test_support::SidecarResult],
641    walk_stats: &WalkStats,
642) -> String {
643    let fields: std::collections::BTreeMap<&str, FieldDiagnostic<'_>> = SIDECAR_NONE_CATALOG
644        .iter()
645        .map(|entry| {
646            let none_count = sidecars
647                .iter()
648                .filter(|sc| {
649                    project_optional_fields(sc)
650                        .iter()
651                        .any(|(n, b)| *n == entry.field && !*b)
652                })
653                .count();
654            let some_count = sidecars.len().saturating_sub(none_count);
655            (
656                entry.field,
657                FieldDiagnostic {
658                    none_count,
659                    some_count,
660                    classification: entry.classification.as_str(),
661                    causes: entry.causes,
662                    fix: entry.fix,
663                },
664            )
665        })
666        .collect();
667    let errors: Vec<WalkError<'_>> = walk_stats
668        .errors
669        .iter()
670        .map(|err| WalkError {
671            path: err.path.display().to_string(),
672            error: &err.raw_error,
673            enriched_message: err.enriched_message.as_deref(),
674        })
675        .collect();
676    let io_errors: Vec<WalkIoError<'_>> = walk_stats
677        .io_errors
678        .iter()
679        .map(|err| WalkIoError {
680            path: err.path.display().to_string(),
681            error: &err.raw_error,
682        })
683        .collect();
684    let overcommit_runs = sidecars
685        .iter()
686        .filter(|sc| sc.cpu_budget != 0 && sc.cpu_budget < sc.vcpus)
687        .count();
688    let output = ExplainOutput {
689        _schema_version: EXPLAIN_SIDECAR_SCHEMA_VERSION,
690        _walk: WalkStatsJson {
691            walked: walk_stats.walked,
692            valid: walk_stats.valid,
693            errors,
694            io_errors,
695        },
696        overcommit_runs,
697        fields,
698    };
699    serde_json::to_string_pretty(&output).expect(
700        "static-shape JSON serialization is infallible — every \
701         field in ExplainOutput / WalkStatsJson / WalkError / WalkIoError / \
702         FieldDiagnostic is a primitive, &str, or Vec/BTreeMap \
703         of those — no NaN, no non-string keys, no unsupported \
704         types",
705    )
706}
707
708#[cfg(test)]
709mod tests {
710    use super::super::super::testing::{
711        SIDECAR_VEC_FIELDS, make_test_run, write_corrupt_sidecar, write_sidecar,
712    };
713    use super::*;
714
715    /// Drift guard: every `Option<T>` field on `SidecarResult` must
716    /// have a matching catalog entry in `SIDECAR_NONE_CATALOG`, and
717    /// the projected-fields helper must enumerate the same set.
718    #[test]
719    fn none_catalog_covers_every_option_field() {
720        const EXPECTED_OPTION_FIELD_COUNT: usize = 11;
721        assert_eq!(
722            SIDECAR_NONE_CATALOG.len(),
723            EXPECTED_OPTION_FIELD_COUNT,
724            "SIDECAR_NONE_CATALOG must cover every Option<T> field on \
725             SidecarResult; expected {EXPECTED_OPTION_FIELD_COUNT}, got \
726             {}. A schema change must update the catalog in lockstep.",
727            SIDECAR_NONE_CATALOG.len(),
728        );
729        let sc = crate::test_support::SidecarResult::test_fixture();
730        let projected = project_optional_fields(&sc);
731        assert_eq!(
732            projected.len(),
733            EXPECTED_OPTION_FIELD_COUNT,
734            "project_optional_fields must enumerate every Option<T> \
735             field; expected {EXPECTED_OPTION_FIELD_COUNT}, got {}. Co-update \
736             with the catalog when adding a new Option field.",
737            projected.len(),
738        );
739        for (i, (name, _)) in projected.iter().enumerate() {
740            let catalog = &SIDECAR_NONE_CATALOG[i];
741            assert_eq!(
742                *name, catalog.field,
743                "projected field {i} ({name:?}) must match catalog \
744                 entry at the same index ({:?}) — order drift breaks \
745                 the renderer's catalog-lookup expectation",
746                catalog.field,
747            );
748        }
749    }
750
751    /// Catalog `causes` arrays must be non-empty for every entry.
752    #[test]
753    fn none_catalog_every_entry_has_causes() {
754        for entry in SIDECAR_NONE_CATALOG {
755            assert!(
756                !entry.causes.is_empty(),
757                "catalog entry for {} has no causes — every field's \
758                 None case must document at least one cause",
759                entry.field,
760            );
761        }
762    }
763
764    /// Expected-classified entries (steady-state None) must NOT
765    /// carry a `fix:` — there is no operator action that recovers
766    /// an Expected None, so emitting one would mislead.
767    #[test]
768    fn none_catalog_expected_entries_have_no_fix() {
769        for entry in SIDECAR_NONE_CATALOG {
770            if matches!(entry.classification, NoneClassification::Expected) {
771                assert!(
772                    entry.fix.is_none(),
773                    "Expected-classified field {} must not carry a `fix:` \
774                     — there is no operator action that recovers a \
775                     steady-state None",
776                    entry.field,
777                );
778            }
779        }
780    }
781
782    /// `fix:` assignment policy: must-fix for fields with a single
783    /// concrete recovery action; must-not-fix for Actionable fields
784    /// whose cause set spans multiple unrelated remedies (no single
785    /// operator action covers them).
786    #[test]
787    fn none_catalog_fix_assignments_match_policy() {
788        let by_field: std::collections::HashMap<&'static str, Option<&'static str>> =
789            SIDECAR_NONE_CATALOG
790                .iter()
791                .map(|e| (e.field, e.fix))
792                .collect();
793        let must_fix = [
794            "project_commit",
795            "kernel_commit",
796            "host",
797            "run_source",
798            "resolve_source",
799        ];
800        let must_not_fix = [
801            "scheduler_commit",
802            "payload",
803            "monitor",
804            "kvm_stats",
805            "kernel_version",
806            "cleanup_duration_ms",
807        ];
808        assert_eq!(
809            must_fix.len() + must_not_fix.len(),
810            SIDECAR_NONE_CATALOG.len(),
811            "every catalog entry must be classified as either \
812             must-fix or must-not-fix; expected sum = catalog len \
813             ({}), got must_fix={} + must_not_fix={}",
814            SIDECAR_NONE_CATALOG.len(),
815            must_fix.len(),
816            must_not_fix.len(),
817        );
818        for field in &must_fix {
819            let fix = by_field.get(field).copied().flatten();
820            assert!(
821                fix.is_some(),
822                "field {field} has a single concrete recovery action and must carry a `fix:`",
823            );
824        }
825        for field in &must_not_fix {
826            let fix = by_field.get(field).copied().flatten();
827            assert!(
828                fix.is_none(),
829                "field {field} must NOT carry a `fix:` (multi-cause or \
830                 steady-state None) — got: {fix:?}",
831            );
832        }
833    }
834
835    /// Error path: the named run directory does not exist.
836    #[test]
837    fn explain_sidecar_missing_run_returns_error() {
838        let tmp = tempfile::tempdir().unwrap();
839        let err = explain_sidecar("nonexistent-run", Some(tmp.path()), false).unwrap_err();
840        let msg = format!("{err:#}");
841        assert!(
842            msg.contains("run 'nonexistent-run' not found"),
843            "missing-run error must name the run: {msg}",
844        );
845        assert!(
846            msg.contains("cargo ktstr stats list"),
847            "missing-run error must name the discovery command: {msg}",
848        );
849    }
850
851    /// Error path: run directory exists but is empty.
852    #[test]
853    fn explain_sidecar_empty_run_returns_error() {
854        let tmp = tempfile::tempdir().unwrap();
855        let run_dir = tmp.path().join("run-empty");
856        std::fs::create_dir(&run_dir).unwrap();
857        let err = explain_sidecar("run-empty", Some(tmp.path()), false).unwrap_err();
858        let msg = format!("{err:#}");
859        assert!(
860            msg.contains("no sidecar data"),
861            "empty-run error must use the canonical message: {msg}",
862        );
863        assert!(
864            msg.contains("searched"),
865            "empty-run error must name the searched directory: {msg}",
866        );
867        assert!(
868            msg.contains(&run_dir.display().to_string()),
869            "empty-run error must include the resolved run_dir path \
870             ({}): {msg}",
871            run_dir.display(),
872        );
873    }
874
875    /// All-corrupt run is NOT a hard error — text rendering surfaces
876    /// every parse failure under the trailing `corrupt sidecars`
877    /// block; per-sidecar `test:` blocks must NOT appear.
878    #[test]
879    fn explain_sidecar_all_corrupt_renders_structured_diagnostic() {
880        let tmp = tempfile::tempdir().unwrap();
881        let run_dir = tmp.path().join("run-corrupt");
882        std::fs::create_dir(&run_dir).unwrap();
883        std::fs::write(run_dir.join("a-0000000000000000.ktstr.json"), "not json {").unwrap();
884        std::fs::write(
885            run_dir.join("b-0000000000000000.ktstr.json"),
886            "{\"missing\": \"required-fields\"}",
887        )
888        .unwrap();
889        let out = explain_sidecar("run-corrupt", Some(tmp.path()), false)
890            .expect("all-corrupt is no longer a hard error — must render");
891        assert!(
892            out.contains("walked 2"),
893            "header must name the walked count: {out}",
894        );
895        assert!(
896            out.contains("parsed 0 valid"),
897            "header must distinguish walked-vs-parsed (zero valid): {out}",
898        );
899        assert!(
900            out.contains("corrupt sidecars (2):"),
901            "all-corrupt run must surface the corrupt-sidecars \
902             block listing every parse failure: {out}",
903        );
904        assert!(
905            !out.contains("test:"),
906            "no sidecar parsed — must not emit any per-sidecar \
907             block: {out}",
908        );
909    }
910
911    /// Happy path: one fixture sidecar (every Option None). Text
912    /// output must list ALL eleven fields under "none fields" with
913    /// classifications + at least one cause string per entry.
914    #[test]
915    fn explain_sidecar_text_lists_all_none_fields_for_fixture() {
916        let tmp = tempfile::tempdir().unwrap();
917        let run_dir = tmp.path().join("run-all-none");
918        std::fs::create_dir(&run_dir).unwrap();
919        let sc = crate::test_support::SidecarResult::test_fixture();
920        std::fs::write(
921            run_dir.join("t-0000000000000000.ktstr.json"),
922            serde_json::to_string(&sc).unwrap(),
923        )
924        .unwrap();
925        let out = explain_sidecar("run-all-none", Some(tmp.path()), false).unwrap();
926        assert!(out.contains("walked 1"), "header must report walked: {out}");
927        assert!(out.contains("parsed 1"), "header must report parsed: {out}");
928        assert!(
929            out.contains("none fields (11)"),
930            "fixture has every Option as None — count must be 11: {out}",
931        );
932        for entry in SIDECAR_NONE_CATALOG {
933            assert!(
934                out.contains(entry.field),
935                "output must mention field {}: {out}",
936                entry.field,
937            );
938        }
939        assert!(
940            out.contains("[expected]"),
941            "expected-class fields must surface their tag: {out}",
942        );
943        assert!(
944            out.contains("[actionable]"),
945            "actionable-class fields must surface their tag: {out}",
946        );
947        let project_commit_fix = SIDECAR_NONE_CATALOG
948            .iter()
949            .find(|e| e.field == "project_commit")
950            .and_then(|e| e.fix)
951            .expect("project_commit has a single concrete recovery action and must carry a fix");
952        assert!(
953            out.contains(&format!("fix: {project_commit_fix}")),
954            "project_commit's fix: line must render its catalog \
955             prose verbatim ({project_commit_fix:?}): {out}",
956        );
957        let fix_line_count = out.matches("\n      fix:").count();
958        let expected_fix_count = SIDECAR_NONE_CATALOG
959            .iter()
960            .filter(|e| e.fix.is_some())
961            .count();
962        assert_eq!(
963            fix_line_count, expected_fix_count,
964            "exactly {expected_fix_count} entries carry a fix: in \
965             the catalog; output emitted {fix_line_count}: {out}",
966        );
967    }
968
969    /// JSON shape: aggregate per-field with `none_count`,
970    /// `some_count`, `classification`, `causes`, `fix`. With one
971    /// fixture sidecar (every Option None), every field reports
972    /// none_count=1, some_count=0, and the two sum to _walk.valid.
973    #[test]
974    fn explain_sidecar_json_shape_aggregates_none_counts() {
975        let tmp = tempfile::tempdir().unwrap();
976        let run_dir = tmp.path().join("run-json");
977        std::fs::create_dir(&run_dir).unwrap();
978        let sc = crate::test_support::SidecarResult::test_fixture();
979        std::fs::write(
980            run_dir.join("t-0000000000000000.ktstr.json"),
981            serde_json::to_string(&sc).unwrap(),
982        )
983        .unwrap();
984        let out = explain_sidecar("run-json", Some(tmp.path()), true).unwrap();
985        let parsed: serde_json::Value =
986            serde_json::from_str(&out).expect("json output must round-trip parse");
987        let walk = parsed.get("_walk").expect("must have _walk key");
988        assert_eq!(walk.get("walked").and_then(|v| v.as_u64()), Some(1));
989        assert_eq!(walk.get("valid").and_then(|v| v.as_u64()), Some(1));
990        let fields = parsed.get("fields").expect("must have fields key");
991        for entry in SIDECAR_NONE_CATALOG {
992            let f = fields
993                .get(entry.field)
994                .unwrap_or_else(|| panic!("missing field {}", entry.field));
995            let none_count = f
996                .get("none_count")
997                .and_then(|v| v.as_u64())
998                .unwrap_or_else(|| panic!("missing none_count for {}", entry.field));
999            let some_count = f
1000                .get("some_count")
1001                .and_then(|v| v.as_u64())
1002                .unwrap_or_else(|| panic!("missing some_count for {}", entry.field));
1003            assert_eq!(
1004                none_count, 1,
1005                "fixture: none_count must be 1 for {}",
1006                entry.field
1007            );
1008            assert_eq!(
1009                some_count, 0,
1010                "fixture: some_count must be 0 for {}",
1011                entry.field
1012            );
1013            assert_eq!(
1014                none_count + some_count,
1015                1,
1016                "sum invariant for {}",
1017                entry.field
1018            );
1019            assert_eq!(
1020                f.get("classification").and_then(|v| v.as_str()),
1021                Some(entry.classification.as_str()),
1022                "classification must round-trip for {}",
1023                entry.field,
1024            );
1025            let causes = f
1026                .get("causes")
1027                .and_then(|v| v.as_array())
1028                .unwrap_or_else(|| panic!("missing causes for {}", entry.field));
1029            assert_eq!(
1030                causes.len(),
1031                entry.causes.len(),
1032                "causes array length must match catalog for {}",
1033                entry.field,
1034            );
1035            let fix_value = f
1036                .get("fix")
1037                .unwrap_or_else(|| panic!("missing fix for {}", entry.field));
1038            match entry.fix {
1039                Some(expected) => {
1040                    assert_eq!(
1041                        fix_value.as_str(),
1042                        Some(expected),
1043                        "fix string must round-trip for {}",
1044                        entry.field,
1045                    );
1046                }
1047                None => {
1048                    assert!(
1049                        fix_value.is_null(),
1050                        "fix must be JSON null for fix=None entry {}: \
1051                         got {fix_value:?}",
1052                        entry.field,
1053                    );
1054                }
1055            }
1056        }
1057    }
1058
1059    /// JSON `overcommit_runs` counts only sidecars whose host CPU
1060    /// budget was below their vCPU count; skip rows (budget 0) and
1061    /// roomy/exact runs are excluded — the JSON-path mirror of the
1062    /// text OVERCOMMIT marker.
1063    #[test]
1064    fn explain_sidecar_json_counts_overcommitted_runs() {
1065        let tmp = tempfile::tempdir().unwrap();
1066        let run_dir = tmp.path().join("run-budget");
1067        std::fs::create_dir(&run_dir).unwrap();
1068        let mk = |name: &str, prefix: &str, budget: u32, vcpus: u32| {
1069            let mut sc = crate::test_support::SidecarResult::test_fixture();
1070            sc.test_name = name.to_string();
1071            sc.cpu_budget = budget;
1072            sc.vcpus = vcpus;
1073            std::fs::write(
1074                run_dir.join(format!("{prefix}-0000000000000000.ktstr.json")),
1075                serde_json::to_string(&sc).unwrap(),
1076            )
1077            .unwrap();
1078        };
1079        mk("over", "a", 4, 16); // overcommit -> counted
1080        mk("exact", "b", 16, 16); // boundary -> not counted
1081        mk("roomy", "c", 32, 16); // roomy -> not counted
1082        mk("skip", "d", 0, 0); // skip -> not counted
1083        let out = explain_sidecar("run-budget", Some(tmp.path()), true).unwrap();
1084        let parsed: serde_json::Value =
1085            serde_json::from_str(&out).expect("json output must round-trip parse");
1086        assert_eq!(
1087            parsed.get("overcommit_runs").and_then(|v| v.as_u64()),
1088            Some(1),
1089            "only the 4/16 run is overcommitted: {out}",
1090        );
1091    }
1092
1093    /// Mixed populated/None: text output splits "populated optional
1094    /// fields (N)" from "none fields (M)" with the right counts.
1095    #[test]
1096    fn explain_sidecar_text_distinguishes_populated_from_none() {
1097        let tmp = tempfile::tempdir().unwrap();
1098        let run_dir = tmp.path().join("run-mixed");
1099        std::fs::create_dir(&run_dir).unwrap();
1100        let mut sc = crate::test_support::SidecarResult::test_fixture();
1101        sc.payload = Some("ipc_pingpong".to_string());
1102        sc.kernel_version = Some("6.14.2".to_string());
1103        sc.run_source = Some("local".to_string());
1104        std::fs::write(
1105            run_dir.join("t-0000000000000000.ktstr.json"),
1106            serde_json::to_string(&sc).unwrap(),
1107        )
1108        .unwrap();
1109        let out = explain_sidecar("run-mixed", Some(tmp.path()), false).unwrap();
1110        assert!(
1111            out.contains("populated optional fields (3)"),
1112            "must report 3 populated: {out}",
1113        );
1114        assert!(
1115            out.contains("payload"),
1116            "populated `payload` must appear: {out}",
1117        );
1118        assert!(out.contains("none fields (8)"), "must report 8 None: {out}",);
1119    }
1120
1121    /// Per-sidecar text output: `arch:` line surfaces under each
1122    /// sidecar's block, sourced from `host.arch`.
1123    #[test]
1124    fn explain_sidecar_text_renders_arch_line() {
1125        let tmp = tempfile::tempdir().unwrap();
1126        let run_dir = tmp.path().join("run-arch");
1127        std::fs::create_dir(&run_dir).unwrap();
1128        let mut sc = crate::test_support::SidecarResult::test_fixture();
1129        sc.host = Some(crate::host_context::HostContext::test_fixture());
1130        std::fs::write(
1131            run_dir.join("t-0000000000000000.ktstr.json"),
1132            serde_json::to_string(&sc).unwrap(),
1133        )
1134        .unwrap();
1135        let out = explain_sidecar("run-arch", Some(tmp.path()), false).unwrap();
1136        assert!(
1137            out.contains("arch: x86_64"),
1138            "host-populated sidecar must surface `arch: x86_64`: {out}",
1139        );
1140    }
1141
1142    /// Per-sidecar text output: when `host` is `None`, `arch:` line
1143    /// still emits with `-` sentinel for uniform shape.
1144    #[test]
1145    fn explain_sidecar_text_arch_line_falls_back_to_dash_when_host_none() {
1146        let tmp = tempfile::tempdir().unwrap();
1147        let run_dir = tmp.path().join("run-arch-none");
1148        std::fs::create_dir(&run_dir).unwrap();
1149        let sc = crate::test_support::SidecarResult::test_fixture();
1150        std::fs::write(
1151            run_dir.join("t-0000000000000000.ktstr.json"),
1152            serde_json::to_string(&sc).unwrap(),
1153        )
1154        .unwrap();
1155        let out = explain_sidecar("run-arch-none", Some(tmp.path()), false).unwrap();
1156        assert!(
1157            out.contains("arch: -"),
1158            "host-None sidecar must surface `arch: -`: {out}",
1159        );
1160    }
1161
1162    /// Per-sidecar text output: the cpu_budget line renders all four
1163    /// branches — skip (budget 0), overcommit (budget < vcpus), normal
1164    /// (budget >= vcpus), and the malformed budget>0/vcpus=0 case — and
1165    /// the overcommit marker names run-delay as confounded (locks in
1166    /// the kernel-grounded "run_delay is steal-inflated" semantics so a
1167    /// regression that dropped it back to "only partial" is caught).
1168    #[test]
1169    fn explain_sidecar_text_renders_cpu_budget_marker_branches() {
1170        let tmp = tempfile::tempdir().unwrap();
1171        let render = |name: &str, budget: u32, vcpus: u32| -> String {
1172            let run_dir = tmp.path().join(name);
1173            std::fs::create_dir(&run_dir).unwrap();
1174            let mut sc = crate::test_support::SidecarResult::test_fixture();
1175            sc.cpu_budget = budget;
1176            sc.vcpus = vcpus;
1177            std::fs::write(
1178                run_dir.join("t-0000000000000000.ktstr.json"),
1179                serde_json::to_string(&sc).unwrap(),
1180            )
1181            .unwrap();
1182            explain_sidecar(name, Some(tmp.path()), false).unwrap()
1183        };
1184
1185        let skip = render("run-skip", 0, 0);
1186        assert!(
1187            skip.contains("cpu_budget: - (skip; VM not booted)"),
1188            "budget 0 must render the skip sentinel: {skip}",
1189        );
1190        assert!(
1191            !skip.contains("OVERCOMMIT"),
1192            "skip row must not be flagged overcommit: {skip}",
1193        );
1194
1195        let over = render("run-over", 4, 16);
1196        assert!(
1197            over.contains("cpu_budget: 4 / 16 vcpus") && over.contains("[OVERCOMMIT"),
1198            "budget < vcpus must render the raw values AND the OVERCOMMIT marker: {over}",
1199        );
1200        assert!(
1201            over.contains("run-delay"),
1202            "OVERCOMMIT marker must list run-delay as confounded (steal-inflated): {over}",
1203        );
1204
1205        let exact = render("run-exact", 16, 16);
1206        assert!(
1207            exact.contains("cpu_budget: 16 / 16 vcpus") && !exact.contains("OVERCOMMIT"),
1208            "budget == vcpus is the boundary: plain line, no marker: {exact}",
1209        );
1210
1211        let roomy = render("run-roomy", 32, 16);
1212        assert!(
1213            roomy.contains("cpu_budget: 32 / 16 vcpus") && !roomy.contains("OVERCOMMIT"),
1214            "budget > vcpus must render plain, no marker: {roomy}",
1215        );
1216
1217        let malformed = render("run-malformed", 8, 0);
1218        assert!(
1219            malformed.contains("[malformed") && !malformed.contains("OVERCOMMIT"),
1220            "budget>0 with vcpus==0 must render the malformed note, not overcommit: {malformed}",
1221        );
1222    }
1223
1224    /// Per-cgroup CPU placement renders on the text path for any run.
1225    /// A labeled cgroup with a non-empty cpus_used prints a
1226    /// "cgroup <name>: ran on cpus [..]" line; a cgroup that recorded no
1227    /// CPU (empty cpus_used) is skipped.
1228    #[test]
1229    fn explain_sidecar_text_renders_per_cgroup_cpus() {
1230        let tmp = tempfile::tempdir().unwrap();
1231        let run_dir = tmp.path().join("run-cgcpus");
1232        std::fs::create_dir(&run_dir).unwrap();
1233        let mut sc = crate::test_support::SidecarResult::test_fixture();
1234        sc.stats.cgroups = vec![
1235            crate::assert::CgroupStats {
1236                cgroup_name: "cg_a".to_string(),
1237                cpus_used: [0usize, 1].into_iter().collect(),
1238                num_cpus: 2,
1239                ..Default::default()
1240            },
1241            crate::assert::CgroupStats {
1242                cgroup_name: "cg_idle".to_string(),
1243                cpus_used: std::collections::BTreeSet::new(),
1244                ..Default::default()
1245            },
1246        ];
1247        std::fs::write(
1248            run_dir.join("t-0000000000000000.ktstr.json"),
1249            serde_json::to_string(&sc).unwrap(),
1250        )
1251        .unwrap();
1252        let out = explain_sidecar("run-cgcpus", Some(tmp.path()), false).unwrap();
1253        assert!(
1254            out.contains("cgroup cg_a: ran on cpus [0,1]"),
1255            "labeled cgroup must render its cpus_used: {out}",
1256        );
1257        assert!(
1258            !out.contains("cg_idle"),
1259            "a cgroup with empty cpus_used must be skipped: {out}",
1260        );
1261    }
1262
1263    /// Per-sidecar text output: two sidecars in the same run with
1264    /// different None patterns must each get their own block.
1265    #[test]
1266    fn explain_sidecar_text_emits_one_block_per_sidecar() {
1267        let tmp = tempfile::tempdir().unwrap();
1268        let run_dir = tmp.path().join("run-two");
1269        std::fs::create_dir(&run_dir).unwrap();
1270        let mut a = crate::test_support::SidecarResult::test_fixture();
1271        a.test_name = "test_a".to_string();
1272        let mut b = crate::test_support::SidecarResult::test_fixture();
1273        b.test_name = "test_b".to_string();
1274        b.payload = Some("ipc_pingpong".to_string());
1275        std::fs::write(
1276            run_dir.join("a-0000000000000000.ktstr.json"),
1277            serde_json::to_string(&a).unwrap(),
1278        )
1279        .unwrap();
1280        std::fs::write(
1281            run_dir.join("b-0000000000000000.ktstr.json"),
1282            serde_json::to_string(&b).unwrap(),
1283        )
1284        .unwrap();
1285        let out = explain_sidecar("run-two", Some(tmp.path()), false).unwrap();
1286        assert!(out.contains("test: test_a"), "test_a block missing: {out}");
1287        assert!(out.contains("test: test_b"), "test_b block missing: {out}");
1288        assert!(out.contains("walked 2"), "walked count must be 2: {out}");
1289        assert!(out.contains("parsed 2"), "parsed count must be 2: {out}");
1290    }
1291
1292    /// JSON aggregation across multiple sidecars: partial None on a
1293    /// per-field basis surfaces both none_count and some_count.
1294    #[test]
1295    fn explain_sidecar_json_aggregates_partial_none_correctly() {
1296        let tmp = tempfile::tempdir().unwrap();
1297        let run_dir = tmp.path().join("run-partial");
1298        std::fs::create_dir(&run_dir).unwrap();
1299        let a = crate::test_support::SidecarResult::test_fixture();
1300        let mut b = crate::test_support::SidecarResult::test_fixture();
1301        b.payload = Some("ipc_pingpong".to_string());
1302        std::fs::write(
1303            run_dir.join("a-0000000000000000.ktstr.json"),
1304            serde_json::to_string(&a).unwrap(),
1305        )
1306        .unwrap();
1307        std::fs::write(
1308            run_dir.join("b-0000000000000000.ktstr.json"),
1309            serde_json::to_string(&b).unwrap(),
1310        )
1311        .unwrap();
1312        let out = explain_sidecar("run-partial", Some(tmp.path()), true).unwrap();
1313        let parsed: serde_json::Value = serde_json::from_str(&out).unwrap();
1314        let payload = parsed
1315            .get("fields")
1316            .and_then(|f| f.get("payload"))
1317            .expect("payload field must be present");
1318        assert_eq!(payload.get("none_count").and_then(|v| v.as_u64()), Some(1));
1319        assert_eq!(payload.get("some_count").and_then(|v| v.as_u64()), Some(1));
1320        let host = parsed
1321            .get("fields")
1322            .and_then(|f| f.get("host"))
1323            .expect("host field must be present");
1324        assert_eq!(host.get("none_count").and_then(|v| v.as_u64()), Some(2));
1325        assert_eq!(host.get("some_count").and_then(|v| v.as_u64()), Some(0));
1326    }
1327
1328    /// Walker counts both valid and corrupt `.ktstr.json` files.
1329    #[test]
1330    fn explain_sidecar_walks_corrupt_files_into_count() {
1331        let tmp = tempfile::tempdir().unwrap();
1332        let run_dir = tmp.path().join("run-mixed-parse");
1333        std::fs::create_dir(&run_dir).unwrap();
1334        let valid = crate::test_support::SidecarResult::test_fixture();
1335        std::fs::write(
1336            run_dir.join("a-0000000000000000.ktstr.json"),
1337            serde_json::to_string(&valid).unwrap(),
1338        )
1339        .unwrap();
1340        std::fs::write(run_dir.join("b-0000000000000000.ktstr.json"), "garbage{").unwrap();
1341        let out = explain_sidecar("run-mixed-parse", Some(tmp.path()), false).unwrap();
1342        assert!(
1343            out.contains("walked 2"),
1344            "walker must visit both files: {out}"
1345        );
1346        assert!(
1347            out.contains("parsed 1"),
1348            "only the valid file parses: {out}"
1349        );
1350    }
1351
1352    /// Walker recurses one level into subdirectories.
1353    #[test]
1354    fn explain_sidecar_walks_one_level_subdirectory() {
1355        let tmp = tempfile::tempdir().unwrap();
1356        let run_dir = tmp.path().join("run-sub");
1357        let sub = run_dir.join("job-x");
1358        std::fs::create_dir_all(&sub).unwrap();
1359        let sc = crate::test_support::SidecarResult::test_fixture();
1360        std::fs::write(
1361            sub.join("t-0000000000000000.ktstr.json"),
1362            serde_json::to_string(&sc).unwrap(),
1363        )
1364        .unwrap();
1365        let out = explain_sidecar("run-sub", Some(tmp.path()), false).unwrap();
1366        assert!(out.contains("walked 1"), "must walk into job-x: {out}");
1367        assert!(
1368            out.contains("parsed 1"),
1369            "must parse the nested file: {out}"
1370        );
1371    }
1372
1373    /// Walker MUST ignore non-`.ktstr.json` files.
1374    #[test]
1375    fn explain_sidecar_ignores_non_ktstr_json() {
1376        let tmp = tempfile::tempdir().unwrap();
1377        let run_dir = tmp.path().join("run-with-other-json");
1378        std::fs::create_dir(&run_dir).unwrap();
1379        let sc = crate::test_support::SidecarResult::test_fixture();
1380        std::fs::write(
1381            run_dir.join("t-0000000000000000.ktstr.json"),
1382            serde_json::to_string(&sc).unwrap(),
1383        )
1384        .unwrap();
1385        std::fs::write(run_dir.join("metadata.json"), "{}").unwrap();
1386        let out = explain_sidecar("run-with-other-json", Some(tmp.path()), false).unwrap();
1387        assert!(
1388            out.contains("walked 1"),
1389            "non-ktstr JSON must not inflate the walked count: {out}",
1390        );
1391    }
1392
1393    /// JSON output must be a single valid JSON document.
1394    #[test]
1395    fn explain_sidecar_json_is_valid_document() {
1396        let tmp = tempfile::tempdir().unwrap();
1397        let run_dir = tmp.path().join("run-roundtrip");
1398        std::fs::create_dir(&run_dir).unwrap();
1399        let sc = crate::test_support::SidecarResult::test_fixture();
1400        std::fs::write(
1401            run_dir.join("t-0000000000000000.ktstr.json"),
1402            serde_json::to_string(&sc).unwrap(),
1403        )
1404        .unwrap();
1405        let out = explain_sidecar("run-roundtrip", Some(tmp.path()), true).unwrap();
1406        let _: serde_json::Value = serde_json::from_str(&out).expect("output must be valid JSON");
1407    }
1408
1409    /// Partial population: 7 of 11 Options populated; report shows
1410    /// "populated optional fields (7)" + "none fields (4)".
1411    #[test]
1412    fn explain_sidecar_text_handles_partial_population() {
1413        let tmp = tempfile::tempdir().unwrap();
1414        let run_dir = tmp.path().join("run-partial-pop");
1415        std::fs::create_dir(&run_dir).unwrap();
1416        let mut sc = crate::test_support::SidecarResult::test_fixture();
1417        sc.scheduler_commit = Some("aaaa111".to_string());
1418        sc.project_commit = Some("bbbb222".to_string());
1419        sc.payload = Some("payload".to_string());
1420        sc.kernel_version = Some("6.14.2".to_string());
1421        sc.kernel_commit = Some("cccc333".to_string());
1422        sc.cleanup_duration_ms = Some(123);
1423        sc.run_source = Some("local".to_string());
1424        std::fs::write(
1425            run_dir.join("t-0000000000000000.ktstr.json"),
1426            serde_json::to_string(&sc).unwrap(),
1427        )
1428        .unwrap();
1429        let out = explain_sidecar("run-partial-pop", Some(tmp.path()), false).unwrap();
1430        assert!(
1431            out.contains("populated optional fields (7)"),
1432            "7 of 11 Options populated must be reflected in the count: {out}",
1433        );
1434        assert!(
1435            out.contains("none fields (4)"),
1436            "4 of 11 Options remain None — must report (4): {out}",
1437        );
1438    }
1439
1440    /// Classification labels are stable strings.
1441    #[test]
1442    fn none_classification_as_str_returns_stable_tokens() {
1443        assert_eq!(NoneClassification::Expected.as_str(), "expected");
1444        assert_eq!(NoneClassification::Actionable.as_str(), "actionable");
1445    }
1446
1447    /// `kernel_commit` rustdoc enumerates 5 None causes; catalog
1448    /// must mirror that.
1449    #[test]
1450    fn kernel_commit_catalog_lists_five_causes() {
1451        let entry = SIDECAR_NONE_CATALOG
1452            .iter()
1453            .find(|e| e.field == "kernel_commit")
1454            .expect("kernel_commit must be in the catalog");
1455        assert_eq!(
1456            entry.causes.len(),
1457            5,
1458            "kernel_commit rustdoc enumerates 5 None causes; catalog \
1459             must mirror that",
1460        );
1461    }
1462
1463    /// Schema version stamp is "2".
1464    #[test]
1465    fn explain_sidecar_schema_version_constant_is_two() {
1466        assert_eq!(EXPLAIN_SIDECAR_SCHEMA_VERSION, "2");
1467    }
1468
1469    /// JSON output stamps `_schema_version` at top level.
1470    #[test]
1471    fn explain_sidecar_json_includes_schema_version() {
1472        let tmp = tempfile::tempdir().unwrap();
1473        let run_dir = tmp.path().join("run-schema");
1474        std::fs::create_dir(&run_dir).unwrap();
1475        let sc = crate::test_support::SidecarResult::test_fixture();
1476        std::fs::write(
1477            run_dir.join("t-0000000000000000.ktstr.json"),
1478            serde_json::to_string(&sc).unwrap(),
1479        )
1480        .unwrap();
1481        let out = explain_sidecar("run-schema", Some(tmp.path()), true).unwrap();
1482        let parsed: serde_json::Value =
1483            serde_json::from_str(&out).expect("json output must round-trip parse");
1484        assert_eq!(
1485            parsed.get("_schema_version").and_then(|v| v.as_str()),
1486            Some(EXPLAIN_SIDECAR_SCHEMA_VERSION),
1487            "JSON output must stamp _schema_version: {out}",
1488        );
1489    }
1490
1491    /// JSON `_walk.errors` is empty array on the all-clean path.
1492    #[test]
1493    fn explain_sidecar_json_walk_errors_empty_when_all_valid() {
1494        let tmp = tempfile::tempdir().unwrap();
1495        let run_dir = tmp.path().join("run-clean-walk");
1496        std::fs::create_dir(&run_dir).unwrap();
1497        let sc = crate::test_support::SidecarResult::test_fixture();
1498        std::fs::write(
1499            run_dir.join("t-0000000000000000.ktstr.json"),
1500            serde_json::to_string(&sc).unwrap(),
1501        )
1502        .unwrap();
1503        let out = explain_sidecar("run-clean-walk", Some(tmp.path()), true).unwrap();
1504        let parsed: serde_json::Value =
1505            serde_json::from_str(&out).expect("json output must round-trip parse");
1506        let errors = parsed
1507            .get("_walk")
1508            .and_then(|w| w.get("errors"))
1509            .and_then(|e| e.as_array())
1510            .expect("_walk.errors must be a JSON array");
1511        assert!(
1512            errors.is_empty(),
1513            "no parse failures — _walk.errors must be empty: {out}",
1514        );
1515    }
1516
1517    /// JSON `_walk.errors` lists `{path, error, enriched_message}`
1518    /// triples for every parse failure. enriched_message is null
1519    /// for generic parse failures.
1520    #[test]
1521    fn explain_sidecar_json_walk_errors_lists_corrupt_files() {
1522        let tmp = tempfile::tempdir().unwrap();
1523        let run_dir = tmp.path().join("run-mixed-errs-json");
1524        std::fs::create_dir(&run_dir).unwrap();
1525        let valid = crate::test_support::SidecarResult::test_fixture();
1526        std::fs::write(
1527            run_dir.join("a-0000000000000000.ktstr.json"),
1528            serde_json::to_string(&valid).unwrap(),
1529        )
1530        .unwrap();
1531        let corrupt_path = run_dir.join("b-0000000000000000.ktstr.json");
1532        std::fs::write(&corrupt_path, "garbage{").unwrap();
1533        let out = explain_sidecar("run-mixed-errs-json", Some(tmp.path()), true).unwrap();
1534        let parsed: serde_json::Value =
1535            serde_json::from_str(&out).expect("json output must round-trip parse");
1536        let walk = parsed.get("_walk").expect("must have _walk key");
1537        assert_eq!(walk.get("walked").and_then(|v| v.as_u64()), Some(2));
1538        assert_eq!(walk.get("valid").and_then(|v| v.as_u64()), Some(1));
1539        let errors = walk
1540            .get("errors")
1541            .and_then(|e| e.as_array())
1542            .expect("_walk.errors must be a JSON array");
1543        assert_eq!(errors.len(), 1);
1544        let entry = &errors[0];
1545        let path = entry.get("path").and_then(|v| v.as_str()).expect("path");
1546        assert_eq!(path, corrupt_path.display().to_string());
1547        let error = entry.get("error").and_then(|v| v.as_str()).expect("error");
1548        assert!(!error.is_empty());
1549        let enriched = entry
1550            .get("enriched_message")
1551            .expect("each error entry must carry an enriched_message key");
1552        assert!(
1553            enriched.is_null(),
1554            "generic parse failure has no schema-drift remediation; \
1555             enriched_message must be JSON null: {enriched:?}",
1556        );
1557    }
1558
1559    /// `enriched_parse_error_message` returns operator-facing prose
1560    /// for the host-missing schema-drift pattern.
1561    #[test]
1562    fn enriched_parse_error_message_returns_prose_for_host_missing_pattern() {
1563        let raw = "missing field `host` at line 1 column 100";
1564        let path = std::path::Path::new("/tmp/example-run/sidecar.ktstr.json");
1565        let enriched = crate::test_support::enriched_parse_error_message_for_test(path, raw)
1566            .expect("host-missing pattern must produce enrichment prose");
1567        assert!(
1568            enriched.contains("host"),
1569            "enrichment must mention host: {enriched}"
1570        );
1571        assert!(
1572            enriched.contains("re-run"),
1573            "enrichment must point at the re-run remediation: {enriched}",
1574        );
1575        assert!(
1576            enriched.contains("disposable-sidecar"),
1577            "enrichment must reference the pre-1.0 disposable-sidecar \
1578             policy: {enriched}",
1579        );
1580        let raw_generic = "expected ident at line 1 column 2";
1581        let no_enrichment =
1582            crate::test_support::enriched_parse_error_message_for_test(path, raw_generic);
1583        assert!(
1584            no_enrichment.is_none(),
1585            "generic parse error must produce no enrichment"
1586        );
1587    }
1588
1589    /// All-corrupt run renders structured JSON with `valid: 0`,
1590    /// every parse failure under `_walk.errors`, and every field's
1591    /// counts at zero.
1592    #[test]
1593    fn explain_sidecar_all_corrupt_json_renders_structured_diagnostic() {
1594        let tmp = tempfile::tempdir().unwrap();
1595        let run_dir = tmp.path().join("run-all-corrupt-json");
1596        std::fs::create_dir(&run_dir).unwrap();
1597        std::fs::write(run_dir.join("a-0000000000000000.ktstr.json"), "{").unwrap();
1598        std::fs::write(run_dir.join("b-0000000000000000.ktstr.json"), "garbage{").unwrap();
1599        let out = explain_sidecar("run-all-corrupt-json", Some(tmp.path()), true)
1600            .expect("all-corrupt JSON must render, not bail");
1601        let parsed: serde_json::Value =
1602            serde_json::from_str(&out).expect("json output must round-trip parse");
1603        let walk = parsed.get("_walk").expect("must have _walk key");
1604        assert_eq!(walk.get("walked").and_then(|v| v.as_u64()), Some(2));
1605        assert_eq!(
1606            walk.get("valid").and_then(|v| v.as_u64()),
1607            Some(0),
1608            "all-corrupt run must report valid=0: {out}",
1609        );
1610        let errors = walk
1611            .get("errors")
1612            .and_then(|e| e.as_array())
1613            .expect("_walk.errors must be present");
1614        assert_eq!(errors.len(), 2);
1615        let fields = parsed
1616            .get("fields")
1617            .and_then(|f| f.as_object())
1618            .expect("fields must be present");
1619        for entry in SIDECAR_NONE_CATALOG {
1620            let f = fields
1621                .get(entry.field)
1622                .unwrap_or_else(|| panic!("field {} must be present", entry.field));
1623            assert_eq!(f.get("none_count").and_then(|v| v.as_u64()), Some(0));
1624            assert_eq!(f.get("some_count").and_then(|v| v.as_u64()), Some(0));
1625        }
1626        assert_eq!(
1627            parsed.get("_schema_version").and_then(|v| v.as_str()),
1628            Some(EXPLAIN_SIDECAR_SCHEMA_VERSION),
1629        );
1630    }
1631
1632    /// Generic parse failures emit `error:` line in corrupt block
1633    /// but NOT `enriched:` line.
1634    #[test]
1635    fn explain_sidecar_text_omits_enriched_line_for_generic_failure() {
1636        let (tmp, run_dir) = make_test_run("run-generic-fail-text");
1637        write_corrupt_sidecar(&run_dir, "a-0000000000000000", "garbage{");
1638        let out = explain_sidecar("run-generic-fail-text", Some(tmp.path()), false).unwrap();
1639        assert!(
1640            out.contains("corrupt sidecars (1):"),
1641            "generic parse failure must surface in the corrupt block: {out}",
1642        );
1643        assert!(
1644            out.contains("    error:"),
1645            "generic parse failure must emit raw `error:` line: {out}",
1646        );
1647        assert!(
1648            !out.contains("    enriched:"),
1649            "generic parse failure has no enrichment — `enriched:` \
1650             line must NOT appear: {out}",
1651        );
1652    }
1653
1654    /// Text output appends trailing `corrupt sidecars (N):` block
1655    /// with positional invariant: header → per-sidecar blocks →
1656    /// trailing corrupt block.
1657    #[test]
1658    fn explain_sidecar_text_appends_corrupt_sidecars_block() {
1659        let tmp = tempfile::tempdir().unwrap();
1660        let run_dir = tmp.path().join("run-text-corrupt");
1661        std::fs::create_dir(&run_dir).unwrap();
1662        let mut valid = crate::test_support::SidecarResult::test_fixture();
1663        valid.test_name = "valid_test".to_string();
1664        std::fs::write(
1665            run_dir.join("a-0000000000000000.ktstr.json"),
1666            serde_json::to_string(&valid).unwrap(),
1667        )
1668        .unwrap();
1669        let corrupt_path = run_dir.join("b-0000000000000000.ktstr.json");
1670        std::fs::write(&corrupt_path, "garbage{").unwrap();
1671        let out = explain_sidecar("run-text-corrupt", Some(tmp.path()), false).unwrap();
1672        assert!(out.contains("corrupt sidecars (1):"));
1673        assert!(out.contains(&corrupt_path.display().to_string()));
1674        assert!(out.contains("    error:"));
1675        let header_pos = out.find("walked 2 sidecar file(s)").unwrap();
1676        let test_block_pos = out.find("test: valid_test").unwrap();
1677        let corrupt_pos = out.find("corrupt sidecars (1):").unwrap();
1678        assert!(
1679            header_pos < test_block_pos,
1680            "header must precede per-sidecar blocks"
1681        );
1682        assert!(
1683            test_block_pos < corrupt_pos,
1684            "per-sidecar blocks must precede trailing corrupt block"
1685        );
1686    }
1687
1688    /// Corrupt-sidecars block is suppressed when zero parse failures.
1689    #[test]
1690    fn explain_sidecar_text_omits_corrupt_block_when_no_errors() {
1691        let (tmp, run_dir) = make_test_run("run-text-clean");
1692        let sc = crate::test_support::SidecarResult::test_fixture();
1693        write_sidecar(&run_dir, "t-0000000000000000", &sc);
1694        let out = explain_sidecar("run-text-clean", Some(tmp.path()), false).unwrap();
1695        assert!(
1696            !out.contains("corrupt sidecars"),
1697            "no parse failures — corrupt-sidecars block must be \
1698             suppressed: {out}",
1699        );
1700    }
1701
1702    /// Vec fields on `SidecarResult` (metrics, stimulus_events, etc.)
1703    /// are hard-required, NOT Option<T>. Catalog must NEVER name a
1704    /// Vec field as None.
1705    #[test]
1706    fn explain_sidecar_does_not_flag_empty_vec_fields_as_none() {
1707        let tmp = tempfile::tempdir().unwrap();
1708        let run_dir = tmp.path().join("run-vecs");
1709        std::fs::create_dir(&run_dir).unwrap();
1710        let mut sc = crate::test_support::SidecarResult::test_fixture();
1711        sc.scheduler_commit = Some("aaaa111".to_string());
1712        sc.project_commit = Some("bbbb222".to_string());
1713        sc.payload = Some("payload".to_string());
1714        sc.kernel_version = Some("6.14.2".to_string());
1715        sc.kernel_commit = Some("cccc333".to_string());
1716        sc.cleanup_duration_ms = Some(123);
1717        sc.run_source = Some("local".to_string());
1718        sc.resolve_source = Some("auto_built".to_string());
1719        sc.monitor = Some(crate::monitor::MonitorSummary::default());
1720        sc.kvm_stats = Some(crate::vmm::KvmStatsTotals::default());
1721        sc.host = Some(crate::host_context::HostContext::test_fixture());
1722        std::fs::write(
1723            run_dir.join("t-0000000000000000.ktstr.json"),
1724            serde_json::to_string(&sc).unwrap(),
1725        )
1726        .unwrap();
1727        let out = explain_sidecar("run-vecs", Some(tmp.path()), false).unwrap();
1728        assert!(
1729            out.contains("none fields: <all populated>"),
1730            "all Options populated — must report no None fields: {out}",
1731        );
1732        for vec_field in SIDECAR_VEC_FIELDS {
1733            assert!(
1734                !out.contains(vec_field),
1735                "Vec field '{vec_field}' is hard-required (not Option) and \
1736                 must never appear in explain-sidecar output: {out}",
1737            );
1738        }
1739    }
1740
1741    /// Pre-rename archive: on-disk `source` key → `run_source` None
1742    /// via serde tolerate-absence; diagnostic must surface with
1743    /// "rename" cause prose.
1744    #[test]
1745    fn explain_sidecar_handles_old_source_key_sidecar() {
1746        let tmp = tempfile::tempdir().unwrap();
1747        let run_dir = tmp.path().join("run-old-source-key");
1748        std::fs::create_dir(&run_dir).unwrap();
1749        let sc = crate::test_support::SidecarResult::test_fixture();
1750        let mut value = serde_json::to_value(&sc).expect("fixture must serialize");
1751        let obj = value.as_object_mut().expect("fixture is an Object");
1752        obj.remove("run_source");
1753        obj.insert(
1754            "source".to_string(),
1755            serde_json::Value::String("archive".to_string()),
1756        );
1757        std::fs::write(
1758            run_dir.join("t-0000000000000000.ktstr.json"),
1759            serde_json::to_string(&value).unwrap(),
1760        )
1761        .unwrap();
1762        let out = explain_sidecar("run-old-source-key", Some(tmp.path()), false).unwrap();
1763        assert!(
1764            out.contains("run_source"),
1765            "explain-sidecar must surface run_source as None for \
1766             pre-rename archive: {out}",
1767        );
1768        assert!(
1769            out.contains("rename"),
1770            "run_source None cause must mention the rename: {out}",
1771        );
1772    }
1773
1774    /// `dir=None` defaults to `runs_root` derived from `CARGO_TARGET_DIR`.
1775    #[test]
1776    fn explain_sidecar_resolves_dir_default_to_runs_root() {
1777        use crate::test_support::test_helpers::{EnvVarGuard, lock_env};
1778        let _lock = lock_env();
1779        let tmp = tempfile::tempdir().unwrap();
1780        let _env_target = EnvVarGuard::set("CARGO_TARGET_DIR", tmp.path());
1781        // Clear the orchestrator's KTSTR_RUNS_ROOT so runs_root() resolves
1782        // via CARGO_TARGET_DIR (the default this test pins).
1783        let _env_runs_root = EnvVarGuard::remove(crate::KTSTR_RUNS_ROOT_ENV);
1784        let _env_sidecar = EnvVarGuard::remove(crate::KTSTR_SIDECAR_DIR_ENV);
1785        let runs_root = tmp.path().join("ktstr");
1786        let run_dir = runs_root.join("run-default-root");
1787        std::fs::create_dir_all(&run_dir).unwrap();
1788        let sc = crate::test_support::SidecarResult::test_fixture();
1789        std::fs::write(
1790            run_dir.join("t-0000000000000000.ktstr.json"),
1791            serde_json::to_string(&sc).unwrap(),
1792        )
1793        .unwrap();
1794        let out = explain_sidecar("run-default-root", None, false)
1795            .expect("dir=None must resolve via runs_root() and succeed");
1796        assert!(out.contains("walked 1"));
1797        assert!(out.contains("parsed 1 valid"));
1798    }
1799
1800    /// 0-byte `.ktstr.json` is a parse failure (serde_json rejects
1801    /// empty input); walker counts it in walked + emits parse error.
1802    #[test]
1803    fn explain_sidecar_handles_zero_byte_file() {
1804        let tmp = tempfile::tempdir().unwrap();
1805        let run_dir = tmp.path().join("run-zero-byte");
1806        std::fs::create_dir(&run_dir).unwrap();
1807        let valid = crate::test_support::SidecarResult::test_fixture();
1808        std::fs::write(
1809            run_dir.join("a-0000000000000000.ktstr.json"),
1810            serde_json::to_string(&valid).unwrap(),
1811        )
1812        .unwrap();
1813        std::fs::write(run_dir.join("b-0000000000000000.ktstr.json"), "").unwrap();
1814        let out = explain_sidecar("run-zero-byte", Some(tmp.path()), false).unwrap();
1815        assert!(out.contains("walked 2"));
1816        assert!(out.contains("parsed 1"));
1817        assert!(
1818            out.contains("corrupt sidecars (1):"),
1819            "zero-byte file must surface in the corrupt-sidecars \
1820             block as a parse failure, not be silently dropped: {out}",
1821        );
1822    }
1823
1824    /// `SidecarResult` does NOT set `deny_unknown_fields`, so a
1825    /// future-schema sidecar must still deserialize cleanly.
1826    #[test]
1827    fn explain_sidecar_tolerates_unknown_extra_fields() {
1828        let tmp = tempfile::tempdir().unwrap();
1829        let run_dir = tmp.path().join("run-extra-fields");
1830        std::fs::create_dir(&run_dir).unwrap();
1831        let sc = crate::test_support::SidecarResult::test_fixture();
1832        let mut value = serde_json::to_value(&sc).expect("fixture must serialize");
1833        let obj = value.as_object_mut().expect("fixture is an Object");
1834        obj.insert(
1835            "future_field".to_string(),
1836            serde_json::Value::String("hypothetical".to_string()),
1837        );
1838        std::fs::write(
1839            run_dir.join("t-0000000000000000.ktstr.json"),
1840            serde_json::to_string(&value).unwrap(),
1841        )
1842        .unwrap();
1843        let out = explain_sidecar("run-extra-fields", Some(tmp.path()), false).unwrap();
1844        assert!(out.contains("walked 1"));
1845        assert!(out.contains("parsed 1 valid"));
1846        assert!(out.contains("test: t"));
1847    }
1848
1849    /// Per-field classification mapping is operator-visible as a
1850    /// stable tag. HashMap dedup guard catches catalog duplicate
1851    /// field names.
1852    #[test]
1853    fn explain_sidecar_classification_accuracy_per_field() {
1854        let by_field: std::collections::HashMap<&'static str, NoneClassification> =
1855            SIDECAR_NONE_CATALOG
1856                .iter()
1857                .map(|e| (e.field, e.classification))
1858                .collect();
1859        assert_eq!(
1860            by_field.len(),
1861            SIDECAR_NONE_CATALOG.len(),
1862            "SIDECAR_NONE_CATALOG must have unique `field` values \
1863             — HashMap collected {} entries, catalog has {}.",
1864            by_field.len(),
1865            SIDECAR_NONE_CATALOG.len(),
1866        );
1867        let expected_pairs: &[(&str, NoneClassification)] = &[
1868            ("scheduler_commit", NoneClassification::Expected),
1869            ("payload", NoneClassification::Expected),
1870            ("project_commit", NoneClassification::Actionable),
1871            ("monitor", NoneClassification::Actionable),
1872            ("kvm_stats", NoneClassification::Actionable),
1873            ("kernel_version", NoneClassification::Actionable),
1874            ("kernel_commit", NoneClassification::Actionable),
1875            ("host", NoneClassification::Actionable),
1876            ("cleanup_duration_ms", NoneClassification::Actionable),
1877            ("run_source", NoneClassification::Actionable),
1878            ("resolve_source", NoneClassification::Actionable),
1879        ];
1880        assert_eq!(
1881            expected_pairs.len(),
1882            SIDECAR_NONE_CATALOG.len(),
1883            "every catalog entry must have a pinned classification",
1884        );
1885        for (field, expected) in expected_pairs {
1886            let actual = by_field
1887                .get(field)
1888                .copied()
1889                .unwrap_or_else(|| panic!("catalog must contain field {field}"));
1890            assert_eq!(
1891                actual, *expected,
1892                "field {field}: classification mismatch — expected \
1893                 {expected:?}, got {actual:?}",
1894            );
1895        }
1896    }
1897
1898    /// IO failures (sidecar predicate matched but read_to_string
1899    /// failed) surface in the trailing `io errors` text block AND
1900    /// `_walk.io_errors` JSON array. Trigger via a directory named
1901    /// like a sidecar (read returns EISDIR).
1902    #[test]
1903    fn explain_sidecar_io_errors_surface_in_text_block_and_json() {
1904        let tmp = tempfile::tempdir().unwrap();
1905        let run_dir = tmp.path().join("run-io-err");
1906        std::fs::create_dir(&run_dir).unwrap();
1907        let sub = run_dir.join("sub");
1908        std::fs::create_dir(&sub).unwrap();
1909        std::fs::create_dir(sub.join("eisdir.ktstr.json")).unwrap();
1910
1911        let text_out = explain_sidecar("run-io-err", Some(tmp.path()), false).unwrap();
1912        assert!(text_out.contains("walked 1"));
1913        assert!(text_out.contains("parsed 0 valid"));
1914        assert!(text_out.contains("io errors (1):"));
1915        assert!(text_out.contains("eisdir.ktstr.json"));
1916        assert!(!text_out.contains("corrupt sidecars"));
1917
1918        let json_out = explain_sidecar("run-io-err", Some(tmp.path()), true).unwrap();
1919        let parsed: serde_json::Value =
1920            serde_json::from_str(&json_out).expect("json output must round-trip parse");
1921        let walk = parsed.get("_walk").expect("must have _walk");
1922        assert_eq!(walk.get("walked").and_then(|v| v.as_u64()), Some(1));
1923        assert_eq!(walk.get("valid").and_then(|v| v.as_u64()), Some(0));
1924        let parse_errs = walk.get("errors").and_then(|e| e.as_array()).unwrap();
1925        assert!(parse_errs.is_empty());
1926        let io_errs = walk.get("io_errors").and_then(|e| e.as_array()).unwrap();
1927        assert_eq!(io_errs.len(), 1);
1928        let entry = &io_errs[0];
1929        let path = entry.get("path").and_then(|v| v.as_str()).unwrap();
1930        assert!(path.ends_with("eisdir.ktstr.json"));
1931        let error = entry.get("error").and_then(|v| v.as_str()).unwrap();
1932        assert!(!error.is_empty());
1933        assert!(
1934            entry.get("enriched_message").is_none(),
1935            "io-error entries must NOT have enriched_message: {json_out}",
1936        );
1937    }
1938
1939    /// `walked == valid + errors.len() + io_errors.len()` — every
1940    /// predicate-matching file lands in exactly one bucket.
1941    #[test]
1942    fn explain_sidecar_walk_counts_reconcile_across_outcomes() {
1943        let tmp = tempfile::tempdir().unwrap();
1944        let run_dir = tmp.path().join("run-mixed-outcomes");
1945        std::fs::create_dir(&run_dir).unwrap();
1946        let valid = crate::test_support::SidecarResult::test_fixture();
1947        std::fs::write(
1948            run_dir.join("a-0000000000000000.ktstr.json"),
1949            serde_json::to_string(&valid).unwrap(),
1950        )
1951        .unwrap();
1952        std::fs::write(run_dir.join("b-0000000000000000.ktstr.json"), "garbage{").unwrap();
1953        let sub = run_dir.join("sub");
1954        std::fs::create_dir(&sub).unwrap();
1955        std::fs::create_dir(sub.join("c-0000000000000000.ktstr.json")).unwrap();
1956
1957        let json_out = explain_sidecar("run-mixed-outcomes", Some(tmp.path()), true).unwrap();
1958        let parsed: serde_json::Value = serde_json::from_str(&json_out).unwrap();
1959        let walk = parsed.get("_walk").unwrap();
1960        let walked = walk.get("walked").and_then(|v| v.as_u64()).unwrap();
1961        let valid_n = walk.get("valid").and_then(|v| v.as_u64()).unwrap();
1962        let parse_errs = walk.get("errors").and_then(|e| e.as_array()).unwrap().len() as u64;
1963        let io_errs = walk
1964            .get("io_errors")
1965            .and_then(|e| e.as_array())
1966            .unwrap()
1967            .len() as u64;
1968        assert_eq!(
1969            walked,
1970            valid_n + parse_errs + io_errs,
1971            "walked must equal valid + errors + io_errors. \
1972             walked={walked}, valid={valid_n}, errors={parse_errs}, \
1973             io_errors={io_errs}",
1974        );
1975        assert_eq!(walked, 3);
1976        assert_eq!(valid_n, 1);
1977        assert_eq!(parse_errs, 1);
1978        assert_eq!(io_errs, 1);
1979    }
1980
1981    /// `_walk.io_errors` is empty array on the all-clean happy path.
1982    #[test]
1983    fn explain_sidecar_json_walk_io_errors_empty_when_no_io_failures() {
1984        let tmp = tempfile::tempdir().unwrap();
1985        let run_dir = tmp.path().join("run-clean-io");
1986        std::fs::create_dir(&run_dir).unwrap();
1987        let sc = crate::test_support::SidecarResult::test_fixture();
1988        std::fs::write(
1989            run_dir.join("t-0000000000000000.ktstr.json"),
1990            serde_json::to_string(&sc).unwrap(),
1991        )
1992        .unwrap();
1993        let out = explain_sidecar("run-clean-io", Some(tmp.path()), true).unwrap();
1994        let parsed: serde_json::Value = serde_json::from_str(&out).unwrap();
1995        let io_errs = parsed
1996            .get("_walk")
1997            .and_then(|w| w.get("io_errors"))
1998            .and_then(|e| e.as_array())
1999            .unwrap();
2000        assert!(io_errs.is_empty());
2001    }
2002
2003    /// E2E renderer test: synthetic [`WalkStats`] with one enriched
2004    /// parse error renders both `error:` and `enriched:` lines in
2005    /// the corrupt block, in that order.
2006    #[test]
2007    fn explain_sidecar_text_e2e_enrichment_renders_in_corrupt_block() {
2008        let parse_err = crate::test_support::SidecarParseError {
2009            path: std::path::PathBuf::from("/tmp/example-run/sidecar.ktstr.json"),
2010            raw_error: "missing field `host` at line 1 column 100".to_string(),
2011            enriched_message: Some(
2012                "ktstr_test: skipping /tmp/example-run/sidecar.ktstr.json: \
2013                 missing field `host` ... — re-run the test"
2014                    .to_string(),
2015            ),
2016        };
2017        let walk = WalkStats {
2018            walked: 1,
2019            valid: 0,
2020            errors: vec![parse_err],
2021            io_errors: Vec::new(),
2022        };
2023        let out = render_explain_sidecar_text(&[], &walk);
2024        assert!(out.contains("corrupt sidecars (1):"));
2025        assert!(out.contains("    error: missing field `host`"));
2026        assert!(out.contains("    enriched: "));
2027        let error_pos = out.find("    error: ").unwrap();
2028        let enriched_pos = out.find("    enriched: ").unwrap();
2029        assert!(
2030            error_pos < enriched_pos,
2031            "raw `error:` line must precede `enriched:` line",
2032        );
2033    }
2034
2035    /// JSON-channel mirror: synthetic [`WalkStats`] with one enriched
2036    /// parse error renders enriched_message as JSON string (not null).
2037    #[test]
2038    fn explain_sidecar_json_e2e_enrichment_renders_in_walk_errors() {
2039        let prose = "ktstr_test: skipping path: missing field `host` \
2040                     — re-run the test to regenerate";
2041        let parse_err = crate::test_support::SidecarParseError {
2042            path: std::path::PathBuf::from("/tmp/example-run/sidecar.ktstr.json"),
2043            raw_error: "missing field `host` at line 1 column 100".to_string(),
2044            enriched_message: Some(prose.to_string()),
2045        };
2046        let walk = WalkStats {
2047            walked: 1,
2048            valid: 0,
2049            errors: vec![parse_err],
2050            io_errors: Vec::new(),
2051        };
2052        let out = render_explain_sidecar_json(&[], &walk);
2053        let parsed: serde_json::Value = serde_json::from_str(&out).unwrap();
2054        let errors = parsed
2055            .get("_walk")
2056            .and_then(|w| w.get("errors"))
2057            .and_then(|e| e.as_array())
2058            .unwrap();
2059        assert_eq!(errors.len(), 1);
2060        let entry = &errors[0];
2061        let enriched = entry
2062            .get("enriched_message")
2063            .and_then(|v| v.as_str())
2064            .expect("enriched_message must be a JSON string");
2065        assert_eq!(enriched, prose);
2066        let raw = entry.get("error").and_then(|v| v.as_str()).unwrap();
2067        assert!(raw.contains("missing field"));
2068    }
2069
2070    /// `--run` with `..` segments must bail before path resolution.
2071    #[test]
2072    fn explain_sidecar_rejects_parent_dir_traversal_in_run() {
2073        let tmp = tempfile::tempdir().unwrap();
2074        for traversal in ["../escape", "subdir/../../escape"] {
2075            let err = explain_sidecar(traversal, Some(tmp.path()), false)
2076                .expect_err("path-traversal `..` in --run must be rejected");
2077            let msg = format!("{err:#}");
2078            assert!(
2079                msg.contains("path-traversal"),
2080                "rejection message must name the cause for {traversal}: \
2081                 {msg}",
2082            );
2083            assert!(msg.contains(traversal));
2084        }
2085    }
2086
2087    /// Absolute paths in `--run` must bail.
2088    #[test]
2089    fn explain_sidecar_rejects_absolute_path_in_run() {
2090        let tmp = tempfile::tempdir().unwrap();
2091        let err = explain_sidecar("/etc/passwd", Some(tmp.path()), false)
2092            .expect_err("absolute path in --run must be rejected");
2093        let msg = format!("{err:#}");
2094        assert!(msg.contains("path-traversal"));
2095    }
2096
2097    /// Empty `--run` must bail.
2098    #[test]
2099    fn explain_sidecar_rejects_empty_run() {
2100        let tmp = tempfile::tempdir().unwrap();
2101        let err =
2102            explain_sidecar("", Some(tmp.path()), false).expect_err("empty --run must be rejected");
2103        let msg = format!("{err:#}");
2104        assert!(msg.contains("must not be empty"));
2105    }
2106
2107    /// `--run .` must bail (CurDir aliases pool root).
2108    #[test]
2109    fn explain_sidecar_rejects_curdir_run() {
2110        let tmp = tempfile::tempdir().unwrap();
2111        let err =
2112            explain_sidecar(".", Some(tmp.path()), false).expect_err("`.` --run must be rejected");
2113        let msg = format!("{err:#}");
2114        assert!(msg.contains("path-traversal"));
2115    }
2116
2117    /// Bare run keys with Normal-only components pass the traversal
2118    /// validator and reach the not-found gate.
2119    #[test]
2120    fn explain_sidecar_accepts_bare_run_key_after_traversal_check() {
2121        let tmp = tempfile::tempdir().unwrap();
2122        let err = explain_sidecar("6.14-abc1234", Some(tmp.path()), false)
2123            .expect_err("non-existent run must surface the not-found error");
2124        let msg = format!("{err:#}");
2125        assert!(msg.contains("not found"));
2126        assert!(!msg.contains("path-traversal"));
2127    }
2128}