ktstr/cli/stats_cmds/
dispatch.rs

1//! `stats` subcommand dispatch: thin wrappers over the
2//! [`crate::stats`] surface plus host-context render entry points.
3//!
4//! Holds [`print_stats_report`], [`list_runs`], [`list_metrics`],
5//! [`list_values`], [`compare_partitions`], [`show_host`],
6//! [`show_run_host`], [`show_thresholds`] and the per-test /
7//! per-run-key fuzzy-match helpers
8//! ([`suggest_closest_test_name`], [`suggest_closest_run_key`]).
9
10use std::path::Path;
11
12use anyhow::{Result, anyhow, bail};
13
14use crate::stats::{ComparisonPolicy, RowFilter};
15
16/// Read sidecar JSON files and return the gauntlet analysis report.
17///
18/// Source directory:
19/// - `KTSTR_SIDECAR_DIR` if set, else
20/// - the most recently modified subdirectory under
21///   `{CARGO_TARGET_DIR or "target"}/ktstr/`.
22///
23/// `cargo ktstr stats` doesn't itself run a kernel, so it can't
24/// reconstruct the `{kernel}-{project_commit}` key the test process
25/// used; the mtime fallback mirrors "show me the report from my
26/// last test run."
27///
28/// Returns `None` with a warning on stderr when no sidecars are found.
29/// This is not an error -- regular test runs that skip gauntlet tests
30/// produce no sidecar files.
31pub fn print_stats_report() -> Option<String> {
32    let dir = match std::env::var(crate::KTSTR_SIDECAR_DIR_ENV) {
33        Ok(d) if !d.is_empty() => Some(std::path::PathBuf::from(d)),
34        _ => crate::test_support::newest_run_dir(),
35    };
36    let report = dir
37        .as_deref()
38        .map(|d| crate::test_support::analyze_sidecars(Some(d)))
39        .filter(|r| !r.is_empty());
40    if report.is_none() {
41        eprintln!("cargo ktstr: no sidecar data found (skipped)");
42    }
43    report
44}
45
46/// List test runs under `{CARGO_TARGET_DIR or "target"}/ktstr/`.
47pub fn list_runs() -> Result<()> {
48    crate::stats::list_runs()
49}
50
51/// Render the metric registry for `cargo ktstr stats list-metrics`.
52///
53/// Thin wrapper over `crate::stats::list_metrics` — exposed through
54/// `cli::` to match the `list_runs` / `compare_partitions` / `show_host`
55/// convention where every stats-subcommand dispatch arm lands on a
56/// `cli::*` helper before reaching the private `stats` module. The
57/// returned `String` is printed verbatim by the dispatch site.
58pub fn list_metrics(json: bool) -> Result<String> {
59    crate::stats::list_metrics(json)
60}
61
62/// Render the distinct-value catalogue for the sidecar pool, for
63/// `cargo ktstr stats list-values`.
64///
65/// Thin wrapper over `crate::stats::list_values` — exposed
66/// through `cli::` for the same surface-stability reason as
67/// [`list_metrics`]. The returned `String` is printed verbatim by
68/// the dispatch site.
69pub fn list_values(json: bool, dir: Option<&Path>) -> Result<String> {
70    crate::stats::list_values(json, dir)
71}
72
73/// Compare two filter-defined partitions of the sidecar pool and
74/// report regressions across slicing dimensions. See
75/// `crate::stats::compare_partitions` for the full contract.
76pub fn compare_partitions(
77    filter_a: &RowFilter,
78    filter_b: &RowFilter,
79    filter: Option<&str>,
80    policy: &ComparisonPolicy,
81    dir: Option<&Path>,
82    gate: &crate::stats::GateOptions,
83) -> Result<i32> {
84    crate::stats::compare_partitions(filter_a, filter_b, filter, policy, dir, gate)
85}
86
87/// Noise-adjusted variant of [`compare_partitions`]: keeps every per-run row and
88/// gates a confident regression on the two sides being SEPARATED (a two-sided
89/// Welch t-test, or fully disjoint `[min, max]` bands) AND the mean delta being
90/// MATERIAL (the registry `default_abs` + `default_rel` dual-gate), instead of a
91/// fixed single-run threshold. When the scenarios carry phases it also renders a
92/// per-phase spread + coverage block (render-only, honoring `phase_opts`;
93/// per-phase never affects the exit). See
94/// `crate::stats::compare_partitions_noise` for the full contract.
95pub fn compare_partitions_noise(
96    filter_a: &RowFilter,
97    filter_b: &RowFilter,
98    dir: Option<&Path>,
99    spread_threshold_pct: f64,
100    phase_opts: &crate::stats::PhaseDisplayOptions,
101    gate: &crate::stats::GateOptions,
102) -> Result<i32> {
103    crate::stats::compare_partitions_noise(
104        filter_a,
105        filter_b,
106        dir,
107        spread_threshold_pct,
108        phase_opts,
109        gate,
110    )
111}
112
113/// Collect the current host context via
114/// [`crate::host_context::collect_host_context`] and render it as
115/// a human-readable multi-line report via
116/// [`crate::host_context::HostContext::format_human`]. The output
117/// ends with a newline; callers print it verbatim.
118pub fn show_host() -> String {
119    crate::host_context::collect_host_context().format_human()
120}
121
122/// Return the run-directory leaf name under `root` whose Levenshtein
123/// edit distance from `query` is smallest AND within the closeness
124/// threshold, or `None` if no candidate is close enough (or if
125/// `root` cannot be enumerated).
126///
127/// Threshold is `max(3, query.len() / 3)` — same shape as
128/// [`suggest_closest_test_name`] so the "did you mean?" UX stays
129/// uniform across the test-name and run-key surfaces. The absolute-3 floor lets
130/// short keys (e.g. `6.14`) tolerate small typos while the
131/// proportional `len/3` lets longer keys (e.g.
132/// `6.14-abcdef1-dirty`) tolerate roughly one bit-flip per 3
133/// chars.
134///
135/// Ties resolve to the FIRST name encountered in `read_dir`
136/// iteration order — non-deterministic across filesystems but
137/// consistent within a single invocation. The returned `String`
138/// owns the leaf name (heap allocation per match) because
139/// `read_dir` yields `OsString` filenames that the suggestion
140/// outlives.
141///
142/// `read_dir` failure (root doesn't exist, permission denied)
143/// silently degrades to `None` — the caller's primary diagnostic
144/// is "run not found"; the "did you mean?" hint is best-effort
145/// gravy and must not gate the bail path.
146///
147/// Filters via [`crate::test_support::is_run_directory`] so the
148/// flock sentinel subdirectory (`.locks/`) and any other
149/// dotfile-prefixed entry under `runs_root` cannot surface as
150/// a "did you mean?" suggestion — the same predicate that
151/// `newest_run_dir` and `sorted_run_entries` use, so all three
152/// run-listing surfaces agree on what counts as a run dir.
153pub(super) fn suggest_closest_run_key(query: &str, root: &Path) -> Option<String> {
154    let threshold = std::cmp::max(3, query.len() / 3);
155    let entries = std::fs::read_dir(root).ok()?;
156    let mut best: Option<(usize, String)> = None;
157    for entry in entries.flatten() {
158        if !crate::test_support::is_run_directory(&entry) {
159            continue;
160        }
161        let name = match entry.file_name().to_str() {
162            Some(s) => s.to_string(),
163            None => continue,
164        };
165        let d = strsim::levenshtein(query, &name);
166        if d > threshold {
167            continue;
168        }
169        match best {
170            Some((best_d, _)) if best_d <= d => continue,
171            _ => best = Some((d, name)),
172        }
173    }
174    best.map(|(_, name)| name)
175}
176
177/// Render the archived host context for the named run, resolved
178/// against `dir` (or `test_support::runs_root()` when `dir` is
179/// `None`). Loads sidecars under the run directory and returns the
180/// `HostContext::format_human` of the first sidecar that has a
181/// populated `host` field — every sidecar in a single run captures
182/// the same host, so first-wins is adequate.
183///
184/// Returns `Err` when:
185/// - The run directory does not exist (actionable message names
186///   the expected root),
187/// - The run directory exists but has no sidecar data (matches
188///   the `compare_partitions` error shape),
189/// - Every sidecar carried `host: None` (older pre-enrichment
190///   runs won't have the field).
191pub fn show_run_host(run: &str, dir: Option<&Path>) -> Result<String> {
192    let root: std::path::PathBuf = match dir {
193        Some(d) => d.to_path_buf(),
194        None => crate::test_support::runs_root(),
195    };
196    let run_dir = root.join(run);
197    if !run_dir.exists() {
198        let suggestion = suggest_closest_run_key(run, &root)
199            .map(|name| format!(" Did you mean `{name}`?"))
200            .unwrap_or_default();
201        bail!(
202            "run '{run}' not found under {}.{suggestion} \
203             Run `cargo ktstr stats list` to enumerate available run keys.",
204            root.display(),
205        );
206    }
207    let sidecars = crate::test_support::collect_sidecars(&run_dir);
208    if sidecars.is_empty() {
209        bail!("run '{run}' has no sidecar data");
210    }
211    // First sidecar with a populated host wins. Every sidecar in a
212    // single run captures the same host; pre-enrichment sidecars
213    // may have `host: None`. Scan forward rather than take the
214    // first entry so older data doesn't force a "no host context"
215    // error when newer sidecars in the same run DO have it.
216    let host = sidecars
217        .iter()
218        .find_map(|sc| sc.host.as_ref())
219        .ok_or_else(|| {
220            anyhow!(
221                "run '{run}' has {} sidecar(s) but none carries a populated \
222                 host context; this usually means the run predates host-context \
223                 enrichment. Re-run the test to produce a sidecar with the \
224                 current schema.",
225                sidecars.len(),
226            )
227        })?;
228    Ok(host.format_human())
229}
230
231/// Return the registered test name whose Levenshtein edit distance
232/// from `query` is smallest AND within the closeness threshold, or
233/// `None` if no candidate is close enough.
234pub(super) fn suggest_closest_test_name(query: &str) -> Option<&'static str> {
235    let threshold = std::cmp::max(3, query.len() / 3);
236    let mut best: Option<(usize, &'static str)> = None;
237    for entry in crate::test_support::KTSTR_TESTS.iter() {
238        let d = strsim::levenshtein(query, entry.name);
239        if d > threshold {
240            continue;
241        }
242        match best {
243            Some((best_d, _)) if best_d <= d => continue,
244            _ => best = Some((d, entry.name)),
245        }
246    }
247    best.map(|(_, name)| name)
248}
249
250/// Render the resolved, merged `Assert` thresholds for the named
251/// test — the same merge chain evaluated at run time in
252/// `run_ktstr_test_inner`:
253/// `Assert::default_checks().merge(&entry.scheduler.assert).merge(&entry.assert)`.
254///
255/// Returns `Err` when no registered test matches `test_name`. The
256/// CLI wiring (`cargo ktstr show-thresholds <test>`) surfaces this
257/// to the operator without requiring them to read the source, the
258/// nextest `--list` output, or the Debug impl of `Assert`.
259pub fn show_thresholds(test_name: &str) -> Result<String> {
260    let entry = crate::test_support::find_test(test_name).ok_or_else(|| {
261        let suggestion = suggest_closest_test_name(test_name)
262            .map(|s| format!(" Did you mean `{s}`?"))
263            .unwrap_or_default();
264        anyhow!(
265            "no registered ktstr test named '{test_name}'.{suggestion} \
266             Run `cargo nextest list` to see the available test names \
267             — then pass just the function-name component to \
268             `show-thresholds`, not the `<binary>::` prefix that \
269             nextest prepends to each line."
270        )
271    })?;
272    let merged = crate::assert::Assert::default_checks()
273        .merge(&entry.scheduler.assert)
274        .merge(&entry.assert);
275    let mut out = format!("Test: {}\n", entry.name);
276    out.push_str(&format!("Scheduler: {}\n", entry.scheduler.name,));
277    out.push_str("Resolved assertion thresholds:\n");
278    out.push_str(&merged.format_human());
279    Ok(out)
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    #[test]
287    fn show_host_returns_populated_report() {
288        let out = show_host();
289        assert!(!out.is_empty(), "show_host must return non-empty output");
290        assert!(
291            out.ends_with('\n'),
292            "show_host output must end with a newline for print! use: {out:?}",
293        );
294        assert!(
295            out.contains("kernel_name"),
296            "show_host must surface the kernel_name field: {out}",
297        );
298    }
299
300    /// Error path: the named run directory does not exist.
301    #[test]
302    fn show_run_host_missing_run_returns_error() {
303        let tmp = tempfile::tempdir().unwrap();
304        let err = show_run_host("nonexistent-run", Some(tmp.path())).unwrap_err();
305        let msg = format!("{err:#}");
306        assert!(
307            msg.contains("run 'nonexistent-run' not found"),
308            "missing-run error must name the run: {msg}",
309        );
310        assert!(
311            msg.contains("cargo ktstr stats list"),
312            "missing-run error must name the `stats list` discovery \
313             command so operators can enumerate available run keys \
314             without extra lookups: {msg}",
315        );
316    }
317
318    /// Error path: run dir exists but has no sidecars.
319    #[test]
320    fn show_run_host_empty_run_returns_error() {
321        let tmp = tempfile::tempdir().unwrap();
322        std::fs::create_dir(tmp.path().join("run-empty")).unwrap();
323        let err = show_run_host("run-empty", Some(tmp.path())).unwrap_err();
324        let msg = format!("{err:#}");
325        assert!(
326            msg.contains("no sidecar data"),
327            "empty-run error must name the condition: {msg}",
328        );
329    }
330
331    /// Error path: every sidecar carries `host: None`.
332    #[test]
333    fn show_run_host_all_host_none_returns_error() {
334        let tmp = tempfile::tempdir().unwrap();
335        let run_dir = tmp.path().join("run-no-host");
336        std::fs::create_dir(&run_dir).unwrap();
337        let sc = crate::test_support::SidecarResult::test_fixture();
338        let json = serde_json::to_string(&sc).unwrap();
339        std::fs::write(run_dir.join("t-0000000000000000.ktstr.json"), json).unwrap();
340        let err = show_run_host("run-no-host", Some(tmp.path())).unwrap_err();
341        let msg = format!("{err:#}");
342        assert!(
343            msg.contains("no sidecar with a populated host")
344                || msg.contains("none carries a populated host context"),
345            "all-host-None error must name the pre-enrichment likely cause: {msg}",
346        );
347    }
348
349    /// Happy path: a run with a populated host returns format_human.
350    #[test]
351    fn show_run_host_populated_sidecar_returns_format_human() {
352        let tmp = tempfile::tempdir().unwrap();
353        let run_dir = tmp.path().join("run-with-host");
354        std::fs::create_dir(&run_dir).unwrap();
355        let mut sc = crate::test_support::SidecarResult::test_fixture();
356        sc.host = Some(crate::host_context::HostContext::test_fixture());
357        let json = serde_json::to_string(&sc).unwrap();
358        std::fs::write(run_dir.join("t-0000000000000000.ktstr.json"), json).unwrap();
359
360        let out = show_run_host("run-with-host", Some(tmp.path())).unwrap();
361        assert!(
362            out.contains("kernel_name"),
363            "populated host output must include the kernel_name row: {out}",
364        );
365        assert!(
366            out.ends_with('\n'),
367            "output must end with newline for print!: {out:?}",
368        );
369    }
370
371    /// Happy path forward-scan: first sidecar has host:None, later
372    /// has populated host. `iter().find_map` must skip the first.
373    #[test]
374    fn show_run_host_forward_scans_past_none_sidecars() {
375        let tmp = tempfile::tempdir().unwrap();
376        let run_dir = tmp.path().join("run-mixed");
377        std::fs::create_dir(&run_dir).unwrap();
378        let sc_none = crate::test_support::SidecarResult::test_fixture();
379        std::fs::write(
380            run_dir.join("a-0000000000000000.ktstr.json"),
381            serde_json::to_string(&sc_none).unwrap(),
382        )
383        .unwrap();
384        let mut sc_host = crate::test_support::SidecarResult::test_fixture();
385        sc_host.host = Some(crate::host_context::HostContext::test_fixture());
386        std::fs::write(
387            run_dir.join("b-0000000000000000.ktstr.json"),
388            serde_json::to_string(&sc_host).unwrap(),
389        )
390        .unwrap();
391
392        let out = show_run_host("run-mixed", Some(tmp.path()))
393            .expect("forward scan must find the populated sidecar");
394        assert!(
395            out.contains("kernel_name"),
396            "output from populated sidecar must include kernel_name: {out}",
397        );
398    }
399
400    /// Happy path: a registered test name resolves through the
401    /// merge chain (default → scheduler → entry assert) and renders
402    /// the three-section report (Test:, Scheduler:, Resolved
403    /// assertion thresholds:) with the header preceding the
404    /// threshold dump.
405    #[test]
406    fn show_thresholds_known_test_returns_populated_report() {
407        let Some(entry) = crate::test_support::KTSTR_TESTS.iter().next() else {
408            eprintln!(
409                "ktstr: SKIP: show_thresholds_known_test_returns_populated_report — \
410                 no entries in KTSTR_TESTS",
411            );
412            return;
413        };
414        let out = show_thresholds(entry.name).expect("show_thresholds must resolve known test");
415        assert!(
416            out.contains("Test:"),
417            "output missing `Test:` header: {out}"
418        );
419        assert!(
420            out.contains("Scheduler:"),
421            "output missing `Scheduler:` header: {out}"
422        );
423        assert!(
424            out.contains("Resolved assertion thresholds:"),
425            "output missing thresholds section: {out}",
426        );
427        let test_idx = out.find("Test:").unwrap();
428        let thresholds_idx = out.find("Resolved assertion thresholds:").unwrap();
429        assert!(
430            test_idx < thresholds_idx,
431            "`Test:` header must precede threshold dump",
432        );
433    }
434
435    /// Unknown test name surfaces actionable error: missing-name
436    /// diagnostic + nextest pointer + binary:: prefix caveat.
437    #[test]
438    fn show_thresholds_unknown_test_returns_actionable_error() {
439        let err = show_thresholds("definitely_not_a_registered_test_xyz123").unwrap_err();
440        let msg = format!("{err:#}");
441        assert!(
442            msg.contains("no registered ktstr test named"),
443            "error must name the missing-test condition: {msg}",
444        );
445        assert!(
446            msg.contains("cargo nextest list"),
447            "error must point at the discovery command: {msg}",
448        );
449        assert!(
450            msg.contains("function-name component"),
451            "error must flag the nextest binary:: prefix caveat: {msg}",
452        );
453    }
454
455    /// `suggest_closest_test_name` — positive: distance-1 typo of a
456    /// long registered name returns the registered name.
457    #[test]
458    fn suggest_closest_test_name_finds_near_match() {
459        let Some(entry) = crate::test_support::KTSTR_TESTS.iter().find(|e| {
460            e.name.len() >= 10 && !(e.name.starts_with("__unit_test_") && e.name.ends_with("__"))
461        }) else {
462            skip!(
463                "no registered non-sentinel test with name >= 10 chars \
464                 — cannot construct a positive strsim probe"
465            );
466        };
467        let mut mutated: Vec<u8> = entry.name.bytes().collect();
468        mutated[0] = if mutated[0] == b'z' { b'a' } else { b'z' };
469        let query = std::str::from_utf8(&mutated).expect("ASCII mutation stays UTF-8");
470        let suggestion = suggest_closest_test_name(query)
471            .expect("distance-1 typo on a registered name must yield a suggestion");
472        assert_eq!(
473            suggestion, entry.name,
474            "a single-byte typo must suggest the exact name it was derived from",
475        );
476    }
477
478    /// Negative: 40-char unrelated query returns None.
479    #[test]
480    fn suggest_closest_test_name_returns_none_for_unrelated_query() {
481        let unrelated = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
482        assert_eq!(
483            suggest_closest_test_name(unrelated),
484            None,
485            "a query with no lexical relationship to any registered \
486             test name must yield no suggestion (not an over-reach)",
487        );
488    }
489
490    /// `suggest_closest_run_key` — distance-1 planted dir is found.
491    #[test]
492    fn suggest_closest_run_key_finds_near_match() {
493        let tmp = tempfile::tempdir().unwrap();
494        std::fs::create_dir(tmp.path().join("6.14-abc1234")).expect("plant run dir");
495        let suggestion = suggest_closest_run_key("6.14-abc1235", tmp.path())
496            .expect("distance-1 typo on a planted run dir must yield a suggestion");
497        assert_eq!(suggestion, "6.14-abc1234");
498    }
499
500    /// `suggest_closest_run_key` — distant query returns None.
501    #[test]
502    fn suggest_closest_run_key_returns_none_for_distant_query() {
503        let tmp = tempfile::tempdir().unwrap();
504        std::fs::create_dir(tmp.path().join("6.14-abc1234")).expect("plant run dir");
505        assert_eq!(suggest_closest_run_key("xxxxxxxxxxxxx", tmp.path()), None,);
506    }
507
508    /// `suggest_closest_run_key` — empty root returns None.
509    #[test]
510    fn suggest_closest_run_key_returns_none_for_empty_root() {
511        let tmp = tempfile::tempdir().unwrap();
512        assert_eq!(
513            suggest_closest_run_key("6.14-abc1234", tmp.path()),
514            None,
515            "empty root must yield None — no candidates to match against",
516        );
517    }
518
519    /// `suggest_closest_run_key` — file entries are skipped via the
520    /// `is_run_directory` filter.
521    #[test]
522    fn suggest_closest_run_key_skips_files() {
523        let tmp = tempfile::tempdir().unwrap();
524        std::fs::write(tmp.path().join("6.14-abc1234"), b"not a dir").expect("plant file");
525        std::fs::create_dir(tmp.path().join("6.14-abc1235")).expect("plant dir");
526        let suggestion = suggest_closest_run_key("6.14-abc1234", tmp.path())
527            .expect("the planted directory must yield a suggestion despite the same-name file");
528        assert_eq!(
529            suggestion, "6.14-abc1235",
530            "a regression that drops the is_dir() filter would surface \
531             here as `Some(\"6.14-abc1234\")` (the file at distance 0) \
532             instead of `Some(\"6.14-abc1235\")` (the dir at distance 1)",
533        );
534    }
535}