ktstr/test_support/
dispatch.rs

1//! Process-level dispatch and nextest protocol handling.
2//!
3//! This module owns every code path that runs before (or in lieu of)
4//! the user's `main()`:
5//!
6//! - [`ktstr_test_early_dispatch`]: the `#[ctor]` that fires in every
7//!   ktstr-linked binary. Routes the process to guest init, host-side
8//!   VM launch, guest-side test execution, or nextest protocol handling.
9//! - [`ktstr_main`]: the nextest protocol handler — `--list` returns
10//!   `ktstr/` and `gauntlet/` test names, `--exact` runs a single test.
11//! - [`run_ktstr_test`]: programmatic entry point used by library
12//!   consumers and the macro-generated `#[test]` wrappers.
13//! - [`analyze_sidecars`]: collects sidecar JSON from a run directory
14//!   and renders the full gauntlet analysis (rows + verifier + callback
15//!   profile + KVM stats) into a string.
16//!
17//! The heavy lifting lives in sibling submodules: `eval` (host-side
18//! result judgment — `run_ktstr_test_inner` and `evaluate_vm_result`),
19//! `sidecar` (per-run JSON), `probe` (auto-repro + BPF probe pipeline),
20//! `args` (CLI extraction), and the [`crate::vmm`] VM launcher.
21
22use std::path::PathBuf;
23
24use anyhow::{Context, Result};
25
26use crate::assert::AssertResult;
27
28#[cfg(feature = "export")]
29use super::extract_export_output_arg;
30use super::{
31    HostClass, KTSTR_TESTS, KtstrTestEntry, TopoOverride, classify_host_error, collect_sidecars,
32    extract_export_test_arg, extract_shell_test_arg, extract_test_fn_arg, extract_topo_arg,
33    find_test, format_callback_profile, format_kvm_stats, format_verifier_stats,
34    maybe_dispatch_vm_test, parse_topo_string, propagate_rust_env_from_cmdline,
35    record_skip_sidecar, resolve_test_kernel, run_ktstr_test_inner, sidecar_dir, try_flush_profraw,
36};
37
38/// Check if an error is a host topology mismatch (e.g. test requests
39/// 2 LLCs but host has 1, or more CPUs than the host carries).
40///
41/// Walks the FULL error chain via `e.chain().any(...)` so a
42/// [`TopologyInsufficient`] wrapped in `.context(...)` (the
43/// `crate::test_support::eval` `"build ktstr_test VM"` / `"run
44/// ktstr_test VM"` wrappers) is still recognised — mirrors
45/// [`is_resource_contention`]. Replaced a fragile message string-match
46/// (`"need"` + `"LLC"`/`"CPU"`) that would misclassify any unrelated
47/// error happening to contain those words as a topology skip.
48///
49/// [`TopologyInsufficient`]: crate::vmm::host_topology::TopologyInsufficient
50#[doc(hidden)]
51pub fn is_topology_insufficient(e: &anyhow::Error) -> bool {
52    e.chain().any(|cause| {
53        cause
54            .downcast_ref::<crate::vmm::host_topology::TopologyInsufficient>()
55            .is_some()
56    })
57}
58
59/// Check if an `anyhow::Error` carries a [`ResourceContention`].
60///
61/// Walks the FULL error chain via `e.chain().any(...)` so a
62/// `ResourceContention` wrapped in `.context(...)` (e.g. the
63/// `crate::test_support::eval` `"build ktstr_test VM"` and `"run ktstr_test VM"`
64/// wrappers) is still recognised — the macro's match arm depends on
65/// this.
66///
67/// Used by the `#[ktstr_test]` macro expansion to short-circuit on
68/// host-resource contention (LLC slots / CPUs unavailable, KVM fd
69/// budget exhausted, ENOMEM): the macro emits the canonical
70/// `ktstr: SKIP: resource contention: ...` banner and early-returns
71/// so libtest sees pass. The skip sidecar is recorded at every
72/// contention site inside `run_ktstr_test_inner`, so stats tooling
73/// still sees the skip without a panic-driven nextest retry. `pub`
74/// because the macro-generated `#[test]` body in `ktstr-macros`
75/// references it by absolute path; `#[doc(hidden)]` keeps it out
76/// of rustdoc's public surface — it is plumbing, not user API.
77///
78/// [`ResourceContention`]: crate::vmm::host_topology::ResourceContention
79#[doc(hidden)]
80pub fn is_resource_contention(e: &anyhow::Error) -> bool {
81    e.chain().any(|cause| {
82        cause
83            .downcast_ref::<crate::vmm::host_topology::ResourceContention>()
84            .is_some()
85    })
86}
87
88/// Check if an `anyhow::Error` carries a [`PerfModeUnavailable`].
89///
90/// Chain-aware (walks `e.chain()`), like [`is_topology_insufficient`].
91/// A `PerfModeUnavailable` is a HOST-INSUFFICIENCY skip, like RC/TI: the
92/// host fundamentally cannot honor an explicitly-requested perf-mode
93/// guarantee (too few CPUs for an exclusive host LLC + a service CPU).
94/// The VM is never run unisolated (it errors at build), so
95/// `result_to_exit_code` and the macro body route it to a VISIBLE skip
96/// by default, promoted to a FAIL banner under `KTSTR_NO_SKIP_MODE`.
97///
98/// [`PerfModeUnavailable`]: crate::vmm::host_topology::PerfModeUnavailable
99#[doc(hidden)]
100pub fn is_perf_mode_unavailable(e: &anyhow::Error) -> bool {
101    e.chain().any(|cause| {
102        cause
103            .downcast_ref::<crate::vmm::host_topology::PerfModeUnavailable>()
104            .is_some()
105    })
106}
107
108/// Check if an `anyhow::Error` carries a [`CpuBudgetUnsatisfiable`].
109///
110/// Chain-aware. A `CpuBudgetUnsatisfiable` is a HARD ERROR (an operator
111/// `--cpu-cap` number the host cannot satisfy), NOT a skip. (An author's
112/// per-test `cpu_budget` over the allowance skips via `TopologyInsufficient`
113/// instead — see `resolve_cpu_budget` — so it never carries this type.)
114///
115/// [`CpuBudgetUnsatisfiable`]: crate::vmm::host_topology::CpuBudgetUnsatisfiable
116#[doc(hidden)]
117pub fn is_cpu_budget_unsatisfiable(e: &anyhow::Error) -> bool {
118    e.chain().any(|cause| {
119        cause
120            .downcast_ref::<crate::vmm::host_topology::CpuBudgetUnsatisfiable>()
121            .is_some()
122    })
123}
124
125/// Check if an `anyhow::Error` carries a [`TopologyUnrepresentable`].
126///
127/// Chain-aware. A `TopologyUnrepresentable` is a HARD ERROR (a topology no
128/// host can represent under this VMM's static device layout — the aarch64
129/// over-`MAX_VCPUS` GICv3-redistributor case), NOT a skip.
130/// `classify_host_error` classifies it as `HostClass::Fail`, checked above
131/// the RC/TI skip types and handled above the `expect_err` inversion in
132/// both `err_to_exit_code` and the macro body, so a too-wide aarch64
133/// topology can neither masquerade as the expected failure nor be turned
134/// into a silent skip. Distinct from [`is_topology_insufficient`], which
135/// matches the host-DEPENDENT skip type.
136///
137/// [`TopologyUnrepresentable`]: crate::vmm::host_topology::TopologyUnrepresentable
138#[doc(hidden)]
139pub fn is_topology_unrepresentable(e: &anyhow::Error) -> bool {
140    e.chain().any(|cause| {
141        cause
142            .downcast_ref::<crate::vmm::host_topology::TopologyUnrepresentable>()
143            .is_some()
144    })
145}
146
147/// Predicate: walks the [`anyhow::Error`] chain looking for a
148/// [`KernelUnavailable`] cause. Used by `classify_host_error` to classify
149/// a no-kernel host as a skip-class host-insufficiency.
150///
151/// The harness signals "I have no kernel to boot, the binary was
152/// likely invoked outside `cargo ktstr test`" by surfacing
153/// [`KernelUnavailable`] rather than a generic `anyhow::bail!`.
154/// `classify_host_error` maps it to `HostClass::Skip` (the canonical
155/// `ktstr: SKIP: harness not configured: ...` banner), promoted to a FAIL
156/// under `KTSTR_NO_SKIP_MODE` — same shape as the resource-contention skip.
157/// `pub` + `#[doc(hidden)]`: plumbing re-exported from `test_support`
158/// alongside the sibling `is_*` predicates, not user API.
159///
160/// Both consumers route a `KernelUnavailable` through the shared
161/// [`classify_host_error`] (a no-kernel host is a skip-class
162/// host-insufficiency): `err_to_exit_code` and the `#[ktstr_test]` macro
163/// body both SKIP it by default, promoted to a FAIL under
164/// `KTSTR_NO_SKIP_MODE`. Under nextest the plain `#[test]` wrapper is
165/// suppressed, so an entry dispatches as `ktstr/{name}` via `run_named_test`
166/// → `err_to_exit_code` — meaning a developer running `cargo nextest run`,
167/// or `cargo ktstr test` without `--kernel`, on a kernel-less host gets a
168/// clean skip rather than a hard fail on every entry. This cannot mask a CI
169/// kernel-build failure: a requested `--kernel` that fails to build bails in
170/// cargo-ktstr (`resolve_kernel_set`) before nextest is spawned, so a
171/// `KernelUnavailable` here only ever means "no kernel was requested".
172/// Pinned by `result_to_exit_code_kernel_unavailable_skips_on_dispatch_path`.
173///
174/// [`classify_host_error`]: crate::test_support::classify_host_error
175///
176/// [`KernelUnavailable`]: crate::test_support::eval::KernelUnavailable
177#[doc(hidden)]
178pub fn is_kernel_unavailable(e: &anyhow::Error) -> bool {
179    e.chain().any(|cause| {
180        cause
181            .downcast_ref::<crate::test_support::eval::KernelUnavailable>()
182            .is_some()
183    })
184}
185
186/// A nextest-safe kernel identifier whose construction is gated
187/// through [`sanitize_kernel_label`] — once a value of this type
188/// exists, the contained string is GUARANTEED to match the
189/// `kernel_[a-z0-9_]+` shape that nextest's test-name parsing
190/// accepts. The wrapped `String` is private so a future caller
191/// cannot bypass [`Self::new`] and stuff a raw label into the
192/// invariant.
193///
194/// Constructed by [`Self::new`] (which always calls
195/// [`sanitize_kernel_label`]). Read access is via
196/// [`Self::as_str`] / `Display` / `AsRef<str>` — both of which
197/// expose the sanitized form unchanged.
198///
199/// `pub(crate)` because every consumer (this module, the
200/// production parser at [`parse_kernel_list`], and the encoder
201/// helpers in `cargo-ktstr` that emit the wire format
202/// `parse_kernel_list` decodes) lives inside the workspace; no external
203/// surface is needed today. If a future external consumer needs
204/// to construct a `SanitizedKernelLabel` directly, expose
205/// `Self::new` as `pub` then — but the private inner stays a
206/// private invariant either way.
207#[derive(Clone, Debug, PartialEq, Eq, Hash)]
208pub(crate) struct SanitizedKernelLabel(String);
209
210impl SanitizedKernelLabel {
211    /// Sanitize `raw` via [`sanitize_kernel_label`] and wrap the
212    /// result in the invariant-preserving newtype. The only path
213    /// that produces a `SanitizedKernelLabel`; bypassing it is
214    /// impossible because the inner field is private to this
215    /// module.
216    pub(crate) fn new(raw: &str) -> Self {
217        Self(sanitize_kernel_label(raw))
218    }
219
220    /// Read access to the sanitized identifier. Returns `&str`
221    /// rather than `&String` so callers can compose with
222    /// `format!` / `starts_with` / `strip_suffix` without
223    /// chaining `.as_str().as_str()`.
224    pub(crate) fn as_str(&self) -> &str {
225        &self.0
226    }
227}
228
229impl std::fmt::Display for SanitizedKernelLabel {
230    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
231        f.write_str(&self.0)
232    }
233}
234
235impl AsRef<str> for SanitizedKernelLabel {
236    fn as_ref(&self) -> &str {
237        &self.0
238    }
239}
240
241// `PartialEq<&str>` and `PartialEq<str>` impls let `assert_eq!`
242// against a string literal stay readable in tests
243// (`assert_eq!(entries[0].sanitized, "kernel_6_14_2")`) without
244// forcing every consumer to chain `.as_str()`. The wrapped
245// `String` is private to this module, so impls comparing
246// against external `&str` values cannot break the
247// "constructor enforces sanitization" invariant — the
248// invariant attaches to value PRODUCTION, not to value
249// COMPARISON.
250impl PartialEq<&str> for SanitizedKernelLabel {
251    fn eq(&self, other: &&str) -> bool {
252        self.0 == *other
253    }
254}
255
256impl PartialEq<str> for SanitizedKernelLabel {
257    fn eq(&self, other: &str) -> bool {
258        self.0 == other
259    }
260}
261
262#[cfg(test)]
263impl SanitizedKernelLabel {
264    /// Test-only escape hatch: wrap a string that's ALREADY in
265    /// the sanitized shape (`kernel_[a-z0-9_]+`) without running
266    /// the sanitizer. Used by unit-test fixtures that hand-roll
267    /// `KernelEntry` values whose `sanitized` field is meant to
268    /// be a literal — running [`Self::new`] on `"kernel_6_14_2"`
269    /// would double-prefix to `"kernel_kernel_6_14_2"`.
270    ///
271    /// Production code must NEVER call this — invariant
272    /// violation here means callers can stuff arbitrary strings
273    /// into the field, defeating the point of the newtype.
274    /// `#[cfg(test)]` enforces that at compile time.
275    pub(crate) fn from_pre_sanitized_for_test(s: &str) -> Self {
276        Self(s.to_string())
277    }
278}
279
280/// One resolved kernel entry from `KTSTR_KERNEL_LIST` (the multi-
281/// kernel fan-out wire format that `cargo ktstr test --kernel A
282/// --kernel B` or `cargo ktstr verifier --kernel A --kernel B`
283/// exports before exec'ing into `cargo nextest`).
284///
285/// `label` is the producer-side label string before
286/// sanitization — e.g. `"6.14.2"` for Version, `"git_tj_sched_ext_for-next"`
287/// for Git, `"6.14.2-tarball-x86_64-kc..."` for CacheKey,
288/// `"path_linux_a3f2b1"` for Path. Preserved so the
289/// [`crate::test_support::dispatch`] verifier sweep filter can
290/// compare against `declare_scheduler!`'s `kernels = [...]`
291/// declarations — specifically, range membership
292/// (`"6.14..6.16"` vs `"6.14.2"`) needs the raw version string
293/// to feed into [`crate::kernel_path::decompose_version_for_compare`],
294/// which the sanitized form has lost (slashes / dots → underscores).
295///
296/// `sanitized` is the nextest-safe identifier appended to test names
297/// so `cargo nextest run -E 'test(kernel_6_14_2)'` filters work
298/// natively. The producer-side encoder in `cargo-ktstr` emits a
299/// semantic, operator-readable label per kernel:
300/// - Version / Range expansion: the version string verbatim
301///   (`6.14.2`, `6.15-rc3`).
302/// - CacheKey: the version prefix (everything before the
303///   `-tarball-` / `-git-` source tag).
304/// - Git: `git_{owner}_{repo}_{kind}_{ref}` extracted from the URL
305///   (kind = tag/branch/sha).
306/// - Path: `path_{basename}_{hash6}` — basename + 6-char crc32 of
307///   the canonical path, disambiguating two `linux` directories
308///   under different parents.
309///
310/// [`SanitizedKernelLabel::new`] (which calls [`sanitize_kernel_label`])
311/// applies the `kernel_` prefix and `[a-z0-9_]+` normalization
312/// downstream. The newtype on this field makes the invariant
313/// compile-checked: a future caller cannot construct a
314/// `KernelEntry` whose `sanitized` field skipped sanitization.
315///
316/// `kernel_dir` is the canonical absolute path to the kernel-build
317/// directory the per-variant subprocess re-exports as
318/// `KTSTR_KERNEL`.
319#[derive(Clone, Debug)]
320pub(crate) struct KernelEntry {
321    pub(crate) label: String,
322    pub(crate) sanitized: SanitizedKernelLabel,
323    pub(crate) kernel_dir: PathBuf,
324}
325
326/// Parse the multi-kernel wire format `KTSTR_KERNEL_LIST` into a
327/// `Vec<KernelEntry>`. Format: `label1=path1;label2=path2;...`,
328/// semicolon-separated entries, `=` separating label from path. Empty
329/// / unset env returns an empty vec — callers treat that as
330/// "single-kernel mode" and fall through to `KTSTR_KERNEL`.
331///
332/// Malformed entries (missing `=`, empty label, empty path) are
333/// dropped silently — the producer is `cargo ktstr` which encodes
334/// the format under our control, so a malformed entry indicates a
335/// regression in the producer rather than operator input that
336/// deserves a clear error. Silent drop preserves the `len() <= 1` →
337/// "treat as single-kernel" invariant in the readers downstream.
338pub(crate) fn parse_kernel_list(raw: &str) -> Vec<KernelEntry> {
339    raw.split(';')
340        .filter_map(|seg| {
341            let seg = seg.trim();
342            if seg.is_empty() {
343                return None;
344            }
345            let (label, path) = seg.split_once('=')?;
346            let label = label.trim();
347            let path = path.trim();
348            if label.is_empty() || path.is_empty() {
349                return None;
350            }
351            Some(KernelEntry {
352                label: label.to_string(),
353                sanitized: SanitizedKernelLabel::new(label),
354                kernel_dir: PathBuf::from(path),
355            })
356        })
357        .collect()
358}
359
360/// Read [`crate::KTSTR_KERNEL_LIST_ENV`] and parse it into a
361/// `Vec<KernelEntry>`. Empty / unset / malformed → empty vec
362/// (single-kernel mode at the call site).
363pub(crate) fn read_kernel_list() -> Vec<KernelEntry> {
364    std::env::var(crate::KTSTR_KERNEL_LIST_ENV)
365        .ok()
366        .map(|v| parse_kernel_list(&v))
367        .unwrap_or_default()
368}
369
370/// Sanitise a kernel label (the producer-side identity emitted by
371/// `cargo ktstr`'s resolver) into a nextest-safe identifier of the
372/// shape `kernel_[a-z0-9_]+`.
373///
374/// Replaces every `[^A-Za-z0-9]` byte with `_`, lowercases, collapses
375/// runs of `_`, and prefixes with `kernel_`. Empty / pathologically-
376/// short input collapses to `kernel_` alone, which the parser
377/// downstream still recognises as a valid suffix (the empty
378/// `sanitized` marker just won't disambiguate two kernels — but the
379/// producer side guarantees non-empty labels, so the empty case is
380/// defensive only).
381///
382/// Example mappings:
383/// - `6.14.2` → `kernel_6_14_2`
384/// - `6.15-rc3` → `kernel_6_15_rc3`
385/// - `git_tj_sched_ext_for-next` → `kernel_git_tj_sched_ext_for_next`
386/// - `path_linux_a3f2b1` → `kernel_path_linux_a3f2b1`
387pub fn sanitize_kernel_label(raw: &str) -> String {
388    let mut out = String::with_capacity(raw.len() + 7);
389    out.push_str("kernel_");
390    let mut last_underscore = true; // suppress leading `_` after `kernel_`
391    for ch in raw.chars() {
392        let c = ch.to_ascii_lowercase();
393        if c.is_ascii_alphanumeric() {
394            out.push(c);
395            last_underscore = false;
396        } else if !last_underscore {
397            out.push('_');
398            last_underscore = true;
399        }
400    }
401    // Strip a trailing `_` so a label like `for-next-` doesn't
402    // produce a dangling separator.
403    if out.ends_with('_') && out.len() > "kernel_".len() {
404        out.pop();
405    }
406    out
407}
408
409ctor::declarative::ctor! {
410/// Early dispatch for `#[ktstr_test]` test execution.
411///
412/// Runs before `main()` in any binary that links against ktstr.
413///
414/// When running as PID 1 (the binary is `/init` in the VM), calls
415/// `ktstr_guest_init()` which handles the full init lifecycle and never
416/// returns.
417///
418/// - `--ktstr-test-fn=NAME --ktstr-topo=NnNlNcNt`: host-side dispatch —
419///   boots a VM with the specified topology and runs the test inside it.
420/// - `--ktstr-test-fn=NAME` (without `--ktstr-topo`): guest-side dispatch —
421///   runs the test function directly (inside a VM that was already booted).
422/// - nextest protocol (`--list`/`--exact`): intercepted when running
423///   under nextest (`NEXTEST` env var set), delegates to [`ktstr_main`].
424/// - Otherwise: no-op (falls through to the standard test harness).
425///
426/// ctor 1.0 ships both `#[ctor::ctor(...)]` (proc-macro attribute) and
427/// `ctor::declarative::ctor! { ... }` (declarative block). This site
428/// uses the declarative form because it sidesteps the TT-muncher
429/// recursion-limit cost the proc-macro form would impose on the
430/// ktstr_test expansion. The proc-macro form stays reachable via
431/// `crate::__private::ctor::ctor` for downstream consumers that prefer
432/// the attribute-on-fn shape; see `tests/private_module_paths.rs` for
433/// the re-export contract.
434#[doc(hidden)]
435#[ctor(unsafe)]
436pub fn ktstr_test_early_dispatch() {
437    // PID 1: the binary is /init in the VM. Perform full init lifecycle
438    // (mounts, scheduler, test dispatch, reboot). Never returns.
439    if unsafe { libc::getpid() } == 1 {
440        crate::vmm::rust_init::ktstr_guest_init();
441    }
442
443    // Export-self dispatch runs BEFORE host/guest test dispatch.
444    // `cargo ktstr export` is a router that exec's the test binary
445    // with `--ktstr-export-test=NAME`; the binary reads its own
446    // `KTSTR_TESTS` registry, embeds itself via `current_exe`, and
447    // writes the .run file. Running this check first means the
448    // export path never accidentally triggers VM boot if the
449    // operator simultaneously passes `--ktstr-test-fn` (the export
450    // arg wins because export is a one-shot tool, not a test
451    // execution).
452    if let Some(code) = maybe_dispatch_export() {
453        std::process::exit(code);
454    }
455    if let Some(code) = maybe_dispatch_shell_test() {
456        std::process::exit(code);
457    }
458    if let Some(code) = maybe_dispatch_host_test() {
459        std::process::exit(code);
460    }
461    // Propagate RUST_BACKTRACE / RUST_LOG from /proc/cmdline before
462    // `maybe_dispatch_vm_test` runs: ctor context is single-threaded
463    // (`.init_array` runs before any user thread exists), so this
464    // `set_var` is sound and the later guest-side code that spawns
465    // the probe thread observes the correct env.
466    propagate_rust_env_from_cmdline();
467    if let Some(code) = maybe_dispatch_vm_test() {
468        // The LLVM profiling runtime registers its atexit handler via a
469        // .init_array entry (C++ global initializer). Our ctor also lives
470        // in .init_array, and the execution order between them is
471        // non-deterministic. If our ctor runs first, the atexit handler
472        // was never registered, so std::process::exit() won't write the
473        // profraw. Serialize profraw to a buffer and write it to the SHM
474        // ring for host-side extraction.
475        try_flush_profraw();
476        std::process::exit(code);
477    }
478
479    // nextest protocol: intercept --list and --exact when running under
480    // nextest. Under cargo test, fall through to the standard harness
481    // which runs the #[test] wrappers generated by #[ktstr_test].
482    //
483    // Binaries with real #[ktstr_test] entries need the ctor to handle
484    // listing (gauntlet expansion) and dispatch (VM booting). The lib
485    // test binary has only the dummy entry and no gauntlet variants —
486    // skip interception so the standard harness discovers #[cfg(test)]
487    // module #[test] functions (unit tests).
488    //
489    // For `--list`, ktstr_main prints the gauntlet/ktstr names and
490    // RETURNS so the standard libtest harness can print its own list
491    // of `#[test]` items afterward. This makes plain `#[test]`
492    // functions inside a ktstr_test integration-test binary visible
493    // to nextest — without the fall-through, libtest never runs and
494    // those test names are silently dropped from the listing.
495    //
496    // For `--exact`, ktstr_main runs only when the test name starts
497    // with `ktstr/` or `gauntlet/` — names ktstr owns. Other names
498    // (libtest #[test] items, including the per-entry wrappers
499    // emitted by `#[ktstr_test]` itself) fall through to libtest's
500    // dispatch. Without this guard, run_named_test would fail
501    // `find_test` for a plain `#[test]` name and exit 1, blocking
502    // nextest from running it.
503    if std::env::var_os("NEXTEST").is_some() {
504        let has_real_tests = KTSTR_TESTS.iter().any(|e| !is_test_sentinel(e.name));
505        // A binary may carry only `declare_scheduler!` declarations
506        // (no `#[ktstr_test]` entries) — pure verifier-only test
507        // binaries. Without the scheduler check below the listing
508        // branch would never fire for such a binary and the
509        // verifier cells would silently fail to emit under nextest.
510        let has_schedulers = !super::KTSTR_SCHEDULERS.is_empty();
511        if has_real_tests || has_schedulers {
512            let args: Vec<String> = std::env::args().collect();
513            if args.iter().any(|a| a == "--list") {
514                ktstr_list_only();
515                list_verifier_cells_all();
516                list_plain_tests(args.iter().any(|a| a == "--ignored"));
517                std::process::exit(0);
518            } else if let Some(pos) = args.iter().position(|a| a == "--exact")
519                && let Some(name) = args.get(pos + 1)
520                && name.starts_with("verifier/")
521            {
522                // verifier/<sched>/<kernel>/<preset> cells bypass libtest
523                // entirely — the cell handler resolves the scheduler
524                // binary, kernel, and the cell's topology preset, runs
525                // collect_verifier_output, prints the result, and
526                // exits. No #[test] wrapper exists for declared
527                // schedulers (declare_scheduler! only emits a static),
528                // so it runs directly via run_verifier_cell — the same
529                // libtest bypass the ktstr/ branch below uses.
530                let code = run_verifier_cell(name);
531                try_flush_profraw();
532                std::process::exit(code);
533            } else if let Some(pos) = args.iter().position(|a| a == "--exact")
534                && let Some(name) = args.get(pos + 1)
535                && (name.starts_with("ktstr/") || name.starts_with("gauntlet/"))
536            {
537                let bare = name
538                    .strip_prefix("ktstr/")
539                    .or_else(|| name.strip_prefix("gauntlet/"))
540                    .unwrap_or(name)
541                    .split('/')
542                    .next()
543                    .unwrap_or(name);
544
545                // Reject malformed names like `gauntlet/` (trailing
546                // slash, no test name) and `ktstr/` up front, so the
547                // operator sees a clear error instead of an opaque
548                // "unknown test" from the empty bare name.
549                if bare.is_empty() {
550                    eprintln!(
551                        "ktstr: malformed --exact test name {name:?} \
552                         (resolves to an empty bare name after prefix strip)",
553                    );
554                    std::process::exit(1);
555                }
556
557                // Run the entry directly, bypassing libtest — the same
558                // pattern as the verifier/ branch above. The previous
559                // dispatch rewrote argv to the bare name and relied on a
560                // #[test] wrapper (emitted only by the #[ktstr_test]
561                // macro) for libtest to match it; raw
562                // `#[distributed_slice(KTSTR_TESTS)]` registrations have
563                // no wrapper, so libtest matched nothing and printed
564                // "running 0 tests" — a silent trivial-pass. run_named_test
565                // resolves the entry from KTSTR_TESTS by name and boots it
566                // for both registration styles, routing gauntlet/ to
567                // run_gauntlet_test (identical topology) and applying the
568                // host_only / performance_mode / bpf_map_write gates the
569                // wrapper path skipped.
570                let code = run_named_test(name);
571                try_flush_profraw();
572                std::process::exit(code);
573            }
574        }
575    } else {
576        // cargo-test-direct path: the standard rustc test harness
577        // runs only the bare `#[test]` wrappers `#[ktstr_test]`
578        // generates. Gauntlet expansion (topology-preset variants)
579        // lives inside `ktstr_main`'s `--list` + `--exact` handlers
580        // and is reachable ONLY under nextest. Every real ktstr
581        // entry produces topology-preset variants under nextest
582        // (`for_each_gauntlet_variant` iterates
583        // `crate::gauntlet::gauntlet_presets()`). Without nextest those
584        // variants would silently not run — coverage loss with no
585        // error. Emit a one-shot stderr `warning:` diagnostic (see
586        // the `eprintln!` below) when the binary carries any real
587        // entry so the user sees the gap instead of trusting a
588        // false green. Print once per process (cargo test invokes
589        // one test binary per crate; the ctor runs exactly once per
590        // test binary) so there is no need to gate with a
591        // std::sync::Once.
592        //
593        // `KTSTR_CARGO_TEST_MODE=1` opts out of the warning: the
594        // operator deliberately picked the cargo-test-direct path
595        // (e.g. for a single-test debug iteration without the
596        // nextest harness) and accepts that gauntlet variants
597        // won't run. The warning is still emitted under bare
598        // `cargo test` without the env var set so unaware users
599        // see the coverage gap.
600        if !crate::cargo_test_mode::cargo_test_mode_active() {
601            let total = KTSTR_TESTS.len();
602            let real = KTSTR_TESTS
603                .iter()
604                .filter(|e| !is_test_sentinel(e.name))
605                .count();
606            if real > 0 {
607                eprintln!(
608                    "warning: {real} of {total} ktstr test entries registered in this binary \
609                     will not generate their topology-preset gauntlet variants — NEXTEST env \
610                     var is not set and the standard rustc harness does not expand them. Use \
611                     `cargo nextest run` (or `cargo ktstr test`) to exercise the full gauntlet, \
612                     or set KTSTR_CARGO_TEST_MODE=1 to opt into single-variant bare-`cargo test` \
613                     mode without this warning.",
614                );
615            }
616            // Verifier cells are emitted by `list_verifier_cells_all`
617            // which runs ONLY from the NEXTEST listing branch above.
618            // A bare `cargo test` invocation on a binary carrying
619            // `declare_scheduler!` declarations gets zero verifier
620            // coverage — surface the gap with the same opt-out shape
621            // as the gauntlet warning so an unaware operator does not
622            // trust a green run that never reached the verifier.
623            // Eevdf + KernelBuiltin variants don't produce userspace
624            // binaries to verify, so they are excluded from the count
625            // (matching the emission-time filter in
626            // `list_verifier_cells_all`).
627            let verifier_schedulers = super::KTSTR_SCHEDULERS
628                .iter()
629                .filter(|s| {
630                    !matches!(
631                        s.binary,
632                        super::SchedulerSpec::Eevdf | super::SchedulerSpec::KernelBuiltin { .. }
633                    )
634                })
635                .count();
636            if verifier_schedulers > 0 {
637                eprintln!(
638                    "warning: {verifier_schedulers} `declare_scheduler!` declaration(s) in this \
639                     binary will not generate verifier cells — NEXTEST env var is not set and \
640                     verifier cells are emitted only by ktstr's `--list` handler under nextest. \
641                     Use `cargo ktstr verifier` to exercise the verifier sweep, or set \
642                     KTSTR_CARGO_TEST_MODE=1 to acknowledge the verifier-cell-free path without \
643                     this warning.",
644                );
645            }
646        }
647    }
648}
649}
650
651/// Predicate for "this entry is a unit-test sentinel, not a real
652/// `#[ktstr_test]` user entry." The lib-test binary registers a
653/// single sentinel entry (currently `"__unit_test_dummy__"`) so
654/// the dispatch + gauntlet plumbing has something to exercise
655/// under `cargo test --lib`; real user entries look like
656/// `"module::test_name"` or similar PascalCase-with-dots names.
657///
658/// Matching the sentinel by convention (`__` prefix + `__`
659/// suffix + `_test_` or `_dummy_` infix) rather than by literal
660/// equality keeps the filter robust when the sentinel is
661/// renamed, or when future scaffolding adds additional
662/// sentinel-shaped entries (e.g. `__unit_test_panics__`,
663/// `__unit_test_timeout__`). The literal-equality form would
664/// silently admit those future sentinels into the real-entry
665/// population and double-fire the "NEXTEST env var not set"
666/// warning or spuriously enable --list interception.
667fn is_test_sentinel(name: &str) -> bool {
668    // Real user-authored `#[ktstr_test]` entry names
669    // conventionally do not match the `__unit_test_*__` pattern
670    // (Rust's reserved-identifier convention for
671    // language-implementation and framework-internal names).
672    // The `#[ktstr_test]` proc macro does not validate this, so
673    // the predicate admits a real user entry in the unlikely
674    // case someone names one with the `__unit_test_*__` shape —
675    // collision would double-fire the "NEXTEST env var not set"
676    // warning / spuriously enable --list interception, but
677    // that's a diagnostic glitch, not a correctness failure.
678    name.starts_with("__unit_test_") && name.ends_with("__")
679}
680
681/// Export-self dispatch: if `--ktstr-export-test=NAME` is present in
682/// argv, look up `NAME` in the binary's own `KTSTR_TESTS` registry,
683/// build a self-extracting `.run` file embedding `current_exe()`
684/// (this binary), and exit. Returns `Some(exit_code)` when dispatched,
685/// `None` when the flag is absent.
686///
687/// `cargo ktstr export <NAME>` (the cargo-ktstr binary) is a router
688/// that compiles the workspace's tests, locates the test binary that
689/// owns `NAME`, and exec's it with this arg. The test binary embeds
690/// ITSELF — without that indirection, cargo-ktstr would package its
691/// own binary, which has no `#[ktstr_test]` registrations from the
692/// user's crate and can't reproduce the test on bare metal.
693///
694/// `--ktstr-export-output=PATH` overrides the default output path
695/// (`<NAME>.run` in the cwd). Both flags are leniently parsed by the
696/// helpers in `args.rs`; an empty NAME (`--ktstr-export-test=`)
697/// surfaces with diagnostic "requires a non-empty test name" and
698/// exit 1 so the router moves on to the next candidate.
699///
700/// # Exit-code contract
701///
702/// The router (`cargo-ktstr.rs::run_export`) discriminates between
703/// "this binary doesn't know the test" (exit 1) and "this binary
704/// has the test but rejects it" (exit 2). When ANY candidate exits
705/// 2, the router surfaces THAT candidate's stderr (the rejection
706/// reason: host_only, bpf_map_write, KernelBuiltin) rather than
707/// the generic "not found in any workspace test binary" message.
708/// Without the differentiation, an operator who exports a
709/// host_only test would see the misleading "not found" diagnostic
710/// even though the test exists.
711/// Stub for the `export`-feature-disabled build. The router
712/// (`cargo-ktstr.rs::run_export`) execs every candidate test binary
713/// with `--ktstr-export-test=NAME`; without this stub a binary
714/// compiled without `export` would fall through to the nextest
715/// harness, which would surface an opaque "unrecognised argument"
716/// error against an arg the operator never typed. The stub turns
717/// that into an actionable diagnostic by detecting the arg and
718/// emitting a build-config hint, then exiting 2 (matches the
719/// "registered but rejected" exit code so the router surfaces
720/// THIS binary's stderr rather than a sibling's "not registered"
721/// fallthrough). Recompile the test binary with the `export`
722/// feature (folded into `cli-bins` in the default feature set)
723/// to enable the real `cargo ktstr export` flow.
724#[cfg(not(feature = "export"))]
725fn maybe_dispatch_export() -> Option<i32> {
726    let args: Vec<String> = std::env::args().collect();
727    let _ = extract_export_test_arg(&args)?;
728    eprintln!(
729        "ktstr export: this test binary was built without the `export` cargo \
730         feature, so `cargo ktstr export <name>` cannot reach the export pipeline \
731         from here. Rebuild with the default feature set (or pass \
732         `--features cli-bins`) and retry."
733    );
734    Some(2)
735}
736
737#[cfg(feature = "export")]
738fn maybe_dispatch_export() -> Option<i32> {
739    let args: Vec<String> = std::env::args().collect();
740    let name = extract_export_test_arg(&args)?;
741    let output = extract_export_output_arg(&args).map(std::path::PathBuf::from);
742
743    // Empty name: surface as a hard error rather than silently
744    // succeeding. The router's "first binary that exits 0 wins"
745    // protocol relies on the absent-test path returning a non-zero
746    // exit so the next candidate is tried.
747    if name.is_empty() {
748        eprintln!("ktstr export: --ktstr-export-test= requires a non-empty test name");
749        return Some(1);
750    }
751
752    // Look up the test ourselves so we can discriminate "not
753    // registered here" (exit 1, router falls through) from
754    // "registered but rejected" (exit 2, router surfaces this
755    // stderr). `export_test` itself returns anyhow::Error for both
756    // cases, which would conflate them at the exit-code level.
757    if find_test(name).is_none() {
758        eprintln!("ktstr export: no registered test named '{name}'");
759        return Some(1);
760    }
761
762    match crate::export::export_test(name, output) {
763        Ok(()) => Some(0),
764        Err(e) => {
765            eprintln!("ktstr export: {e:#}");
766            // The test exists in this binary but the export pipeline
767            // refused it (host_only / bpf_map_write / KernelBuiltin /
768            // I/O error). Exit 2 so the router prefers this stderr
769            // over a sibling binary's exit-1 "not registered" miss.
770            Some(2)
771        }
772    }
773}
774
775/// Shell-self dispatch: if `--ktstr-shell-test=NAME` is present in
776/// argv, look up `NAME` in the binary's own `KTSTR_TESTS` registry,
777/// serialize its shell-relevant fields to stdout as JSON, and exit.
778/// Returns `Some(exit_code)` when dispatched, `None` when absent.
779///
780/// `cargo ktstr shell --test <NAME>` (the cargo-ktstr binary) is a
781/// router that compiles the workspace's tests, exec's each test
782/// binary with this flag, and consumes the first stdout-JSON it
783/// gets (the router bails on ambiguous names — same `NAME`
784/// registered in two binaries). The router applies the
785/// descriptor's topology / memory / extra_include_files to the
786/// shell VM, then prints a one-line banner to stderr BEFORE VM
787/// boot naming the test + scheduler so the operator can repro the
788/// workload manually. (PS1-in-guest is a follow-up.)
789///
790/// # Stdout contract
791///
792/// The test binary MUST keep stdout silent on this dispatch path —
793/// `tracing` output MUST go to stderr. The router parses the entire
794/// stdout as a JSON descriptor; any prefix like an INFO log line
795/// will fail the parse.
796///
797/// # JSON shape
798///
799/// Serialized from [`crate::test_support::ShellTestDescriptor`] via
800/// `serde_json::to_string` — see that struct for the field-by-field
801/// contract. The struct lives in
802/// `crate::test_support::shell_descriptor` so producer and consumer
803/// share a single definition; adding a field there automatically
804/// propagates to both sides.
805///
806/// `scheduler_kind` discriminates `"eevdf" | "discover" | "path" |
807/// "kernel_builtin"` so the banner can hint at how to repro the
808/// scheduler (Discover/Path = userspace binary at `/bin/<n>`;
809/// KernelBuiltin = no binary, the shell-mode boot runs
810/// `scheduler_enable_cmds` before drop-to-busybox and
811/// `scheduler_disable_cmds` on shell exit; Eevdf = no setup needed).
812///
813/// # Exit-code contract
814///
815/// Matches `maybe_dispatch_export`:
816/// - `0`: test registered, JSON emitted to stdout.
817/// - `1`: test not registered in this binary (router falls
818///   through to the next candidate).
819/// - `2`: registered but rejected for shell mode (currently:
820///   `host_only` — no VM to drop into).
821fn maybe_dispatch_shell_test() -> Option<i32> {
822    let args: Vec<String> = std::env::args().collect();
823    let name = extract_shell_test_arg(&args)?;
824
825    if name.is_empty() {
826        eprintln!("ktstr shell: --ktstr-shell-test= requires a non-empty test name");
827        return Some(1);
828    }
829
830    let entry = match find_test(name) {
831        Some(e) => e,
832        None => {
833            eprintln!("ktstr shell: no registered test named '{name}'");
834            return Some(1);
835        }
836    };
837
838    if entry.host_only {
839        eprintln!(
840            "ktstr shell: test '{name}' has host_only = true; \
841             shell mode requires a guest VM to drop into. \
842             Either run the test directly with `cargo ktstr test {name}` \
843             (host_only tests don't boot a VM) or pick a non-host_only \
844             test for shell mode."
845        );
846        return Some(2);
847    }
848
849    let topo = &entry.topology;
850    let scheduler_kind = crate::test_support::SchedulerKind::from(&entry.scheduler.binary);
851    let (scheduler_enable_cmds, scheduler_disable_cmds) = match &entry.scheduler.binary {
852        crate::test_support::entry::SchedulerSpec::KernelBuiltin { enable, disable } => (
853            enable.iter().copied().map(String::from).collect(),
854            disable.iter().copied().map(String::from).collect(),
855        ),
856        _ => (Vec::new(), Vec::new()),
857    };
858
859    let descriptor = crate::test_support::ShellTestDescriptor {
860        numa_nodes: topo.numa_nodes,
861        llcs: topo.llcs,
862        cores: topo.cores_per_llc,
863        threads: topo.threads_per_core,
864        memory_mib: entry.memory_mib,
865        wprof: entry.wprof,
866        extra_include_files: entry
867            .extra_include_files
868            .iter()
869            .copied()
870            .map(String::from)
871            .collect(),
872        scheduler_name: entry.scheduler.name.to_string(),
873        scheduler_kind,
874        wprof_args: entry.wprof_args.map(String::from),
875        performance_mode: entry.performance_mode,
876        scheduler_enable_cmds,
877        scheduler_disable_cmds,
878    };
879
880    // serde_json::to_string produces RFC-8259-compliant escaping
881    // (`\uXXXX` with 4 hex digits, surrogate pairs for SMP code
882    // points) which Rust's Debug formatter does NOT — Debug uses
883    // `\u{1f4c2}` (braced form) for non-ASCII, breaking
884    // operator-supplied paths with non-ASCII chars (test built
885    // under `/home/<unicode-name>/proj`, `extra_include_files`
886    // listing emoji-named files, etc.). serde_json is already a
887    // workspace dep so adding this call doesn't widen the dep graph.
888    let payload = serde_json::to_string(&descriptor)
889        .expect("ShellTestDescriptor is a plain serde struct with no fallible field types");
890    println!("{payload}");
891    Some(0)
892}
893
894/// Host-side dispatch: if both `--ktstr-test-fn` and `--ktstr-topo` are
895/// present, boot a VM with the specified topology and run the test
896/// inside it. Returns `Some(exit_code)` if dispatched, `None` otherwise.
897fn maybe_dispatch_host_test() -> Option<i32> {
898    let args: Vec<String> = std::env::args().collect();
899    let name = extract_test_fn_arg(&args)?;
900    let topo_str = extract_topo_arg(&args)?;
901
902    let entry = match find_test(name) {
903        Some(e) => e,
904        None => {
905            eprintln!("ktstr_test: unknown test function '{name}'");
906            return Some(1);
907        }
908    };
909
910    let (numa_nodes, llcs, cores, threads) = match parse_topo_string(&topo_str) {
911        Some(t) => t,
912        None => {
913            eprintln!(
914                "ktstr_test: invalid --ktstr-topo format '{topo_str}' (expected NnNlNcNt, e.g. 1n2l4c2t)"
915            );
916            return Some(1);
917        }
918    };
919
920    let cpus = llcs * cores * threads;
921    let memory_mib = super::runtime::derive_test_memory_mib(cpus, entry);
922    let topo = TopoOverride {
923        numa_nodes,
924        llcs,
925        cores,
926        threads,
927        memory_mib,
928    };
929
930    match run_ktstr_test_with_topo(entry, &topo) {
931        Ok(_) => Some(0),
932        Err(e) => {
933            eprintln!("ktstr_test: {e:#}");
934            Some(1)
935        }
936    }
937}
938
939/// Host-side entry point: build a VM, boot it with `--ktstr-test-fn=NAME`,
940/// extract profraw from SHM, and return the test result.
941///
942/// Validates KVM access and auto-discovers a kernel image via
943/// `resolve_test_kernel()` when `KTSTR_TEST_KERNEL` is not set.
944pub fn run_ktstr_test(entry: &KtstrTestEntry) -> Result<AssertResult> {
945    // Directly-constructed entries bypass the proc-macro's
946    // compile-time checks. Call `validate` here so programmatic
947    // consumers (library callers pushing into `KTSTR_TESTS`
948    // dynamically) hit the same bail messages the macro produces at
949    // compile time.
950    entry.validate()?;
951
952    if entry.host_only {
953        return run_host_only_test_inner(entry);
954    }
955    if !entry.bpf_map_write.is_empty()
956        && let Ok(kernel) = resolve_test_kernel()
957        && crate::vmm::find_vmlinux(&kernel).is_none()
958    {
959        anyhow::bail!("vmlinux not found, bpf_map_write requires vmlinux");
960    }
961    run_ktstr_test_inner(entry, None)
962}
963
964/// Like `run_ktstr_test` but with an explicit topology override.
965/// Only consumed inside this module by `maybe_dispatch_host_test`;
966/// kept as a named helper so the `--ktstr-test-fn` + `--ktstr-topo`
967/// dispatch path reads symmetrically with the zero-override
968/// [`run_ktstr_test`] library entry point.
969fn run_ktstr_test_with_topo(entry: &KtstrTestEntry, topo: &TopoOverride) -> Result<AssertResult> {
970    run_ktstr_test_inner(entry, Some(topo))
971}
972
973/// Process exit code for a Pass verdict (and for the Skip path,
974/// which degenerates to Pass because the test never ran).
975///
976/// Defined as a `pub const` so external tooling (CI gates,
977/// dashboard aggregators, nextest wrappers) can reference the
978/// exit-code triad by name instead of duplicating the integer
979/// literals. The trio [`EXIT_PASS`] / [`EXIT_FAIL`] /
980/// [`EXIT_INCONCLUSIVE`] cover every verdict produced by the
981/// `Fail > Inconclusive > Pass > Skip` lattice when projected
982/// to a process exit code.
983pub const EXIT_PASS: i32 = 0;
984
985/// Process exit code for a Fail verdict (or any expect_err
986/// satisfaction failure).
987///
988/// See [`EXIT_PASS`] for the full triad rationale.
989pub const EXIT_FAIL: i32 = 1;
990
991/// Process exit code for an Inconclusive verdict (a
992/// zero-denominator ratio gate that could not evaluate).
993///
994/// Distinct from [`EXIT_PASS`] (which would silently green an
995/// unevaluated gate) and [`EXIT_FAIL`] (which would conflate
996/// "could not evaluate" with a real regression). External tooling
997/// uses this code to triage Inconclusive runs separately — see
998/// the README "Exit codes" section for the full operator contract.
999pub const EXIT_INCONCLUSIVE: i32 = 2;
1000
1001/// Run a test result through expect_err logic and return an exit code.
1002///
1003/// Returns [`EXIT_PASS`] on pass, [`EXIT_FAIL`] on failure, and
1004/// [`EXIT_INCONCLUSIVE`] on Inconclusive — the 4-state lattice
1005/// `Fail > Inconclusive > Pass > Skip` projects to 3 distinct exit
1006/// codes (Skip degenerates to [`EXIT_PASS`] because the test never
1007/// ran, mirroring `ResourceContention`). A Skip routes through the
1008/// dedicated FIRST match arm (`Ok(r) if r.is_skip()`), ahead of the
1009/// expect_err arm, so an expect_err test that produced no verdict (e.g.
1010/// a `post_vm_skip` on a load-starved placeholder dump) is not inverted
1011/// into a FAIL — a skipped test cannot "produce the expected error."
1012/// [`EXIT_INCONCLUSIVE`] lets
1013/// downstream tooling (CI gates, nextest summary aggregation, the
1014/// operator dashboard) triage zero-denominator runs distinctly from
1015/// real regressions. `ResourceContention` returns [`EXIT_PASS`] —
1016/// the test never ran, not a real failure. The skip sidecar for
1017/// this case is written upstream in `run_ktstr_test_inner` at the
1018/// ResourceContention propagation site so every caller (including
1019/// the library entry point `run_ktstr_test`) records it, not just
1020/// the nextest dispatch path.
1021///
1022/// `ResourceContention` detection walks the FULL error chain via
1023/// [`is_resource_contention`] (chain-walk predicate) plus a
1024/// matching `e.chain().find_map(...)` extraction for the reason
1025/// string. The eval-side `crate::test_support::eval` `"build ktstr_test VM"` and
1026/// `"run ktstr_test VM"` wrappers nest the contention error under
1027/// `.context(...)`, so a top-level `downcast_ref` on the outer
1028/// error misses the inner cause. Without the chain walk a wrapped
1029/// contention would land in the `Err(e)` arm below as a regular
1030/// failure (exit 1) rather than the skip path (exit 0), turning
1031/// every host-resource-exhausted run into a hard test failure.
1032fn result_to_exit_code(
1033    result: Result<AssertResult>,
1034    expect_err: bool,
1035    allow_inconclusive: bool,
1036) -> i32 {
1037    let no_skip = std::env::var_os(crate::KTSTR_NO_SKIP_MODE_ENV).is_some();
1038    match result {
1039        Ok(r) => ok_to_exit_code(r, expect_err, allow_inconclusive),
1040        Err(e) => err_to_exit_code(e, expect_err, no_skip),
1041    }
1042}
1043
1044/// Map an `Ok(AssertResult)` verdict to an exit code.
1045///
1046/// The sequential guards preserve the original `match` arm precedence
1047/// (first matching guard wins): `is_skip()` → `expect_err` →
1048/// `is_inconclusive()` → the trailing `EXIT_PASS` (the former
1049/// `Ok(_) => EXIT_PASS` arm). Reordering these would change which
1050/// verdict fires for a result matching more than one guard.
1051fn ok_to_exit_code(r: AssertResult, expect_err: bool, allow_inconclusive: bool) -> i32 {
1052    // A Skip degenerates to EXIT_PASS regardless of expect_err — the
1053    // test never evaluated, so there is no guest failure to "expect"
1054    // (the `Fail > Inconclusive > Pass > Skip` projection; mirrors the
1055    // ResourceContention Err branch in `err_to_exit_code`, but on the
1056    // Ok side). Without this guard a post_vm_skip under expect_err
1057    // falls into the `expect_err` guard below and surfaces as "expected
1058    // error but test passed" (EXIT_FAIL) — a load-starvation
1059    // placeholder-dump skip becomes a flaky failure. End-to-end chain:
1060    // a post_vm callback returns Err(post_vm_skip(..)) → the eval gate
1061    // detects the HostSkipRequest marker, reports via report::test_skip,
1062    // and returns Ok(AssertResult::skip) → this guard maps it to
1063    // EXIT_PASS. is_skip() is true only when `outcomes` is non-empty and
1064    // every outcome is Outcome::Skip (assert/plan.rs); the empty-outcomes
1065    // Pass identity has is_skip()==false and falls through to the
1066    // trailing `EXIT_PASS`.
1067    if r.is_skip() {
1068        return EXIT_PASS;
1069    }
1070    if expect_err {
1071        // expect_err inverts on Pass and on Inconclusive: both
1072        // are "not a failure" in the operator's mental model,
1073        // and an expect_err scenario that produces an
1074        // Inconclusive verdict (denominator zero) failed to
1075        // produce the expected failure just like a Pass would.
1076        // Surface the inconclusive as exit code 2 to preserve
1077        // the distinct verdict, but treat it as expect_err
1078        // satisfaction failure (exit 1) — the test author
1079        // wanted a Fail, not "the gate could not run".
1080        //
1081        // `allow_inconclusive` does NOT relax the expect_err
1082        // contract: expect_err demands a real Fail, and an
1083        // Inconclusive verdict does not satisfy that
1084        // regardless of how the test author scopes
1085        // Inconclusive elsewhere. The dominant gate wins;
1086        // `allow_inconclusive` only relaxes the
1087        // EXIT_INCONCLUSIVE projection on the no-expect_err
1088        // path below.
1089        if r.is_inconclusive() {
1090            eprintln!(
1091                "expected error but test produced an Inconclusive verdict — \
1092                 zero-denominator gate could not evaluate; expect_err is \
1093                 unsatisfied"
1094            );
1095            return EXIT_FAIL;
1096        } else {
1097            eprintln!("expected error but test passed");
1098            return EXIT_FAIL;
1099        }
1100    }
1101    if r.is_inconclusive() {
1102        // `allow_inconclusive` opt-in: a test author may have
1103        // declared `#[ktstr_test(allow_inconclusive)]` to
1104        // signal "this test's Inconclusive arm is acceptable —
1105        // don't fail the CI gate." Route to EXIT_PASS in that
1106        // case (Inconclusive is still recorded in the sidecar
1107        // for stats tooling and the operator-facing failure
1108        // dump still renders the diagnostic). When the flag
1109        // is unset (the default) the verdict surfaces as
1110        // EXIT_INCONCLUSIVE so the operator triages it.
1111        if allow_inconclusive {
1112            eprintln!(
1113                "test produced an Inconclusive verdict but \
1114                 `allow_inconclusive` is set — routing to EXIT_PASS \
1115                 for CI gate, sidecar still records Inconclusive"
1116            );
1117            return EXIT_PASS;
1118        } else {
1119            return EXIT_INCONCLUSIVE;
1120        }
1121    }
1122    EXIT_PASS
1123}
1124
1125/// Map an `Err(anyhow::Error)` outcome to an exit code.
1126///
1127/// The sequential guards preserve the original `match` arm precedence
1128/// (first matching guard wins): the host-insufficiency classification
1129/// ([`classify_host_error`], covering kernel-unavailable → perf-mode →
1130/// cpu-budget → topology-unrepresentable → resource-contention →
1131/// topology-insufficient, shared with the `#[ktstr_test]` macro body) runs
1132/// FIRST, then the
1133/// marker-typed guards (`PostVmAssertionFailure` → `SchedulerBuildRefused`
1134/// → `SurvivesStormViolated` → `ExpectAutoReproSatisfied`), then the
1135/// `expect_err` inversion, then
1136/// the catch-all (the former `Err(e) => …` arm) operating on the
1137/// now-owned `e`. Reordering these would change which guard fires for an
1138/// error matching more than one guard. The host-insufficiency guard
1139/// order + per-class skip/fail policy live in `classify_host_error`, not
1140/// here, so this site and the macro cannot drift apart.
1141fn err_to_exit_code(e: anyhow::Error, expect_err: bool, no_skip: bool) -> i32 {
1142    // Host-insufficiency classification (kernel-unavailable, perf-mode,
1143    // cpu-budget, topology-unrepresentable, resource-contention,
1144    // topology-insufficient) is shared with the `#[ktstr_test]` macro body via
1145    // `classify_host_error` — the single source of truth for the guard
1146    // ORDER and the per-class skip/fail policy. This site renders the
1147    // verdict as an exit code; the macro renders the same `HostClass` as
1148    // libtest control flow. The bare `reason` carries no prefix: the skip
1149    // channel (`report::test_skip`) prepends `ktstr: SKIP:`, the fail
1150    // channel prepends `ktstr: FAIL:`. Placed first so a host-insufficiency
1151    // returns before the marker / expect_err / catch-all arms below — a
1152    // skip is a skip and an unconditional hard fail is a hard fail
1153    // regardless of `expect_err`.
1154    match classify_host_error(&e, no_skip) {
1155        HostClass::Skip { reason } => {
1156            crate::report::test_skip(format_args!("{reason}"));
1157            return EXIT_PASS;
1158        }
1159        HostClass::Fail { reason } => {
1160            eprintln!("ktstr: FAIL: {reason}");
1161            return EXIT_FAIL;
1162        }
1163        HostClass::NotHostClass => {}
1164    }
1165    if e.downcast_ref::<crate::test_support::eval::PostVmAssertionFailure>()
1166        .is_some()
1167    {
1168        // A host-side post_vm / post_vm_unconditional callback
1169        // failed. This is a real regression that must surface
1170        // regardless of expect_err / expect_auto_repro inversion —
1171        // those invert a GUEST-side expected failure, but a
1172        // HOST-side check is always honored. Positioned AFTER the
1173        // resource-contention / topology skip guards (a skip means
1174        // the test never ran, so there was no host-side state to
1175        // assert) but BEFORE the ExpectAutoReproSatisfied and
1176        // expect_err inversion guards so the host-side regression
1177        // wins. `downcast_ref` walks the anyhow context+source
1178        // chain (the marker rides as `.context(...)` from
1179        // run_ktstr_test_inner_impl); a raw `chain().any(is::<C>())`
1180        // would miss it (anyhow boxes context as ContextError<C,E>).
1181        eprintln!("{e:#}");
1182        return EXIT_FAIL;
1183    }
1184    if e.downcast_ref::<crate::test_support::eval::SchedulerBuildRefused>()
1185        .is_some()
1186    {
1187        // An orchestrated scheduler build expected to succeed FAILED and the
1188        // resolver refused to validate against a possibly-stale pre-built
1189        // binary (KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK unset). A host-side
1190        // build-infra fault — always EXIT_FAIL, never inverted by expect_err
1191        // (mirrors PostVmAssertionFailure above): an expect_err test must not
1192        // let a broken build masquerade as the guest-side expected failure.
1193        eprintln!("{e:#}");
1194        return EXIT_FAIL;
1195    }
1196    if e.downcast_ref::<crate::test_support::eval::SurvivesStormViolated>()
1197        .is_some()
1198    {
1199        // The marker rides ONLY when `entry.survives_storm` was set AND the
1200        // failure cause was a scheduler death (see
1201        // `render_failure_verdict_message`), so its presence alone proves the
1202        // survival assertion was violated — no `survives_storm` param needed
1203        // (mirrors the marker-presence arms for PostVmAssertionFailure /
1204        // SchedulerBuildRefused / ExpectAutoReproSatisfied below). Force
1205        // EXIT_FAIL with a survival-specific explainer. Positioned AFTER the
1206        // host-insufficiency / PostVmAssertionFailure / SchedulerBuildRefused
1207        // guards (a skip or host-side fault still dominates) but BEFORE the
1208        // ExpectAutoReproSatisfied and expect_err inversion arms so a survival
1209        // violation can never be inverted to PASS (defense-in-depth: the
1210        // validate-time survives_storm/expect_err mutex already forbids that
1211        // pairing). `downcast_ref` walks the anyhow context chain (the marker
1212        // rides as `.context(...)`).
1213        eprintln!(
1214            "ktstr: FAIL: survives_storm asserted but the scheduler did not \
1215             survive the run:\n{e:#}"
1216        );
1217        return EXIT_FAIL;
1218    }
1219    if e.downcast_ref::<crate::test_support::eval::ExpectAutoReproSatisfied>()
1220        .is_some()
1221    {
1222        // `expect_auto_repro = true` was satisfied: the primary
1223        // VM produced a Fail AND the auto-repro VM landed a
1224        // shape-valid `.repro.wprof.pb`. The eval layer attached
1225        // the marker as `anyhow::Context`. `downcast_ref` walks
1226        // the anyhow context+source chain (per anyhow's
1227        // documentation: "For errors with context, this method
1228        // returns true if E matches the type of the context C or
1229        // the type of the error on which the context has been
1230        // attached"). A `chain().any(|c| c.is::<E>())` walk on
1231        // the raw `&dyn StdError` chain would MISS the marker
1232        // because anyhow boxes context as `ContextError<C, E>`
1233        // whose underlying `is::<C>()` check returns false. The
1234        // diagnostic is printed so the operator sees both the
1235        // original failure trail and the inversion notice — the
1236        // verdict flips to PASS without erasing the failure
1237        // detail. Positioned AFTER the ResourceContention /
1238        // TopologyInsufficient guards so a skip-class outcome still
1239        // wins over inversion (a skip is a skip regardless of the
1240        // satisfaction signal). The macro-parse cross-attribute
1241        // check rejects `expect_auto_repro` combined with
1242        // `expect_err`, so the two inversion paths are mutually
1243        // exclusive at the entry layer.
1244        eprintln!("{e:#}");
1245        return EXIT_PASS;
1246    }
1247    if expect_err {
1248        // expect_err inverts a failure into a pass — UNLESS the
1249        // failure carries the
1250        // [`crate::test_support::eval::ScxBpfErrorMatcherMismatch`]
1251        // marker, which signals that the reproducer's scx_bpf_error
1252        // matcher rejected this particular failure. A matcher-
1253        // mismatch failure must surface even when expect_err = true:
1254        // the user authored the matcher to pin THIS specific bug,
1255        // and a different bug firing is itself a regression.
1256        //
1257        // `downcast_ref` walks the anyhow context+source chain
1258        // (anyhow's documented "For errors with context, this
1259        // method returns true if E matches the type of the context
1260        // C or the type of the error on which the context has been
1261        // attached" semantics). A `chain().any(|c| c.is::<E>())`
1262        // walk on the raw `&dyn StdError` chain would MISS the
1263        // marker because anyhow boxes context as
1264        // `ContextError<C, E>` whose underlying `is::<C>()` check
1265        // returns false.
1266        if e.downcast_ref::<crate::test_support::eval::ScxBpfErrorMatcherMismatch>()
1267            .is_some()
1268        {
1269            eprintln!("{e:#}");
1270            return EXIT_FAIL;
1271        } else {
1272            return EXIT_PASS;
1273        }
1274    }
1275    // Catch-all: a non-host-class, non-marker, non-expect_err error is a
1276    // real failure. (A KernelUnavailable does NOT reach here — it is a
1277    // skip-class host-insufficiency handled by the classify_host_error match
1278    // at the top.)
1279    eprintln!("{e:#}");
1280    EXIT_FAIL
1281}
1282
1283/// The final test verdict — the 4-state lattice `Fail > Inconclusive >
1284/// Pass > Skip` that [`result_to_exit_code`] projects to a process exit
1285/// code. Distinct from the exit code because the exit code collapses
1286/// `Skip` into [`EXIT_PASS`]; the sidecar finalize ([`final_outcome`])
1287/// needs all four to set the persisted `passed`/`skipped`/`inconclusive`
1288/// bits to the POST-inversion outcome.
1289#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1290pub(crate) enum Verdict {
1291    Pass,
1292    Fail,
1293    Skip,
1294    Inconclusive,
1295}
1296
1297impl Verdict {
1298    /// Project to the process exit code, matching the
1299    /// `EXIT_PASS`/`EXIT_FAIL`/`EXIT_INCONCLUSIVE` mapping
1300    /// [`result_to_exit_code`] produces (Skip degenerates to
1301    /// [`EXIT_PASS`]). Test-only: the anti-drift truth-table test
1302    /// (`final_outcome_projects_to_result_to_exit_code`) is its sole
1303    /// caller — production reads the [`Verdict`] directly via
1304    /// [`Verdict::sidecar_bits`].
1305    #[cfg(test)]
1306    pub(crate) fn to_exit_code(self) -> i32 {
1307        match self {
1308            Verdict::Pass | Verdict::Skip => EXIT_PASS,
1309            Verdict::Fail => EXIT_FAIL,
1310            Verdict::Inconclusive => EXIT_INCONCLUSIVE,
1311        }
1312    }
1313
1314    /// The persisted-sidecar verdict bits `(passed, skipped,
1315    /// inconclusive)` for this outcome. `Fail` is all-false (the
1316    /// [`crate::test_support::SidecarResult::is_fail`] "none set"
1317    /// encoding). Lets the sidecar finalize record the final verdict
1318    /// without [`crate::test_support::sidecar`] depending on this enum.
1319    pub(crate) fn sidecar_bits(self) -> (bool, bool, bool) {
1320        match self {
1321            Verdict::Pass => (true, false, false),
1322            Verdict::Skip => (false, true, false),
1323            Verdict::Inconclusive => (false, false, true),
1324            Verdict::Fail => (false, false, false),
1325        }
1326    }
1327}
1328
1329/// Classify a test result into the final [`Verdict`] — the same
1330/// classification [`result_to_exit_code`] performs, as a 4-state value
1331/// (it does not collapse `Skip` into `Pass` the way the exit code does)
1332/// and WITHOUT the operator-facing `eprintln` diagnostics.
1333///
1334/// Used to record the FINAL (post-`expect_err` / post-marker) outcome on
1335/// the sidecar so the footer, `stats` analysis, and `replay` reflect the
1336/// test's real pass/fail (matching nextest's exit code) rather than the
1337/// raw scenario verdict written mid-run.
1338///
1339/// MUST stay in lockstep with [`result_to_exit_code`]: the truth-table
1340/// test `final_outcome_projects_to_result_to_exit_code` asserts
1341/// `final_outcome(...).to_exit_code() == result_to_exit_code(...)` over a
1342/// matrix including the marker-carrying error arms, so the two cannot
1343/// drift. The arm order mirrors [`ok_to_exit_code`] / [`err_to_exit_code`]
1344/// first-match precedence exactly.
1345pub(crate) fn final_outcome(
1346    result: &Result<AssertResult>,
1347    expect_err: bool,
1348    allow_inconclusive: bool,
1349) -> Verdict {
1350    let no_skip = std::env::var_os(crate::KTSTR_NO_SKIP_MODE_ENV).is_some();
1351    match result {
1352        Ok(r) => {
1353            if r.is_skip() {
1354                return Verdict::Skip;
1355            }
1356            if expect_err {
1357                // expect_err on an Ok result is always a failure
1358                // (expected an error, got a non-error verdict) — both the
1359                // Pass and Inconclusive arms of ok_to_exit_code map here.
1360                return Verdict::Fail;
1361            }
1362            if r.is_inconclusive() {
1363                return if allow_inconclusive {
1364                    Verdict::Pass
1365                } else {
1366                    Verdict::Inconclusive
1367                };
1368            }
1369            Verdict::Pass
1370        }
1371        Err(e) => {
1372            match classify_host_error(e, no_skip) {
1373                HostClass::Skip { .. } => return Verdict::Skip,
1374                HostClass::Fail { .. } => return Verdict::Fail,
1375                HostClass::NotHostClass => {}
1376            }
1377            if e.downcast_ref::<crate::test_support::eval::PostVmAssertionFailure>()
1378                .is_some()
1379            {
1380                return Verdict::Fail;
1381            }
1382            if e.downcast_ref::<crate::test_support::eval::SchedulerBuildRefused>()
1383                .is_some()
1384            {
1385                return Verdict::Fail;
1386            }
1387            if e.downcast_ref::<crate::test_support::eval::SurvivesStormViolated>()
1388                .is_some()
1389            {
1390                // Lockstep with err_to_exit_code's SurvivesStormViolated arm
1391                // (same position: after SchedulerBuildRefused, before
1392                // ExpectAutoReproSatisfied / expect_err) so the persisted
1393                // sidecar verdict matches the exit code for a survival
1394                // violation — including the defense-in-depth bypass case
1395                // (marker + expect_err) the mutex normally forbids.
1396                return Verdict::Fail;
1397            }
1398            if e.downcast_ref::<crate::test_support::eval::ExpectAutoReproSatisfied>()
1399                .is_some()
1400            {
1401                return Verdict::Pass;
1402            }
1403            if expect_err {
1404                if e.downcast_ref::<crate::test_support::eval::ScxBpfErrorMatcherMismatch>()
1405                    .is_some()
1406                {
1407                    return Verdict::Fail;
1408                }
1409                return Verdict::Pass;
1410            }
1411            Verdict::Fail
1412        }
1413    }
1414}
1415
1416/// Whether a base test entry is "ignored" (skipped by default).
1417///
1418/// Tests whose names start with `demo_` are ignored -- they are
1419/// demonstration/benchmarking tests that require manual opt-in.
1420fn is_ignored(entry: &KtstrTestEntry) -> bool {
1421    entry.name.starts_with("demo_")
1422}
1423
1424/// Walk [`KTSTR_TESTS`] once per process and emit a stderr
1425/// `warning:` line for every duplicate `name` found.
1426///
1427/// Two entries with the same name would both match `find_test(name)`
1428/// (which returns the FIRST match), so the second registration is
1429/// silently shadowed — `cargo ktstr` would dispatch the first entry
1430/// and the second entry's body would never run, with no diagnostic
1431/// surfaced. The warning surfaces the collision so an operator can
1432/// rename one of the `#[ktstr_test]` functions; discovery itself
1433/// proceeds (find_test's first-wins behavior continues) so nextest's
1434/// `--list` output still lands in stdout. A panic here would abort
1435/// the whole listing — nextest would see no tests at all rather
1436/// than a partial set with a clear warning. The first-wins
1437/// shadowing remains a real bug, but the diagnostic is louder than
1438/// silence and the tradeoff (operator sees the warning AND a
1439/// usable test list) beats the alternative (operator sees a
1440/// panic backtrace and no test list).
1441///
1442/// `OnceLock<()>` gates the walk to fire EXACTLY ONCE per process:
1443/// every gauntlet variant resolves through `list_tests` (under
1444/// nextest's discovery and budget paths), so without the gate a
1445/// run with N variants would re-walk the slice N times and emit
1446/// the same warning N times. Each duplicate name surfaces exactly
1447/// once via the inner `seen`/`warned` HashSet pair so a
1448/// triple-collision (three entries sharing one name) does not
1449/// double-print the warning.
1450///
1451/// The pure detection logic lives in
1452/// [`warn_duplicate_test_names_inner`] so the duplicate-walker
1453/// is testable without process-wide global state. This wrapper
1454/// only owns the `OnceLock<()>` gate and the
1455/// `(KTSTR_TESTS, stderr)` plumbing.
1456fn warn_duplicate_test_names_once() {
1457    static CHECKED: std::sync::OnceLock<()> = std::sync::OnceLock::new();
1458    CHECKED.get_or_init(|| {
1459        warn_duplicate_test_names_inner(KTSTR_TESTS.iter().map(|e| e.name), &mut std::io::stderr());
1460    });
1461}
1462
1463/// Pure walker behind [`warn_duplicate_test_names_once`]: walks
1464/// the test-name iterator and emits one `warning:` line per
1465/// duplicate name to `sink`. Each duplicate name surfaces
1466/// exactly once (a triple-collision does NOT double-print)
1467/// via the inner `warned` HashSet.
1468///
1469/// Extracted from the OnceLock-gated wrapper so the duplicate
1470/// detection logic is testable without process-wide global
1471/// state — the wrapper handles "fire once per process" via its
1472/// own `OnceLock<()>` gate; this inner is a pure function over
1473/// `(names, sink)`. The wrapper passes
1474/// `KTSTR_TESTS.iter().map(|e| e.name)` as the iterator and
1475/// `std::io::stderr()` as the sink.
1476///
1477/// `Result<(), std::io::Error>` is collapsed to ignore-on-write
1478/// because the production wrapper writes to stderr where IO
1479/// errors are unrecoverable; tests pass a `Vec<u8>` sink which
1480/// never errors. The function name says "warn" — diagnostic
1481/// channel — and matches the wrapper's pre-existing
1482/// `eprintln!` semantics.
1483fn warn_duplicate_test_names_inner<'a, W: std::io::Write>(
1484    names: impl IntoIterator<Item = &'a str>,
1485    sink: &mut W,
1486) {
1487    use std::collections::HashSet;
1488    let names: Vec<&'a str> = names.into_iter().collect();
1489    let mut seen: HashSet<&'a str> = HashSet::with_capacity(names.len());
1490    let mut warned: HashSet<&'a str> = HashSet::new();
1491    for name in names {
1492        if !seen.insert(name) && warned.insert(name) {
1493            let _ = writeln!(
1494                sink,
1495                "warning: ktstr_test: duplicate test name {name:?} registered in KTSTR_TESTS — \
1496                 two `#[ktstr_test]` entries share this name; the SECOND entry is \
1497                 silently shadowed (find_test returns the first registration). \
1498                 rename one of the functions to disambiguate.",
1499            );
1500        }
1501    }
1502}
1503
1504/// Collect test names for nextest discovery (--list --format terse).
1505///
1506/// Nextest calls the binary twice:
1507/// - Without `--ignored`: prints ALL tests (ignored and non-ignored).
1508/// - With `--ignored`: prints ONLY ignored tests.
1509///
1510/// Gauntlet variants are always ignored. Base tests are ignored when
1511/// their name starts with `demo_`.
1512///
1513/// When `KTSTR_BUDGET_SECS` is set, applies greedy coverage maximization
1514/// to select the subset of tests that maximizes feature coverage within
1515/// the time budget. Only selected tests are printed.
1516///
1517/// Calls [`warn_duplicate_test_names_once`] on the first invocation per
1518/// process so duplicate registrations surface a stderr `warning:`
1519/// line BEFORE any test name is printed (discovery itself proceeds
1520/// — find_test's first-wins behavior continues, but the operator
1521/// sees which name collided). Subsequent invocations are no-ops via
1522/// the inner `OnceLock` gate.
1523fn list_tests(ignored_only: bool) {
1524    warn_duplicate_test_names_once();
1525    let raw = std::env::var(crate::KTSTR_BUDGET_SECS_ENV).ok();
1526    let budget_secs: Option<f64> = raw.as_deref().and_then(|s| match s.parse::<f64>() {
1527        Ok(v) if v > 0.0 => Some(v),
1528        Ok(v) => {
1529            eprintln!("ktstr_test: KTSTR_BUDGET_SECS={v}: must be positive, ignoring");
1530            None
1531        }
1532        Err(e) => {
1533            eprintln!("ktstr_test: KTSTR_BUDGET_SECS={s:?}: {e}, ignoring");
1534            None
1535        }
1536    });
1537
1538    if let Some(budget) = budget_secs {
1539        list_tests_budget(ignored_only, budget);
1540    } else {
1541        list_tests_all(ignored_only);
1542    }
1543}
1544
1545/// Iterate topology presets that both fit the host capacity and
1546/// match the entry's `TopologyConstraints`. Shared between the
1547/// eager ("print every name") and budgeted ("push a candidate")
1548/// listers in `list_tests_*`.
1549fn for_each_gauntlet_variant<F>(
1550    entry: &KtstrTestEntry,
1551    presets: &[crate::gauntlet::TopoPreset],
1552    host_cpus: u32,
1553    host_llcs: u32,
1554    host_max_cpus_per_llc: u32,
1555    mut visit: F,
1556) where
1557    F: FnMut(&crate::gauntlet::TopoPreset),
1558{
1559    let no_perf_mode = super::runtime::no_perf_mode_for_entry(entry);
1560    for preset in presets {
1561        // No-perf-mode tests run KVM-emulated topology — guest sees the
1562        // declared NUMA / LLC / per-LLC layout regardless of host
1563        // hardware — so the host-side LLC count and per-LLC CPU width
1564        // do not constrain preset eligibility. Only the total-CPU
1565        // budget survives.
1566        let accepted = if no_perf_mode {
1567            entry
1568                .constraints
1569                .accepts_no_perf_mode(&preset.topology, host_cpus)
1570        } else {
1571            entry.constraints.accepts(
1572                &preset.topology,
1573                host_cpus,
1574                host_llcs,
1575                host_max_cpus_per_llc,
1576            )
1577        };
1578        if !accepted {
1579            continue;
1580        }
1581        visit(preset);
1582    }
1583}
1584
1585/// List all tests without budget filtering.
1586///
1587/// When `KTSTR_KERNEL_LIST` carries 2 or more entries, every test
1588/// name carries an extra `/{sanitized_kernel_label}` suffix so each
1589/// (test × kernel) pair becomes a distinct nextest test case;
1590/// nextest's parallelism, retries, and `-E` filtering all apply
1591/// natively. Single-kernel mode (0 or 1 entries) emits the
1592/// `gauntlet/{name}/{preset}` shape with no kernel suffix.
1593///
1594/// `KTSTR_CARGO_TEST_MODE=1` skips gauntlet variant emission and
1595/// the multi-kernel suffix path: each test gets exactly one
1596/// `ktstr/{name}: test` line. Bare `cargo test` doesn't have
1597/// access to the cargo-ktstr resolver that produces
1598/// `KTSTR_KERNEL_LIST`, so the multi-kernel branch can't apply
1599/// even if it were enabled — pin both behaviors explicitly so
1600/// the listing matches what the dispatch path will actually run.
1601fn list_tests_all(ignored_only: bool) {
1602    let cargo_test_mode = crate::cargo_test_mode::cargo_test_mode_active();
1603    let presets = crate::gauntlet::gauntlet_presets();
1604    let has_vmlinux = resolve_test_kernel()
1605        .ok()
1606        .and_then(|k| crate::vmm::find_vmlinux(&k))
1607        .is_some();
1608    let (host_cpus, host_llcs, host_max_cpus_per_llc) = super::host_capacity();
1609
1610    let kernel_list = read_kernel_list();
1611    let multi_kernel = kernel_list.len() > 1 && !cargo_test_mode;
1612    // Single-kernel mode (no list, or list has exactly one entry)
1613    // emits one variant per (test × preset) tuple with no kernel
1614    // suffix. Multi-kernel mode iterates every kernel as an outer
1615    // loop and appends `/{sanitized}` per variant. The empty-suffix
1616    // sentinel below is what the single-kernel branch passes to keep
1617    // the print path uniform.
1618    let kernel_suffixes: Vec<&str> = if multi_kernel {
1619        kernel_list.iter().map(|k| k.sanitized.as_str()).collect()
1620    } else {
1621        vec![""]
1622    };
1623
1624    for entry in KTSTR_TESTS.iter() {
1625        // bpf_map_write tests require vmlinux to resolve BPF map
1626        // addresses. Don't list them when vmlinux is unavailable —
1627        // they cannot run and would produce false PASS results.
1628        if !entry.bpf_map_write.is_empty() && !has_vmlinux {
1629            continue;
1630        }
1631
1632        if !ignored_only || is_ignored(entry) {
1633            if entry.host_only {
1634                println!("ktstr/{}: test", entry.name);
1635            } else {
1636                for suffix in &kernel_suffixes {
1637                    if suffix.is_empty() {
1638                        println!("ktstr/{}: test", entry.name);
1639                    } else {
1640                        println!("ktstr/{}/{suffix}: test", entry.name);
1641                    }
1642                }
1643            }
1644        }
1645
1646        // Host-only tests run on the host without a VM -- gauntlet
1647        // topology variants are meaningless.
1648        if entry.host_only {
1649            continue;
1650        }
1651
1652        // KTSTR_CARGO_TEST_MODE: skip gauntlet expansion. The
1653        // operator picked the bare-`cargo test` path; emit only
1654        // the base name so each `#[ktstr_test]` runs once with its
1655        // declared topology.
1656        if cargo_test_mode {
1657            continue;
1658        }
1659
1660        // Gauntlet variants are always ignored — users opt in with
1661        // --run-ignored. Presets that exceed the host's CPU count or
1662        // LLC count are filtered from the listing entirely.
1663        for_each_gauntlet_variant(
1664            entry,
1665            &presets,
1666            host_cpus,
1667            host_llcs,
1668            host_max_cpus_per_llc,
1669            |preset| {
1670                for suffix in &kernel_suffixes {
1671                    if suffix.is_empty() {
1672                        println!("gauntlet/{}/{}: test", entry.name, preset.name);
1673                    } else {
1674                        println!("gauntlet/{}/{}/{suffix}: test", entry.name, preset.name,);
1675                    }
1676                }
1677            },
1678        );
1679    }
1680}
1681
1682/// True iff the given operator-resolved kernel `entry` matches one
1683/// of the `declared` kernel specs from a scheduler's
1684/// `declare_scheduler!` `kernels = [...]` declaration. Empty
1685/// `declared` accepts every entry (no per-scheduler filter).
1686///
1687/// Match semantics per spec variant (via [`crate::kernel_path::KernelId::parse`]):
1688/// - [`crate::kernel_path::KernelId::Version`]: raw-label string equality OR sanitized-label match
1689///   ([`sanitize_kernel_label`] of the spec string equals the entry's
1690///   sanitized label). Direct match catches the common case where
1691///   the dispatcher resolved `--kernel 6.14.2` and the scheduler
1692///   declared `kernels = ["6.14.2"]`.
1693/// - [`crate::kernel_path::KernelId::Range`]: range-membership check on the entry's raw
1694///   label via [`crate::kernel_path::decompose_version_for_compare`].
1695///   Lets schedulers declaring `kernels = ["6.14..6.16"]` match
1696///   any operator-supplied kernel whose version falls in
1697///   `[6.14, 6.16]` inclusive.
1698/// - [`crate::kernel_path::KernelId::Path`] / [`crate::kernel_path::KernelId::CacheKey`] / [`crate::kernel_path::KernelId::Git`]:
1699///   sanitized-label equality — the producer-side encoder
1700///   (`src/bin/cargo_ktstr/kernel/wire_format.rs`) emits a deterministic
1701///   label per variant (`path_…`, `git_owner_repo_kind_ref`, version
1702///   prefix from cache key), so identical specs on both sides
1703///   produce identical sanitized labels.
1704///
1705/// [`KernelId`]: crate::kernel_path::KernelId
1706fn sched_kernel_filter_accepts(declared: &[&'static str], entry: &KernelEntry) -> bool {
1707    if declared.is_empty() {
1708        return true;
1709    }
1710    declared.iter().any(|spec| entry_matches_spec(entry, spec))
1711}
1712
1713/// Single-spec match helper for [`sched_kernel_filter_accepts`].
1714/// Parses `spec` via [`crate::kernel_path::KernelId::parse`] and
1715/// dispatches on the variant. Pure logic — no network, no FS.
1716fn entry_matches_spec(entry: &KernelEntry, spec: &str) -> bool {
1717    use crate::kernel_path::{KernelId, decompose_version_for_compare};
1718    match KernelId::parse(spec) {
1719        KernelId::Version(spec_ver) => {
1720            entry.label == spec_ver || entry.sanitized.as_str() == sanitize_kernel_label(&spec_ver)
1721        }
1722        KernelId::Range { start, end, .. } => {
1723            let Some(entry_t) = decompose_version_for_compare(&entry.label) else {
1724                return false;
1725            };
1726            let Some(start_t) = decompose_version_for_compare(&start) else {
1727                return false;
1728            };
1729            let Some(end_t) = decompose_version_for_compare(&end) else {
1730                return false;
1731            };
1732            entry_t >= start_t && entry_t <= end_t
1733        }
1734        KernelId::CacheKey(_) | KernelId::Path(_) | KernelId::Git { .. } => {
1735            entry.sanitized.as_str() == sanitize_kernel_label(spec)
1736        }
1737    }
1738}
1739
1740/// Format the `KTSTR_KERNEL_LIST is empty` diagnostic emitted by
1741/// [`run_verifier_cell`] when a verifier cell name reaches the cell
1742/// handler with no kernel-list to look the label up in. Extracted
1743/// from the inline eprintln! so the exact wording can be pinned in
1744/// unit tests without spawning a process.
1745fn format_empty_kernel_list_error(full_name: &str) -> String {
1746    format!(
1747        "ktstr verifier: cell {full_name}: KTSTR_KERNEL_LIST is empty. \
1748         Direct `--exact verifier/...` invocation outside `cargo ktstr verifier` \
1749         is not supported — the dispatcher owns kernel-set resolution. Run \
1750         `cargo ktstr verifier [--kernel SPEC]` instead.",
1751    )
1752}
1753
1754/// Format the "kernel label not in KTSTR_KERNEL_LIST" diagnostic.
1755/// `present` is the slice of sanitized labels actually present in
1756/// the list, in their KTSTR_KERNEL_LIST ordering. Extracted for the
1757/// same reason as [`format_empty_kernel_list_error`].
1758fn format_unknown_kernel_label_error(
1759    full_name: &str,
1760    kernel_label: &str,
1761    sched_name: &str,
1762    present: &[&str],
1763) -> String {
1764    format!(
1765        "ktstr verifier: cell {full_name}: kernel label {kernel_label:?} \
1766         not in KTSTR_KERNEL_LIST. Present labels: [{}]. \
1767         Either add --kernel <SPEC> to the dispatcher invocation so it \
1768         resolves into this label, or remove the matching entry from \
1769         declare_scheduler!(... kernels = [...]) for {sched_name}.",
1770        present.join(", "),
1771    )
1772}
1773
1774/// The set of workspace PACKAGE names, parsed once from the workspace
1775/// `Cargo.toml` baked in at compile time. [`list_verifier_cells_all`]
1776/// uses it to skip `declare_scheduler!` decls whose `Discover(pkg)` is
1777/// not a real workspace package — the macro-expansion FIXTURES in
1778/// tests/declare_scheduler.rs register into `KTSTR_SCHEDULERS` but have
1779/// no buildable package, so their cells must not be emitted.
1780///
1781/// `CARGO_MANIFEST_DIR` is the ktstr crate dir, which IS the workspace
1782/// root in this repo, so its `Cargo.toml` carries `[workspace] members`.
1783fn workspace_packages() -> &'static std::collections::HashSet<String> {
1784    use std::sync::OnceLock;
1785    static PKGS: OnceLock<std::collections::HashSet<String>> = OnceLock::new();
1786    PKGS.get_or_init(|| {
1787        const ROOT_TOML: &str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.toml"));
1788        parse_workspace_member_packages(ROOT_TOML, env!("CARGO_PKG_NAME"))
1789    })
1790}
1791
1792/// Pure parse of `[workspace] members = [ ... ]` from a `Cargo.toml`
1793/// string into the set of package names. The `.` member (the workspace
1794/// root) maps to `root_pkg`; every other member's last path component is
1795/// taken as its package name (this workspace's convention: member dir
1796/// `scx-ktstr` = package `scx-ktstr`). Pure so it is unit-testable.
1797fn parse_workspace_member_packages(
1798    cargo_toml: &str,
1799    root_pkg: &str,
1800) -> std::collections::HashSet<String> {
1801    let mut out = std::collections::HashSet::new();
1802    // Anchor on the [workspace] section so a stray `members` key
1803    // elsewhere can't be mistaken for the workspace member list.
1804    let Some(ws) = cargo_toml.find("[workspace]") else {
1805        return out;
1806    };
1807    let after_ws = &cargo_toml[ws..];
1808    let Some(m) = after_ws.find("members") else {
1809        return out;
1810    };
1811    let after = &after_ws[m..];
1812    let Some(open) = after.find('[') else {
1813        return out;
1814    };
1815    let Some(close_rel) = after[open..].find(']') else {
1816        return out;
1817    };
1818    for tok in after[open + 1..open + close_rel].split(',') {
1819        let name = tok.trim().trim_matches('"').trim();
1820        if name.is_empty() {
1821            continue;
1822        }
1823        if name == "." {
1824            out.insert(root_pkg.to_string());
1825        } else {
1826            out.insert(name.rsplit('/').next().unwrap_or(name).to_string());
1827        }
1828    }
1829    out
1830}
1831
1832/// Emit `verifier/<sched>/<kernel>/<preset>: test` lines — one per
1833/// (declared scheduler × kernel-list entry × accepted topology preset)
1834/// cell. Mirrors the gauntlet emission pattern in [`list_tests_all`] but
1835/// walks [`super::KTSTR_SCHEDULERS`] instead of [`KTSTR_TESTS`]. The
1836/// verifier sweeps each declared scheduler ACROSS topologies, because
1837/// attach/liveness is topology-dependent (a scheduler can attach on one
1838/// topology and wedge on another). Cells are paired with the
1839/// [`run_verifier_cell`] handler registered in
1840/// [`ktstr_test_early_dispatch`]'s `--exact verifier/...` branch.
1841///
1842/// The matrix dimension is `KTSTR_KERNEL_LIST` (always populated by the
1843/// `cargo ktstr verifier` dispatcher — even with a single
1844/// auto-discovered kernel, the dispatcher synthesizes a one-entry list
1845/// with a derived label). Each scheduler's `declare_scheduler!`
1846/// `kernels = [...]` declaration acts as a per-scheduler filter on the
1847/// matrix — `Version` / `Range` declarations match entries by
1848/// raw-label equality / range membership; `Path` / `CacheKey` / `Git`
1849/// declarations match by sanitized-label equality. An empty
1850/// `kernels = []` declaration accepts every entry in the list.
1851///
1852/// The topology dimension is [`crate::gauntlet::gauntlet_presets`], gated
1853/// per scheduler: the verifier VM always runs no_perf_mode, so a preset
1854/// is emitted only when the scheduler's constraints accept it under
1855/// [`super::TopologyConstraints::accepts_no_perf_mode`] (declared scope +
1856/// host CPU budget). A scheduler that accepts no preset emits no cell.
1857///
1858/// Schedulers declared with [`super::SchedulerSpec::Eevdf`] or
1859/// [`super::SchedulerSpec::KernelBuiltin`] are skipped at emission time
1860/// because neither has a userspace binary to load BPF programs from.
1861///
1862/// When [`crate::KTSTR_VERIFIER_SCHEDULER_ENV`] is set (the
1863/// `cargo ktstr verifier --scheduler <NAME>` filter), every declared
1864/// scheduler whose `name` does not equal the value is skipped, so the
1865/// sweep covers one scheduler across topologies instead of the full
1866/// declared-scheduler matrix. A value matching no declared scheduler
1867/// (or a non-BPF one) emits no cell; the dispatcher reports the empty
1868/// result set rather than silently sweeping nothing.
1869///
1870/// Cell names with `/` in `sched.name` or `preset.name` would corrupt
1871/// the splitn-based parse in [`run_verifier_cell`]; the emission elides
1872/// such cells with a stderr warning. When `KTSTR_KERNEL_LIST` is absent
1873/// (direct binary invocation outside the dispatcher), no cells emit.
1874fn list_verifier_cells_all() {
1875    use super::SchedulerSpec;
1876    let kernel_list = read_kernel_list();
1877    if kernel_list.is_empty() {
1878        return;
1879    }
1880    let presets = crate::gauntlet::gauntlet_presets();
1881    let (host_cpus, _host_llcs, _host_max_cpus_per_llc) = super::host_capacity();
1882
1883    // `cargo ktstr verifier --scheduler <NAME>` filter (via
1884    // KTSTR_VERIFIER_SCHEDULER): when set, sweep only the named declared
1885    // scheduler across topologies instead of the full declared-scheduler
1886    // matrix. Read once; unset (or non-unicode) leaves the sweep full.
1887    let scheduler_filter = std::env::var(crate::KTSTR_VERIFIER_SCHEDULER_ENV).ok();
1888
1889    for sched in super::KTSTR_SCHEDULERS.iter() {
1890        if let Some(want) = &scheduler_filter
1891            && sched.name != want.as_str()
1892        {
1893            continue;
1894        }
1895        if matches!(
1896            sched.binary,
1897            SchedulerSpec::Eevdf | SchedulerSpec::KernelBuiltin { .. }
1898        ) {
1899            continue;
1900        }
1901        if sched.name.contains('/') {
1902            eprintln!(
1903                "ktstr verifier: scheduler name {:?} contains '/' — skipping cell emission (would corrupt verifier/<sched>/<kernel>/<preset> parse)",
1904                sched.name,
1905            );
1906            continue;
1907        }
1908        // Skip declarations whose binary is not a real, buildable
1909        // scheduler. The macro-expansion FIXTURES in
1910        // tests/declare_scheduler.rs (`binary = "scx-full"`, `"scx-ee"`,
1911        // …) register into KTSTR_SCHEDULERS but expand to `Discover` of a
1912        // package that is NOT a workspace member; emitting their cells
1913        // would make `cargo ktstr verifier --run-ignored` fail on
1914        // `cargo build -p <fixture>` for a nonexistent package. A
1915        // `Discover` of a real workspace member (scx-ktstr) and a `Path`
1916        // that exists still emit. This is the emission-time counterpart to
1917        // the resolve arms in `run_verifier_cell`.
1918        match sched.binary {
1919            SchedulerSpec::Discover(pkg) if !workspace_packages().contains(pkg) => {
1920                continue;
1921            }
1922            SchedulerSpec::Path(p) if !std::path::Path::new(p).exists() => {
1923                continue;
1924            }
1925            _ => {}
1926        }
1927        for kernel_entry in &kernel_list {
1928            if !sched_kernel_filter_accepts(sched.kernels, kernel_entry) {
1929                continue;
1930            }
1931            // One cell per (scheduler, kernel, topology preset). The
1932            // verifier sweep runs each scheduler's "does it verify,
1933            // attach, AND dispatch" check ACROSS topologies, because
1934            // both vary with topology: attach/liveness (a scheduler can
1935            // attach on one topology and wedge on another — odd LLC
1936            // counts, large CPU counts, SMT) and verified_insns (a
1937            // scheduler that bakes topology-derived config into .rodata
1938            // hands the verifier different known constants, so it
1939            // processes a different instruction count per topology). The
1940            // verifier VM always
1941            // runs no_perf_mode, so preset eligibility uses
1942            // accepts_no_perf_mode: the KVM-emulated topology is gated by
1943            // the scheduler's declared scope + the host CPU budget.
1944            for preset in presets.iter() {
1945                if preset.name.contains('/') {
1946                    eprintln!(
1947                        "ktstr verifier: preset name {:?} contains '/' — skipping cell (would corrupt parse)",
1948                        preset.name,
1949                    );
1950                    continue;
1951                }
1952                if !sched
1953                    .constraints
1954                    .accepts_no_perf_mode(&preset.topology, host_cpus)
1955                {
1956                    continue;
1957                }
1958                println!(
1959                    "verifier/{}/{}/{}: test",
1960                    sched.name, kernel_entry.sanitized, preset.name,
1961                );
1962            }
1963        }
1964    }
1965}
1966
1967/// Parse `verifier/<sched_name>/<kernel_label>/<preset_name>`, look up
1968/// the declared scheduler in [`super::KTSTR_SCHEDULERS`] + the gauntlet
1969/// preset in [`crate::gauntlet::gauntlet_presets`] + the kernel in
1970/// [`KTSTR_KERNEL_LIST_ENV`](crate::KTSTR_KERNEL_LIST_ENV), resolve the
1971/// scheduler binary path per [`super::SchedulerSpec`], boot the verifier
1972/// VM on that topology via [`crate::verifier::collect_verifier_output`],
1973/// and print the rendered output. Returns 0 only when the scheduler
1974/// verified (BPF loaded), turned on (the guest attach gate reached
1975/// sched_ext `enabled`, surfaced via [`crate::verifier::AttachOutcome`]),
1976/// AND dispatched the injected workload (a `WorkloadDispatched` frame) on
1977/// this topology — the three gates `VerifierVmResult::cell_verdict`
1978/// enforces; returns 1 on a verify / attach / dispatch failure, a
1979/// post-attach teardown hang (`timed_out`), or a malformed cell name.
1980///
1981/// The per-cell kernel directory is resolved by sanitized-label
1982/// lookup in `KTSTR_KERNEL_LIST` — the
1983/// `cargo ktstr verifier` dispatcher always populates the list,
1984/// even with no `--kernel` flag (it synthesizes a single auto-
1985/// discovered entry). There is no single-kernel-mode fallback.
1986/// An unrecognised label or an absent list both surface as an
1987/// exit-1 diagnostic naming the present labels and pointing at
1988/// the dispatcher.
1989///
1990/// Eevdf + KernelBuiltin scheduler variants are filtered out at
1991/// emission time in [`list_verifier_cells_all`], so nextest
1992/// dispatch never reaches the SKIP arms in this function. The
1993/// SKIP arms remain as defense-in-depth for direct
1994/// `--exact verifier/<eevdf>/...` invocation outside nextest
1995/// (the only path that bypasses the emission-time filter); in
1996/// that case they emit a `SKIP` banner + exit 0.
1997fn run_verifier_cell_inner(
1998    full_name: &str,
1999    out_stats: &mut Vec<crate::verifier::ProgStats>,
2000) -> i32 {
2001    use super::SchedulerSpec;
2002
2003    let rest = match full_name.strip_prefix("verifier/") {
2004        Some(r) => r,
2005        None => {
2006            eprintln!("ktstr verifier: missing 'verifier/' prefix in {full_name:?}");
2007            return 1;
2008        }
2009    };
2010    let parts: Vec<&str> = rest.splitn(3, '/').collect();
2011    if parts.len() != 3 {
2012        eprintln!(
2013            "ktstr verifier: malformed cell name {full_name:?}; expected verifier/<sched>/<kernel>/<preset>",
2014        );
2015        return 1;
2016    }
2017    let (sched_name, kernel_label, preset_name) = (parts[0], parts[1], parts[2]);
2018
2019    // Emit the cell banner BEFORE every SKIP / FAIL branch so the
2020    // operator always sees which (scheduler, kernel, topology) tuple
2021    // produced the result. Without it an early-exit SKIP / FAIL would
2022    // surface as a bare error line nextest tags with the full cell name
2023    // but no per-axis context.
2024    println!("\n=== {sched_name} | kernel {kernel_label} | topology {preset_name} ===");
2025
2026    // Fail-fast on missing KVM with the canonical actionable error
2027    // (kvm group / kvm-ok hint). Without this preflight the operator
2028    // gets a deep error inside VM bring-up.
2029    if let Err(e) = crate::cli::check_kvm() {
2030        eprintln!("ktstr verifier: cell {full_name}: {e:#}");
2031        return 1;
2032    }
2033
2034    let Some(sched) = super::KTSTR_SCHEDULERS
2035        .iter()
2036        .find(|s| s.name == sched_name)
2037    else {
2038        eprintln!("ktstr verifier: no declared scheduler {sched_name:?} (cell {full_name:?})",);
2039        return 1;
2040    };
2041
2042    // Resolve the cell's topology preset by its <preset> name segment.
2043    let preset_list = crate::gauntlet::gauntlet_presets();
2044    let Some(preset) = preset_list.iter().find(|p| p.name == preset_name) else {
2045        eprintln!("ktstr verifier: no gauntlet preset {preset_name:?} (cell {full_name:?})",);
2046        return 1;
2047    };
2048
2049    // Resolve the per-cell kernel directory by looking the cell's
2050    // sanitized label up in `KTSTR_KERNEL_LIST`. The
2051    // `cargo ktstr verifier` dispatcher always populates the list —
2052    // even with no `--kernel` flag it synthesizes a single auto-
2053    // discovered entry — so the lookup is the single source of
2054    // truth and there is no single-kernel-mode fallback that would
2055    // silently run a cell against an unrelated kernel.
2056    //
2057    // An empty list reaching this function means the test binary was
2058    // invoked outside the dispatcher (direct `--exact verifier/...`
2059    // under a hand-spawned nextest, for instance). Error with an
2060    // actionable message rather than fall through to auto-discovery.
2061    let kernel_list = read_kernel_list();
2062    let Some(kernel_entry) = kernel_list
2063        .iter()
2064        .find(|k| k.sanitized.as_str() == kernel_label)
2065    else {
2066        if kernel_list.is_empty() {
2067            eprintln!("{}", format_empty_kernel_list_error(full_name));
2068        } else {
2069            let present: Vec<&str> = kernel_list.iter().map(|k| k.sanitized.as_str()).collect();
2070            eprintln!(
2071                "{}",
2072                format_unknown_kernel_label_error(full_name, kernel_label, sched_name, &present,),
2073            );
2074        }
2075        return 1;
2076    };
2077
2078    let sched_bin: std::path::PathBuf = match sched.binary {
2079        SchedulerSpec::Discover(pkg) => match crate::build_and_find_binary(pkg) {
2080            Ok(p) => p,
2081            Err(e) => {
2082                eprintln!("ktstr verifier: build scheduler {pkg:?}: {e:#}");
2083                return 1;
2084            }
2085        },
2086        SchedulerSpec::Path(p) => {
2087            let path = std::path::PathBuf::from(p);
2088            if !path.exists() {
2089                eprintln!("ktstr verifier: scheduler binary not found: {p}");
2090                return 1;
2091            }
2092            path
2093        }
2094        // Eevdf + KernelBuiltin are filtered at list time in
2095        // list_verifier_cells_all, so nextest dispatch never reaches
2096        // these arms. The SKIP arms remain as defense-in-depth for
2097        // direct `--exact verifier/<eevdf>/...` invocation outside
2098        // nextest.
2099        SchedulerSpec::Eevdf => {
2100            println!(
2101                "ktstr verifier: SKIP cell {full_name} (Eevdf has no userspace binary to verify)",
2102            );
2103            return 0;
2104        }
2105        SchedulerSpec::KernelBuiltin { .. } => {
2106            println!(
2107                "ktstr verifier: SKIP cell {full_name} (KernelBuiltin has no userspace binary to verify)",
2108            );
2109            return 0;
2110        }
2111    };
2112
2113    let ktstr_bin = match std::env::current_exe() {
2114        Ok(p) => p,
2115        Err(e) => {
2116            eprintln!(
2117                "ktstr verifier: locate ktstr binary via current_exe() (required so the \
2118                 verifier VM can boot the same test binary as /init for guest-side dispatch): {e}",
2119            );
2120            return 1;
2121        }
2122    };
2123
2124    // Resolve the kernel-build DIR to the actual bootable image file.
2125    // `collect_verifier_output` -> `KtstrVm::builder().kernel()` loads the
2126    // path verbatim (build() does NOT extract a source tree), so passing
2127    // the raw dir makes the VMM loader read a directory as a bzImage and
2128    // fail with "Unable to read bzImage header". `find_image_in_dir`
2129    // handles both the build-tree (`arch/*/boot/bzImage`) and cache
2130    // (`<dir>/bzImage`) layouts — the same resolution the eval path uses.
2131    let kernel_path = if kernel_entry.kernel_dir.is_file() {
2132        kernel_entry.kernel_dir.clone()
2133    } else {
2134        match crate::kernel_path::find_image_in_dir(&kernel_entry.kernel_dir) {
2135            Some(img) => img,
2136            None => {
2137                eprintln!(
2138                    "ktstr verifier: cell {full_name}: no kernel image \
2139                     (arch/*/boot/bzImage or a cached bzImage) under {} — \
2140                     build the kernel first",
2141                    kernel_entry.kernel_dir.display(),
2142                );
2143                return 1;
2144            }
2145        }
2146    };
2147    let topology = super::TopologyJson::from(preset.topology);
2148    let sched_args: Vec<String> = sched.sched_args.iter().map(|s| s.to_string()).collect();
2149
2150    // Raw mode is opt-in via the dispatcher's --raw flag, plumbed
2151    // through KTSTR_VERIFIER_RAW_ENV. Presence (any value, including
2152    // empty) enables raw rendering — matches the "set to any value"
2153    // semantics documented on the const and the dispatcher's
2154    // `cmd.env(KTSTR_VERIFIER_RAW_ENV, "1")` setter.
2155    let raw = std::env::var_os(crate::KTSTR_VERIFIER_RAW_ENV).is_some();
2156
2157    match crate::verifier::collect_verifier_output(
2158        &sched_bin,
2159        &ktstr_bin,
2160        &kernel_path,
2161        &sched_args,
2162        topology,
2163    ) {
2164        Ok(result) => {
2165            let output = crate::verifier::format_verifier_output("verifier", &result, raw);
2166            print!("{output}");
2167            // PASS requires verify + attach (sched_ext `enabled`) +
2168            // dispatch (the injected workload made progress).
2169            // `cell_verdict` names the first failing gate (timed_out →
2170            // attach → dispatch, root cause first) and never keys on the
2171            // guest exit code, which is 1 even on the verifier success
2172            // path (no #[ktstr_test] body to dispatch).
2173            let code = match result.cell_verdict() {
2174                Ok(()) => 0,
2175                Err(reason) => {
2176                    eprintln!("ktstr verifier: cell {full_name} FAILED: {reason}");
2177                    1
2178                }
2179            };
2180            // Hand the per-program verified_insns out to the record writer
2181            // so the dispatcher can render the instruction-count tables.
2182            // Only this arm has stats; every earlier return (skip, kernel
2183            // resolution error, build failure) leaves out_stats empty.
2184            *out_stats = result.stats;
2185            code
2186        }
2187        Err(e) => {
2188            eprintln!("ktstr verifier: cell {full_name} FAILED: {e:#}");
2189            1
2190        }
2191    }
2192}
2193
2194/// Run a verifier cell and, when the `cargo ktstr verifier` dispatcher
2195/// set [`crate::KTSTR_VERIFIER_RESULT_DIR_ENV`], record its PASS/FAIL
2196/// outcome there so the dispatcher can render the run-summary table after
2197/// nextest returns. Best-effort + env-gated: a direct
2198/// `--exact verifier/...` invocation (env unset) behaves exactly as
2199/// [`run_verifier_cell_inner`], and a record-write failure never changes
2200/// the cell's exit code. A nextest RETRY re-runs this wrapper and
2201/// overwrites the cell's own record (deterministic filename), so the
2202/// final attempt's outcome is the one that lands in the table.
2203fn run_verifier_cell(full_name: &str) -> i32 {
2204    let mut stats = Vec::new();
2205    let code = run_verifier_cell_inner(full_name, &mut stats);
2206    if let Some(dir) = std::env::var_os(crate::KTSTR_VERIFIER_RESULT_DIR_ENV) {
2207        crate::verifier::write_cell_record(
2208            std::path::Path::new(&dir),
2209            full_name,
2210            code == 0,
2211            &stats,
2212        );
2213    }
2214    code
2215}
2216
2217/// List tests with budget-based coverage maximization.
2218///
2219/// Collects all eligible tests as candidates, runs greedy selection,
2220/// and prints only the selected subset. Multi-kernel mode adds the
2221/// kernel suffix as a feature dimension so the budget selector
2222/// picks per-kernel coverage; single-kernel mode is unchanged.
2223///
2224/// `KTSTR_CARGO_TEST_MODE=1` is treated identically to
2225/// `list_tests_all`: the budget pipeline runs only over base test
2226/// candidates (no gauntlet-variant candidates, no multi-kernel
2227/// fan-out). The greedy selector still applies — a low budget
2228/// can still trim the base list — but the candidate set is the
2229/// same set that the dispatch path would actually run.
2230fn list_tests_budget(ignored_only: bool, budget_secs: f64) {
2231    use crate::budget::{TestCandidate, estimate_duration, extract_features, select};
2232
2233    let cargo_test_mode = crate::cargo_test_mode::cargo_test_mode_active();
2234    let presets = crate::gauntlet::gauntlet_presets();
2235    let has_vmlinux = resolve_test_kernel()
2236        .ok()
2237        .and_then(|k| crate::vmm::find_vmlinux(&k))
2238        .is_some();
2239    let (host_cpus, host_llcs, host_max_cpus_per_llc) = super::host_capacity();
2240    let mut candidates: Vec<TestCandidate> = Vec::new();
2241
2242    let kernel_list = read_kernel_list();
2243    let multi_kernel = kernel_list.len() > 1 && !cargo_test_mode;
2244    let kernel_suffixes: Vec<&str> = if multi_kernel {
2245        kernel_list.iter().map(|k| k.sanitized.as_str()).collect()
2246    } else {
2247        vec![""]
2248    };
2249
2250    for entry in KTSTR_TESTS.iter() {
2251        if !entry.bpf_map_write.is_empty() && !has_vmlinux {
2252            continue;
2253        }
2254
2255        let base_ignored = is_ignored(entry);
2256        let base_topo = entry.topology;
2257
2258        // Base test
2259        if !ignored_only || base_ignored {
2260            // host_only tests never boot a VM, so the kernel never
2261            // affects what runs — push one candidate without a
2262            // kernel suffix even in multi-kernel mode. Otherwise the
2263            // budget selector would consider N identical copies of
2264            // the same host-side function.
2265            if entry.host_only {
2266                candidates.push(TestCandidate {
2267                    name: format!("ktstr/{}: test", entry.name),
2268                    features: extract_features(entry, &base_topo, false, entry.name),
2269                    estimated_secs: estimate_duration(entry, &base_topo),
2270                });
2271            } else {
2272                for suffix in &kernel_suffixes {
2273                    let name = if suffix.is_empty() {
2274                        format!("ktstr/{}: test", entry.name)
2275                    } else {
2276                        format!("ktstr/{}/{suffix}: test", entry.name)
2277                    };
2278                    candidates.push(TestCandidate {
2279                        name,
2280                        features: extract_features(entry, &base_topo, false, entry.name),
2281                        estimated_secs: estimate_duration(entry, &base_topo),
2282                    });
2283                }
2284            }
2285        }
2286
2287        if entry.host_only {
2288            continue;
2289        }
2290
2291        if cargo_test_mode {
2292            // No gauntlet candidates in cargo-test mode — the
2293            // dispatch path will never execute them and including
2294            // them in the budget candidate set would shift greedy
2295            // selection toward variants that resolve to "no test"
2296            // at run time.
2297            continue;
2298        }
2299
2300        for_each_gauntlet_variant(
2301            entry,
2302            &presets,
2303            host_cpus,
2304            host_llcs,
2305            host_max_cpus_per_llc,
2306            |preset| {
2307                for suffix in &kernel_suffixes {
2308                    let test_name = if suffix.is_empty() {
2309                        format!("gauntlet/{}/{}", entry.name, preset.name)
2310                    } else {
2311                        format!("gauntlet/{}/{}/{suffix}", entry.name, preset.name)
2312                    };
2313                    candidates.push(TestCandidate {
2314                        name: format!("{test_name}: test"),
2315                        features: extract_features(entry, &preset.topology, true, &test_name),
2316                        estimated_secs: estimate_duration(entry, &preset.topology),
2317                    });
2318                }
2319            },
2320        );
2321    }
2322
2323    let selected = select(&candidates, budget_secs);
2324    for &i in &selected {
2325        println!("{}", candidates[i].name);
2326    }
2327
2328    let stats = crate::budget::selection_stats(&candidates, &selected, budget_secs);
2329    eprintln!(
2330        "ktstr budget: {}/{} tests, {:.0}/{:.0}s used, {}/{} configurations covered",
2331        stats.selected,
2332        stats.total,
2333        stats.budget_used,
2334        stats.budget_total,
2335        stats.bits_covered,
2336        stats.bits_possible,
2337    );
2338}
2339
2340/// Strip an optional `/{sanitized_kernel_label}` suffix from `name`,
2341/// look up the matching [`KernelEntry`] in the multi-kernel list,
2342/// and re-export `KTSTR_KERNEL` to that entry's directory. Returns
2343/// the prefix-only name for the dispatch caller.
2344///
2345/// When `KTSTR_KERNEL_LIST` is unset / single-entry, the function
2346/// is a no-op pass-through: returns `(name, None)` and does not
2347/// touch the env. When the list has 2+ entries, the suffix is
2348/// REQUIRED and missing it surfaces as `Err` (the early-dispatch
2349/// caller turns that into exit code 1 with an actionable message)
2350/// — the suffix is part of every test name `--list` emitted, so a
2351/// `--exact` invocation that omits it can only come from operator
2352/// hand-construction or tooling that hasn't been taught the
2353/// multi-kernel naming.
2354fn strip_kernel_suffix<'a>(
2355    name: &'a str,
2356    kernel_list: &'a [KernelEntry],
2357) -> Result<(&'a str, Option<&'a KernelEntry>), String> {
2358    if kernel_list.len() <= 1 {
2359        return Ok((name, None));
2360    }
2361    // Multi-kernel: every test name carries `/kernel_…` as its
2362    // final segment. Iterate the labels rather than splitting on
2363    // `/` — the suffix always has exactly one extra `/` separator
2364    // before `kernel_…`, but the body of the test name CAN contain
2365    // `/` (gauntlet variants already do — `gauntlet/{name}/{preset}`),
2366    // so a naive `rsplit_once('/')` would accidentally peel the
2367    // preset segment instead.
2368    //
2369    // Distinct kernels in the same `KTSTR_KERNEL_LIST` produce
2370    // distinct sanitized labels in practice — the producer emits
2371    // semantic identifiers (version strings, git owner/repo/ref,
2372    // path basename + 6-char hash) that don't share suffixes
2373    // among the resolved set. If a future regression DID produce
2374    // labels where one is a strict suffix of another (e.g.
2375    // `kernel_6_14` vs `kernel_x_kernel_6_14`), the iterate-and-
2376    // first-match below would pick whichever appears first in
2377    // the kernel_list — deterministic but potentially wrong.
2378    // Producer-side regression detection would catch that
2379    // class of collision before it reaches this peeler.
2380    for entry in kernel_list {
2381        let needle = format!("/{}", entry.sanitized);
2382        if let Some(stripped) = name.strip_suffix(&needle) {
2383            return Ok((stripped, Some(entry)));
2384        }
2385    }
2386    Err(format!(
2387        "test name {name:?} has no recognised kernel suffix (KTSTR_KERNEL_LIST \
2388         carries {n} kernels — every test name must end with `/kernel_…`)",
2389        n = kernel_list.len(),
2390    ))
2391}
2392
2393/// Re-export `KTSTR_KERNEL` to the kernel directory carried by a
2394/// resolved [`KernelEntry`]. Called when a multi-kernel `--exact`
2395/// dispatch peels off the per-test kernel suffix.
2396///
2397/// SAFETY: nextest invokes the test binary's `--exact` handler in a
2398/// single-threaded context — there are no other readers of the env
2399/// at this point. The eventual VM-launch site reads `KTSTR_KERNEL`
2400/// via `find_kernel` after this returns; that read is sequenced
2401/// after the write per the program order.
2402fn export_kernel_for_variant(entry: &KernelEntry) {
2403    // SAFETY: see fn-level doc — single-threaded ctor / nextest
2404    // dispatch context.
2405    unsafe { std::env::set_var(crate::KTSTR_KERNEL_ENV, &entry.kernel_dir) };
2406}
2407
2408/// Parse a nextest-style test name and run it.
2409///
2410/// Handles base tests (`ktstr/{name}`), gauntlet variants
2411/// (`gauntlet/{name}/{preset}`), and bare names (backward compat).
2412/// When `KTSTR_KERNEL_LIST` carries 2+ kernels,
2413/// VM-bound test names additionally end with
2414/// `/{sanitized_kernel_label}` — that suffix is peeled here and
2415/// the matching kernel directory is re-exported via
2416/// [`crate::KTSTR_KERNEL_ENV`] before the dispatch continues. `host_only`
2417/// tests are short-circuited BEFORE the suffix peel: they never
2418/// boot a VM, so the kernel-suffix listing path emits one
2419/// `ktstr/{name}: test` entry without a kernel suffix regardless
2420/// of the kernel-list cardinality (see `list_tests_all` /
2421/// `list_tests_budget`), and routing them through
2422/// `strip_kernel_suffix` would surface as a "no recognised kernel
2423/// suffix" exit-1 error. Returns an exit code.
2424pub(crate) fn run_named_test(test_name: &str) -> i32 {
2425    let kernel_list = read_kernel_list();
2426
2427    // host_only short-circuit: in multi-kernel mode, host_only tests
2428    // are listed without a `/{sanitized_kernel_label}` suffix (see
2429    // `list_tests_all` / `list_tests_budget`, which emit a single
2430    // `ktstr/{name}: test` line for host_only entries regardless of
2431    // the kernel-list cardinality — a host_only test never boots a
2432    // VM, so the kernel never affects what runs). Calling
2433    // `strip_kernel_suffix` on such a name in multi-kernel mode
2434    // would fail with the "no recognised kernel suffix" error and
2435    // misroute every host_only dispatch to exit 1.
2436    //
2437    // Resolve the host_only check from `find_test` BEFORE the
2438    // suffix peel so the multi-kernel branch only applies to
2439    // VM-bound tests. Single-kernel mode is unaffected — the
2440    // pass-through arm in `strip_kernel_suffix` returns the input
2441    // verbatim either way.
2442    let bare_for_lookup = test_name.strip_prefix("ktstr/").unwrap_or(test_name);
2443
2444    if let Some(entry) = find_test(bare_for_lookup)
2445        && entry.host_only
2446    {
2447        return run_host_only_test(entry);
2448    }
2449
2450    let (test_name, kernel_entry) = match strip_kernel_suffix(test_name, &kernel_list) {
2451        Ok(pair) => pair,
2452        Err(e) => {
2453            eprintln!("{e}");
2454            return 1;
2455        }
2456    };
2457    if let Some(entry) = kernel_entry {
2458        export_kernel_for_variant(entry);
2459    }
2460
2461    if let Some(rest) = test_name.strip_prefix("gauntlet/") {
2462        return run_gauntlet_test(rest);
2463    }
2464
2465    let bare_name = test_name.strip_prefix("ktstr/").unwrap_or(test_name);
2466    let entry = match find_test(bare_name) {
2467        Some(e) => e,
2468        None => {
2469            eprintln!("unknown test: {test_name}");
2470            return 1;
2471        }
2472    };
2473
2474    // Defense-in-depth: host_only re-check after suffix peel for the
2475    // edge case where the bare_for_lookup pre-strip lookup missed
2476    // (e.g. a future test name shape that doesn't match the
2477    // pre-strip form but does after the suffix peel).
2478    if entry.host_only {
2479        return run_host_only_test(entry);
2480    }
2481
2482    if entry.performance_mode && super::runtime::no_perf_mode_active() {
2483        crate::report::test_skip(format_args!(
2484            "{}: test requires performance_mode but --no-perf-mode or KTSTR_NO_PERF_MODE is active",
2485            bare_name,
2486        ));
2487        // See run_ktstr_test_inner for the sidecar-emission rationale.
2488        // Plain (non-gauntlet) dispatch: no TopoOverride, so the skip
2489        // records entry.topology (declared == booted for a plain test).
2490        record_skip_sidecar(entry, None);
2491        return 0;
2492    }
2493
2494    if super::runtime::perf_only_skips_entry(entry) {
2495        crate::report::test_skip(format_args!(
2496            "{bare_name}: KTSTR_PERF_ONLY is active and this test is not a performance_mode test",
2497        ));
2498        // Skip sidecar so the perf-delta pool records the skip (excluded
2499        // from the A/B compare) rather than a phantom missing result.
2500        record_skip_sidecar(entry, None);
2501        return 0;
2502    }
2503
2504    if !entry.bpf_map_write.is_empty()
2505        && let Ok(kernel) = resolve_test_kernel()
2506        && crate::vmm::find_vmlinux(&kernel).is_none()
2507    {
2508        eprintln!("FAIL: vmlinux not found, bpf_map_write requires vmlinux");
2509        return 1;
2510    }
2511
2512    let result = run_ktstr_test_inner(entry, None);
2513    result_to_exit_code(result, entry.expect_err, entry.allow_inconclusive)
2514}
2515
2516/// Run a host-only test directly without booting a VM.
2517/// Returns an exit code for nextest dispatch.
2518fn run_host_only_test(entry: &KtstrTestEntry) -> i32 {
2519    let result = run_host_only_test_inner(entry);
2520    result_to_exit_code(result, entry.expect_err, entry.allow_inconclusive)
2521}
2522
2523/// Inner host-only dispatch returning `Result<AssertResult>`.
2524///
2525/// Builds a minimal Ctx and calls the test function on the host.
2526/// Used for tests that need host tools (cargo, nested VMs).
2527///
2528/// Topology comes from real-host sysfs (`/sys/devices/system/cpu/`)
2529/// via [`crate::topology::TestTopology::from_system`]; the test's
2530/// declared VM topology is intentionally ignored for host_only
2531/// runs because the test author wrote it for a synthetic VM and
2532/// the host's actual CPU layout is what `WorkSpec::workers_pct` /
2533/// `AffinityIntent::LlcAligned` resolve against. Bails with an
2534/// actionable diagnostic when sysfs CPU enumeration fails — the
2535/// underlying causes are missing `/sys/devices/system/cpu/online`
2536/// (no /sys mount or container masking), unreadable contents (rare
2537/// permissions edge), corrupt sysfs string (kernel/hardware bug),
2538/// or an empty online-CPU set (degenerate cpuset namespace).
2539///
2540/// Cgroup parent defaults to `/sys/fs/cgroup/ktstr`; the operator
2541/// can override via `KTSTR_HOST_CGROUP_PARENT`. The override path
2542/// is validated upfront: it must be non-empty and rooted under
2543/// `/sys/fs/cgroup` so an accidental empty/relative/foreign value
2544/// produces a clear error instead of an opaque cgroupfs failure
2545/// later. Empty-string env value is treated as "unset" and falls
2546/// back to the default.
2547///
2548/// For cgroup-v2 user delegation (Mode B/C: systemd `Delegate=yes`,
2549/// container `nsdelegate`), the operator sets
2550/// `KTSTR_CGROUP_WALK_ROOT` to the delegation boundary so
2551/// [`crate::cgroup::CgroupManager::setup`]'s ancestor
2552/// `subtree_control` walk stops there instead of EACCES-ing at
2553/// `user.slice` / the container root. Defaults to `/sys/fs/cgroup`
2554/// (Mode A: root-owned tree).
2555fn run_host_only_test_inner(entry: &KtstrTestEntry) -> Result<AssertResult> {
2556    let topo = crate::topology::TestTopology::from_system().context(
2557        "host_only requires real-host topology from sysfs; \
2558         the sysfs CPU enumeration at /sys/devices/system/cpu/online \
2559         failed — likely causes: running outside a /sys-mounted \
2560         environment, sysfs contents unreadable (permissions / \
2561         container mask), corrupt online-CPU string, or a degenerate \
2562         cpuset namespace with no online CPUs",
2563    )?;
2564    let cgroup_parent = resolve_host_cgroup_parent()?;
2565    let cgroups = build_host_cgroup_manager(&cgroup_parent)?;
2566    let merged_assert = crate::assert::Assert::default_checks()
2567        .merge(&entry.scheduler.assert)
2568        .merge(&entry.assert);
2569    let ctx = crate::scenario::Ctx::builder(&cgroups, &topo)
2570        .duration(entry.duration)
2571        .settle(std::time::Duration::ZERO)
2572        .assert(merged_assert)
2573        .entry_name(entry.name)
2574        // host_only is host-side with no VM: the resolved topology is
2575        // the declared entry.topology (resolve_vm_topology(entry, None)),
2576        // so compute the variant hash directly rather than threading.
2577        .variant_hash(super::sidecar::variant_hash_from_parts(
2578            entry,
2579            &entry.topology,
2580            &super::args::current_work_type(),
2581        ))
2582        .build();
2583    (entry.func)(&ctx)
2584}
2585
2586/// Default cgroup parent path for `host_only` tests when
2587/// `KTSTR_HOST_CGROUP_PARENT` is unset. Suitable for both root
2588/// (writable directly) and non-root (operator pre-creates
2589/// `/sys/fs/cgroup/ktstr` with appropriate ownership, OR overrides via
2590/// `KTSTR_HOST_CGROUP_PARENT` to point at a path inside a delegated
2591/// subtree) invocations. See [`resolve_host_cgroup_parent`] for the
2592/// env-override path and `build_host_cgroup_manager` for the
2593/// cgroup-v2 Mode B/C delegation wire-up.
2594///
2595/// `pub` so tests can pin against it instead of mirroring
2596/// the literal in their own assertion strings (the
2597/// `resolve_host_cgroup_parent_*` unit tests in `dispatch_tests.rs`
2598/// assert unset/empty env falls back to this const). Treat as the
2599/// canonical default — operators set `KTSTR_HOST_CGROUP_PARENT` to
2600/// override.
2601pub const DEFAULT_HOST_CGROUP_PARENT: &str = "/sys/fs/cgroup/ktstr";
2602
2603/// Resolve the cgroup parent path for `host_only` tests.
2604///
2605/// Reads `KTSTR_HOST_CGROUP_PARENT`. Empty / unset falls back to
2606/// `DEFAULT_HOST_CGROUP_PARENT`. A set value must be rooted under
2607/// `/sys/fs/cgroup` (no relative paths, no random /tmp dirs) so an
2608/// accidental misconfiguration surfaces here rather than as an
2609/// opaque cgroupfs failure inside `CgroupManager::setup`.
2610///
2611/// Non-root callers are admitted: cgroup-v2 user delegation (Mode
2612/// B/C: systemd `Delegate=yes`, container `nsdelegate`) is handled
2613/// by `build_host_cgroup_manager` threading
2614/// [`crate::KTSTR_CGROUP_WALK_ROOT_ENV`] into
2615/// [`crate::cgroup::CgroupManager::with_walk_root`] so the
2616/// `subtree_control` walk bails at the delegation root instead of
2617/// EACCES-ing on `user.slice`.
2618pub fn resolve_host_cgroup_parent() -> Result<String> {
2619    let parent = match std::env::var(crate::KTSTR_HOST_CGROUP_PARENT_ENV) {
2620        Ok(s) if !s.is_empty() => s,
2621        _ => return Ok(DEFAULT_HOST_CGROUP_PARENT.to_string()),
2622    };
2623    if !parent.starts_with("/sys/fs/cgroup") || parent == "/sys/fs/cgroup" {
2624        anyhow::bail!(
2625            "KTSTR_HOST_CGROUP_PARENT={parent:?}: must be rooted under \
2626             /sys/fs/cgroup and name a non-root subdirectory \
2627             (e.g. /sys/fs/cgroup/ktstr or /sys/fs/cgroup/ktstr-foo); \
2628             unset or empty falls back to {DEFAULT_HOST_CGROUP_PARENT}",
2629        );
2630    }
2631    Ok(parent)
2632}
2633
2634/// Build a [`crate::cgroup::CgroupManager`] for a `host_only` test
2635/// run, threading [`crate::KTSTR_CGROUP_WALK_ROOT_ENV`] into
2636/// [`crate::cgroup::CgroupManager::with_walk_root`] when set.
2637///
2638/// The walk root override bounds [`crate::cgroup::CgroupManager::setup`]'s
2639/// ancestor `subtree_control` walk for cgroup-v2 Mode B/C
2640/// delegation: under systemd `Delegate=yes` or a container's
2641/// `nsdelegate`, the operator owns subtree_control writes only
2642/// inside the delegated subtree. Without the override the walk
2643/// starts at `/sys/fs/cgroup` and EACCES-es at `user.slice` or the
2644/// container root.
2645///
2646/// Empty / unset falls through to the default `/sys/fs/cgroup`
2647/// (Mode A: root-owned tree). [`crate::cgroup::CgroupManager::with_walk_root`]
2648/// validates that the chosen walk root is a prefix of `parent` —
2649/// misconfigurations surface as a focused error before the first
2650/// cgroupfs write rather than as an opaque downstream EACCES.
2651///
2652/// Non-root callers with no walk-root override are admitted here — the
2653/// precondition (root, or a cgroup-v2 delegated walk root) is enforced
2654/// lazily at [`crate::cgroup::CgroupManager::setup`], the first real
2655/// cgroup operation. `host_only` tests that never create a cgroup
2656/// (macro-attribute fixtures, host-topology reads, nested-VM verifier
2657/// orchestration) therefore run without root; only a test that actually
2658/// touches a cgroup hits the deferred non-root error.
2659fn build_host_cgroup_manager(cgroup_parent: &str) -> Result<crate::cgroup::CgroupManager> {
2660    let cg = crate::cgroup::CgroupManager::new(cgroup_parent);
2661    match std::env::var(crate::KTSTR_CGROUP_WALK_ROOT_ENV) {
2662        Ok(walk_root) if !walk_root.is_empty() => {
2663            // Defense-in-depth: walk_root must be rooted under
2664            // /sys/fs/cgroup. Mirrors the sibling
2665            // KTSTR_HOST_CGROUP_PARENT_ENV guard above so an operator
2666            // typo surfaces here instead of as a downstream cgroupfs
2667            // fs::write EACCES.
2668            if !walk_root.starts_with("/sys/fs/cgroup") {
2669                anyhow::bail!(
2670                    "{env}={walk_root:?}: walk root must be rooted under /sys/fs/cgroup \
2671                     (e.g. /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service \
2672                     for a systemd user session); the value supplied is outside the cgroup-v2 \
2673                     mount and would EACCES on the first cgroupfs write",
2674                    env = crate::KTSTR_CGROUP_WALK_ROOT_ENV,
2675                );
2676            }
2677            cg.with_walk_root(&walk_root).with_context(|| {
2678                format!(
2679                    "{env}={walk_root:?}: walk-root override rejected (must be a prefix of \
2680                     KTSTR_HOST_CGROUP_PARENT={cgroup_parent:?})",
2681                    env = crate::KTSTR_CGROUP_WALK_ROOT_ENV,
2682                )
2683            })
2684        }
2685        // No KTSTR_CGROUP_WALK_ROOT override. Return the manager as-is;
2686        // the non-root precondition for managing cgroups under the
2687        // kernel-owned default walk root is checked lazily in
2688        // CgroupManager::setup (first real cgroup use). host_only tests
2689        // that never create a cgroup — macro-attribute fixtures,
2690        // host-topology reads, nested-VM verifier orchestration — must
2691        // not be failed here for a resource they never touch. A
2692        // non-root test that does create a cgroup gets the deferred
2693        // setup error pointing at with_walk_root; the operator on-ramp
2694        // is EITHER to run as root OR to set KTSTR_CGROUP_WALK_ROOT to a
2695        // delegated cgroup-v2 subtree (handled by the arm above).
2696        _ => Ok(cg),
2697    }
2698}
2699
2700/// Run a gauntlet variant test. `rest` is `{name}/{preset}`.
2701pub(crate) fn run_gauntlet_test(rest: &str) -> i32 {
2702    let parts: Vec<&str> = rest.splitn(2, '/').collect();
2703    if parts.len() != 2 {
2704        eprintln!("invalid gauntlet test name: gauntlet/{rest}");
2705        return 1;
2706    }
2707    let (test_name, preset_name) = (parts[0], parts[1]);
2708
2709    let entry = match find_test(test_name) {
2710        Some(e) => e,
2711        None => {
2712            eprintln!("unknown test: {test_name}");
2713            return 1;
2714        }
2715    };
2716
2717    let presets = crate::gauntlet::gauntlet_presets();
2718    let preset = match presets.iter().find(|p| p.name == preset_name) {
2719        Some(p) => p,
2720        None => {
2721            eprintln!("unknown gauntlet preset: {preset_name}");
2722            return 1;
2723        }
2724    };
2725
2726    let t = &preset.topology;
2727    let cpus = t.total_cpus();
2728
2729    let memory_mib = super::runtime::derive_test_memory_mib(cpus, entry);
2730    let topo = TopoOverride {
2731        numa_nodes: t.numa_nodes,
2732        llcs: t.llcs,
2733        cores: t.cores_per_llc,
2734        threads: t.threads_per_core,
2735        memory_mib,
2736    };
2737
2738    if entry.performance_mode && super::runtime::no_perf_mode_active() {
2739        crate::report::test_skip(format_args!(
2740            "{}: test requires performance_mode but --no-perf-mode or KTSTR_NO_PERF_MODE is active",
2741            test_name,
2742        ));
2743        // Gauntlet preset: record the preset's RESOLVED topology
2744        // (Topology::from(&topo)) so this skip shares a variant_hash
2745        // with a run of the same preset and distinguishes other presets.
2746        record_skip_sidecar(entry, Some(&topo));
2747        return 0;
2748    }
2749
2750    if super::runtime::perf_only_skips_entry(entry) {
2751        crate::report::test_skip(format_args!(
2752            "{test_name}: KTSTR_PERF_ONLY is active and this test is not a performance_mode test",
2753        ));
2754        // Gauntlet preset: record the preset's RESOLVED topology so the
2755        // skip shares a variant_hash with a run of the same preset.
2756        record_skip_sidecar(entry, Some(&topo));
2757        return 0;
2758    }
2759
2760    if !entry.bpf_map_write.is_empty()
2761        && let Ok(kernel) = resolve_test_kernel()
2762        && crate::vmm::find_vmlinux(&kernel).is_none()
2763    {
2764        eprintln!("FAIL: vmlinux not found, bpf_map_write requires vmlinux");
2765        return 1;
2766    }
2767
2768    let result = run_ktstr_test_inner(entry, Some(&topo));
2769    result_to_exit_code(result, entry.expect_err, entry.allow_inconclusive)
2770}
2771
2772/// Collect sidecar JSON files and return the full gauntlet analysis.
2773///
2774/// When `dir` is `Some`, reads sidecars from that directory. Otherwise
2775/// uses the default sidecar directory (`KTSTR_SIDECAR_DIR` override, or
2776/// `{CARGO_TARGET_DIR or "target"}/ktstr/{kernel}-{project_commit}/`,
2777/// where `{project_commit}` is the project HEAD short hex with
2778/// `-dirty` when the worktree differs).
2779///
2780/// Returns the concatenated output of `analyze_rows`, verifier stats,
2781/// callback profile, and KVM stats. Returns an empty string when no
2782/// sidecars are found.
2783pub fn analyze_sidecars(dir: Option<&std::path::Path>) -> String {
2784    let default_dir;
2785    let dir = match dir {
2786        Some(d) => d,
2787        None => {
2788            default_dir = sidecar_dir();
2789            &default_dir
2790        }
2791    };
2792    let sidecars = collect_sidecars(dir);
2793    if sidecars.is_empty() {
2794        return String::new();
2795    }
2796    let mut out = String::new();
2797    let rows: Vec<_> = sidecars.iter().map(crate::stats::sidecar_to_row).collect();
2798    if !rows.is_empty() {
2799        out.push_str(&crate::stats::analyze_rows(&rows));
2800    }
2801    let vstats = format_verifier_stats(&sidecars);
2802    if !vstats.is_empty() {
2803        out.push_str(&vstats);
2804    }
2805    let cprofile = format_callback_profile(&sidecars);
2806    if !cprofile.is_empty() {
2807        out.push_str(&cprofile);
2808    }
2809    let kstats = format_kvm_stats(&sidecars);
2810    if !kstats.is_empty() {
2811        out.push_str(&kstats);
2812    }
2813    out
2814}
2815
2816/// Discover plain `#[test]` items by re-invoking the binary without
2817/// NEXTEST, reading libtest's `--list` output, and printing only
2818/// names that don't match any KTSTR_TESTS entry. This lets plain
2819/// tests coexist with `#[ktstr_test]` in the same binary without
2820/// duplicating the ktstr entries.
2821///
2822/// `ignored_only` forwards `--ignored` onto the child `--list` call
2823/// so the echoed plain-test set matches the bucket nextest is
2824/// enumerating (all tests vs the `#[ignore]`-only subset). Omitting
2825/// the flag here lands every plain test in nextest's ignored set and
2826/// silently skips them by default — see the body comment.
2827fn list_plain_tests(ignored_only: bool) {
2828    use std::collections::HashSet;
2829    let ktstr_names: HashSet<&str> = KTSTR_TESTS.iter().map(|e| e.name).collect();
2830
2831    let exe = match std::env::current_exe() {
2832        Ok(p) => p,
2833        Err(_) => return,
2834    };
2835    let mut cmd = std::process::Command::new(exe);
2836    cmd.env_remove("NEXTEST");
2837    // Forward `--ignored` so the plain-test set echoed here matches the
2838    // bucket nextest is asking for. nextest computes its "ignored" set by
2839    // re-running the binary with `--list --ignored`; if this child always
2840    // lists ALL plain `#[test]` (no `--ignored`), every plain test lands
2841    // in nextest's ignored set and is silently skipped by default
2842    // (footgun #2). With the flag forwarded, only real `#[ignore]` plain
2843    // tests are reported under `--ignored`, so non-ignored plain tests run
2844    // by default like any other test.
2845    let mut list_args: Vec<&str> = vec!["--list", "--format", "terse"];
2846    if ignored_only {
2847        list_args.push("--ignored");
2848    }
2849    cmd.args(&list_args);
2850    cmd.stdout(std::process::Stdio::piped());
2851    cmd.stderr(std::process::Stdio::null());
2852    let output = match cmd.output() {
2853        Ok(o) => o,
2854        Err(_) => return,
2855    };
2856    let stdout = String::from_utf8_lossy(&output.stdout);
2857    for line in stdout.lines() {
2858        let name = line.strip_suffix(": test").unwrap_or(line);
2859        if !ktstr_names.contains(name) && !name.is_empty() {
2860            println!("{line}");
2861        }
2862    }
2863}
2864
2865/// `--list` subprotocol: emit ktstr/gauntlet test names without
2866/// exiting so the standard libtest harness can also print its own
2867/// test list afterward. This is what makes plain `#[test]` items
2868/// inside a ktstr_test integration-test binary visible to nextest.
2869///
2870/// Honours `--ignored` the same way [`ktstr_main`] does — when set,
2871/// only the ignored subset (gauntlet variants and `demo_` base
2872/// tests) is printed. Unlike `ktstr_main`, this function returns to
2873/// the caller after listing so the ctor's caller can fall through
2874/// to libtest's `main`.
2875fn ktstr_list_only() {
2876    let args: Vec<String> = std::env::args().collect();
2877    let ignored_only = args.iter().any(|a| a == "--ignored");
2878    list_tests(ignored_only);
2879}
2880
2881/// Nextest protocol handler.
2882///
2883/// Called automatically by [`ktstr_test_early_dispatch`] when running
2884/// under nextest with `--exact <ktstr_or_gauntlet_name>`.
2885/// Not intended for direct use.
2886///
2887/// - `--list --format terse`: output `ktstr/{name}: test\n` for base
2888///   tests and `gauntlet/{name}/{preset}: test\n` for gauntlet
2889///   variants. (Discovery uses `ktstr_list_only` instead to allow
2890///   libtest to print its own list afterward; this branch is
2891///   preserved for direct callers of `ktstr_main`.)
2892/// - `--exact NAME --nocapture`: run the named test, exit 0/1.
2893pub fn ktstr_main() -> ! {
2894    let args: Vec<String> = std::env::args().collect();
2895
2896    // Discovery mode: --list --format terse [--ignored]
2897    if args.iter().any(|a| a == "--list") {
2898        let ignored_only = args.iter().any(|a| a == "--ignored");
2899        list_tests(ignored_only);
2900        std::process::exit(0);
2901    }
2902
2903    // Execution mode: --exact NAME [--nocapture] [--ignored] [--bench]
2904    if let Some(pos) = args.iter().position(|a| a == "--exact") {
2905        if let Some(name) = args.get(pos + 1) {
2906            let code = run_named_test(name);
2907            std::process::exit(code);
2908        }
2909        eprintln!("--exact requires a test name");
2910        std::process::exit(1);
2911    }
2912
2913    // Fallback: no recognized arguments.
2914    eprintln!("usage: <binary> --list --format terse [--ignored]");
2915    eprintln!("       <binary> --exact <test_name> --nocapture");
2916    std::process::exit(1)
2917}
2918
2919#[cfg(test)]
2920#[path = "dispatch_tests.rs"]
2921mod tests;
ktstr/test_support/dispatch.rs

ktstr/test_support/
dispatch.rs