ktstr/test_support/dispatch.rs
1//! Process-level dispatch and nextest protocol handling.
2//!
3//! This module owns every code path that runs before (or in lieu of)
4//! the user's `main()`:
5//!
6//! - [`ktstr_test_early_dispatch`]: the `#[ctor]` that fires in every
7//! ktstr-linked binary. Routes the process to guest init, host-side
8//! VM launch, guest-side test execution, or nextest protocol handling.
9//! - [`ktstr_main`]: the nextest protocol handler — `--list` returns
10//! `ktstr/` and `gauntlet/` test names, `--exact` runs a single test.
11//! - [`run_ktstr_test`]: programmatic entry point used by library
12//! consumers and the macro-generated `#[test]` wrappers.
13//! - [`analyze_sidecars`]: collects sidecar JSON from a run directory
14//! and renders the full gauntlet analysis (rows + verifier + callback
15//! profile + KVM stats) into a string.
16//!
17//! The heavy lifting lives in sibling submodules: `eval` (host-side
18//! result judgment — `run_ktstr_test_inner` and `evaluate_vm_result`),
19//! `sidecar` (per-run JSON), `probe` (auto-repro + BPF probe pipeline),
20//! `args` (CLI extraction), and the [`crate::vmm`] VM launcher.
21
22use std::path::PathBuf;
23
24use anyhow::{Context, Result};
25
26use crate::assert::AssertResult;
27
28#[cfg(feature = "export")]
29use super::extract_export_output_arg;
30use super::{
31 HostClass, KTSTR_TESTS, KtstrTestEntry, TopoOverride, classify_host_error, collect_sidecars,
32 extract_export_test_arg, extract_shell_test_arg, extract_test_fn_arg, extract_topo_arg,
33 find_test, format_callback_profile, format_kvm_stats, format_verifier_stats,
34 maybe_dispatch_vm_test, parse_topo_string, propagate_rust_env_from_cmdline,
35 record_skip_sidecar, resolve_test_kernel, run_ktstr_test_inner, sidecar_dir, try_flush_profraw,
36};
37
38/// Check if an error is a host topology mismatch (e.g. test requests
39/// 2 LLCs but host has 1, or more CPUs than the host carries).
40///
41/// Walks the FULL error chain via `e.chain().any(...)` so a
42/// [`TopologyInsufficient`] wrapped in `.context(...)` (the
43/// `crate::test_support::eval` `"build ktstr_test VM"` / `"run
44/// ktstr_test VM"` wrappers) is still recognised — mirrors
45/// [`is_resource_contention`]. Replaced a fragile message string-match
46/// (`"need"` + `"LLC"`/`"CPU"`) that would misclassify any unrelated
47/// error happening to contain those words as a topology skip.
48///
49/// [`TopologyInsufficient`]: crate::vmm::host_topology::TopologyInsufficient
50#[doc(hidden)]
51pub fn is_topology_insufficient(e: &anyhow::Error) -> bool {
52 e.chain().any(|cause| {
53 cause
54 .downcast_ref::<crate::vmm::host_topology::TopologyInsufficient>()
55 .is_some()
56 })
57}
58
59/// Check if an `anyhow::Error` carries a [`ResourceContention`].
60///
61/// Walks the FULL error chain via `e.chain().any(...)` so a
62/// `ResourceContention` wrapped in `.context(...)` (e.g. the
63/// `crate::test_support::eval` `"build ktstr_test VM"` and `"run ktstr_test VM"`
64/// wrappers) is still recognised — the macro's match arm depends on
65/// this.
66///
67/// Used by the `#[ktstr_test]` macro expansion to short-circuit on
68/// host-resource contention (LLC slots / CPUs unavailable, KVM fd
69/// budget exhausted, ENOMEM): the macro emits the canonical
70/// `ktstr: SKIP: resource contention: ...` banner and early-returns
71/// so libtest sees pass. The skip sidecar is recorded at every
72/// contention site inside `run_ktstr_test_inner`, so stats tooling
73/// still sees the skip without a panic-driven nextest retry. `pub`
74/// because the macro-generated `#[test]` body in `ktstr-macros`
75/// references it by absolute path; `#[doc(hidden)]` keeps it out
76/// of rustdoc's public surface — it is plumbing, not user API.
77///
78/// [`ResourceContention`]: crate::vmm::host_topology::ResourceContention
79#[doc(hidden)]
80pub fn is_resource_contention(e: &anyhow::Error) -> bool {
81 e.chain().any(|cause| {
82 cause
83 .downcast_ref::<crate::vmm::host_topology::ResourceContention>()
84 .is_some()
85 })
86}
87
88/// Check if an `anyhow::Error` carries a [`PerfModeUnavailable`].
89///
90/// Chain-aware (walks `e.chain()`), like [`is_topology_insufficient`].
91/// A `PerfModeUnavailable` is a HOST-INSUFFICIENCY skip, like RC/TI: the
92/// host fundamentally cannot honor an explicitly-requested perf-mode
93/// guarantee (too few CPUs for an exclusive host LLC + a service CPU).
94/// The VM is never run unisolated (it errors at build), so
95/// `result_to_exit_code` and the macro body route it to a VISIBLE skip
96/// by default, promoted to a FAIL banner under `KTSTR_NO_SKIP_MODE`.
97///
98/// [`PerfModeUnavailable`]: crate::vmm::host_topology::PerfModeUnavailable
99#[doc(hidden)]
100pub fn is_perf_mode_unavailable(e: &anyhow::Error) -> bool {
101 e.chain().any(|cause| {
102 cause
103 .downcast_ref::<crate::vmm::host_topology::PerfModeUnavailable>()
104 .is_some()
105 })
106}
107
108/// Check if an `anyhow::Error` carries a [`CpuBudgetUnsatisfiable`].
109///
110/// Chain-aware. A `CpuBudgetUnsatisfiable` is a HARD ERROR (an operator
111/// `--cpu-cap` number the host cannot satisfy), NOT a skip. (An author's
112/// per-test `cpu_budget` over the allowance skips via `TopologyInsufficient`
113/// instead — see `resolve_cpu_budget` — so it never carries this type.)
114///
115/// [`CpuBudgetUnsatisfiable`]: crate::vmm::host_topology::CpuBudgetUnsatisfiable
116#[doc(hidden)]
117pub fn is_cpu_budget_unsatisfiable(e: &anyhow::Error) -> bool {
118 e.chain().any(|cause| {
119 cause
120 .downcast_ref::<crate::vmm::host_topology::CpuBudgetUnsatisfiable>()
121 .is_some()
122 })
123}
124
125/// Check if an `anyhow::Error` carries a [`TopologyUnrepresentable`].
126///
127/// Chain-aware. A `TopologyUnrepresentable` is a HARD ERROR (a topology no
128/// host can represent under this VMM's static device layout — the aarch64
129/// over-`MAX_VCPUS` GICv3-redistributor case), NOT a skip.
130/// `classify_host_error` classifies it as `HostClass::Fail`, checked above
131/// the RC/TI skip types and handled above the `expect_err` inversion in
132/// both `err_to_exit_code` and the macro body, so a too-wide aarch64
133/// topology can neither masquerade as the expected failure nor be turned
134/// into a silent skip. Distinct from [`is_topology_insufficient`], which
135/// matches the host-DEPENDENT skip type.
136///
137/// [`TopologyUnrepresentable`]: crate::vmm::host_topology::TopologyUnrepresentable
138#[doc(hidden)]
139pub fn is_topology_unrepresentable(e: &anyhow::Error) -> bool {
140 e.chain().any(|cause| {
141 cause
142 .downcast_ref::<crate::vmm::host_topology::TopologyUnrepresentable>()
143 .is_some()
144 })
145}
146
147/// Predicate: walks the [`anyhow::Error`] chain looking for a
148/// [`KernelUnavailable`] cause. Used by `classify_host_error` to classify
149/// a no-kernel host as a skip-class host-insufficiency.
150///
151/// The harness signals "I have no kernel to boot, the binary was
152/// likely invoked outside `cargo ktstr test`" by surfacing
153/// [`KernelUnavailable`] rather than a generic `anyhow::bail!`.
154/// `classify_host_error` maps it to `HostClass::Skip` (the canonical
155/// `ktstr: SKIP: harness not configured: ...` banner), promoted to a FAIL
156/// under `KTSTR_NO_SKIP_MODE` — same shape as the resource-contention skip.
157/// `pub` + `#[doc(hidden)]`: plumbing re-exported from `test_support`
158/// alongside the sibling `is_*` predicates, not user API.
159///
160/// Both consumers route a `KernelUnavailable` through the shared
161/// [`classify_host_error`] (a no-kernel host is a skip-class
162/// host-insufficiency): `err_to_exit_code` and the `#[ktstr_test]` macro
163/// body both SKIP it by default, promoted to a FAIL under
164/// `KTSTR_NO_SKIP_MODE`. Under nextest the plain `#[test]` wrapper is
165/// suppressed, so an entry dispatches as `ktstr/{name}` via `run_named_test`
166/// → `err_to_exit_code` — meaning a developer running `cargo nextest run`,
167/// or `cargo ktstr test` without `--kernel`, on a kernel-less host gets a
168/// clean skip rather than a hard fail on every entry. This cannot mask a CI
169/// kernel-build failure: a requested `--kernel` that fails to build bails in
170/// cargo-ktstr (`resolve_kernel_set`) before nextest is spawned, so a
171/// `KernelUnavailable` here only ever means "no kernel was requested".
172/// Pinned by `result_to_exit_code_kernel_unavailable_skips_on_dispatch_path`.
173///
174/// [`classify_host_error`]: crate::test_support::classify_host_error
175///
176/// [`KernelUnavailable`]: crate::test_support::eval::KernelUnavailable
177#[doc(hidden)]
178pub fn is_kernel_unavailable(e: &anyhow::Error) -> bool {
179 e.chain().any(|cause| {
180 cause
181 .downcast_ref::<crate::test_support::eval::KernelUnavailable>()
182 .is_some()
183 })
184}
185
186/// A nextest-safe kernel identifier whose construction is gated
187/// through [`sanitize_kernel_label`] — once a value of this type
188/// exists, the contained string is GUARANTEED to match the
189/// `kernel_[a-z0-9_]+` shape that nextest's test-name parsing
190/// accepts. The wrapped `String` is private so a future caller
191/// cannot bypass [`Self::new`] and stuff a raw label into the
192/// invariant.
193///
194/// Constructed by [`Self::new`] (which always calls
195/// [`sanitize_kernel_label`]). Read access is via
196/// [`Self::as_str`] / `Display` / `AsRef<str>` — both of which
197/// expose the sanitized form unchanged.
198///
199/// `pub(crate)` because every consumer (this module, the
200/// production parser at [`parse_kernel_list`], and the encoder
201/// helpers in `cargo-ktstr` that emit the wire format
202/// `parse_kernel_list` decodes) lives inside the workspace; no external
203/// surface is needed today. If a future external consumer needs
204/// to construct a `SanitizedKernelLabel` directly, expose
205/// `Self::new` as `pub` then — but the private inner stays a
206/// private invariant either way.
207#[derive(Clone, Debug, PartialEq, Eq, Hash)]
208pub(crate) struct SanitizedKernelLabel(String);
209
210impl SanitizedKernelLabel {
211 /// Sanitize `raw` via [`sanitize_kernel_label`] and wrap the
212 /// result in the invariant-preserving newtype. The only path
213 /// that produces a `SanitizedKernelLabel`; bypassing it is
214 /// impossible because the inner field is private to this
215 /// module.
216 pub(crate) fn new(raw: &str) -> Self {
217 Self(sanitize_kernel_label(raw))
218 }
219
220 /// Read access to the sanitized identifier. Returns `&str`
221 /// rather than `&String` so callers can compose with
222 /// `format!` / `starts_with` / `strip_suffix` without
223 /// chaining `.as_str().as_str()`.
224 pub(crate) fn as_str(&self) -> &str {
225 &self.0
226 }
227}
228
229impl std::fmt::Display for SanitizedKernelLabel {
230 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
231 f.write_str(&self.0)
232 }
233}
234
235impl AsRef<str> for SanitizedKernelLabel {
236 fn as_ref(&self) -> &str {
237 &self.0
238 }
239}
240
241// `PartialEq<&str>` and `PartialEq<str>` impls let `assert_eq!`
242// against a string literal stay readable in tests
243// (`assert_eq!(entries[0].sanitized, "kernel_6_14_2")`) without
244// forcing every consumer to chain `.as_str()`. The wrapped
245// `String` is private to this module, so impls comparing
246// against external `&str` values cannot break the
247// "constructor enforces sanitization" invariant — the
248// invariant attaches to value PRODUCTION, not to value
249// COMPARISON.
250impl PartialEq<&str> for SanitizedKernelLabel {
251 fn eq(&self, other: &&str) -> bool {
252 self.0 == *other
253 }
254}
255
256impl PartialEq<str> for SanitizedKernelLabel {
257 fn eq(&self, other: &str) -> bool {
258 self.0 == other
259 }
260}
261
262#[cfg(test)]
263impl SanitizedKernelLabel {
264 /// Test-only escape hatch: wrap a string that's ALREADY in
265 /// the sanitized shape (`kernel_[a-z0-9_]+`) without running
266 /// the sanitizer. Used by unit-test fixtures that hand-roll
267 /// `KernelEntry` values whose `sanitized` field is meant to
268 /// be a literal — running [`Self::new`] on `"kernel_6_14_2"`
269 /// would double-prefix to `"kernel_kernel_6_14_2"`.
270 ///
271 /// Production code must NEVER call this — invariant
272 /// violation here means callers can stuff arbitrary strings
273 /// into the field, defeating the point of the newtype.
274 /// `#[cfg(test)]` enforces that at compile time.
275 pub(crate) fn from_pre_sanitized_for_test(s: &str) -> Self {
276 Self(s.to_string())
277 }
278}
279
280/// One resolved kernel entry from `KTSTR_KERNEL_LIST` (the multi-
281/// kernel fan-out wire format that `cargo ktstr test --kernel A
282/// --kernel B` or `cargo ktstr verifier --kernel A --kernel B`
283/// exports before exec'ing into `cargo nextest`).
284///
285/// `label` is the producer-side label string before
286/// sanitization — e.g. `"6.14.2"` for Version, `"git_tj_sched_ext_for-next"`
287/// for Git, `"6.14.2-tarball-x86_64-kc..."` for CacheKey,
288/// `"path_linux_a3f2b1"` for Path. Preserved so the
289/// [`crate::test_support::dispatch`] verifier sweep filter can
290/// compare against `declare_scheduler!`'s `kernels = [...]`
291/// declarations — specifically, range membership
292/// (`"6.14..6.16"` vs `"6.14.2"`) needs the raw version string
293/// to feed into [`crate::kernel_path::decompose_version_for_compare`],
294/// which the sanitized form has lost (slashes / dots → underscores).
295///
296/// `sanitized` is the nextest-safe identifier appended to test names
297/// so `cargo nextest run -E 'test(kernel_6_14_2)'` filters work
298/// natively. The producer-side encoder in `cargo-ktstr` emits a
299/// semantic, operator-readable label per kernel:
300/// - Version / Range expansion: the version string verbatim
301/// (`6.14.2`, `6.15-rc3`).
302/// - CacheKey: the version prefix (everything before the
303/// `-tarball-` / `-git-` source tag).
304/// - Git: `git_{owner}_{repo}_{kind}_{ref}` extracted from the URL
305/// (kind = tag/branch/sha).
306/// - Path: `path_{basename}_{hash6}` — basename + 6-char crc32 of
307/// the canonical path, disambiguating two `linux` directories
308/// under different parents.
309///
310/// [`SanitizedKernelLabel::new`] (which calls [`sanitize_kernel_label`])
311/// applies the `kernel_` prefix and `[a-z0-9_]+` normalization
312/// downstream. The newtype on this field makes the invariant
313/// compile-checked: a future caller cannot construct a
314/// `KernelEntry` whose `sanitized` field skipped sanitization.
315///
316/// `kernel_dir` is the canonical absolute path to the kernel-build
317/// directory the per-variant subprocess re-exports as
318/// `KTSTR_KERNEL`.
319#[derive(Clone, Debug)]
320pub(crate) struct KernelEntry {
321 pub(crate) label: String,
322 pub(crate) sanitized: SanitizedKernelLabel,
323 pub(crate) kernel_dir: PathBuf,
324}
325
326/// Parse the multi-kernel wire format `KTSTR_KERNEL_LIST` into a
327/// `Vec<KernelEntry>`. Format: `label1=path1;label2=path2;...`,
328/// semicolon-separated entries, `=` separating label from path. Empty
329/// / unset env returns an empty vec — callers treat that as
330/// "single-kernel mode" and fall through to `KTSTR_KERNEL`.
331///
332/// Malformed entries (missing `=`, empty label, empty path) are
333/// dropped silently — the producer is `cargo ktstr` which encodes
334/// the format under our control, so a malformed entry indicates a
335/// regression in the producer rather than operator input that
336/// deserves a clear error. Silent drop preserves the `len() <= 1` →
337/// "treat as single-kernel" invariant in the readers downstream.
338pub(crate) fn parse_kernel_list(raw: &str) -> Vec<KernelEntry> {
339 raw.split(';')
340 .filter_map(|seg| {
341 let seg = seg.trim();
342 if seg.is_empty() {
343 return None;
344 }
345 let (label, path) = seg.split_once('=')?;
346 let label = label.trim();
347 let path = path.trim();
348 if label.is_empty() || path.is_empty() {
349 return None;
350 }
351 Some(KernelEntry {
352 label: label.to_string(),
353 sanitized: SanitizedKernelLabel::new(label),
354 kernel_dir: PathBuf::from(path),
355 })
356 })
357 .collect()
358}
359
360/// Read [`crate::KTSTR_KERNEL_LIST_ENV`] and parse it into a
361/// `Vec<KernelEntry>`. Empty / unset / malformed → empty vec
362/// (single-kernel mode at the call site).
363pub(crate) fn read_kernel_list() -> Vec<KernelEntry> {
364 std::env::var(crate::KTSTR_KERNEL_LIST_ENV)
365 .ok()
366 .map(|v| parse_kernel_list(&v))
367 .unwrap_or_default()
368}
369
370/// Sanitise a kernel label (the producer-side identity emitted by
371/// `cargo ktstr`'s resolver) into a nextest-safe identifier of the
372/// shape `kernel_[a-z0-9_]+`.
373///
374/// Replaces every `[^A-Za-z0-9]` byte with `_`, lowercases, collapses
375/// runs of `_`, and prefixes with `kernel_`. Empty / pathologically-
376/// short input collapses to `kernel_` alone, which the parser
377/// downstream still recognises as a valid suffix (the empty
378/// `sanitized` marker just won't disambiguate two kernels — but the
379/// producer side guarantees non-empty labels, so the empty case is
380/// defensive only).
381///
382/// Example mappings:
383/// - `6.14.2` → `kernel_6_14_2`
384/// - `6.15-rc3` → `kernel_6_15_rc3`
385/// - `git_tj_sched_ext_for-next` → `kernel_git_tj_sched_ext_for_next`
386/// - `path_linux_a3f2b1` → `kernel_path_linux_a3f2b1`
387pub fn sanitize_kernel_label(raw: &str) -> String {
388 let mut out = String::with_capacity(raw.len() + 7);
389 out.push_str("kernel_");
390 let mut last_underscore = true; // suppress leading `_` after `kernel_`
391 for ch in raw.chars() {
392 let c = ch.to_ascii_lowercase();
393 if c.is_ascii_alphanumeric() {
394 out.push(c);
395 last_underscore = false;
396 } else if !last_underscore {
397 out.push('_');
398 last_underscore = true;
399 }
400 }
401 // Strip a trailing `_` so a label like `for-next-` doesn't
402 // produce a dangling separator.
403 if out.ends_with('_') && out.len() > "kernel_".len() {
404 out.pop();
405 }
406 out
407}
408
409ctor::declarative::ctor! {
410/// Early dispatch for `#[ktstr_test]` test execution.
411///
412/// Runs before `main()` in any binary that links against ktstr.
413///
414/// When running as PID 1 (the binary is `/init` in the VM), calls
415/// `ktstr_guest_init()` which handles the full init lifecycle and never
416/// returns.
417///
418/// - `--ktstr-test-fn=NAME --ktstr-topo=NnNlNcNt`: host-side dispatch —
419/// boots a VM with the specified topology and runs the test inside it.
420/// - `--ktstr-test-fn=NAME` (without `--ktstr-topo`): guest-side dispatch —
421/// runs the test function directly (inside a VM that was already booted).
422/// - nextest protocol (`--list`/`--exact`): intercepted when running
423/// under nextest (`NEXTEST` env var set), delegates to [`ktstr_main`].
424/// - Otherwise: no-op (falls through to the standard test harness).
425///
426/// ctor 1.0 ships both `#[ctor::ctor(...)]` (proc-macro attribute) and
427/// `ctor::declarative::ctor! { ... }` (declarative block). This site
428/// uses the declarative form because it sidesteps the TT-muncher
429/// recursion-limit cost the proc-macro form would impose on the
430/// ktstr_test expansion. The proc-macro form stays reachable via
431/// `crate::__private::ctor::ctor` for downstream consumers that prefer
432/// the attribute-on-fn shape; see `tests/private_module_paths.rs` for
433/// the re-export contract.
434#[doc(hidden)]
435#[ctor(unsafe)]
436pub fn ktstr_test_early_dispatch() {
437 // PID 1: the binary is /init in the VM. Perform full init lifecycle
438 // (mounts, scheduler, test dispatch, reboot). Never returns.
439 if unsafe { libc::getpid() } == 1 {
440 crate::vmm::rust_init::ktstr_guest_init();
441 }
442
443 // Export-self dispatch runs BEFORE host/guest test dispatch.
444 // `cargo ktstr export` is a router that exec's the test binary
445 // with `--ktstr-export-test=NAME`; the binary reads its own
446 // `KTSTR_TESTS` registry, embeds itself via `current_exe`, and
447 // writes the .run file. Running this check first means the
448 // export path never accidentally triggers VM boot if the
449 // operator simultaneously passes `--ktstr-test-fn` (the export
450 // arg wins because export is a one-shot tool, not a test
451 // execution).
452 if let Some(code) = maybe_dispatch_export() {
453 std::process::exit(code);
454 }
455 if let Some(code) = maybe_dispatch_shell_test() {
456 std::process::exit(code);
457 }
458 if let Some(code) = maybe_dispatch_host_test() {
459 std::process::exit(code);
460 }
461 // Propagate RUST_BACKTRACE / RUST_LOG from /proc/cmdline before
462 // `maybe_dispatch_vm_test` runs: ctor context is single-threaded
463 // (`.init_array` runs before any user thread exists), so this
464 // `set_var` is sound and the later guest-side code that spawns
465 // the probe thread observes the correct env.
466 propagate_rust_env_from_cmdline();
467 if let Some(code) = maybe_dispatch_vm_test() {
468 // The LLVM profiling runtime registers its atexit handler via a
469 // .init_array entry (C++ global initializer). Our ctor also lives
470 // in .init_array, and the execution order between them is
471 // non-deterministic. If our ctor runs first, the atexit handler
472 // was never registered, so std::process::exit() won't write the
473 // profraw. Serialize profraw to a buffer and write it to the SHM
474 // ring for host-side extraction.
475 try_flush_profraw();
476 std::process::exit(code);
477 }
478
479 // nextest protocol: intercept --list and --exact when running under
480 // nextest. Under cargo test, fall through to the standard harness
481 // which runs the #[test] wrappers generated by #[ktstr_test].
482 //
483 // Binaries with real #[ktstr_test] entries need the ctor to handle
484 // listing (gauntlet expansion) and dispatch (VM booting). The lib
485 // test binary has only the dummy entry and no gauntlet variants —
486 // skip interception so the standard harness discovers #[cfg(test)]
487 // module #[test] functions (unit tests).
488 //
489 // For `--list`, ktstr_main prints the gauntlet/ktstr names and
490 // RETURNS so the standard libtest harness can print its own list
491 // of `#[test]` items afterward. This makes plain `#[test]`
492 // functions inside a ktstr_test integration-test binary visible
493 // to nextest — without the fall-through, libtest never runs and
494 // those test names are silently dropped from the listing.
495 //
496 // For `--exact`, ktstr_main runs only when the test name starts
497 // with `ktstr/` or `gauntlet/` — names ktstr owns. Other names
498 // (libtest #[test] items, including the per-entry wrappers
499 // emitted by `#[ktstr_test]` itself) fall through to libtest's
500 // dispatch. Without this guard, run_named_test would fail
501 // `find_test` for a plain `#[test]` name and exit 1, blocking
502 // nextest from running it.
503 if std::env::var_os("NEXTEST").is_some() {
504 let has_real_tests = KTSTR_TESTS.iter().any(|e| !is_test_sentinel(e.name));
505 // A binary may carry only `declare_scheduler!` declarations
506 // (no `#[ktstr_test]` entries) — pure verifier-only test
507 // binaries. Without the scheduler check below the listing
508 // branch would never fire for such a binary and the
509 // verifier cells would silently fail to emit under nextest.
510 let has_schedulers = !super::KTSTR_SCHEDULERS.is_empty();
511 if has_real_tests || has_schedulers {
512 let args: Vec<String> = std::env::args().collect();
513 if args.iter().any(|a| a == "--list") {
514 ktstr_list_only();
515 list_verifier_cells_all();
516 list_plain_tests(args.iter().any(|a| a == "--ignored"));
517 std::process::exit(0);
518 } else if let Some(pos) = args.iter().position(|a| a == "--exact")
519 && let Some(name) = args.get(pos + 1)
520 && name.starts_with("verifier/")
521 {
522 // verifier/<sched>/<kernel>/<preset> cells bypass libtest
523 // entirely — the cell handler resolves the scheduler
524 // binary, kernel, and the cell's topology preset, runs
525 // collect_verifier_output, prints the result, and
526 // exits. No #[test] wrapper exists for declared
527 // schedulers (declare_scheduler! only emits a static),
528 // so it runs directly via run_verifier_cell — the same
529 // libtest bypass the ktstr/ branch below uses.
530 let code = run_verifier_cell(name);
531 try_flush_profraw();
532 std::process::exit(code);
533 } else if let Some(pos) = args.iter().position(|a| a == "--exact")
534 && let Some(name) = args.get(pos + 1)
535 && (name.starts_with("ktstr/") || name.starts_with("gauntlet/"))
536 {
537 let bare = name
538 .strip_prefix("ktstr/")
539 .or_else(|| name.strip_prefix("gauntlet/"))
540 .unwrap_or(name)
541 .split('/')
542 .next()
543 .unwrap_or(name);
544
545 // Reject malformed names like `gauntlet/` (trailing
546 // slash, no test name) and `ktstr/` up front, so the
547 // operator sees a clear error instead of an opaque
548 // "unknown test" from the empty bare name.
549 if bare.is_empty() {
550 eprintln!(
551 "ktstr: malformed --exact test name {name:?} \
552 (resolves to an empty bare name after prefix strip)",
553 );
554 std::process::exit(1);
555 }
556
557 // Run the entry directly, bypassing libtest — the same
558 // pattern as the verifier/ branch above. The previous
559 // dispatch rewrote argv to the bare name and relied on a
560 // #[test] wrapper (emitted only by the #[ktstr_test]
561 // macro) for libtest to match it; raw
562 // `#[distributed_slice(KTSTR_TESTS)]` registrations have
563 // no wrapper, so libtest matched nothing and printed
564 // "running 0 tests" — a silent trivial-pass. run_named_test
565 // resolves the entry from KTSTR_TESTS by name and boots it
566 // for both registration styles, routing gauntlet/ to
567 // run_gauntlet_test (identical topology) and applying the
568 // host_only / performance_mode / bpf_map_write gates the
569 // wrapper path skipped.
570 let code = run_named_test(name);
571 try_flush_profraw();
572 std::process::exit(code);
573 }
574 }
575 } else {
576 // cargo-test-direct path: the standard rustc test harness
577 // runs only the bare `#[test]` wrappers `#[ktstr_test]`
578 // generates. Gauntlet expansion (topology-preset variants)
579 // lives inside `ktstr_main`'s `--list` + `--exact` handlers
580 // and is reachable ONLY under nextest. Every real ktstr
581 // entry produces topology-preset variants under nextest
582 // (`for_each_gauntlet_variant` iterates
583 // `crate::gauntlet::gauntlet_presets()`). Without nextest those
584 // variants would silently not run — coverage loss with no
585 // error. Emit a one-shot stderr `warning:` diagnostic (see
586 // the `eprintln!` below) when the binary carries any real
587 // entry so the user sees the gap instead of trusting a
588 // false green. Print once per process (cargo test invokes
589 // one test binary per crate; the ctor runs exactly once per
590 // test binary) so there is no need to gate with a
591 // std::sync::Once.
592 //
593 // `KTSTR_CARGO_TEST_MODE=1` opts out of the warning: the
594 // operator deliberately picked the cargo-test-direct path
595 // (e.g. for a single-test debug iteration without the
596 // nextest harness) and accepts that gauntlet variants
597 // won't run. The warning is still emitted under bare
598 // `cargo test` without the env var set so unaware users
599 // see the coverage gap.
600 if !crate::cargo_test_mode::cargo_test_mode_active() {
601 let total = KTSTR_TESTS.len();
602 let real = KTSTR_TESTS
603 .iter()
604 .filter(|e| !is_test_sentinel(e.name))
605 .count();
606 if real > 0 {
607 eprintln!(
608 "warning: {real} of {total} ktstr test entries registered in this binary \
609 will not generate their topology-preset gauntlet variants — NEXTEST env \
610 var is not set and the standard rustc harness does not expand them. Use \
611 `cargo nextest run` (or `cargo ktstr test`) to exercise the full gauntlet, \
612 or set KTSTR_CARGO_TEST_MODE=1 to opt into single-variant bare-`cargo test` \
613 mode without this warning.",
614 );
615 }
616 // Verifier cells are emitted by `list_verifier_cells_all`
617 // which runs ONLY from the NEXTEST listing branch above.
618 // A bare `cargo test` invocation on a binary carrying
619 // `declare_scheduler!` declarations gets zero verifier
620 // coverage — surface the gap with the same opt-out shape
621 // as the gauntlet warning so an unaware operator does not
622 // trust a green run that never reached the verifier.
623 // Eevdf + KernelBuiltin variants don't produce userspace
624 // binaries to verify, so they are excluded from the count
625 // (matching the emission-time filter in
626 // `list_verifier_cells_all`).
627 let verifier_schedulers = super::KTSTR_SCHEDULERS
628 .iter()
629 .filter(|s| {
630 !matches!(
631 s.binary,
632 super::SchedulerSpec::Eevdf | super::SchedulerSpec::KernelBuiltin { .. }
633 )
634 })
635 .count();
636 if verifier_schedulers > 0 {
637 eprintln!(
638 "warning: {verifier_schedulers} `declare_scheduler!` declaration(s) in this \
639 binary will not generate verifier cells — NEXTEST env var is not set and \
640 verifier cells are emitted only by ktstr's `--list` handler under nextest. \
641 Use `cargo ktstr verifier` to exercise the verifier sweep, or set \
642 KTSTR_CARGO_TEST_MODE=1 to acknowledge the verifier-cell-free path without \
643 this warning.",
644 );
645 }
646 }
647 }
648}
649}
650
651/// Predicate for "this entry is a unit-test sentinel, not a real
652/// `#[ktstr_test]` user entry." The lib-test binary registers a
653/// single sentinel entry (currently `"__unit_test_dummy__"`) so
654/// the dispatch + gauntlet plumbing has something to exercise
655/// under `cargo test --lib`; real user entries look like
656/// `"module::test_name"` or similar PascalCase-with-dots names.
657///
658/// Matching the sentinel by convention (`__` prefix + `__`
659/// suffix + `_test_` or `_dummy_` infix) rather than by literal
660/// equality keeps the filter robust when the sentinel is
661/// renamed, or when future scaffolding adds additional
662/// sentinel-shaped entries (e.g. `__unit_test_panics__`,
663/// `__unit_test_timeout__`). The literal-equality form would
664/// silently admit those future sentinels into the real-entry
665/// population and double-fire the "NEXTEST env var not set"
666/// warning or spuriously enable --list interception.
667fn is_test_sentinel(name: &str) -> bool {
668 // Real user-authored `#[ktstr_test]` entry names
669 // conventionally do not match the `__unit_test_*__` pattern
670 // (Rust's reserved-identifier convention for
671 // language-implementation and framework-internal names).
672 // The `#[ktstr_test]` proc macro does not validate this, so
673 // the predicate admits a real user entry in the unlikely
674 // case someone names one with the `__unit_test_*__` shape —
675 // collision would double-fire the "NEXTEST env var not set"
676 // warning / spuriously enable --list interception, but
677 // that's a diagnostic glitch, not a correctness failure.
678 name.starts_with("__unit_test_") && name.ends_with("__")
679}
680
681/// Export-self dispatch: if `--ktstr-export-test=NAME` is present in
682/// argv, look up `NAME` in the binary's own `KTSTR_TESTS` registry,
683/// build a self-extracting `.run` file embedding `current_exe()`
684/// (this binary), and exit. Returns `Some(exit_code)` when dispatched,
685/// `None` when the flag is absent.
686///
687/// `cargo ktstr export <NAME>` (the cargo-ktstr binary) is a router
688/// that compiles the workspace's tests, locates the test binary that
689/// owns `NAME`, and exec's it with this arg. The test binary embeds
690/// ITSELF — without that indirection, cargo-ktstr would package its
691/// own binary, which has no `#[ktstr_test]` registrations from the
692/// user's crate and can't reproduce the test on bare metal.
693///
694/// `--ktstr-export-output=PATH` overrides the default output path
695/// (`<NAME>.run` in the cwd). Both flags are leniently parsed by the
696/// helpers in `args.rs`; an empty NAME (`--ktstr-export-test=`)
697/// surfaces with diagnostic "requires a non-empty test name" and
698/// exit 1 so the router moves on to the next candidate.
699///
700/// # Exit-code contract
701///
702/// The router (`cargo-ktstr.rs::run_export`) discriminates between
703/// "this binary doesn't know the test" (exit 1) and "this binary
704/// has the test but rejects it" (exit 2). When ANY candidate exits
705/// 2, the router surfaces THAT candidate's stderr (the rejection
706/// reason: host_only, bpf_map_write, KernelBuiltin) rather than
707/// the generic "not found in any workspace test binary" message.
708/// Without the differentiation, an operator who exports a
709/// host_only test would see the misleading "not found" diagnostic
710/// even though the test exists.
711/// Stub for the `export`-feature-disabled build. The router
712/// (`cargo-ktstr.rs::run_export`) execs every candidate test binary
713/// with `--ktstr-export-test=NAME`; without this stub a binary
714/// compiled without `export` would fall through to the nextest
715/// harness, which would surface an opaque "unrecognised argument"
716/// error against an arg the operator never typed. The stub turns
717/// that into an actionable diagnostic by detecting the arg and
718/// emitting a build-config hint, then exiting 2 (matches the
719/// "registered but rejected" exit code so the router surfaces
720/// THIS binary's stderr rather than a sibling's "not registered"
721/// fallthrough). Recompile the test binary with the `export`
722/// feature (folded into `cli-bins` in the default feature set)
723/// to enable the real `cargo ktstr export` flow.
724#[cfg(not(feature = "export"))]
725fn maybe_dispatch_export() -> Option<i32> {
726 let args: Vec<String> = std::env::args().collect();
727 let _ = extract_export_test_arg(&args)?;
728 eprintln!(
729 "ktstr export: this test binary was built without the `export` cargo \
730 feature, so `cargo ktstr export <name>` cannot reach the export pipeline \
731 from here. Rebuild with the default feature set (or pass \
732 `--features cli-bins`) and retry."
733 );
734 Some(2)
735}
736
737#[cfg(feature = "export")]
738fn maybe_dispatch_export() -> Option<i32> {
739 let args: Vec<String> = std::env::args().collect();
740 let name = extract_export_test_arg(&args)?;
741 let output = extract_export_output_arg(&args).map(std::path::PathBuf::from);
742
743 // Empty name: surface as a hard error rather than silently
744 // succeeding. The router's "first binary that exits 0 wins"
745 // protocol relies on the absent-test path returning a non-zero
746 // exit so the next candidate is tried.
747 if name.is_empty() {
748 eprintln!("ktstr export: --ktstr-export-test= requires a non-empty test name");
749 return Some(1);
750 }
751
752 // Look up the test ourselves so we can discriminate "not
753 // registered here" (exit 1, router falls through) from
754 // "registered but rejected" (exit 2, router surfaces this
755 // stderr). `export_test` itself returns anyhow::Error for both
756 // cases, which would conflate them at the exit-code level.
757 if find_test(name).is_none() {
758 eprintln!("ktstr export: no registered test named '{name}'");
759 return Some(1);
760 }
761
762 match crate::export::export_test(name, output) {
763 Ok(()) => Some(0),
764 Err(e) => {
765 eprintln!("ktstr export: {e:#}");
766 // The test exists in this binary but the export pipeline
767 // refused it (host_only / bpf_map_write / KernelBuiltin /
768 // I/O error). Exit 2 so the router prefers this stderr
769 // over a sibling binary's exit-1 "not registered" miss.
770 Some(2)
771 }
772 }
773}
774
775/// Shell-self dispatch: if `--ktstr-shell-test=NAME` is present in
776/// argv, look up `NAME` in the binary's own `KTSTR_TESTS` registry,
777/// serialize its shell-relevant fields to stdout as JSON, and exit.
778/// Returns `Some(exit_code)` when dispatched, `None` when absent.
779///
780/// `cargo ktstr shell --test <NAME>` (the cargo-ktstr binary) is a
781/// router that compiles the workspace's tests, exec's each test
782/// binary with this flag, and consumes the first stdout-JSON it
783/// gets (the router bails on ambiguous names — same `NAME`
784/// registered in two binaries). The router applies the
785/// descriptor's topology / memory / extra_include_files to the
786/// shell VM, then prints a one-line banner to stderr BEFORE VM
787/// boot naming the test + scheduler so the operator can repro the
788/// workload manually. (PS1-in-guest is a follow-up.)
789///
790/// # Stdout contract
791///
792/// The test binary MUST keep stdout silent on this dispatch path —
793/// `tracing` output MUST go to stderr. The router parses the entire
794/// stdout as a JSON descriptor; any prefix like an INFO log line
795/// will fail the parse.
796///
797/// # JSON shape
798///
799/// Serialized from [`crate::test_support::ShellTestDescriptor`] via
800/// `serde_json::to_string` — see that struct for the field-by-field
801/// contract. The struct lives in
802/// `crate::test_support::shell_descriptor` so producer and consumer
803/// share a single definition; adding a field there automatically
804/// propagates to both sides.
805///
806/// `scheduler_kind` discriminates `"eevdf" | "discover" | "path" |
807/// "kernel_builtin"` so the banner can hint at how to repro the
808/// scheduler (Discover/Path = userspace binary at `/bin/<n>`;
809/// KernelBuiltin = no binary, the shell-mode boot runs
810/// `scheduler_enable_cmds` before drop-to-busybox and
811/// `scheduler_disable_cmds` on shell exit; Eevdf = no setup needed).
812///
813/// # Exit-code contract
814///
815/// Matches `maybe_dispatch_export`:
816/// - `0`: test registered, JSON emitted to stdout.
817/// - `1`: test not registered in this binary (router falls
818/// through to the next candidate).
819/// - `2`: registered but rejected for shell mode (currently:
820/// `host_only` — no VM to drop into).
821fn maybe_dispatch_shell_test() -> Option<i32> {
822 let args: Vec<String> = std::env::args().collect();
823 let name = extract_shell_test_arg(&args)?;
824
825 if name.is_empty() {
826 eprintln!("ktstr shell: --ktstr-shell-test= requires a non-empty test name");
827 return Some(1);
828 }
829
830 let entry = match find_test(name) {
831 Some(e) => e,
832 None => {
833 eprintln!("ktstr shell: no registered test named '{name}'");
834 return Some(1);
835 }
836 };
837
838 if entry.host_only {
839 eprintln!(
840 "ktstr shell: test '{name}' has host_only = true; \
841 shell mode requires a guest VM to drop into. \
842 Either run the test directly with `cargo ktstr test {name}` \
843 (host_only tests don't boot a VM) or pick a non-host_only \
844 test for shell mode."
845 );
846 return Some(2);
847 }
848
849 let topo = &entry.topology;
850 let scheduler_kind = crate::test_support::SchedulerKind::from(&entry.scheduler.binary);
851 let (scheduler_enable_cmds, scheduler_disable_cmds) = match &entry.scheduler.binary {
852 crate::test_support::entry::SchedulerSpec::KernelBuiltin { enable, disable } => (
853 enable.iter().copied().map(String::from).collect(),
854 disable.iter().copied().map(String::from).collect(),
855 ),
856 _ => (Vec::new(), Vec::new()),
857 };
858
859 let descriptor = crate::test_support::ShellTestDescriptor {
860 numa_nodes: topo.numa_nodes,
861 llcs: topo.llcs,
862 cores: topo.cores_per_llc,
863 threads: topo.threads_per_core,
864 memory_mib: entry.memory_mib,
865 wprof: entry.wprof,
866 extra_include_files: entry
867 .extra_include_files
868 .iter()
869 .copied()
870 .map(String::from)
871 .collect(),
872 scheduler_name: entry.scheduler.name.to_string(),
873 scheduler_kind,
874 wprof_args: entry.wprof_args.map(String::from),
875 performance_mode: entry.performance_mode,
876 scheduler_enable_cmds,
877 scheduler_disable_cmds,
878 };
879
880 // serde_json::to_string produces RFC-8259-compliant escaping
881 // (`\uXXXX` with 4 hex digits, surrogate pairs for SMP code
882 // points) which Rust's Debug formatter does NOT — Debug uses
883 // `\u{1f4c2}` (braced form) for non-ASCII, breaking
884 // operator-supplied paths with non-ASCII chars (test built
885 // under `/home/<unicode-name>/proj`, `extra_include_files`
886 // listing emoji-named files, etc.). serde_json is already a
887 // workspace dep so adding this call doesn't widen the dep graph.
888 let payload = serde_json::to_string(&descriptor)
889 .expect("ShellTestDescriptor is a plain serde struct with no fallible field types");
890 println!("{payload}");
891 Some(0)
892}
893
894/// Host-side dispatch: if both `--ktstr-test-fn` and `--ktstr-topo` are
895/// present, boot a VM with the specified topology and run the test
896/// inside it. Returns `Some(exit_code)` if dispatched, `None` otherwise.
897fn maybe_dispatch_host_test() -> Option<i32> {
898 let args: Vec<String> = std::env::args().collect();
899 let name = extract_test_fn_arg(&args)?;
900 let topo_str = extract_topo_arg(&args)?;
901
902 let entry = match find_test(name) {
903 Some(e) => e,
904 None => {
905 eprintln!("ktstr_test: unknown test function '{name}'");
906 return Some(1);
907 }
908 };
909
910 let (numa_nodes, llcs, cores, threads) = match parse_topo_string(&topo_str) {
911 Some(t) => t,
912 None => {
913 eprintln!(
914 "ktstr_test: invalid --ktstr-topo format '{topo_str}' (expected NnNlNcNt, e.g. 1n2l4c2t)"
915 );
916 return Some(1);
917 }
918 };
919
920 let cpus = llcs * cores * threads;
921 let memory_mib = super::runtime::derive_test_memory_mib(cpus, entry);
922 let topo = TopoOverride {
923 numa_nodes,
924 llcs,
925 cores,
926 threads,
927 memory_mib,
928 };
929
930 match run_ktstr_test_with_topo(entry, &topo) {
931 Ok(_) => Some(0),
932 Err(e) => {
933 eprintln!("ktstr_test: {e:#}");
934 Some(1)
935 }
936 }
937}
938
939/// Host-side entry point: build a VM, boot it with `--ktstr-test-fn=NAME`,
940/// extract profraw from SHM, and return the test result.
941///
942/// Validates KVM access and auto-discovers a kernel image via
943/// `resolve_test_kernel()` when `KTSTR_TEST_KERNEL` is not set.
944pub fn run_ktstr_test(entry: &KtstrTestEntry) -> Result<AssertResult> {
945 // Directly-constructed entries bypass the proc-macro's
946 // compile-time checks. Call `validate` here so programmatic
947 // consumers (library callers pushing into `KTSTR_TESTS`
948 // dynamically) hit the same bail messages the macro produces at
949 // compile time.
950 entry.validate()?;
951
952 if entry.host_only {
953 return run_host_only_test_inner(entry);
954 }
955 if !entry.bpf_map_write.is_empty()
956 && let Ok(kernel) = resolve_test_kernel()
957 && crate::vmm::find_vmlinux(&kernel).is_none()
958 {
959 anyhow::bail!("vmlinux not found, bpf_map_write requires vmlinux");
960 }
961 run_ktstr_test_inner(entry, None)
962}
963
964/// Like `run_ktstr_test` but with an explicit topology override.
965/// Only consumed inside this module by `maybe_dispatch_host_test`;
966/// kept as a named helper so the `--ktstr-test-fn` + `--ktstr-topo`
967/// dispatch path reads symmetrically with the zero-override
968/// [`run_ktstr_test`] library entry point.
969fn run_ktstr_test_with_topo(entry: &KtstrTestEntry, topo: &TopoOverride) -> Result<AssertResult> {
970 run_ktstr_test_inner(entry, Some(topo))
971}
972
973/// Process exit code for a Pass verdict (and for the Skip path,
974/// which degenerates to Pass because the test never ran).
975///
976/// Defined as a `pub const` so external tooling (CI gates,
977/// dashboard aggregators, nextest wrappers) can reference the
978/// exit-code triad by name instead of duplicating the integer
979/// literals. The trio [`EXIT_PASS`] / [`EXIT_FAIL`] /
980/// [`EXIT_INCONCLUSIVE`] cover every verdict produced by the
981/// `Fail > Inconclusive > Pass > Skip` lattice when projected
982/// to a process exit code.
983pub const EXIT_PASS: i32 = 0;
984
985/// Process exit code for a Fail verdict (or any expect_err
986/// satisfaction failure).
987///
988/// See [`EXIT_PASS`] for the full triad rationale.
989pub const EXIT_FAIL: i32 = 1;
990
991/// Process exit code for an Inconclusive verdict (a
992/// zero-denominator ratio gate that could not evaluate).
993///
994/// Distinct from [`EXIT_PASS`] (which would silently green an
995/// unevaluated gate) and [`EXIT_FAIL`] (which would conflate
996/// "could not evaluate" with a real regression). External tooling
997/// uses this code to triage Inconclusive runs separately — see
998/// the README "Exit codes" section for the full operator contract.
999pub const EXIT_INCONCLUSIVE: i32 = 2;
1000
1001/// Run a test result through expect_err logic and return an exit code.
1002///
1003/// Returns [`EXIT_PASS`] on pass, [`EXIT_FAIL`] on failure, and
1004/// [`EXIT_INCONCLUSIVE`] on Inconclusive — the 4-state lattice
1005/// `Fail > Inconclusive > Pass > Skip` projects to 3 distinct exit
1006/// codes (Skip degenerates to [`EXIT_PASS`] because the test never
1007/// ran, mirroring `ResourceContention`). A Skip routes through the
1008/// dedicated FIRST match arm (`Ok(r) if r.is_skip()`), ahead of the
1009/// expect_err arm, so an expect_err test that produced no verdict (e.g.
1010/// a `post_vm_skip` on a load-starved placeholder dump) is not inverted
1011/// into a FAIL — a skipped test cannot "produce the expected error."
1012/// [`EXIT_INCONCLUSIVE`] lets
1013/// downstream tooling (CI gates, nextest summary aggregation, the
1014/// operator dashboard) triage zero-denominator runs distinctly from
1015/// real regressions. `ResourceContention` returns [`EXIT_PASS`] —
1016/// the test never ran, not a real failure. The skip sidecar for
1017/// this case is written upstream in `run_ktstr_test_inner` at the
1018/// ResourceContention propagation site so every caller (including
1019/// the library entry point `run_ktstr_test`) records it, not just
1020/// the nextest dispatch path.
1021///
1022/// `ResourceContention` detection walks the FULL error chain via
1023/// [`is_resource_contention`] (chain-walk predicate) plus a
1024/// matching `e.chain().find_map(...)` extraction for the reason
1025/// string. The eval-side `crate::test_support::eval` `"build ktstr_test VM"` and
1026/// `"run ktstr_test VM"` wrappers nest the contention error under
1027/// `.context(...)`, so a top-level `downcast_ref` on the outer
1028/// error misses the inner cause. Without the chain walk a wrapped
1029/// contention would land in the `Err(e)` arm below as a regular
1030/// failure (exit 1) rather than the skip path (exit 0), turning
1031/// every host-resource-exhausted run into a hard test failure.
1032fn result_to_exit_code(
1033 result: Result<AssertResult>,
1034 expect_err: bool,
1035 allow_inconclusive: bool,
1036) -> i32 {
1037 let no_skip = std::env::var_os(crate::KTSTR_NO_SKIP_MODE_ENV).is_some();
1038 match result {
1039 Ok(r) => ok_to_exit_code(r, expect_err, allow_inconclusive),
1040 Err(e) => err_to_exit_code(e, expect_err, no_skip),
1041 }
1042}
1043
1044/// Map an `Ok(AssertResult)` verdict to an exit code.
1045///
1046/// The sequential guards preserve the original `match` arm precedence
1047/// (first matching guard wins): `is_skip()` → `expect_err` →
1048/// `is_inconclusive()` → the trailing `EXIT_PASS` (the former
1049/// `Ok(_) => EXIT_PASS` arm). Reordering these would change which
1050/// verdict fires for a result matching more than one guard.
1051fn ok_to_exit_code(r: AssertResult, expect_err: bool, allow_inconclusive: bool) -> i32 {
1052 // A Skip degenerates to EXIT_PASS regardless of expect_err — the
1053 // test never evaluated, so there is no guest failure to "expect"
1054 // (the `Fail > Inconclusive > Pass > Skip` projection; mirrors the
1055 // ResourceContention Err branch in `err_to_exit_code`, but on the
1056 // Ok side). Without this guard a post_vm_skip under expect_err
1057 // falls into the `expect_err` guard below and surfaces as "expected
1058 // error but test passed" (EXIT_FAIL) — a load-starvation
1059 // placeholder-dump skip becomes a flaky failure. End-to-end chain:
1060 // a post_vm callback returns Err(post_vm_skip(..)) → the eval gate
1061 // detects the HostSkipRequest marker, reports via report::test_skip,
1062 // and returns Ok(AssertResult::skip) → this guard maps it to
1063 // EXIT_PASS. is_skip() is true only when `outcomes` is non-empty and
1064 // every outcome is Outcome::Skip (assert/plan.rs); the empty-outcomes
1065 // Pass identity has is_skip()==false and falls through to the
1066 // trailing `EXIT_PASS`.
1067 if r.is_skip() {
1068 return EXIT_PASS;
1069 }
1070 if expect_err {
1071 // expect_err inverts on Pass and on Inconclusive: both
1072 // are "not a failure" in the operator's mental model,
1073 // and an expect_err scenario that produces an
1074 // Inconclusive verdict (denominator zero) failed to
1075 // produce the expected failure just like a Pass would.
1076 // Surface the inconclusive as exit code 2 to preserve
1077 // the distinct verdict, but treat it as expect_err
1078 // satisfaction failure (exit 1) — the test author
1079 // wanted a Fail, not "the gate could not run".
1080 //
1081 // `allow_inconclusive` does NOT relax the expect_err
1082 // contract: expect_err demands a real Fail, and an
1083 // Inconclusive verdict does not satisfy that
1084 // regardless of how the test author scopes
1085 // Inconclusive elsewhere. The dominant gate wins;
1086 // `allow_inconclusive` only relaxes the
1087 // EXIT_INCONCLUSIVE projection on the no-expect_err
1088 // path below.
1089 if r.is_inconclusive() {
1090 eprintln!(
1091 "expected error but test produced an Inconclusive verdict — \
1092 zero-denominator gate could not evaluate; expect_err is \
1093 unsatisfied"
1094 );
1095 return EXIT_FAIL;
1096 } else {
1097 eprintln!("expected error but test passed");
1098 return EXIT_FAIL;
1099 }
1100 }
1101 if r.is_inconclusive() {
1102 // `allow_inconclusive` opt-in: a test author may have
1103 // declared `#[ktstr_test(allow_inconclusive)]` to
1104 // signal "this test's Inconclusive arm is acceptable —
1105 // don't fail the CI gate." Route to EXIT_PASS in that
1106 // case (Inconclusive is still recorded in the sidecar
1107 // for stats tooling and the operator-facing failure
1108 // dump still renders the diagnostic). When the flag
1109 // is unset (the default) the verdict surfaces as
1110 // EXIT_INCONCLUSIVE so the operator triages it.
1111 if allow_inconclusive {
1112 eprintln!(
1113 "test produced an Inconclusive verdict but \
1114 `allow_inconclusive` is set — routing to EXIT_PASS \
1115 for CI gate, sidecar still records Inconclusive"
1116 );
1117 return EXIT_PASS;
1118 } else {
1119 return EXIT_INCONCLUSIVE;
1120 }
1121 }
1122 EXIT_PASS
1123}
1124
1125/// Map an `Err(anyhow::Error)` outcome to an exit code.
1126///
1127/// The sequential guards preserve the original `match` arm precedence
1128/// (first matching guard wins): the host-insufficiency classification
1129/// ([`classify_host_error`], covering kernel-unavailable → perf-mode →
1130/// cpu-budget → topology-unrepresentable → resource-contention →
1131/// topology-insufficient, shared with the `#[ktstr_test]` macro body) runs
1132/// FIRST, then the
1133/// marker-typed guards (`PostVmAssertionFailure` → `SchedulerBuildRefused`
1134/// → `SurvivesStormViolated` → `ExpectAutoReproSatisfied`), then the
1135/// `expect_err` inversion, then
1136/// the catch-all (the former `Err(e) => …` arm) operating on the
1137/// now-owned `e`. Reordering these would change which guard fires for an
1138/// error matching more than one guard. The host-insufficiency guard
1139/// order + per-class skip/fail policy live in `classify_host_error`, not
1140/// here, so this site and the macro cannot drift apart.
1141fn err_to_exit_code(e: anyhow::Error, expect_err: bool, no_skip: bool) -> i32 {
1142 // Host-insufficiency classification (kernel-unavailable, perf-mode,
1143 // cpu-budget, topology-unrepresentable, resource-contention,
1144 // topology-insufficient) is shared with the `#[ktstr_test]` macro body via
1145 // `classify_host_error` — the single source of truth for the guard
1146 // ORDER and the per-class skip/fail policy. This site renders the
1147 // verdict as an exit code; the macro renders the same `HostClass` as
1148 // libtest control flow. The bare `reason` carries no prefix: the skip
1149 // channel (`report::test_skip`) prepends `ktstr: SKIP:`, the fail
1150 // channel prepends `ktstr: FAIL:`. Placed first so a host-insufficiency
1151 // returns before the marker / expect_err / catch-all arms below — a
1152 // skip is a skip and an unconditional hard fail is a hard fail
1153 // regardless of `expect_err`.
1154 match classify_host_error(&e, no_skip) {
1155 HostClass::Skip { reason } => {
1156 crate::report::test_skip(format_args!("{reason}"));
1157 return EXIT_PASS;
1158 }
1159 HostClass::Fail { reason } => {
1160 eprintln!("ktstr: FAIL: {reason}");
1161 return EXIT_FAIL;
1162 }
1163 HostClass::NotHostClass => {}
1164 }
1165 if e.downcast_ref::<crate::test_support::eval::PostVmAssertionFailure>()
1166 .is_some()
1167 {
1168 // A host-side post_vm / post_vm_unconditional callback
1169 // failed. This is a real regression that must surface
1170 // regardless of expect_err / expect_auto_repro inversion —
1171 // those invert a GUEST-side expected failure, but a
1172 // HOST-side check is always honored. Positioned AFTER the
1173 // resource-contention / topology skip guards (a skip means
1174 // the test never ran, so there was no host-side state to
1175 // assert) but BEFORE the ExpectAutoReproSatisfied and
1176 // expect_err inversion guards so the host-side regression
1177 // wins. `downcast_ref` walks the anyhow context+source
1178 // chain (the marker rides as `.context(...)` from
1179 // run_ktstr_test_inner_impl); a raw `chain().any(is::<C>())`
1180 // would miss it (anyhow boxes context as ContextError<C,E>).
1181 eprintln!("{e:#}");
1182 return EXIT_FAIL;
1183 }
1184 if e.downcast_ref::<crate::test_support::eval::SchedulerBuildRefused>()
1185 .is_some()
1186 {
1187 // An orchestrated scheduler build expected to succeed FAILED and the
1188 // resolver refused to validate against a possibly-stale pre-built
1189 // binary (KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK unset). A host-side
1190 // build-infra fault — always EXIT_FAIL, never inverted by expect_err
1191 // (mirrors PostVmAssertionFailure above): an expect_err test must not
1192 // let a broken build masquerade as the guest-side expected failure.
1193 eprintln!("{e:#}");
1194 return EXIT_FAIL;
1195 }
1196 if e.downcast_ref::<crate::test_support::eval::SurvivesStormViolated>()
1197 .is_some()
1198 {
1199 // The marker rides ONLY when `entry.survives_storm` was set AND the
1200 // failure cause was a scheduler death (see
1201 // `render_failure_verdict_message`), so its presence alone proves the
1202 // survival assertion was violated — no `survives_storm` param needed
1203 // (mirrors the marker-presence arms for PostVmAssertionFailure /
1204 // SchedulerBuildRefused / ExpectAutoReproSatisfied below). Force
1205 // EXIT_FAIL with a survival-specific explainer. Positioned AFTER the
1206 // host-insufficiency / PostVmAssertionFailure / SchedulerBuildRefused
1207 // guards (a skip or host-side fault still dominates) but BEFORE the
1208 // ExpectAutoReproSatisfied and expect_err inversion arms so a survival
1209 // violation can never be inverted to PASS (defense-in-depth: the
1210 // validate-time survives_storm/expect_err mutex already forbids that
1211 // pairing). `downcast_ref` walks the anyhow context chain (the marker
1212 // rides as `.context(...)`).
1213 eprintln!(
1214 "ktstr: FAIL: survives_storm asserted but the scheduler did not \
1215 survive the run:\n{e:#}"
1216 );
1217 return EXIT_FAIL;
1218 }
1219 if e.downcast_ref::<crate::test_support::eval::ExpectAutoReproSatisfied>()
1220 .is_some()
1221 {
1222 // `expect_auto_repro = true` was satisfied: the primary
1223 // VM produced a Fail AND the auto-repro VM landed a
1224 // shape-valid `.repro.wprof.pb`. The eval layer attached
1225 // the marker as `anyhow::Context`. `downcast_ref` walks
1226 // the anyhow context+source chain (per anyhow's
1227 // documentation: "For errors with context, this method
1228 // returns true if E matches the type of the context C or
1229 // the type of the error on which the context has been
1230 // attached"). A `chain().any(|c| c.is::<E>())` walk on
1231 // the raw `&dyn StdError` chain would MISS the marker
1232 // because anyhow boxes context as `ContextError<C, E>`
1233 // whose underlying `is::<C>()` check returns false. The
1234 // diagnostic is printed so the operator sees both the
1235 // original failure trail and the inversion notice — the
1236 // verdict flips to PASS without erasing the failure
1237 // detail. Positioned AFTER the ResourceContention /
1238 // TopologyInsufficient guards so a skip-class outcome still
1239 // wins over inversion (a skip is a skip regardless of the
1240 // satisfaction signal). The macro-parse cross-attribute
1241 // check rejects `expect_auto_repro` combined with
1242 // `expect_err`, so the two inversion paths are mutually
1243 // exclusive at the entry layer.
1244 eprintln!("{e:#}");
1245 return EXIT_PASS;
1246 }
1247 if expect_err {
1248 // expect_err inverts a failure into a pass — UNLESS the
1249 // failure carries the
1250 // [`crate::test_support::eval::ScxBpfErrorMatcherMismatch`]
1251 // marker, which signals that the reproducer's scx_bpf_error
1252 // matcher rejected this particular failure. A matcher-
1253 // mismatch failure must surface even when expect_err = true:
1254 // the user authored the matcher to pin THIS specific bug,
1255 // and a different bug firing is itself a regression.
1256 //
1257 // `downcast_ref` walks the anyhow context+source chain
1258 // (anyhow's documented "For errors with context, this
1259 // method returns true if E matches the type of the context
1260 // C or the type of the error on which the context has been
1261 // attached" semantics). A `chain().any(|c| c.is::<E>())`
1262 // walk on the raw `&dyn StdError` chain would MISS the
1263 // marker because anyhow boxes context as
1264 // `ContextError<C, E>` whose underlying `is::<C>()` check
1265 // returns false.
1266 if e.downcast_ref::<crate::test_support::eval::ScxBpfErrorMatcherMismatch>()
1267 .is_some()
1268 {
1269 eprintln!("{e:#}");
1270 return EXIT_FAIL;
1271 } else {
1272 return EXIT_PASS;
1273 }
1274 }
1275 // Catch-all: a non-host-class, non-marker, non-expect_err error is a
1276 // real failure. (A KernelUnavailable does NOT reach here — it is a
1277 // skip-class host-insufficiency handled by the classify_host_error match
1278 // at the top.)
1279 eprintln!("{e:#}");
1280 EXIT_FAIL
1281}
1282
1283/// The final test verdict — the 4-state lattice `Fail > Inconclusive >
1284/// Pass > Skip` that [`result_to_exit_code`] projects to a process exit
1285/// code. Distinct from the exit code because the exit code collapses
1286/// `Skip` into [`EXIT_PASS`]; the sidecar finalize ([`final_outcome`])
1287/// needs all four to set the persisted `passed`/`skipped`/`inconclusive`
1288/// bits to the POST-inversion outcome.
1289#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1290pub(crate) enum Verdict {
1291 Pass,
1292 Fail,
1293 Skip,
1294 Inconclusive,
1295}
1296
1297impl Verdict {
1298 /// Project to the process exit code, matching the
1299 /// `EXIT_PASS`/`EXIT_FAIL`/`EXIT_INCONCLUSIVE` mapping
1300 /// [`result_to_exit_code`] produces (Skip degenerates to
1301 /// [`EXIT_PASS`]). Test-only: the anti-drift truth-table test
1302 /// (`final_outcome_projects_to_result_to_exit_code`) is its sole
1303 /// caller — production reads the [`Verdict`] directly via
1304 /// [`Verdict::sidecar_bits`].
1305 #[cfg(test)]
1306 pub(crate) fn to_exit_code(self) -> i32 {
1307 match self {
1308 Verdict::Pass | Verdict::Skip => EXIT_PASS,
1309 Verdict::Fail => EXIT_FAIL,
1310 Verdict::Inconclusive => EXIT_INCONCLUSIVE,
1311 }
1312 }
1313
1314 /// The persisted-sidecar verdict bits `(passed, skipped,
1315 /// inconclusive)` for this outcome. `Fail` is all-false (the
1316 /// [`crate::test_support::SidecarResult::is_fail`] "none set"
1317 /// encoding). Lets the sidecar finalize record the final verdict
1318 /// without [`crate::test_support::sidecar`] depending on this enum.
1319 pub(crate) fn sidecar_bits(self) -> (bool, bool, bool) {
1320 match self {
1321 Verdict::Pass => (true, false, false),
1322 Verdict::Skip => (false, true, false),
1323 Verdict::Inconclusive => (false, false, true),
1324 Verdict::Fail => (false, false, false),
1325 }
1326 }
1327}
1328
1329/// Classify a test result into the final [`Verdict`] — the same
1330/// classification [`result_to_exit_code`] performs, as a 4-state value
1331/// (it does not collapse `Skip` into `Pass` the way the exit code does)
1332/// and WITHOUT the operator-facing `eprintln` diagnostics.
1333///
1334/// Used to record the FINAL (post-`expect_err` / post-marker) outcome on
1335/// the sidecar so the footer, `stats` analysis, and `replay` reflect the
1336/// test's real pass/fail (matching nextest's exit code) rather than the
1337/// raw scenario verdict written mid-run.
1338///
1339/// MUST stay in lockstep with [`result_to_exit_code`]: the truth-table
1340/// test `final_outcome_projects_to_result_to_exit_code` asserts
1341/// `final_outcome(...).to_exit_code() == result_to_exit_code(...)` over a
1342/// matrix including the marker-carrying error arms, so the two cannot
1343/// drift. The arm order mirrors [`ok_to_exit_code`] / [`err_to_exit_code`]
1344/// first-match precedence exactly.
1345pub(crate) fn final_outcome(
1346 result: &Result<AssertResult>,
1347 expect_err: bool,
1348 allow_inconclusive: bool,
1349) -> Verdict {
1350 let no_skip = std::env::var_os(crate::KTSTR_NO_SKIP_MODE_ENV).is_some();
1351 match result {
1352 Ok(r) => {
1353 if r.is_skip() {
1354 return Verdict::Skip;
1355 }
1356 if expect_err {
1357 // expect_err on an Ok result is always a failure
1358 // (expected an error, got a non-error verdict) — both the
1359 // Pass and Inconclusive arms of ok_to_exit_code map here.
1360 return Verdict::Fail;
1361 }
1362 if r.is_inconclusive() {
1363 return if allow_inconclusive {
1364 Verdict::Pass
1365 } else {
1366 Verdict::Inconclusive
1367 };
1368 }
1369 Verdict::Pass
1370 }
1371 Err(e) => {
1372 match classify_host_error(e, no_skip) {
1373 HostClass::Skip { .. } => return Verdict::Skip,
1374 HostClass::Fail { .. } => return Verdict::Fail,
1375 HostClass::NotHostClass => {}
1376 }
1377 if e.downcast_ref::<crate::test_support::eval::PostVmAssertionFailure>()
1378 .is_some()
1379 {
1380 return Verdict::Fail;
1381 }
1382 if e.downcast_ref::<crate::test_support::eval::SchedulerBuildRefused>()
1383 .is_some()
1384 {
1385 return Verdict::Fail;
1386 }
1387 if e.downcast_ref::<crate::test_support::eval::SurvivesStormViolated>()
1388 .is_some()
1389 {
1390 // Lockstep with err_to_exit_code's SurvivesStormViolated arm
1391 // (same position: after SchedulerBuildRefused, before
1392 // ExpectAutoReproSatisfied / expect_err) so the persisted
1393 // sidecar verdict matches the exit code for a survival
1394 // violation — including the defense-in-depth bypass case
1395 // (marker + expect_err) the mutex normally forbids.
1396 return Verdict::Fail;
1397 }
1398 if e.downcast_ref::<crate::test_support::eval::ExpectAutoReproSatisfied>()
1399 .is_some()
1400 {
1401 return Verdict::Pass;
1402 }
1403 if expect_err {
1404 if e.downcast_ref::<crate::test_support::eval::ScxBpfErrorMatcherMismatch>()
1405 .is_some()
1406 {
1407 return Verdict::Fail;
1408 }
1409 return Verdict::Pass;
1410 }
1411 Verdict::Fail
1412 }
1413 }
1414}
1415
1416/// Whether a base test entry is "ignored" (skipped by default).
1417///
1418/// Tests whose names start with `demo_` are ignored -- they are
1419/// demonstration/benchmarking tests that require manual opt-in.
1420fn is_ignored(entry: &KtstrTestEntry) -> bool {
1421 entry.name.starts_with("demo_")
1422}
1423
1424/// Walk [`KTSTR_TESTS`] once per process and emit a stderr
1425/// `warning:` line for every duplicate `name` found.
1426///
1427/// Two entries with the same name would both match `find_test(name)`
1428/// (which returns the FIRST match), so the second registration is
1429/// silently shadowed — `cargo ktstr` would dispatch the first entry
1430/// and the second entry's body would never run, with no diagnostic
1431/// surfaced. The warning surfaces the collision so an operator can
1432/// rename one of the `#[ktstr_test]` functions; discovery itself
1433/// proceeds (find_test's first-wins behavior continues) so nextest's
1434/// `--list` output still lands in stdout. A panic here would abort
1435/// the whole listing — nextest would see no tests at all rather
1436/// than a partial set with a clear warning. The first-wins
1437/// shadowing remains a real bug, but the diagnostic is louder than
1438/// silence and the tradeoff (operator sees the warning AND a
1439/// usable test list) beats the alternative (operator sees a
1440/// panic backtrace and no test list).
1441///
1442/// `OnceLock<()>` gates the walk to fire EXACTLY ONCE per process:
1443/// every gauntlet variant resolves through `list_tests` (under
1444/// nextest's discovery and budget paths), so without the gate a
1445/// run with N variants would re-walk the slice N times and emit
1446/// the same warning N times. Each duplicate name surfaces exactly
1447/// once via the inner `seen`/`warned` HashSet pair so a
1448/// triple-collision (three entries sharing one name) does not
1449/// double-print the warning.
1450///
1451/// The pure detection logic lives in
1452/// [`warn_duplicate_test_names_inner`] so the duplicate-walker
1453/// is testable without process-wide global state. This wrapper
1454/// only owns the `OnceLock<()>` gate and the
1455/// `(KTSTR_TESTS, stderr)` plumbing.
1456fn warn_duplicate_test_names_once() {
1457 static CHECKED: std::sync::OnceLock<()> = std::sync::OnceLock::new();
1458 CHECKED.get_or_init(|| {
1459 warn_duplicate_test_names_inner(KTSTR_TESTS.iter().map(|e| e.name), &mut std::io::stderr());
1460 });
1461}
1462
1463/// Pure walker behind [`warn_duplicate_test_names_once`]: walks
1464/// the test-name iterator and emits one `warning:` line per
1465/// duplicate name to `sink`. Each duplicate name surfaces
1466/// exactly once (a triple-collision does NOT double-print)
1467/// via the inner `warned` HashSet.
1468///
1469/// Extracted from the OnceLock-gated wrapper so the duplicate
1470/// detection logic is testable without process-wide global
1471/// state — the wrapper handles "fire once per process" via its
1472/// own `OnceLock<()>` gate; this inner is a pure function over
1473/// `(names, sink)`. The wrapper passes
1474/// `KTSTR_TESTS.iter().map(|e| e.name)` as the iterator and
1475/// `std::io::stderr()` as the sink.
1476///
1477/// `Result<(), std::io::Error>` is collapsed to ignore-on-write
1478/// because the production wrapper writes to stderr where IO
1479/// errors are unrecoverable; tests pass a `Vec<u8>` sink which
1480/// never errors. The function name says "warn" — diagnostic
1481/// channel — and matches the wrapper's pre-existing
1482/// `eprintln!` semantics.
1483fn warn_duplicate_test_names_inner<'a, W: std::io::Write>(
1484 names: impl IntoIterator<Item = &'a str>,
1485 sink: &mut W,
1486) {
1487 use std::collections::HashSet;
1488 let names: Vec<&'a str> = names.into_iter().collect();
1489 let mut seen: HashSet<&'a str> = HashSet::with_capacity(names.len());
1490 let mut warned: HashSet<&'a str> = HashSet::new();
1491 for name in names {
1492 if !seen.insert(name) && warned.insert(name) {
1493 let _ = writeln!(
1494 sink,
1495 "warning: ktstr_test: duplicate test name {name:?} registered in KTSTR_TESTS — \
1496 two `#[ktstr_test]` entries share this name; the SECOND entry is \
1497 silently shadowed (find_test returns the first registration). \
1498 rename one of the functions to disambiguate.",
1499 );
1500 }
1501 }
1502}
1503
1504/// Collect test names for nextest discovery (--list --format terse).
1505///
1506/// Nextest calls the binary twice:
1507/// - Without `--ignored`: prints ALL tests (ignored and non-ignored).
1508/// - With `--ignored`: prints ONLY ignored tests.
1509///
1510/// Gauntlet variants are always ignored. Base tests are ignored when
1511/// their name starts with `demo_`.
1512///
1513/// When `KTSTR_BUDGET_SECS` is set, applies greedy coverage maximization
1514/// to select the subset of tests that maximizes feature coverage within
1515/// the time budget. Only selected tests are printed.
1516///
1517/// Calls [`warn_duplicate_test_names_once`] on the first invocation per
1518/// process so duplicate registrations surface a stderr `warning:`
1519/// line BEFORE any test name is printed (discovery itself proceeds
1520/// — find_test's first-wins behavior continues, but the operator
1521/// sees which name collided). Subsequent invocations are no-ops via
1522/// the inner `OnceLock` gate.
1523fn list_tests(ignored_only: bool) {
1524 warn_duplicate_test_names_once();
1525 let raw = std::env::var(crate::KTSTR_BUDGET_SECS_ENV).ok();
1526 let budget_secs: Option<f64> = raw.as_deref().and_then(|s| match s.parse::<f64>() {
1527 Ok(v) if v > 0.0 => Some(v),
1528 Ok(v) => {
1529 eprintln!("ktstr_test: KTSTR_BUDGET_SECS={v}: must be positive, ignoring");
1530 None
1531 }
1532 Err(e) => {
1533 eprintln!("ktstr_test: KTSTR_BUDGET_SECS={s:?}: {e}, ignoring");
1534 None
1535 }
1536 });
1537
1538 if let Some(budget) = budget_secs {
1539 list_tests_budget(ignored_only, budget);
1540 } else {
1541 list_tests_all(ignored_only);
1542 }
1543}
1544
1545/// Iterate topology presets that both fit the host capacity and
1546/// match the entry's `TopologyConstraints`. Shared between the
1547/// eager ("print every name") and budgeted ("push a candidate")
1548/// listers in `list_tests_*`.
1549fn for_each_gauntlet_variant<F>(
1550 entry: &KtstrTestEntry,
1551 presets: &[crate::gauntlet::TopoPreset],
1552 host_cpus: u32,
1553 host_llcs: u32,
1554 host_max_cpus_per_llc: u32,
1555 mut visit: F,
1556) where
1557 F: FnMut(&crate::gauntlet::TopoPreset),
1558{
1559 let no_perf_mode = super::runtime::no_perf_mode_for_entry(entry);
1560 for preset in presets {
1561 // No-perf-mode tests run KVM-emulated topology — guest sees the
1562 // declared NUMA / LLC / per-LLC layout regardless of host
1563 // hardware — so the host-side LLC count and per-LLC CPU width
1564 // do not constrain preset eligibility. Only the total-CPU
1565 // budget survives.
1566 let accepted = if no_perf_mode {
1567 entry
1568 .constraints
1569 .accepts_no_perf_mode(&preset.topology, host_cpus)
1570 } else {
1571 entry.constraints.accepts(
1572 &preset.topology,
1573 host_cpus,
1574 host_llcs,
1575 host_max_cpus_per_llc,
1576 )
1577 };
1578 if !accepted {
1579 continue;
1580 }
1581 visit(preset);
1582 }
1583}
1584
1585/// List all tests without budget filtering.
1586///
1587/// When `KTSTR_KERNEL_LIST` carries 2 or more entries, every test
1588/// name carries an extra `/{sanitized_kernel_label}` suffix so each
1589/// (test × kernel) pair becomes a distinct nextest test case;
1590/// nextest's parallelism, retries, and `-E` filtering all apply
1591/// natively. Single-kernel mode (0 or 1 entries) emits the
1592/// `gauntlet/{name}/{preset}` shape with no kernel suffix.
1593///
1594/// `KTSTR_CARGO_TEST_MODE=1` skips gauntlet variant emission and
1595/// the multi-kernel suffix path: each test gets exactly one
1596/// `ktstr/{name}: test` line. Bare `cargo test` doesn't have
1597/// access to the cargo-ktstr resolver that produces
1598/// `KTSTR_KERNEL_LIST`, so the multi-kernel branch can't apply
1599/// even if it were enabled — pin both behaviors explicitly so
1600/// the listing matches what the dispatch path will actually run.
1601fn list_tests_all(ignored_only: bool) {
1602 let cargo_test_mode = crate::cargo_test_mode::cargo_test_mode_active();
1603 let presets = crate::gauntlet::gauntlet_presets();
1604 let has_vmlinux = resolve_test_kernel()
1605 .ok()
1606 .and_then(|k| crate::vmm::find_vmlinux(&k))
1607 .is_some();
1608 let (host_cpus, host_llcs, host_max_cpus_per_llc) = super::host_capacity();
1609
1610 let kernel_list = read_kernel_list();
1611 let multi_kernel = kernel_list.len() > 1 && !cargo_test_mode;
1612 // Single-kernel mode (no list, or list has exactly one entry)
1613 // emits one variant per (test × preset) tuple with no kernel
1614 // suffix. Multi-kernel mode iterates every kernel as an outer
1615 // loop and appends `/{sanitized}` per variant. The empty-suffix
1616 // sentinel below is what the single-kernel branch passes to keep
1617 // the print path uniform.
1618 let kernel_suffixes: Vec<&str> = if multi_kernel {
1619 kernel_list.iter().map(|k| k.sanitized.as_str()).collect()
1620 } else {
1621 vec![""]
1622 };
1623
1624 for entry in KTSTR_TESTS.iter() {
1625 // bpf_map_write tests require vmlinux to resolve BPF map
1626 // addresses. Don't list them when vmlinux is unavailable —
1627 // they cannot run and would produce false PASS results.
1628 if !entry.bpf_map_write.is_empty() && !has_vmlinux {
1629 continue;
1630 }
1631
1632 if !ignored_only || is_ignored(entry) {
1633 if entry.host_only {
1634 println!("ktstr/{}: test", entry.name);
1635 } else {
1636 for suffix in &kernel_suffixes {
1637 if suffix.is_empty() {
1638 println!("ktstr/{}: test", entry.name);
1639 } else {
1640 println!("ktstr/{}/{suffix}: test", entry.name);
1641 }
1642 }
1643 }
1644 }
1645
1646 // Host-only tests run on the host without a VM -- gauntlet
1647 // topology variants are meaningless.
1648 if entry.host_only {
1649 continue;
1650 }
1651
1652 // KTSTR_CARGO_TEST_MODE: skip gauntlet expansion. The
1653 // operator picked the bare-`cargo test` path; emit only
1654 // the base name so each `#[ktstr_test]` runs once with its
1655 // declared topology.
1656 if cargo_test_mode {
1657 continue;
1658 }
1659
1660 // Gauntlet variants are always ignored — users opt in with
1661 // --run-ignored. Presets that exceed the host's CPU count or
1662 // LLC count are filtered from the listing entirely.
1663 for_each_gauntlet_variant(
1664 entry,
1665 &presets,
1666 host_cpus,
1667 host_llcs,
1668 host_max_cpus_per_llc,
1669 |preset| {
1670 for suffix in &kernel_suffixes {
1671 if suffix.is_empty() {
1672 println!("gauntlet/{}/{}: test", entry.name, preset.name);
1673 } else {
1674 println!("gauntlet/{}/{}/{suffix}: test", entry.name, preset.name,);
1675 }
1676 }
1677 },
1678 );
1679 }
1680}
1681
1682/// True iff the given operator-resolved kernel `entry` matches one
1683/// of the `declared` kernel specs from a scheduler's
1684/// `declare_scheduler!` `kernels = [...]` declaration. Empty
1685/// `declared` accepts every entry (no per-scheduler filter).
1686///
1687/// Match semantics per spec variant (via [`crate::kernel_path::KernelId::parse`]):
1688/// - [`crate::kernel_path::KernelId::Version`]: raw-label string equality OR sanitized-label match
1689/// ([`sanitize_kernel_label`] of the spec string equals the entry's
1690/// sanitized label). Direct match catches the common case where
1691/// the dispatcher resolved `--kernel 6.14.2` and the scheduler
1692/// declared `kernels = ["6.14.2"]`.
1693/// - [`crate::kernel_path::KernelId::Range`]: range-membership check on the entry's raw
1694/// label via [`crate::kernel_path::decompose_version_for_compare`].
1695/// Lets schedulers declaring `kernels = ["6.14..6.16"]` match
1696/// any operator-supplied kernel whose version falls in
1697/// `[6.14, 6.16]` inclusive.
1698/// - [`crate::kernel_path::KernelId::Path`] / [`crate::kernel_path::KernelId::CacheKey`] / [`crate::kernel_path::KernelId::Git`]:
1699/// sanitized-label equality — the producer-side encoder
1700/// (`src/bin/cargo_ktstr/kernel/wire_format.rs`) emits a deterministic
1701/// label per variant (`path_…`, `git_owner_repo_kind_ref`, version
1702/// prefix from cache key), so identical specs on both sides
1703/// produce identical sanitized labels.
1704///
1705/// [`KernelId`]: crate::kernel_path::KernelId
1706fn sched_kernel_filter_accepts(declared: &[&'static str], entry: &KernelEntry) -> bool {
1707 if declared.is_empty() {
1708 return true;
1709 }
1710 declared.iter().any(|spec| entry_matches_spec(entry, spec))
1711}
1712
1713/// Single-spec match helper for [`sched_kernel_filter_accepts`].
1714/// Parses `spec` via [`crate::kernel_path::KernelId::parse`] and
1715/// dispatches on the variant. Pure logic — no network, no FS.
1716fn entry_matches_spec(entry: &KernelEntry, spec: &str) -> bool {
1717 use crate::kernel_path::{KernelId, decompose_version_for_compare};
1718 match KernelId::parse(spec) {
1719 KernelId::Version(spec_ver) => {
1720 entry.label == spec_ver || entry.sanitized.as_str() == sanitize_kernel_label(&spec_ver)
1721 }
1722 KernelId::Range { start, end, .. } => {
1723 let Some(entry_t) = decompose_version_for_compare(&entry.label) else {
1724 return false;
1725 };
1726 let Some(start_t) = decompose_version_for_compare(&start) else {
1727 return false;
1728 };
1729 let Some(end_t) = decompose_version_for_compare(&end) else {
1730 return false;
1731 };
1732 entry_t >= start_t && entry_t <= end_t
1733 }
1734 KernelId::CacheKey(_) | KernelId::Path(_) | KernelId::Git { .. } => {
1735 entry.sanitized.as_str() == sanitize_kernel_label(spec)
1736 }
1737 }
1738}
1739
1740/// Format the `KTSTR_KERNEL_LIST is empty` diagnostic emitted by
1741/// [`run_verifier_cell`] when a verifier cell name reaches the cell
1742/// handler with no kernel-list to look the label up in. Extracted
1743/// from the inline eprintln! so the exact wording can be pinned in
1744/// unit tests without spawning a process.
1745fn format_empty_kernel_list_error(full_name: &str) -> String {
1746 format!(
1747 "ktstr verifier: cell {full_name}: KTSTR_KERNEL_LIST is empty. \
1748 Direct `--exact verifier/...` invocation outside `cargo ktstr verifier` \
1749 is not supported — the dispatcher owns kernel-set resolution. Run \
1750 `cargo ktstr verifier [--kernel SPEC]` instead.",
1751 )
1752}
1753
1754/// Format the "kernel label not in KTSTR_KERNEL_LIST" diagnostic.
1755/// `present` is the slice of sanitized labels actually present in
1756/// the list, in their KTSTR_KERNEL_LIST ordering. Extracted for the
1757/// same reason as [`format_empty_kernel_list_error`].
1758fn format_unknown_kernel_label_error(
1759 full_name: &str,
1760 kernel_label: &str,
1761 sched_name: &str,
1762 present: &[&str],
1763) -> String {
1764 format!(
1765 "ktstr verifier: cell {full_name}: kernel label {kernel_label:?} \
1766 not in KTSTR_KERNEL_LIST. Present labels: [{}]. \
1767 Either add --kernel <SPEC> to the dispatcher invocation so it \
1768 resolves into this label, or remove the matching entry from \
1769 declare_scheduler!(... kernels = [...]) for {sched_name}.",
1770 present.join(", "),
1771 )
1772}
1773
1774/// The set of workspace PACKAGE names, parsed once from the workspace
1775/// `Cargo.toml` baked in at compile time. [`list_verifier_cells_all`]
1776/// uses it to skip `declare_scheduler!` decls whose `Discover(pkg)` is
1777/// not a real workspace package — the macro-expansion FIXTURES in
1778/// tests/declare_scheduler.rs register into `KTSTR_SCHEDULERS` but have
1779/// no buildable package, so their cells must not be emitted.
1780///
1781/// `CARGO_MANIFEST_DIR` is the ktstr crate dir, which IS the workspace
1782/// root in this repo, so its `Cargo.toml` carries `[workspace] members`.
1783fn workspace_packages() -> &'static std::collections::HashSet<String> {
1784 use std::sync::OnceLock;
1785 static PKGS: OnceLock<std::collections::HashSet<String>> = OnceLock::new();
1786 PKGS.get_or_init(|| {
1787 const ROOT_TOML: &str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/Cargo.toml"));
1788 parse_workspace_member_packages(ROOT_TOML, env!("CARGO_PKG_NAME"))
1789 })
1790}
1791
1792/// Pure parse of `[workspace] members = [ ... ]` from a `Cargo.toml`
1793/// string into the set of package names. The `.` member (the workspace
1794/// root) maps to `root_pkg`; every other member's last path component is
1795/// taken as its package name (this workspace's convention: member dir
1796/// `scx-ktstr` = package `scx-ktstr`). Pure so it is unit-testable.
1797fn parse_workspace_member_packages(
1798 cargo_toml: &str,
1799 root_pkg: &str,
1800) -> std::collections::HashSet<String> {
1801 let mut out = std::collections::HashSet::new();
1802 // Anchor on the [workspace] section so a stray `members` key
1803 // elsewhere can't be mistaken for the workspace member list.
1804 let Some(ws) = cargo_toml.find("[workspace]") else {
1805 return out;
1806 };
1807 let after_ws = &cargo_toml[ws..];
1808 let Some(m) = after_ws.find("members") else {
1809 return out;
1810 };
1811 let after = &after_ws[m..];
1812 let Some(open) = after.find('[') else {
1813 return out;
1814 };
1815 let Some(close_rel) = after[open..].find(']') else {
1816 return out;
1817 };
1818 for tok in after[open + 1..open + close_rel].split(',') {
1819 let name = tok.trim().trim_matches('"').trim();
1820 if name.is_empty() {
1821 continue;
1822 }
1823 if name == "." {
1824 out.insert(root_pkg.to_string());
1825 } else {
1826 out.insert(name.rsplit('/').next().unwrap_or(name).to_string());
1827 }
1828 }
1829 out
1830}
1831
1832/// Emit `verifier/<sched>/<kernel>/<preset>: test` lines — one per
1833/// (declared scheduler × kernel-list entry × accepted topology preset)
1834/// cell. Mirrors the gauntlet emission pattern in [`list_tests_all`] but
1835/// walks [`super::KTSTR_SCHEDULERS`] instead of [`KTSTR_TESTS`]. The
1836/// verifier sweeps each declared scheduler ACROSS topologies, because
1837/// attach/liveness is topology-dependent (a scheduler can attach on one
1838/// topology and wedge on another). Cells are paired with the
1839/// [`run_verifier_cell`] handler registered in
1840/// [`ktstr_test_early_dispatch`]'s `--exact verifier/...` branch.
1841///
1842/// The matrix dimension is `KTSTR_KERNEL_LIST` (always populated by the
1843/// `cargo ktstr verifier` dispatcher — even with a single
1844/// auto-discovered kernel, the dispatcher synthesizes a one-entry list
1845/// with a derived label). Each scheduler's `declare_scheduler!`
1846/// `kernels = [...]` declaration acts as a per-scheduler filter on the
1847/// matrix — `Version` / `Range` declarations match entries by
1848/// raw-label equality / range membership; `Path` / `CacheKey` / `Git`
1849/// declarations match by sanitized-label equality. An empty
1850/// `kernels = []` declaration accepts every entry in the list.
1851///
1852/// The topology dimension is [`crate::gauntlet::gauntlet_presets`], gated
1853/// per scheduler: the verifier VM always runs no_perf_mode, so a preset
1854/// is emitted only when the scheduler's constraints accept it under
1855/// [`super::TopologyConstraints::accepts_no_perf_mode`] (declared scope +
1856/// host CPU budget). A scheduler that accepts no preset emits no cell.
1857///
1858/// Schedulers declared with [`super::SchedulerSpec::Eevdf`] or
1859/// [`super::SchedulerSpec::KernelBuiltin`] are skipped at emission time
1860/// because neither has a userspace binary to load BPF programs from.
1861///
1862/// When [`crate::KTSTR_VERIFIER_SCHEDULER_ENV`] is set (the
1863/// `cargo ktstr verifier --scheduler <NAME>` filter), every declared
1864/// scheduler whose `name` does not equal the value is skipped, so the
1865/// sweep covers one scheduler across topologies instead of the full
1866/// declared-scheduler matrix. A value matching no declared scheduler
1867/// (or a non-BPF one) emits no cell; the dispatcher reports the empty
1868/// result set rather than silently sweeping nothing.
1869///
1870/// Cell names with `/` in `sched.name` or `preset.name` would corrupt
1871/// the splitn-based parse in [`run_verifier_cell`]; the emission elides
1872/// such cells with a stderr warning. When `KTSTR_KERNEL_LIST` is absent
1873/// (direct binary invocation outside the dispatcher), no cells emit.
1874fn list_verifier_cells_all() {
1875 use super::SchedulerSpec;
1876 let kernel_list = read_kernel_list();
1877 if kernel_list.is_empty() {
1878 return;
1879 }
1880 let presets = crate::gauntlet::gauntlet_presets();
1881 let (host_cpus, _host_llcs, _host_max_cpus_per_llc) = super::host_capacity();
1882
1883 // `cargo ktstr verifier --scheduler <NAME>` filter (via
1884 // KTSTR_VERIFIER_SCHEDULER): when set, sweep only the named declared
1885 // scheduler across topologies instead of the full declared-scheduler
1886 // matrix. Read once; unset (or non-unicode) leaves the sweep full.
1887 let scheduler_filter = std::env::var(crate::KTSTR_VERIFIER_SCHEDULER_ENV).ok();
1888
1889 for sched in super::KTSTR_SCHEDULERS.iter() {
1890 if let Some(want) = &scheduler_filter
1891 && sched.name != want.as_str()
1892 {
1893 continue;
1894 }
1895 if matches!(
1896 sched.binary,
1897 SchedulerSpec::Eevdf | SchedulerSpec::KernelBuiltin { .. }
1898 ) {
1899 continue;
1900 }
1901 if sched.name.contains('/') {
1902 eprintln!(
1903 "ktstr verifier: scheduler name {:?} contains '/' — skipping cell emission (would corrupt verifier/<sched>/<kernel>/<preset> parse)",
1904 sched.name,
1905 );
1906 continue;
1907 }
1908 // Skip declarations whose binary is not a real, buildable
1909 // scheduler. The macro-expansion FIXTURES in
1910 // tests/declare_scheduler.rs (`binary = "scx-full"`, `"scx-ee"`,
1911 // …) register into KTSTR_SCHEDULERS but expand to `Discover` of a
1912 // package that is NOT a workspace member; emitting their cells
1913 // would make `cargo ktstr verifier --run-ignored` fail on
1914 // `cargo build -p <fixture>` for a nonexistent package. A
1915 // `Discover` of a real workspace member (scx-ktstr) and a `Path`
1916 // that exists still emit. This is the emission-time counterpart to
1917 // the resolve arms in `run_verifier_cell`.
1918 match sched.binary {
1919 SchedulerSpec::Discover(pkg) if !workspace_packages().contains(pkg) => {
1920 continue;
1921 }
1922 SchedulerSpec::Path(p) if !std::path::Path::new(p).exists() => {
1923 continue;
1924 }
1925 _ => {}
1926 }
1927 for kernel_entry in &kernel_list {
1928 if !sched_kernel_filter_accepts(sched.kernels, kernel_entry) {
1929 continue;
1930 }
1931 // One cell per (scheduler, kernel, topology preset). The
1932 // verifier sweep runs each scheduler's "does it verify,
1933 // attach, AND dispatch" check ACROSS topologies, because
1934 // both vary with topology: attach/liveness (a scheduler can
1935 // attach on one topology and wedge on another — odd LLC
1936 // counts, large CPU counts, SMT) and verified_insns (a
1937 // scheduler that bakes topology-derived config into .rodata
1938 // hands the verifier different known constants, so it
1939 // processes a different instruction count per topology). The
1940 // verifier VM always
1941 // runs no_perf_mode, so preset eligibility uses
1942 // accepts_no_perf_mode: the KVM-emulated topology is gated by
1943 // the scheduler's declared scope + the host CPU budget.
1944 for preset in presets.iter() {
1945 if preset.name.contains('/') {
1946 eprintln!(
1947 "ktstr verifier: preset name {:?} contains '/' — skipping cell (would corrupt parse)",
1948 preset.name,
1949 );
1950 continue;
1951 }
1952 if !sched
1953 .constraints
1954 .accepts_no_perf_mode(&preset.topology, host_cpus)
1955 {
1956 continue;
1957 }
1958 println!(
1959 "verifier/{}/{}/{}: test",
1960 sched.name, kernel_entry.sanitized, preset.name,
1961 );
1962 }
1963 }
1964 }
1965}
1966
1967/// Parse `verifier/<sched_name>/<kernel_label>/<preset_name>`, look up
1968/// the declared scheduler in [`super::KTSTR_SCHEDULERS`] + the gauntlet
1969/// preset in [`crate::gauntlet::gauntlet_presets`] + the kernel in
1970/// [`KTSTR_KERNEL_LIST_ENV`](crate::KTSTR_KERNEL_LIST_ENV), resolve the
1971/// scheduler binary path per [`super::SchedulerSpec`], boot the verifier
1972/// VM on that topology via [`crate::verifier::collect_verifier_output`],
1973/// and print the rendered output. Returns 0 only when the scheduler
1974/// verified (BPF loaded), turned on (the guest attach gate reached
1975/// sched_ext `enabled`, surfaced via [`crate::verifier::AttachOutcome`]),
1976/// AND dispatched the injected workload (a `WorkloadDispatched` frame) on
1977/// this topology — the three gates `VerifierVmResult::cell_verdict`
1978/// enforces; returns 1 on a verify / attach / dispatch failure, a
1979/// post-attach teardown hang (`timed_out`), or a malformed cell name.
1980///
1981/// The per-cell kernel directory is resolved by sanitized-label
1982/// lookup in `KTSTR_KERNEL_LIST` — the
1983/// `cargo ktstr verifier` dispatcher always populates the list,
1984/// even with no `--kernel` flag (it synthesizes a single auto-
1985/// discovered entry). There is no single-kernel-mode fallback.
1986/// An unrecognised label or an absent list both surface as an
1987/// exit-1 diagnostic naming the present labels and pointing at
1988/// the dispatcher.
1989///
1990/// Eevdf + KernelBuiltin scheduler variants are filtered out at
1991/// emission time in [`list_verifier_cells_all`], so nextest
1992/// dispatch never reaches the SKIP arms in this function. The
1993/// SKIP arms remain as defense-in-depth for direct
1994/// `--exact verifier/<eevdf>/...` invocation outside nextest
1995/// (the only path that bypasses the emission-time filter); in
1996/// that case they emit a `SKIP` banner + exit 0.
1997fn run_verifier_cell_inner(
1998 full_name: &str,
1999 out_stats: &mut Vec<crate::verifier::ProgStats>,
2000) -> i32 {
2001 use super::SchedulerSpec;
2002
2003 let rest = match full_name.strip_prefix("verifier/") {
2004 Some(r) => r,
2005 None => {
2006 eprintln!("ktstr verifier: missing 'verifier/' prefix in {full_name:?}");
2007 return 1;
2008 }
2009 };
2010 let parts: Vec<&str> = rest.splitn(3, '/').collect();
2011 if parts.len() != 3 {
2012 eprintln!(
2013 "ktstr verifier: malformed cell name {full_name:?}; expected verifier/<sched>/<kernel>/<preset>",
2014 );
2015 return 1;
2016 }
2017 let (sched_name, kernel_label, preset_name) = (parts[0], parts[1], parts[2]);
2018
2019 // Emit the cell banner BEFORE every SKIP / FAIL branch so the
2020 // operator always sees which (scheduler, kernel, topology) tuple
2021 // produced the result. Without it an early-exit SKIP / FAIL would
2022 // surface as a bare error line nextest tags with the full cell name
2023 // but no per-axis context.
2024 println!("\n=== {sched_name} | kernel {kernel_label} | topology {preset_name} ===");
2025
2026 // Fail-fast on missing KVM with the canonical actionable error
2027 // (kvm group / kvm-ok hint). Without this preflight the operator
2028 // gets a deep error inside VM bring-up.
2029 if let Err(e) = crate::cli::check_kvm() {
2030 eprintln!("ktstr verifier: cell {full_name}: {e:#}");
2031 return 1;
2032 }
2033
2034 let Some(sched) = super::KTSTR_SCHEDULERS
2035 .iter()
2036 .find(|s| s.name == sched_name)
2037 else {
2038 eprintln!("ktstr verifier: no declared scheduler {sched_name:?} (cell {full_name:?})",);
2039 return 1;
2040 };
2041
2042 // Resolve the cell's topology preset by its <preset> name segment.
2043 let preset_list = crate::gauntlet::gauntlet_presets();
2044 let Some(preset) = preset_list.iter().find(|p| p.name == preset_name) else {
2045 eprintln!("ktstr verifier: no gauntlet preset {preset_name:?} (cell {full_name:?})",);
2046 return 1;
2047 };
2048
2049 // Resolve the per-cell kernel directory by looking the cell's
2050 // sanitized label up in `KTSTR_KERNEL_LIST`. The
2051 // `cargo ktstr verifier` dispatcher always populates the list —
2052 // even with no `--kernel` flag it synthesizes a single auto-
2053 // discovered entry — so the lookup is the single source of
2054 // truth and there is no single-kernel-mode fallback that would
2055 // silently run a cell against an unrelated kernel.
2056 //
2057 // An empty list reaching this function means the test binary was
2058 // invoked outside the dispatcher (direct `--exact verifier/...`
2059 // under a hand-spawned nextest, for instance). Error with an
2060 // actionable message rather than fall through to auto-discovery.
2061 let kernel_list = read_kernel_list();
2062 let Some(kernel_entry) = kernel_list
2063 .iter()
2064 .find(|k| k.sanitized.as_str() == kernel_label)
2065 else {
2066 if kernel_list.is_empty() {
2067 eprintln!("{}", format_empty_kernel_list_error(full_name));
2068 } else {
2069 let present: Vec<&str> = kernel_list.iter().map(|k| k.sanitized.as_str()).collect();
2070 eprintln!(
2071 "{}",
2072 format_unknown_kernel_label_error(full_name, kernel_label, sched_name, &present,),
2073 );
2074 }
2075 return 1;
2076 };
2077
2078 let sched_bin: std::path::PathBuf = match sched.binary {
2079 SchedulerSpec::Discover(pkg) => match crate::build_and_find_binary(pkg) {
2080 Ok(p) => p,
2081 Err(e) => {
2082 eprintln!("ktstr verifier: build scheduler {pkg:?}: {e:#}");
2083 return 1;
2084 }
2085 },
2086 SchedulerSpec::Path(p) => {
2087 let path = std::path::PathBuf::from(p);
2088 if !path.exists() {
2089 eprintln!("ktstr verifier: scheduler binary not found: {p}");
2090 return 1;
2091 }
2092 path
2093 }
2094 // Eevdf + KernelBuiltin are filtered at list time in
2095 // list_verifier_cells_all, so nextest dispatch never reaches
2096 // these arms. The SKIP arms remain as defense-in-depth for
2097 // direct `--exact verifier/<eevdf>/...` invocation outside
2098 // nextest.
2099 SchedulerSpec::Eevdf => {
2100 println!(
2101 "ktstr verifier: SKIP cell {full_name} (Eevdf has no userspace binary to verify)",
2102 );
2103 return 0;
2104 }
2105 SchedulerSpec::KernelBuiltin { .. } => {
2106 println!(
2107 "ktstr verifier: SKIP cell {full_name} (KernelBuiltin has no userspace binary to verify)",
2108 );
2109 return 0;
2110 }
2111 };
2112
2113 let ktstr_bin = match std::env::current_exe() {
2114 Ok(p) => p,
2115 Err(e) => {
2116 eprintln!(
2117 "ktstr verifier: locate ktstr binary via current_exe() (required so the \
2118 verifier VM can boot the same test binary as /init for guest-side dispatch): {e}",
2119 );
2120 return 1;
2121 }
2122 };
2123
2124 // Resolve the kernel-build DIR to the actual bootable image file.
2125 // `collect_verifier_output` -> `KtstrVm::builder().kernel()` loads the
2126 // path verbatim (build() does NOT extract a source tree), so passing
2127 // the raw dir makes the VMM loader read a directory as a bzImage and
2128 // fail with "Unable to read bzImage header". `find_image_in_dir`
2129 // handles both the build-tree (`arch/*/boot/bzImage`) and cache
2130 // (`<dir>/bzImage`) layouts — the same resolution the eval path uses.
2131 let kernel_path = if kernel_entry.kernel_dir.is_file() {
2132 kernel_entry.kernel_dir.clone()
2133 } else {
2134 match crate::kernel_path::find_image_in_dir(&kernel_entry.kernel_dir) {
2135 Some(img) => img,
2136 None => {
2137 eprintln!(
2138 "ktstr verifier: cell {full_name}: no kernel image \
2139 (arch/*/boot/bzImage or a cached bzImage) under {} — \
2140 build the kernel first",
2141 kernel_entry.kernel_dir.display(),
2142 );
2143 return 1;
2144 }
2145 }
2146 };
2147 let topology = super::TopologyJson::from(preset.topology);
2148 let sched_args: Vec<String> = sched.sched_args.iter().map(|s| s.to_string()).collect();
2149
2150 // Raw mode is opt-in via the dispatcher's --raw flag, plumbed
2151 // through KTSTR_VERIFIER_RAW_ENV. Presence (any value, including
2152 // empty) enables raw rendering — matches the "set to any value"
2153 // semantics documented on the const and the dispatcher's
2154 // `cmd.env(KTSTR_VERIFIER_RAW_ENV, "1")` setter.
2155 let raw = std::env::var_os(crate::KTSTR_VERIFIER_RAW_ENV).is_some();
2156
2157 match crate::verifier::collect_verifier_output(
2158 &sched_bin,
2159 &ktstr_bin,
2160 &kernel_path,
2161 &sched_args,
2162 topology,
2163 ) {
2164 Ok(result) => {
2165 let output = crate::verifier::format_verifier_output("verifier", &result, raw);
2166 print!("{output}");
2167 // PASS requires verify + attach (sched_ext `enabled`) +
2168 // dispatch (the injected workload made progress).
2169 // `cell_verdict` names the first failing gate (timed_out →
2170 // attach → dispatch, root cause first) and never keys on the
2171 // guest exit code, which is 1 even on the verifier success
2172 // path (no #[ktstr_test] body to dispatch).
2173 let code = match result.cell_verdict() {
2174 Ok(()) => 0,
2175 Err(reason) => {
2176 eprintln!("ktstr verifier: cell {full_name} FAILED: {reason}");
2177 1
2178 }
2179 };
2180 // Hand the per-program verified_insns out to the record writer
2181 // so the dispatcher can render the instruction-count tables.
2182 // Only this arm has stats; every earlier return (skip, kernel
2183 // resolution error, build failure) leaves out_stats empty.
2184 *out_stats = result.stats;
2185 code
2186 }
2187 Err(e) => {
2188 eprintln!("ktstr verifier: cell {full_name} FAILED: {e:#}");
2189 1
2190 }
2191 }
2192}
2193
2194/// Run a verifier cell and, when the `cargo ktstr verifier` dispatcher
2195/// set [`crate::KTSTR_VERIFIER_RESULT_DIR_ENV`], record its PASS/FAIL
2196/// outcome there so the dispatcher can render the run-summary table after
2197/// nextest returns. Best-effort + env-gated: a direct
2198/// `--exact verifier/...` invocation (env unset) behaves exactly as
2199/// [`run_verifier_cell_inner`], and a record-write failure never changes
2200/// the cell's exit code. A nextest RETRY re-runs this wrapper and
2201/// overwrites the cell's own record (deterministic filename), so the
2202/// final attempt's outcome is the one that lands in the table.
2203fn run_verifier_cell(full_name: &str) -> i32 {
2204 let mut stats = Vec::new();
2205 let code = run_verifier_cell_inner(full_name, &mut stats);
2206 if let Some(dir) = std::env::var_os(crate::KTSTR_VERIFIER_RESULT_DIR_ENV) {
2207 crate::verifier::write_cell_record(
2208 std::path::Path::new(&dir),
2209 full_name,
2210 code == 0,
2211 &stats,
2212 );
2213 }
2214 code
2215}
2216
2217/// List tests with budget-based coverage maximization.
2218///
2219/// Collects all eligible tests as candidates, runs greedy selection,
2220/// and prints only the selected subset. Multi-kernel mode adds the
2221/// kernel suffix as a feature dimension so the budget selector
2222/// picks per-kernel coverage; single-kernel mode is unchanged.
2223///
2224/// `KTSTR_CARGO_TEST_MODE=1` is treated identically to
2225/// `list_tests_all`: the budget pipeline runs only over base test
2226/// candidates (no gauntlet-variant candidates, no multi-kernel
2227/// fan-out). The greedy selector still applies — a low budget
2228/// can still trim the base list — but the candidate set is the
2229/// same set that the dispatch path would actually run.
2230fn list_tests_budget(ignored_only: bool, budget_secs: f64) {
2231 use crate::budget::{TestCandidate, estimate_duration, extract_features, select};
2232
2233 let cargo_test_mode = crate::cargo_test_mode::cargo_test_mode_active();
2234 let presets = crate::gauntlet::gauntlet_presets();
2235 let has_vmlinux = resolve_test_kernel()
2236 .ok()
2237 .and_then(|k| crate::vmm::find_vmlinux(&k))
2238 .is_some();
2239 let (host_cpus, host_llcs, host_max_cpus_per_llc) = super::host_capacity();
2240 let mut candidates: Vec<TestCandidate> = Vec::new();
2241
2242 let kernel_list = read_kernel_list();
2243 let multi_kernel = kernel_list.len() > 1 && !cargo_test_mode;
2244 let kernel_suffixes: Vec<&str> = if multi_kernel {
2245 kernel_list.iter().map(|k| k.sanitized.as_str()).collect()
2246 } else {
2247 vec![""]
2248 };
2249
2250 for entry in KTSTR_TESTS.iter() {
2251 if !entry.bpf_map_write.is_empty() && !has_vmlinux {
2252 continue;
2253 }
2254
2255 let base_ignored = is_ignored(entry);
2256 let base_topo = entry.topology;
2257
2258 // Base test
2259 if !ignored_only || base_ignored {
2260 // host_only tests never boot a VM, so the kernel never
2261 // affects what runs — push one candidate without a
2262 // kernel suffix even in multi-kernel mode. Otherwise the
2263 // budget selector would consider N identical copies of
2264 // the same host-side function.
2265 if entry.host_only {
2266 candidates.push(TestCandidate {
2267 name: format!("ktstr/{}: test", entry.name),
2268 features: extract_features(entry, &base_topo, false, entry.name),
2269 estimated_secs: estimate_duration(entry, &base_topo),
2270 });
2271 } else {
2272 for suffix in &kernel_suffixes {
2273 let name = if suffix.is_empty() {
2274 format!("ktstr/{}: test", entry.name)
2275 } else {
2276 format!("ktstr/{}/{suffix}: test", entry.name)
2277 };
2278 candidates.push(TestCandidate {
2279 name,
2280 features: extract_features(entry, &base_topo, false, entry.name),
2281 estimated_secs: estimate_duration(entry, &base_topo),
2282 });
2283 }
2284 }
2285 }
2286
2287 if entry.host_only {
2288 continue;
2289 }
2290
2291 if cargo_test_mode {
2292 // No gauntlet candidates in cargo-test mode — the
2293 // dispatch path will never execute them and including
2294 // them in the budget candidate set would shift greedy
2295 // selection toward variants that resolve to "no test"
2296 // at run time.
2297 continue;
2298 }
2299
2300 for_each_gauntlet_variant(
2301 entry,
2302 &presets,
2303 host_cpus,
2304 host_llcs,
2305 host_max_cpus_per_llc,
2306 |preset| {
2307 for suffix in &kernel_suffixes {
2308 let test_name = if suffix.is_empty() {
2309 format!("gauntlet/{}/{}", entry.name, preset.name)
2310 } else {
2311 format!("gauntlet/{}/{}/{suffix}", entry.name, preset.name)
2312 };
2313 candidates.push(TestCandidate {
2314 name: format!("{test_name}: test"),
2315 features: extract_features(entry, &preset.topology, true, &test_name),
2316 estimated_secs: estimate_duration(entry, &preset.topology),
2317 });
2318 }
2319 },
2320 );
2321 }
2322
2323 let selected = select(&candidates, budget_secs);
2324 for &i in &selected {
2325 println!("{}", candidates[i].name);
2326 }
2327
2328 let stats = crate::budget::selection_stats(&candidates, &selected, budget_secs);
2329 eprintln!(
2330 "ktstr budget: {}/{} tests, {:.0}/{:.0}s used, {}/{} configurations covered",
2331 stats.selected,
2332 stats.total,
2333 stats.budget_used,
2334 stats.budget_total,
2335 stats.bits_covered,
2336 stats.bits_possible,
2337 );
2338}
2339
2340/// Strip an optional `/{sanitized_kernel_label}` suffix from `name`,
2341/// look up the matching [`KernelEntry`] in the multi-kernel list,
2342/// and re-export `KTSTR_KERNEL` to that entry's directory. Returns
2343/// the prefix-only name for the dispatch caller.
2344///
2345/// When `KTSTR_KERNEL_LIST` is unset / single-entry, the function
2346/// is a no-op pass-through: returns `(name, None)` and does not
2347/// touch the env. When the list has 2+ entries, the suffix is
2348/// REQUIRED and missing it surfaces as `Err` (the early-dispatch
2349/// caller turns that into exit code 1 with an actionable message)
2350/// — the suffix is part of every test name `--list` emitted, so a
2351/// `--exact` invocation that omits it can only come from operator
2352/// hand-construction or tooling that hasn't been taught the
2353/// multi-kernel naming.
2354fn strip_kernel_suffix<'a>(
2355 name: &'a str,
2356 kernel_list: &'a [KernelEntry],
2357) -> Result<(&'a str, Option<&'a KernelEntry>), String> {
2358 if kernel_list.len() <= 1 {
2359 return Ok((name, None));
2360 }
2361 // Multi-kernel: every test name carries `/kernel_…` as its
2362 // final segment. Iterate the labels rather than splitting on
2363 // `/` — the suffix always has exactly one extra `/` separator
2364 // before `kernel_…`, but the body of the test name CAN contain
2365 // `/` (gauntlet variants already do — `gauntlet/{name}/{preset}`),
2366 // so a naive `rsplit_once('/')` would accidentally peel the
2367 // preset segment instead.
2368 //
2369 // Distinct kernels in the same `KTSTR_KERNEL_LIST` produce
2370 // distinct sanitized labels in practice — the producer emits
2371 // semantic identifiers (version strings, git owner/repo/ref,
2372 // path basename + 6-char hash) that don't share suffixes
2373 // among the resolved set. If a future regression DID produce
2374 // labels where one is a strict suffix of another (e.g.
2375 // `kernel_6_14` vs `kernel_x_kernel_6_14`), the iterate-and-
2376 // first-match below would pick whichever appears first in
2377 // the kernel_list — deterministic but potentially wrong.
2378 // Producer-side regression detection would catch that
2379 // class of collision before it reaches this peeler.
2380 for entry in kernel_list {
2381 let needle = format!("/{}", entry.sanitized);
2382 if let Some(stripped) = name.strip_suffix(&needle) {
2383 return Ok((stripped, Some(entry)));
2384 }
2385 }
2386 Err(format!(
2387 "test name {name:?} has no recognised kernel suffix (KTSTR_KERNEL_LIST \
2388 carries {n} kernels — every test name must end with `/kernel_…`)",
2389 n = kernel_list.len(),
2390 ))
2391}
2392
2393/// Re-export `KTSTR_KERNEL` to the kernel directory carried by a
2394/// resolved [`KernelEntry`]. Called when a multi-kernel `--exact`
2395/// dispatch peels off the per-test kernel suffix.
2396///
2397/// SAFETY: nextest invokes the test binary's `--exact` handler in a
2398/// single-threaded context — there are no other readers of the env
2399/// at this point. The eventual VM-launch site reads `KTSTR_KERNEL`
2400/// via `find_kernel` after this returns; that read is sequenced
2401/// after the write per the program order.
2402fn export_kernel_for_variant(entry: &KernelEntry) {
2403 // SAFETY: see fn-level doc — single-threaded ctor / nextest
2404 // dispatch context.
2405 unsafe { std::env::set_var(crate::KTSTR_KERNEL_ENV, &entry.kernel_dir) };
2406}
2407
2408/// Parse a nextest-style test name and run it.
2409///
2410/// Handles base tests (`ktstr/{name}`), gauntlet variants
2411/// (`gauntlet/{name}/{preset}`), and bare names (backward compat).
2412/// When `KTSTR_KERNEL_LIST` carries 2+ kernels,
2413/// VM-bound test names additionally end with
2414/// `/{sanitized_kernel_label}` — that suffix is peeled here and
2415/// the matching kernel directory is re-exported via
2416/// [`crate::KTSTR_KERNEL_ENV`] before the dispatch continues. `host_only`
2417/// tests are short-circuited BEFORE the suffix peel: they never
2418/// boot a VM, so the kernel-suffix listing path emits one
2419/// `ktstr/{name}: test` entry without a kernel suffix regardless
2420/// of the kernel-list cardinality (see `list_tests_all` /
2421/// `list_tests_budget`), and routing them through
2422/// `strip_kernel_suffix` would surface as a "no recognised kernel
2423/// suffix" exit-1 error. Returns an exit code.
2424pub(crate) fn run_named_test(test_name: &str) -> i32 {
2425 let kernel_list = read_kernel_list();
2426
2427 // host_only short-circuit: in multi-kernel mode, host_only tests
2428 // are listed without a `/{sanitized_kernel_label}` suffix (see
2429 // `list_tests_all` / `list_tests_budget`, which emit a single
2430 // `ktstr/{name}: test` line for host_only entries regardless of
2431 // the kernel-list cardinality — a host_only test never boots a
2432 // VM, so the kernel never affects what runs). Calling
2433 // `strip_kernel_suffix` on such a name in multi-kernel mode
2434 // would fail with the "no recognised kernel suffix" error and
2435 // misroute every host_only dispatch to exit 1.
2436 //
2437 // Resolve the host_only check from `find_test` BEFORE the
2438 // suffix peel so the multi-kernel branch only applies to
2439 // VM-bound tests. Single-kernel mode is unaffected — the
2440 // pass-through arm in `strip_kernel_suffix` returns the input
2441 // verbatim either way.
2442 let bare_for_lookup = test_name.strip_prefix("ktstr/").unwrap_or(test_name);
2443
2444 if let Some(entry) = find_test(bare_for_lookup)
2445 && entry.host_only
2446 {
2447 return run_host_only_test(entry);
2448 }
2449
2450 let (test_name, kernel_entry) = match strip_kernel_suffix(test_name, &kernel_list) {
2451 Ok(pair) => pair,
2452 Err(e) => {
2453 eprintln!("{e}");
2454 return 1;
2455 }
2456 };
2457 if let Some(entry) = kernel_entry {
2458 export_kernel_for_variant(entry);
2459 }
2460
2461 if let Some(rest) = test_name.strip_prefix("gauntlet/") {
2462 return run_gauntlet_test(rest);
2463 }
2464
2465 let bare_name = test_name.strip_prefix("ktstr/").unwrap_or(test_name);
2466 let entry = match find_test(bare_name) {
2467 Some(e) => e,
2468 None => {
2469 eprintln!("unknown test: {test_name}");
2470 return 1;
2471 }
2472 };
2473
2474 // Defense-in-depth: host_only re-check after suffix peel for the
2475 // edge case where the bare_for_lookup pre-strip lookup missed
2476 // (e.g. a future test name shape that doesn't match the
2477 // pre-strip form but does after the suffix peel).
2478 if entry.host_only {
2479 return run_host_only_test(entry);
2480 }
2481
2482 if entry.performance_mode && super::runtime::no_perf_mode_active() {
2483 crate::report::test_skip(format_args!(
2484 "{}: test requires performance_mode but --no-perf-mode or KTSTR_NO_PERF_MODE is active",
2485 bare_name,
2486 ));
2487 // See run_ktstr_test_inner for the sidecar-emission rationale.
2488 // Plain (non-gauntlet) dispatch: no TopoOverride, so the skip
2489 // records entry.topology (declared == booted for a plain test).
2490 record_skip_sidecar(entry, None);
2491 return 0;
2492 }
2493
2494 if super::runtime::perf_only_skips_entry(entry) {
2495 crate::report::test_skip(format_args!(
2496 "{bare_name}: KTSTR_PERF_ONLY is active and this test is not a performance_mode test",
2497 ));
2498 // Skip sidecar so the perf-delta pool records the skip (excluded
2499 // from the A/B compare) rather than a phantom missing result.
2500 record_skip_sidecar(entry, None);
2501 return 0;
2502 }
2503
2504 if !entry.bpf_map_write.is_empty()
2505 && let Ok(kernel) = resolve_test_kernel()
2506 && crate::vmm::find_vmlinux(&kernel).is_none()
2507 {
2508 eprintln!("FAIL: vmlinux not found, bpf_map_write requires vmlinux");
2509 return 1;
2510 }
2511
2512 let result = run_ktstr_test_inner(entry, None);
2513 result_to_exit_code(result, entry.expect_err, entry.allow_inconclusive)
2514}
2515
2516/// Run a host-only test directly without booting a VM.
2517/// Returns an exit code for nextest dispatch.
2518fn run_host_only_test(entry: &KtstrTestEntry) -> i32 {
2519 let result = run_host_only_test_inner(entry);
2520 result_to_exit_code(result, entry.expect_err, entry.allow_inconclusive)
2521}
2522
2523/// Inner host-only dispatch returning `Result<AssertResult>`.
2524///
2525/// Builds a minimal Ctx and calls the test function on the host.
2526/// Used for tests that need host tools (cargo, nested VMs).
2527///
2528/// Topology comes from real-host sysfs (`/sys/devices/system/cpu/`)
2529/// via [`crate::topology::TestTopology::from_system`]; the test's
2530/// declared VM topology is intentionally ignored for host_only
2531/// runs because the test author wrote it for a synthetic VM and
2532/// the host's actual CPU layout is what `WorkSpec::workers_pct` /
2533/// `AffinityIntent::LlcAligned` resolve against. Bails with an
2534/// actionable diagnostic when sysfs CPU enumeration fails — the
2535/// underlying causes are missing `/sys/devices/system/cpu/online`
2536/// (no /sys mount or container masking), unreadable contents (rare
2537/// permissions edge), corrupt sysfs string (kernel/hardware bug),
2538/// or an empty online-CPU set (degenerate cpuset namespace).
2539///
2540/// Cgroup parent defaults to `/sys/fs/cgroup/ktstr`; the operator
2541/// can override via `KTSTR_HOST_CGROUP_PARENT`. The override path
2542/// is validated upfront: it must be non-empty and rooted under
2543/// `/sys/fs/cgroup` so an accidental empty/relative/foreign value
2544/// produces a clear error instead of an opaque cgroupfs failure
2545/// later. Empty-string env value is treated as "unset" and falls
2546/// back to the default.
2547///
2548/// For cgroup-v2 user delegation (Mode B/C: systemd `Delegate=yes`,
2549/// container `nsdelegate`), the operator sets
2550/// `KTSTR_CGROUP_WALK_ROOT` to the delegation boundary so
2551/// [`crate::cgroup::CgroupManager::setup`]'s ancestor
2552/// `subtree_control` walk stops there instead of EACCES-ing at
2553/// `user.slice` / the container root. Defaults to `/sys/fs/cgroup`
2554/// (Mode A: root-owned tree).
2555fn run_host_only_test_inner(entry: &KtstrTestEntry) -> Result<AssertResult> {
2556 let topo = crate::topology::TestTopology::from_system().context(
2557 "host_only requires real-host topology from sysfs; \
2558 the sysfs CPU enumeration at /sys/devices/system/cpu/online \
2559 failed — likely causes: running outside a /sys-mounted \
2560 environment, sysfs contents unreadable (permissions / \
2561 container mask), corrupt online-CPU string, or a degenerate \
2562 cpuset namespace with no online CPUs",
2563 )?;
2564 let cgroup_parent = resolve_host_cgroup_parent()?;
2565 let cgroups = build_host_cgroup_manager(&cgroup_parent)?;
2566 let merged_assert = crate::assert::Assert::default_checks()
2567 .merge(&entry.scheduler.assert)
2568 .merge(&entry.assert);
2569 let ctx = crate::scenario::Ctx::builder(&cgroups, &topo)
2570 .duration(entry.duration)
2571 .settle(std::time::Duration::ZERO)
2572 .assert(merged_assert)
2573 .entry_name(entry.name)
2574 // host_only is host-side with no VM: the resolved topology is
2575 // the declared entry.topology (resolve_vm_topology(entry, None)),
2576 // so compute the variant hash directly rather than threading.
2577 .variant_hash(super::sidecar::variant_hash_from_parts(
2578 entry,
2579 &entry.topology,
2580 &super::args::current_work_type(),
2581 ))
2582 .build();
2583 (entry.func)(&ctx)
2584}
2585
2586/// Default cgroup parent path for `host_only` tests when
2587/// `KTSTR_HOST_CGROUP_PARENT` is unset. Suitable for both root
2588/// (writable directly) and non-root (operator pre-creates
2589/// `/sys/fs/cgroup/ktstr` with appropriate ownership, OR overrides via
2590/// `KTSTR_HOST_CGROUP_PARENT` to point at a path inside a delegated
2591/// subtree) invocations. See [`resolve_host_cgroup_parent`] for the
2592/// env-override path and `build_host_cgroup_manager` for the
2593/// cgroup-v2 Mode B/C delegation wire-up.
2594///
2595/// `pub` so tests can pin against it instead of mirroring
2596/// the literal in their own assertion strings (the
2597/// `resolve_host_cgroup_parent_*` unit tests in `dispatch_tests.rs`
2598/// assert unset/empty env falls back to this const). Treat as the
2599/// canonical default — operators set `KTSTR_HOST_CGROUP_PARENT` to
2600/// override.
2601pub const DEFAULT_HOST_CGROUP_PARENT: &str = "/sys/fs/cgroup/ktstr";
2602
2603/// Resolve the cgroup parent path for `host_only` tests.
2604///
2605/// Reads `KTSTR_HOST_CGROUP_PARENT`. Empty / unset falls back to
2606/// `DEFAULT_HOST_CGROUP_PARENT`. A set value must be rooted under
2607/// `/sys/fs/cgroup` (no relative paths, no random /tmp dirs) so an
2608/// accidental misconfiguration surfaces here rather than as an
2609/// opaque cgroupfs failure inside `CgroupManager::setup`.
2610///
2611/// Non-root callers are admitted: cgroup-v2 user delegation (Mode
2612/// B/C: systemd `Delegate=yes`, container `nsdelegate`) is handled
2613/// by `build_host_cgroup_manager` threading
2614/// [`crate::KTSTR_CGROUP_WALK_ROOT_ENV`] into
2615/// [`crate::cgroup::CgroupManager::with_walk_root`] so the
2616/// `subtree_control` walk bails at the delegation root instead of
2617/// EACCES-ing on `user.slice`.
2618pub fn resolve_host_cgroup_parent() -> Result<String> {
2619 let parent = match std::env::var(crate::KTSTR_HOST_CGROUP_PARENT_ENV) {
2620 Ok(s) if !s.is_empty() => s,
2621 _ => return Ok(DEFAULT_HOST_CGROUP_PARENT.to_string()),
2622 };
2623 if !parent.starts_with("/sys/fs/cgroup") || parent == "/sys/fs/cgroup" {
2624 anyhow::bail!(
2625 "KTSTR_HOST_CGROUP_PARENT={parent:?}: must be rooted under \
2626 /sys/fs/cgroup and name a non-root subdirectory \
2627 (e.g. /sys/fs/cgroup/ktstr or /sys/fs/cgroup/ktstr-foo); \
2628 unset or empty falls back to {DEFAULT_HOST_CGROUP_PARENT}",
2629 );
2630 }
2631 Ok(parent)
2632}
2633
2634/// Build a [`crate::cgroup::CgroupManager`] for a `host_only` test
2635/// run, threading [`crate::KTSTR_CGROUP_WALK_ROOT_ENV`] into
2636/// [`crate::cgroup::CgroupManager::with_walk_root`] when set.
2637///
2638/// The walk root override bounds [`crate::cgroup::CgroupManager::setup`]'s
2639/// ancestor `subtree_control` walk for cgroup-v2 Mode B/C
2640/// delegation: under systemd `Delegate=yes` or a container's
2641/// `nsdelegate`, the operator owns subtree_control writes only
2642/// inside the delegated subtree. Without the override the walk
2643/// starts at `/sys/fs/cgroup` and EACCES-es at `user.slice` or the
2644/// container root.
2645///
2646/// Empty / unset falls through to the default `/sys/fs/cgroup`
2647/// (Mode A: root-owned tree). [`crate::cgroup::CgroupManager::with_walk_root`]
2648/// validates that the chosen walk root is a prefix of `parent` —
2649/// misconfigurations surface as a focused error before the first
2650/// cgroupfs write rather than as an opaque downstream EACCES.
2651///
2652/// Non-root callers with no walk-root override are admitted here — the
2653/// precondition (root, or a cgroup-v2 delegated walk root) is enforced
2654/// lazily at [`crate::cgroup::CgroupManager::setup`], the first real
2655/// cgroup operation. `host_only` tests that never create a cgroup
2656/// (macro-attribute fixtures, host-topology reads, nested-VM verifier
2657/// orchestration) therefore run without root; only a test that actually
2658/// touches a cgroup hits the deferred non-root error.
2659fn build_host_cgroup_manager(cgroup_parent: &str) -> Result<crate::cgroup::CgroupManager> {
2660 let cg = crate::cgroup::CgroupManager::new(cgroup_parent);
2661 match std::env::var(crate::KTSTR_CGROUP_WALK_ROOT_ENV) {
2662 Ok(walk_root) if !walk_root.is_empty() => {
2663 // Defense-in-depth: walk_root must be rooted under
2664 // /sys/fs/cgroup. Mirrors the sibling
2665 // KTSTR_HOST_CGROUP_PARENT_ENV guard above so an operator
2666 // typo surfaces here instead of as a downstream cgroupfs
2667 // fs::write EACCES.
2668 if !walk_root.starts_with("/sys/fs/cgroup") {
2669 anyhow::bail!(
2670 "{env}={walk_root:?}: walk root must be rooted under /sys/fs/cgroup \
2671 (e.g. /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service \
2672 for a systemd user session); the value supplied is outside the cgroup-v2 \
2673 mount and would EACCES on the first cgroupfs write",
2674 env = crate::KTSTR_CGROUP_WALK_ROOT_ENV,
2675 );
2676 }
2677 cg.with_walk_root(&walk_root).with_context(|| {
2678 format!(
2679 "{env}={walk_root:?}: walk-root override rejected (must be a prefix of \
2680 KTSTR_HOST_CGROUP_PARENT={cgroup_parent:?})",
2681 env = crate::KTSTR_CGROUP_WALK_ROOT_ENV,
2682 )
2683 })
2684 }
2685 // No KTSTR_CGROUP_WALK_ROOT override. Return the manager as-is;
2686 // the non-root precondition for managing cgroups under the
2687 // kernel-owned default walk root is checked lazily in
2688 // CgroupManager::setup (first real cgroup use). host_only tests
2689 // that never create a cgroup — macro-attribute fixtures,
2690 // host-topology reads, nested-VM verifier orchestration — must
2691 // not be failed here for a resource they never touch. A
2692 // non-root test that does create a cgroup gets the deferred
2693 // setup error pointing at with_walk_root; the operator on-ramp
2694 // is EITHER to run as root OR to set KTSTR_CGROUP_WALK_ROOT to a
2695 // delegated cgroup-v2 subtree (handled by the arm above).
2696 _ => Ok(cg),
2697 }
2698}
2699
2700/// Run a gauntlet variant test. `rest` is `{name}/{preset}`.
2701pub(crate) fn run_gauntlet_test(rest: &str) -> i32 {
2702 let parts: Vec<&str> = rest.splitn(2, '/').collect();
2703 if parts.len() != 2 {
2704 eprintln!("invalid gauntlet test name: gauntlet/{rest}");
2705 return 1;
2706 }
2707 let (test_name, preset_name) = (parts[0], parts[1]);
2708
2709 let entry = match find_test(test_name) {
2710 Some(e) => e,
2711 None => {
2712 eprintln!("unknown test: {test_name}");
2713 return 1;
2714 }
2715 };
2716
2717 let presets = crate::gauntlet::gauntlet_presets();
2718 let preset = match presets.iter().find(|p| p.name == preset_name) {
2719 Some(p) => p,
2720 None => {
2721 eprintln!("unknown gauntlet preset: {preset_name}");
2722 return 1;
2723 }
2724 };
2725
2726 let t = &preset.topology;
2727 let cpus = t.total_cpus();
2728
2729 let memory_mib = super::runtime::derive_test_memory_mib(cpus, entry);
2730 let topo = TopoOverride {
2731 numa_nodes: t.numa_nodes,
2732 llcs: t.llcs,
2733 cores: t.cores_per_llc,
2734 threads: t.threads_per_core,
2735 memory_mib,
2736 };
2737
2738 if entry.performance_mode && super::runtime::no_perf_mode_active() {
2739 crate::report::test_skip(format_args!(
2740 "{}: test requires performance_mode but --no-perf-mode or KTSTR_NO_PERF_MODE is active",
2741 test_name,
2742 ));
2743 // Gauntlet preset: record the preset's RESOLVED topology
2744 // (Topology::from(&topo)) so this skip shares a variant_hash
2745 // with a run of the same preset and distinguishes other presets.
2746 record_skip_sidecar(entry, Some(&topo));
2747 return 0;
2748 }
2749
2750 if super::runtime::perf_only_skips_entry(entry) {
2751 crate::report::test_skip(format_args!(
2752 "{test_name}: KTSTR_PERF_ONLY is active and this test is not a performance_mode test",
2753 ));
2754 // Gauntlet preset: record the preset's RESOLVED topology so the
2755 // skip shares a variant_hash with a run of the same preset.
2756 record_skip_sidecar(entry, Some(&topo));
2757 return 0;
2758 }
2759
2760 if !entry.bpf_map_write.is_empty()
2761 && let Ok(kernel) = resolve_test_kernel()
2762 && crate::vmm::find_vmlinux(&kernel).is_none()
2763 {
2764 eprintln!("FAIL: vmlinux not found, bpf_map_write requires vmlinux");
2765 return 1;
2766 }
2767
2768 let result = run_ktstr_test_inner(entry, Some(&topo));
2769 result_to_exit_code(result, entry.expect_err, entry.allow_inconclusive)
2770}
2771
2772/// Collect sidecar JSON files and return the full gauntlet analysis.
2773///
2774/// When `dir` is `Some`, reads sidecars from that directory. Otherwise
2775/// uses the default sidecar directory (`KTSTR_SIDECAR_DIR` override, or
2776/// `{CARGO_TARGET_DIR or "target"}/ktstr/{kernel}-{project_commit}/`,
2777/// where `{project_commit}` is the project HEAD short hex with
2778/// `-dirty` when the worktree differs).
2779///
2780/// Returns the concatenated output of `analyze_rows`, verifier stats,
2781/// callback profile, and KVM stats. Returns an empty string when no
2782/// sidecars are found.
2783pub fn analyze_sidecars(dir: Option<&std::path::Path>) -> String {
2784 let default_dir;
2785 let dir = match dir {
2786 Some(d) => d,
2787 None => {
2788 default_dir = sidecar_dir();
2789 &default_dir
2790 }
2791 };
2792 let sidecars = collect_sidecars(dir);
2793 if sidecars.is_empty() {
2794 return String::new();
2795 }
2796 let mut out = String::new();
2797 let rows: Vec<_> = sidecars.iter().map(crate::stats::sidecar_to_row).collect();
2798 if !rows.is_empty() {
2799 out.push_str(&crate::stats::analyze_rows(&rows));
2800 }
2801 let vstats = format_verifier_stats(&sidecars);
2802 if !vstats.is_empty() {
2803 out.push_str(&vstats);
2804 }
2805 let cprofile = format_callback_profile(&sidecars);
2806 if !cprofile.is_empty() {
2807 out.push_str(&cprofile);
2808 }
2809 let kstats = format_kvm_stats(&sidecars);
2810 if !kstats.is_empty() {
2811 out.push_str(&kstats);
2812 }
2813 out
2814}
2815
2816/// Discover plain `#[test]` items by re-invoking the binary without
2817/// NEXTEST, reading libtest's `--list` output, and printing only
2818/// names that don't match any KTSTR_TESTS entry. This lets plain
2819/// tests coexist with `#[ktstr_test]` in the same binary without
2820/// duplicating the ktstr entries.
2821///
2822/// `ignored_only` forwards `--ignored` onto the child `--list` call
2823/// so the echoed plain-test set matches the bucket nextest is
2824/// enumerating (all tests vs the `#[ignore]`-only subset). Omitting
2825/// the flag here lands every plain test in nextest's ignored set and
2826/// silently skips them by default — see the body comment.
2827fn list_plain_tests(ignored_only: bool) {
2828 use std::collections::HashSet;
2829 let ktstr_names: HashSet<&str> = KTSTR_TESTS.iter().map(|e| e.name).collect();
2830
2831 let exe = match std::env::current_exe() {
2832 Ok(p) => p,
2833 Err(_) => return,
2834 };
2835 let mut cmd = std::process::Command::new(exe);
2836 cmd.env_remove("NEXTEST");
2837 // Forward `--ignored` so the plain-test set echoed here matches the
2838 // bucket nextest is asking for. nextest computes its "ignored" set by
2839 // re-running the binary with `--list --ignored`; if this child always
2840 // lists ALL plain `#[test]` (no `--ignored`), every plain test lands
2841 // in nextest's ignored set and is silently skipped by default
2842 // (footgun #2). With the flag forwarded, only real `#[ignore]` plain
2843 // tests are reported under `--ignored`, so non-ignored plain tests run
2844 // by default like any other test.
2845 let mut list_args: Vec<&str> = vec!["--list", "--format", "terse"];
2846 if ignored_only {
2847 list_args.push("--ignored");
2848 }
2849 cmd.args(&list_args);
2850 cmd.stdout(std::process::Stdio::piped());
2851 cmd.stderr(std::process::Stdio::null());
2852 let output = match cmd.output() {
2853 Ok(o) => o,
2854 Err(_) => return,
2855 };
2856 let stdout = String::from_utf8_lossy(&output.stdout);
2857 for line in stdout.lines() {
2858 let name = line.strip_suffix(": test").unwrap_or(line);
2859 if !ktstr_names.contains(name) && !name.is_empty() {
2860 println!("{line}");
2861 }
2862 }
2863}
2864
2865/// `--list` subprotocol: emit ktstr/gauntlet test names without
2866/// exiting so the standard libtest harness can also print its own
2867/// test list afterward. This is what makes plain `#[test]` items
2868/// inside a ktstr_test integration-test binary visible to nextest.
2869///
2870/// Honours `--ignored` the same way [`ktstr_main`] does — when set,
2871/// only the ignored subset (gauntlet variants and `demo_` base
2872/// tests) is printed. Unlike `ktstr_main`, this function returns to
2873/// the caller after listing so the ctor's caller can fall through
2874/// to libtest's `main`.
2875fn ktstr_list_only() {
2876 let args: Vec<String> = std::env::args().collect();
2877 let ignored_only = args.iter().any(|a| a == "--ignored");
2878 list_tests(ignored_only);
2879}
2880
2881/// Nextest protocol handler.
2882///
2883/// Called automatically by [`ktstr_test_early_dispatch`] when running
2884/// under nextest with `--exact <ktstr_or_gauntlet_name>`.
2885/// Not intended for direct use.
2886///
2887/// - `--list --format terse`: output `ktstr/{name}: test\n` for base
2888/// tests and `gauntlet/{name}/{preset}: test\n` for gauntlet
2889/// variants. (Discovery uses `ktstr_list_only` instead to allow
2890/// libtest to print its own list afterward; this branch is
2891/// preserved for direct callers of `ktstr_main`.)
2892/// - `--exact NAME --nocapture`: run the named test, exit 0/1.
2893pub fn ktstr_main() -> ! {
2894 let args: Vec<String> = std::env::args().collect();
2895
2896 // Discovery mode: --list --format terse [--ignored]
2897 if args.iter().any(|a| a == "--list") {
2898 let ignored_only = args.iter().any(|a| a == "--ignored");
2899 list_tests(ignored_only);
2900 std::process::exit(0);
2901 }
2902
2903 // Execution mode: --exact NAME [--nocapture] [--ignored] [--bench]
2904 if let Some(pos) = args.iter().position(|a| a == "--exact") {
2905 if let Some(name) = args.get(pos + 1) {
2906 let code = run_named_test(name);
2907 std::process::exit(code);
2908 }
2909 eprintln!("--exact requires a test name");
2910 std::process::exit(1);
2911 }
2912
2913 // Fallback: no recognized arguments.
2914 eprintln!("usage: <binary> --list --format terse [--ignored]");
2915 eprintln!(" <binary> --exact <test_name> --nocapture");
2916 std::process::exit(1)
2917}
2918
2919#[cfg(test)]
2920#[path = "dispatch_tests.rs"]
2921mod tests;