ktstr/test_support/runtime.rs
1//! Runtime configuration primitives shared by `eval` and `probe`.
2//!
3//! `eval` calls `probe::attempt_auto_repro` from its failure path,
4//! so items shared between the two siblings live here to avoid a
5//! circular import chain. All items are `pub(crate)` and remain
6//! internal to `test_support`.
7
8use std::os::unix::fs::PermissionsExt;
9use std::path::{Path, PathBuf};
10use std::sync::OnceLock;
11use std::time::Duration;
12
13use super::entry::KtstrTestEntry;
14
15/// Stable PathBuf for the process-owned config scratch directory.
16///
17/// Populated once by [`scratch_dir`] on first access. Kept in a
18/// separate `OnceLock` from the `TempDir` itself so the `atexit`
19/// cleanup handler can read the path through `extern "C"` without
20/// involving the `tempfile::TempDir` value (whose `Drop` would
21/// otherwise never run — see the "leak bound" note on
22/// [`scratch_dir`]).
23static SCRATCH_PATH: OnceLock<PathBuf> = OnceLock::new();
24
25/// Process-owned scratch directory for all inline-config tempfile
26/// writes — both [`config_content_parts`] (in-VM eval path) and
27/// [`crate::export::export_test`]'s `config_content_addition`
28/// (host-side .run packaging path).
29///
30/// Created lazily on first access via `tempfile::Builder` with
31/// explicit `0o700` mode (overrides the crate default of umask-
32/// restricted `0o777`-via-`mkdir(2)`, which on a standard
33/// `umask=0o022` host yields `0o755` and would expose directory
34/// listings + filename predictability to other uids). The
35/// directory is a random-suffixed subdirectory of
36/// `std::env::temp_dir()`, owned by the current uid.
37///
38/// Both call sites share this single directory because the
39/// security and leak-bound properties are identical for both
40/// purposes, and a single `OnceLock` + single `atexit` handler is
41/// simpler than maintaining parallel scratch dirs that diverge
42/// silently. Filenames are independently prefixed at each call
43/// site (`ktstr-config-{hash:016x}.json` for the eval path,
44/// `ktstr-export-config-{hash:016x}-{basename}` for the export
45/// path) so the two purposes can be visually distinguished inside
46/// the same directory.
47///
48/// Two properties matter:
49///
50/// 1. **Symlink defense.** /tmp is sticky-bit world-writable, so an
51/// attacker can pre-plant a symlink at the predictable content-
52/// addressed path and have us write to wherever it points. A
53/// per-process 0o700 subdirectory blocks every cross-uid access
54/// mode (read, list, write, traverse); only our process can
55/// create or replace files inside it, which eliminates the
56/// symlink-attack surface for the tempfile-write path.
57///
58/// 2. **Leak bound.** Rust does NOT run `Drop` impls on values
59/// stored in `static` slots at process exit — so the
60/// `tempfile::TempDir`'s built-in cleanup would never fire here.
61/// Instead, the path is registered with `libc::atexit`
62/// (POSIX-spec process-exit handler) so a clean exit
63/// (`exit(3)`, fall-off-`main`) triggers
64/// [`std::fs::remove_dir_all`] on the directory. Crash, abort,
65/// SIGKILL, or panic-`abort` skip the atexit handler and leak
66/// the directory; the residual is bounded by the number of
67/// such ungraceful exits and the directory contents are
68/// text-sized config files. The tempdir's random suffix
69/// prevents collisions across runs, so accumulated leak dirs
70/// don't interfere with future runs.
71pub(crate) fn scratch_dir() -> &'static Path {
72 SCRATCH_PATH
73 .get_or_init(|| {
74 let td = tempfile::Builder::new()
75 .prefix("ktstr-config-")
76 .permissions(std::fs::Permissions::from_mode(0o700))
77 .tempdir()
78 .expect("create ktstr config scratch directory");
79 // `keep()` consumes the TempDir without running its
80 // Drop's cleanup (it flips the cleanup flag and returns
81 // the bare PathBuf we own). The atexit registration
82 // below takes over cleanup responsibility.
83 let path = td.keep();
84 // SAFETY: `cleanup_scratch_dir` has the required
85 // `extern "C" fn()` signature that `libc::atexit`
86 // accepts. The `unsafe` block here is required because
87 // `libc::atexit` itself is an `unsafe extern "C"` FFI
88 // call (the callback signature itself is plain
89 // `extern "C" fn()`, not `unsafe`). Registering more
90 // than once is the caller's responsibility;
91 // `OnceLock::get_or_init` guarantees this runs exactly
92 // once per process.
93 let rc = unsafe { libc::atexit(cleanup_scratch_dir) };
94 assert_eq!(
95 rc, 0,
96 "libc::atexit registration for ktstr config scratch dir failed"
97 );
98 path
99 })
100 .as_path()
101}
102
103/// Process-exit handler registered via `libc::atexit` by
104/// [`scratch_dir`] on first init. Removes the scratch directory and
105/// every config file inside it. Errors are ignored — by the time
106/// this runs the process is exiting and there is nowhere to surface
107/// a failure (no `eprintln!` ordering guarantees from inside an
108/// atexit handler, and panicking would be unsound across the C ABI
109/// boundary).
110extern "C" fn cleanup_scratch_dir() {
111 if let Some(path) = SCRATCH_PATH.get() {
112 let _ = std::fs::remove_dir_all(path);
113 }
114}
115
116/// True when `RUST_BACKTRACE` is set to `"1"` or `"full"`.
117///
118/// Controls whether the full guest kernel console is appended to the
119/// `--- diagnostics ---` section of a failed test, and whether
120/// auto-repro forwards the repro VM's COM1/COM2 output to the host
121/// terminal in real time. The scheduler-log and sched_ext-dump
122/// sections of a failure are always emitted regardless of this flag.
123pub(crate) fn verbose() -> bool {
124 std::env::var("RUST_BACKTRACE")
125 .map(|v| v == "1" || v == "full")
126 .unwrap_or(false)
127}
128
129/// True when `KTSTR_NO_PERF_MODE` is set to a NON-EMPTY value.
130///
131/// Centralises the perf-mode-disabled check used by the dispatch
132/// gauntlet routes (`run_named_test`, `run_gauntlet_test`, and the
133/// verifier-cell listing route `list_verifier_cells_all`, all in
134/// `super::dispatch`) and the eval entry path
135/// (`super::eval::run_ktstr_test_inner_impl`). All four sites
136/// previously called `std::env::var("KTSTR_NO_PERF_MODE").is_ok()`
137/// directly, which returned true for `KTSTR_NO_PERF_MODE=` (empty
138/// string set, e.g. via `unset`/`set` interplay in CI shells, or a
139/// `--env KTSTR_NO_PERF_MODE` Docker pass-through with no value) —
140/// silently skipping every `performance_mode` test. Requiring a
141/// non-empty value matches operator intent ("set it to something to
142/// disable perf mode") and rejects the empty-string accident.
143///
144/// `cargo ktstr test --no-perf-mode` exports `KTSTR_NO_PERF_MODE=1`
145/// (a non-empty value), so the existing CLI surface is unaffected.
146pub(crate) fn no_perf_mode_active() -> bool {
147 std::env::var(crate::KTSTR_NO_PERF_MODE_ENV)
148 .map(|v| !v.is_empty())
149 .unwrap_or(false)
150}
151
152/// True when `KTSTR_BYPASS_LLC_LOCKS` is set to a NON-EMPTY value.
153///
154/// Centralises the bypass check used at 7 reader sites:
155/// `vmm/builder.rs:1199`, `cli/kernel_build/build.rs:102` +
156/// `:488` (the latter the inverse `!bypass_llc_locks_active()`
157/// form), `bin/cargo_ktstr/kernel/mod.rs:720`,
158/// `bin/cargo_ktstr/misc/shell.rs:181`, and `bin/ktstr.rs:652` +
159/// `:1267`. All sites previously spelled the same
160/// `.ok().is_some_and(|v| !v.is_empty())` inline; centralising
161/// eliminates the drift hazard and matches the
162/// `no_perf_mode_active` shape so the empty-string contract is
163/// uniformly enforced.
164///
165/// Set via `--bypass-llc-locks` CLI flag or
166/// `KTSTR_BYPASS_LLC_LOCKS=1` direct export. Empty
167/// (`KTSTR_BYPASS_LLC_LOCKS=` from a Docker `--env` pass-through
168/// without value) does NOT activate per the empty-as-unset
169/// contract — preventing a stray export from silently disabling
170/// LLC flock contention enforcement in CI.
171pub fn bypass_llc_locks_active() -> bool {
172 std::env::var(crate::KTSTR_BYPASS_LLC_LOCKS_ENV)
173 .ok()
174 .is_some_and(|v| !v.is_empty())
175}
176
177/// Effective no-perf-mode for a given test entry. The env override
178/// `KTSTR_NO_PERF_MODE` and the per-entry [`KtstrTestEntry::no_perf_mode`]
179/// attribute are OR'd: either source forces the no-perf path
180/// (cpuset/LLC locking still applies, but vCPU pinning, hugepages,
181/// NUMA mbind, RT scheduling, and KVM exit suppression are all
182/// skipped). The env override is the operator-level switch; the
183/// per-entry attribute lets a test author opt the test out
184/// permanently — e.g. tests that exercise wild virtual topologies
185/// the host hardware can't possibly satisfy under perf-mode pinning.
186pub(crate) fn no_perf_mode_for_entry(entry: &KtstrTestEntry) -> bool {
187 no_perf_mode_active() || entry.no_perf_mode
188}
189
190/// True when `KTSTR_PERF_ONLY` is set to a NON-EMPTY value.
191///
192/// Mirrors [`no_perf_mode_active`]'s empty-as-unset contract (see
193/// [`crate::KTSTR_PERF_ONLY_ENV`]): any non-empty value restricts the
194/// run to `performance_mode` tests, an empty value does not.
195/// Consulted at the dispatch named/gauntlet routes and the eval entry
196/// to skip non-perf entries before VM boot. Set by the mergebase
197/// perf-delta subcommand.
198pub(crate) fn perf_only_active() -> bool {
199 std::env::var(crate::KTSTR_PERF_ONLY_ENV)
200 .map(|v| !v.is_empty())
201 .unwrap_or(false)
202}
203
204/// Whether `perf_only_active()` requires SKIPPING this entry: perf-only
205/// mode is on and the entry is not a `performance_mode` test. A
206/// `performance_mode` entry is always kept (it is the selection
207/// target); every other entry is skipped so a perf-delta run measures
208/// only the perf-configured tests.
209pub(crate) fn perf_only_skips_entry(entry: &KtstrTestEntry) -> bool {
210 perf_only_active() && !entry.performance_mode
211}
212
213/// Derive initramfs archive path, host path, and guest path from a
214/// scheduler's `config_file`. Returns `None` when no config file is set.
215pub(crate) fn config_file_parts(entry: &KtstrTestEntry) -> Option<(String, PathBuf, String)> {
216 let config_path = entry.scheduler.config_file?;
217 let file_name = Path::new(config_path)
218 .file_name()
219 .and_then(|n| n.to_str())
220 .expect("config_file must have a valid filename");
221 let archive_path = format!("include-files/{file_name}");
222 let guest_path = format!("/include-files/{file_name}");
223 Some((archive_path, PathBuf::from(config_path), guest_path))
224}
225
226/// Stable u64 hash of arbitrary string content.
227///
228/// Used by the config-content tempfile path code, but suitable for
229/// any content-addressed naming site that needs determinism across
230/// rustc bumps.
231///
232/// Uses `siphasher::sip::SipHasher13::new_with_keys(0, 0)` rather
233/// than `std::collections::hash_map::DefaultHasher` because the std
234/// algorithm is explicitly unspecified across rustc versions (see
235/// workspace `Cargo.toml` for the dep-line rationale). The explicit
236/// `new_with_keys(0, 0)` form matches the project's other
237/// stable-hash sites (`src/test_support/sidecar/mod.rs`, `build.rs`)
238/// so a future audit of zero-keyed SipHasher13 callers finds every
239/// instance via one grep. Same content always produces the same u64
240/// across toolchain upgrades, so cached artifacts stay reproducible
241/// across machines and rustc bumps.
242pub(crate) fn content_hash(content: &str) -> u64 {
243 use std::hash::{Hash, Hasher};
244 let mut hasher = siphasher::sip::SipHasher13::new_with_keys(0, 0);
245 content.hash(&mut hasher);
246 hasher.finish()
247}
248
249/// Resolve inline config content into a temp file on disk, returning
250/// `(archive_path, host_path, guest_path, sched_args)` where
251/// `sched_args` are the CLI args derived from the scheduler's
252/// `config_file_def` arg template. Returns `None` when the entry has
253/// no `config_content`.
254pub(crate) fn config_content_parts(
255 entry: &KtstrTestEntry,
256) -> Option<(String, PathBuf, String, Vec<String>)> {
257 use std::io::Write as _;
258 let content = entry.config_content?;
259 let (arg_template, guest_path) = entry.scheduler.config_file_def?;
260 let archive_path = guest_path.trim_start_matches('/').to_string();
261 let hash = content_hash(content);
262 let dir = scratch_dir();
263 // Write to a uniquely-named scratch file, then atomic-rename to the
264 // canonical content-addressed path:
265 // - Scratch acquisition via `NamedTempFile::new_in` uses
266 // `mkstemp(3)` semantics: random suffix, opened O_EXCL so no
267 // pre-existing file can be subverted as the write target.
268 // - The atomic `persist` rename is the cross-thread / cross-process
269 // race fix. Two writers of the same content race their renames
270 // to the canonical path; the last writer wins, but since `hash`
271 // is content-addressed both wrote byte-identical content, so the
272 // winner's bytes match the loser's. No torn writes are possible
273 // because `rename(2)` is atomic at the inode level — readers
274 // either see the old inode or the new one, never a partial blend.
275 // - On panic between `new_in` and `persist`, NamedTempFile's `Drop`
276 // unlinks the scratch file. No `/tmp` leak from in-process aborts.
277 let canonical = dir.join(format!("ktstr-config-{hash:016x}.json"));
278 let mut scratch =
279 tempfile::NamedTempFile::new_in(dir).expect("create ktstr config scratch file");
280 scratch
281 .as_file_mut()
282 .write_all(content.as_bytes())
283 .expect("write ktstr config content to scratch");
284 scratch
285 .persist(&canonical)
286 .expect("atomic-rename ktstr config scratch to canonical path");
287 let expanded = arg_template.replace("{file}", guest_path);
288 let sched_args: Vec<String> = expanded.split_whitespace().map(|s| s.to_string()).collect();
289 Some((archive_path, canonical, guest_path.to_string(), sched_args))
290}
291
292/// Build the shared `cmdline=` string appended to every ktstr_test
293/// guest boot. Per-scheduler sysctls, per-scheduler kargs,
294/// `RUST_BACKTRACE` / `RUST_LOG` propagation, and the host-resolved
295/// `KTSTR_SIDECAR_DIR` so the guest's `sidecar_dir()` returns the
296/// SAME path the host's freeze coordinator writes to. Without that
297/// propagation, host and guest each compute the run directory
298/// independently — the host walks `gix::discover` from a real
299/// workspace cwd and produces `{kernel}-{commit}` whereas the
300/// guest's cwd is `/` (no git repo, no kernel env), yielding the
301/// `unknown-unknown` fallback. Anything the two VM-launch sites
302/// (`run_ktstr_test_inner` and `attempt_auto_repro`) previously
303/// re-implemented side-by-side lives here.
304pub(crate) fn build_cmdline_extra(entry: &KtstrTestEntry) -> String {
305 let mut parts: Vec<String> = Vec::new();
306 for s in entry.scheduler.sysctls {
307 parts.push(format!("sysctl.{}={}", s.key(), s.value()));
308 }
309 for &karg in entry.scheduler.kargs {
310 parts.push(karg.to_string());
311 }
312 // Per-test KASLR opt-out (see `KtstrTestEntry.kaslr` doc). The base
313 // cmdline `base_guest_cmdline` at `src/vmm/setup/mod.rs` does NOT
314 // inject `nokaslr` by default — KASLR is on. A test that needs determinism sets `kaslr = false` in
315 // its `#[ktstr_test]` attribute; that lands the token here, where it
316 // composes with any operator-supplied `Scheduler::kargs(&["nokaslr"])`
317 // above (kernel parses the flag as a bool; duplicates are harmless).
318 // Mirrored guest-side by `vmm::rust_init::create_cgroup_parent_from_sched_args`
319 // and `monitor::symbols::resolve_page_offset`, both of which handle the
320 // `nokaslr` case via the live-publisher fall back to `DEFAULT_PAGE_OFFSET`.
321 if !entry.kaslr {
322 parts.push("nokaslr".to_string());
323 }
324 if let Ok(bt) = std::env::var("RUST_BACKTRACE") {
325 parts.push(format!("RUST_BACKTRACE={bt}"));
326 }
327 if let Ok(log) = std::env::var("RUST_LOG") {
328 parts.push(format!("RUST_LOG={log}"));
329 }
330 // Propagate the host-resolved sidecar dir so the guest scenario
331 // computes the same path the host's freeze coordinator wrote to
332 // (e.g. when a test reads `sidecar_dir().join("foo.json")` from
333 // inside the guest, the path matches the host's writer site).
334 // The host resolves via the OnceLock-cached project commit walk
335 // from the workspace cwd; the guest's cwd is `/` and would
336 // otherwise fall back to `unknown-unknown`. Sidecar dir paths
337 // are filesystem-safe ASCII (kernel version + 7-char hex
338 // commit, optional `-dirty` suffix), so the cmdline-as-token
339 // shape is sound — no escaping needed for whitespace.
340 //
341 // Absolutize via `current_dir().join()` when the resolved path
342 // is relative (the default-branch shape:
343 // `target/ktstr/{kernel}-{commit}` against the host cwd). The
344 // guest's cwd is `/`, so a relative token would resolve there
345 // instead of at the host's workspace root — the propagation
346 // must carry the FULL absolute path so the guest's
347 // `sidecar_dir()` reports the same string the host's writer
348 // site used. Falls back to the raw resolved path when the cwd
349 // probe fails (extremely rare; happens only when the process's
350 // cwd was rmdir'd while alive — a metadata probe has no
351 // recourse, leave the path as-is).
352 let resolved = super::sidecar::sidecar_dir();
353 let absolute = if resolved.is_absolute() {
354 resolved
355 } else {
356 std::env::current_dir()
357 .map(|cwd| cwd.join(&resolved))
358 .unwrap_or(resolved)
359 };
360 if let Some(s) = absolute.to_str() {
361 parts.push(format!("KTSTR_SIDECAR_DIR={s}"));
362 }
363 parts.join(" ")
364}
365
366#[cfg(feature = "wprof")]
367pub(crate) fn attach_wprof_if_requested(
368 builder: crate::vmm::KtstrVmBuilder,
369 entry: &KtstrTestEntry,
370 label: &'static str,
371) -> anyhow::Result<crate::vmm::KtstrVmBuilder> {
372 if !entry.wprof {
373 return Ok(builder);
374 }
375 let mut config = crate::vmm::wprof::WprofConfig::from_env().map_err(|e| {
376 anyhow::anyhow!(
377 "ktstr_test: {label}: wprof requested by \
378 #[ktstr_test(wprof)] but WprofConfig::from_env failed: \
379 {e:#}. Ensure cargo-ktstr's install_env exported \
380 KTSTR_WPROF_PATH and the path is readable."
381 )
382 })?;
383 if let Some(custom_args) = entry.wprof_args {
384 config.args = custom_args.split_whitespace().map(String::from).collect();
385 }
386 Ok(builder.wprof(Some(config)))
387}
388
389/// Derive the test VM's memory floor from a CPU count + entry.
390///
391/// Returns `max(cpus * 64, 256, entry.memory_mib)`. When the
392/// `wprof` feature is enabled and `entry.wprof` is true, bumps
393/// to `WPROF_MIN_MEMORY_MIB` if below that floor.
394///
395/// The returned value is the LOWER BOUND on guest memory; the
396/// VM builder ultimately uses `.memory_deferred_min(mib)` which
397/// also accounts for the initramfs size, so the final boot memory
398/// may exceed this value.
399pub(crate) fn derive_test_memory_mib(cpus: u32, entry: &KtstrTestEntry) -> u32 {
400 let raw = (cpus * 64).max(256).max(entry.memory_mib);
401 #[cfg(feature = "wprof")]
402 {
403 use crate::vmm::wprof::{WPROF_MIN_MEMORY_MIB, apply_wprof_memory_floor};
404 let mem = apply_wprof_memory_floor(raw, entry.wprof);
405 if mem != raw {
406 tracing::info!(
407 test = %entry.name,
408 requested_mib = raw,
409 floored_mib = WPROF_MIN_MEMORY_MIB,
410 "wprof enabled; memory_mib floored to \
411 WPROF_MIN_MEMORY_MIB"
412 );
413 }
414 mem
415 }
416 #[cfg(not(feature = "wprof"))]
417 raw
418}
419
420/// Resolve the VM topology and memory size from an optional
421/// TopoOverride.
422///
423/// Returns `(topology, memory_mib)` where `topology` is the
424/// `vmm::topology::Topology` passed to the VM builder and `memory_mib`
425/// is the LOWER BOUND on guest memory (the builder's
426/// `.memory_deferred_min(mib)` may raise the actual allocation
427/// to fit the initramfs). When `topo` is `Some`, both come from
428/// the override and the memory is honored verbatim (per the
429/// override-is-verbatim contract — see `topo.rs:42-44`). When
430/// `topo` is `None`, the topology comes from `entry.topology` and
431/// memory is derived by [`derive_test_memory_mib`]. Shared with
432/// `attempt_auto_repro` so the repro VM always sizes memory the
433/// same way as the first VM — reproducibility requires identical
434/// topology, including the wprof floor when applicable.
435///
436/// When the `wprof` feature is enabled and `entry.wprof` is true,
437/// a TopoOverride with memory_mib below the wprof floor triggers
438/// a warn-level log but is still honored verbatim.
439pub(crate) fn resolve_vm_topology(
440 entry: &KtstrTestEntry,
441 topo: Option<&super::topo::TopoOverride>,
442) -> (crate::vmm::topology::Topology, u32) {
443 match topo {
444 Some(t) => {
445 #[cfg(feature = "wprof")]
446 if entry.wprof && t.memory_mib < crate::vmm::wprof::WPROF_MIN_MEMORY_MIB {
447 tracing::warn!(
448 test = %entry.name,
449 override_mib = t.memory_mib,
450 wprof_min_mib = crate::vmm::wprof::WPROF_MIN_MEMORY_MIB,
451 "wprof enabled with TopoOverride.memory_mib below \
452 WPROF_MIN_MEMORY_MIB; honoring the override per the \
453 override-is-verbatim contract, but wprof may OOM-kill \
454 mid-run"
455 );
456 }
457 (crate::vmm::topology::Topology::from(t), t.memory_mib)
458 }
459 None => {
460 let cpus = entry.topology.total_cpus();
461 let mem = derive_test_memory_mib(cpus, entry);
462 (entry.topology, mem)
463 }
464 }
465}
466
467/// Append per-scheduler `sched_args` entries shared by both VM-launch
468/// paths: `--config <guest_path>` if the scheduler declared one, the
469/// cgroup-parent switch, the scheduler's own fixed args, and
470/// per-entry extra args. Active-flag dispatch and probe-specific args
471/// remain at the call site because they differ between the paths.
472///
473/// The caller owns the `include_files` binding on the builder;
474/// `config_file_parts` and the guest-path push are returned separately
475/// so the caller decides whether to attach include files (production
476/// does, probe-only repro pipelines that already pass `include_files`
477/// can skip it).
478/// Concrete absolute-path example used by the panic messages that
479/// reject malformed `--cell-parent-cgroup` values — names the
480/// scheduler's declared default when one exists, falls back to a
481/// canonical `/ktstr` literal otherwise. The operator gets a
482/// copy-pasteable shape regardless of whether the scheduler is
483/// cell-aware. Centralised so both rejection arms (Value-invalid and
484/// MissingValue) display the same example.
485fn cgroup_parent_example(entry: &KtstrTestEntry) -> String {
486 entry
487 .scheduler
488 .cgroup_parent
489 .map(|p| p.as_str().to_string())
490 .unwrap_or_else(|| "/ktstr".to_string())
491}
492
493pub(crate) fn append_base_sched_args(entry: &KtstrTestEntry, args: &mut Vec<String>) {
494 // Fail-fast on a malformed user-supplied `--cell-parent-cgroup`
495 // value before the auto-inject branch. The host-side consumer
496 // `resolve_cgroup_root` (defined in `test_support::args`, used by
497 // the probe/setup path at `probe.rs::build_dispatch_ctx_parts`)
498 // interpolates the value into a
499 // `/sys/fs/cgroup{path}` literal and hands the result to
500 // `CgroupManager::new`, which has NO host-root guard — any path
501 // that doesn't start with `/` lands inside the host cgroup root
502 // (e.g. `""` → `/sys/fs/cgroup`, `"my_test"` →
503 // `/sys/fs/cgroupmy_test`) and corrupts unrelated cgroup state
504 // when subsequent `cgroups.setup(...)` calls run. The guest-side
505 // sibling `vmm::rust_init::create_cgroup_parent_from_sched_args`
506 // happens to be safe-by-coincidence for the empty case because
507 // `enable_subtree_controllers_to` early-returns when leaf equals
508 // the cgroup root — but probe.rs has no such gate, so the host
509 // fail-fast is what actually protects against corruption.
510 //
511 // The check is universal — independent of whether the scheduler
512 // declares a default `cgroup_parent` — because both routes
513 // (`extra_sched_args` from the test author, `sched_args` from
514 // the scheduler def) flow through the same parse + chain below,
515 // and the corruption risk is identical regardless of who
516 // supplied the bad value. Operator sees the message at test
517 // setup time, before any cgroup ops run.
518 match super::args::parse_cell_parent_cgroup(
519 entry
520 .scheduler
521 .sched_args
522 .iter()
523 .chain(entry.extra_sched_args.iter())
524 .copied(),
525 ) {
526 super::args::CellParentCgroupArg::Value(path)
527 if !super::args::cell_parent_path_is_valid(path) =>
528 {
529 let example = cgroup_parent_example(entry);
530 let mut fixes = format!(
531 "supply an absolute path under `/` with at least one non-`.`/`..` \
532 segment (e.g. `{example}`) for the per-test cgroup root"
533 );
534 if let Some(default) = entry.scheduler.cgroup_parent {
535 fixes.push_str(&format!(
536 " or omit the flag entirely (the framework will auto-inject \
537 the scheduler's default `cgroup_parent = {default}`)"
538 ));
539 }
540 panic!(
541 "test `{}` supplies `--cell-parent-cgroup` with a value `{:?}` \
542 (via `extra_sched_args` on the test or `sched_args` in the \
543 scheduler def) that does not start with `/`, is `/` alone, or \
544 contains `.`/`..` segments that normalize back to the host \
545 cgroup root; {fixes}. Empty, bare `/`, relative, or paths \
546 like `/.`, `/foo/..`, `/./bar/..` all resolve to a path \
547 equal to or inside `/sys/fs/cgroup` (e.g. empty → \
548 `/sys/fs/cgroup`, `/` → `/sys/fs/cgroup/`, `/.` → \
549 `/sys/fs/cgroup` after canonicalization) and corrupt \
550 unrelated cgroup state when the probe-side `CgroupManager` \
551 operates on the resolved path. This gate mirrors the \
552 const-eval check in `CgroupPath::new` so runtime values \
553 share the validation contract that compile-time \
554 declarations already pass.",
555 entry.name, path,
556 );
557 }
558 super::args::CellParentCgroupArg::MissingValue => {
559 let example = cgroup_parent_example(entry);
560 let mut fixes = format!(
561 "either remove the bare `--cell-parent-cgroup` and let the \
562 framework auto-inject the scheduler's default (when one is \
563 declared), or supply a value (e.g. `--cell-parent-cgroup={example}` \
564 in combined form, or `--cell-parent-cgroup` followed by an \
565 absolute path in two-token form)"
566 );
567 if entry.scheduler.cgroup_parent.is_none() {
568 fixes.push_str(
569 "; the scheduler in this test declares no default \
570 `cgroup_parent`, so an absolute-path value is required",
571 );
572 }
573 panic!(
574 "test `{}` supplies a bare `--cell-parent-cgroup` (via \
575 `extra_sched_args` on the test or `sched_args` in the \
576 scheduler def) with no following value; {fixes}. The \
577 framework intercepts this here because letting it through \
578 would silently combine with the framework's auto-inject \
579 (when a default exists) and trip clap's `cannot be used \
580 multiple times` diagnostic — a confusing error that buries \
581 the actual missing-value mistake.",
582 entry.name,
583 );
584 }
585 super::args::CellParentCgroupArg::Value(_) => {
586 // User-supplied valid path — flows through the
587 // `args.extend(...)` calls below. Skip the auto-inject so
588 // clap doesn't reject the duplicate flag with `cannot be
589 // used multiple times`.
590 }
591 super::args::CellParentCgroupArg::Absent => {
592 // `cgroup_parent` controls the cgroup root where the
593 // framework places test cgroups (`resolve_cgroup_root`
594 // returns `/sys/fs/cgroup{cgroup_parent}` for guest
595 // CgroupManager). It does NOT auto-inject
596 // `--cell-parent-cgroup` into the scheduler's argv —
597 // cell-aware schedulers (scx_mitosis et al.) interpret
598 // that flag by enabling userspace_managed_cell_mode and
599 // starting an inotify-driven CellManager that can
600 // interfere with the host-side periodic-capture
601 // pipeline. If a scheduler genuinely needs
602 // `--cell-parent-cgroup`, the scheduler declaration's
603 // own `sched_args` array (or the per-test
604 // `extra_sched_args`) must include it explicitly. The
605 // guest-side `create_cgroup_parent_from_sched_args`
606 // mkdir + subtree-controller setup still fires when
607 // `--cell-parent-cgroup` is present in `/sched_args` —
608 // it's gated on the flag's presence, not on whether the
609 // framework injected it vs. the user added it manually.
610 }
611 }
612 args.extend(entry.scheduler.sched_args.iter().map(|s| s.to_string()));
613 args.extend(entry.extra_sched_args.iter().map(|s| s.to_string()));
614}
615
616/// Retry budget for the guest's `vmm::rust_init::send_sys_rdy_with_retry`
617/// loop. Boot-to-readiness wall time is a fixed base PLUS per-vCPU
618/// work: the virtio-console multiport handshake (DEVICE_READY →
619/// PORT_ADD → PORT_READY → PORT_OPEN per `drivers/char/virtio_console.c`)
620/// issues per-CPU work whose wall time grows roughly linearly with
621/// topology size, on top of a fixed device-enumeration / first-CPU
622/// cost. The budget is therefore ADDITIVE — `BASE_MS + vcpus *
623/// PER_VCPU_MS` — not `max(BASE, scaled)`.
624///
625/// The earlier `max` form left the per-vCPU term dead below ~67 vCPUs
626/// (since `vcpus * 150` only clears the 10 s floor at 67 vCPUs), so a
627/// 64-vCPU VM got the same 10 s budget as a 1-vCPU VM. Under host
628/// contention a 64-vCPU handshake was observed at ~10 s and timed out
629/// by ~8 ms — the disk-template gauntlet flake. The additive base
630/// gives every topology ~10 s of fixed headroom on top of its linear
631/// per-vCPU term (64 vCPUs → 19.6 s), so a slow handshake under load
632/// no longer races the floor.
633///
634/// Capped at 90 s as a sanity bound on a genuinely-stuck boot's
635/// guest-side retry loop — NOT to protect the host watchdog. The
636/// watchdog deadline IS derived from this budget ([`vm_boot_headroom`]
637/// feeds [`vm_timeout_from_entry`]), so the budget can never "blow" a
638/// deadline it defines; the old 30 s cap's stated rationale was
639/// inverted. That 30 s cap truncated the additive term above
640/// ~133 vCPUs — a 256-vCPU guest wants `10_000 + 256×150 = 48_400` ms
641/// but was clamped to 30_000 ms, starving the widest topologies of
642/// boot budget exactly where overcommit makes the boot slowest (the
643/// wide-SMP boot-timeout class). 90 s admits the full additive budget
644/// up to the 512-vCPU `MAX_VCPUS` (`10_000 + 512×150 = 86_800` ms);
645/// only pathological counts above 533 vCPUs clip.
646///
647/// The const-fn signature lets both the host (`vm_boot_headroom`,
648/// `vm_timeout_from_entry`) and the guest (`vmm::rust_init`) compute
649/// the same budget without trans-VM coordination — the guest reads
650/// its own vCPU count from `/sys/devices/system/cpu/online`. The guest
651/// uses this UN-scaled budget (it cannot read host overcommit); on an
652/// oversubscribed boot its `send_sys_rdy_with_retry` loop may exhaust
653/// and WARN, but that is non-fatal (the host monitor's `data_valid`
654/// gate keeps reads safe). The host's [`vm_timeout_from_entry`] is the
655/// authoritative deadline and DOES scale by the overcommit ratio.
656pub(crate) const fn sys_rdy_budget_ms(vcpus: u32) -> u64 {
657 const BASE_MS: u64 = 10_000;
658 const CAP_MS: u64 = 90_000;
659 const PER_VCPU_MS: u64 = 150;
660 let scaled = (vcpus as u64).saturating_mul(PER_VCPU_MS);
661 let total = BASE_MS.saturating_add(scaled);
662 if total > CAP_MS { CAP_MS } else { total }
663}
664
665/// Headroom for kernel init, scheduler attach, and BPF verifier time
666/// — the post-sys_rdy phase of guest startup. Distinct from
667/// [`sys_rdy_budget_ms`]'s base + per-vCPU budget (the pre-sys_rdy
668/// virtio-console handshake budget); the two add together to form
669/// the full [`vm_boot_headroom`].
670const KERNEL_INIT_HEADROOM: Duration = Duration::from_secs(10);
671
672/// Total boot headroom: covers kernel init + scheduler attach + BPF
673/// verifier time ([`KERNEL_INIT_HEADROOM`]) plus the guest's scaled
674/// `send_sys_rdy` retry loop ([`sys_rdy_budget_ms`]) before the
675/// workload phase begins. Scales with vCPU count so the host timeout
676/// doesn't fire while the guest is still inside its sys_rdy budget.
677pub(crate) fn vm_boot_headroom(vcpus: u32) -> Duration {
678 KERNEL_INIT_HEADROOM + Duration::from_millis(sys_rdy_budget_ms(vcpus))
679}
680
681/// Worst-case host-side latency the guest's `wait_for_map_write` latch
682/// blocks on before a `bpf_map_write` test's workload runs: the host
683/// builds the BPF-map accessor (ELF + BTF parse + symbol HashMap, ~4 s
684/// on a debug vmlinux per the freeze-coord accessor-init comment) in a
685/// retry loop bounded by a 30 s `phase1_deadline`. Under heavy `-j16`
686/// host-compile contention the parse scales and a cold vmlinux read adds
687/// seconds, so the latch can block up to that deadline. Added to the
688/// workload budget for any entry declaring a `bpf_map_write` — a
689/// framework cost every such test pays, not a per-test concern the
690/// author must remember to budget for.
691const COLD_BTF_PHASE1_BUDGET: Duration = Duration::from_secs(30);
692
693/// Oversubscription ratio at or beyond which a default/no-perf (auto)
694/// overcommit is SKIPPED rather than booted. Above it the host
695/// time-slices the vCPU threads so heavily the boot would race even the
696/// oversub-scaled [`vm_timeout_from_entry`] deadline, so the dispatch
697/// (`run_ktstr_test_inner_impl`) skips with a "host topology
698/// insufficient" signal instead of hard-failing. Set above the 4× the
699/// `cpu_budget` overcommit tests deliberately exercise (those carry an
700/// explicit `cpu_budget`, so they are NOT auto-collapse and never skip
701/// on this ratio) and far above the ~1.3× a 256-vCPU guest hits on a
702/// 192-CPU CI runner — which therefore RUNS and validates wide-SMP boot
703/// rather than skipping, so the boot invariant is never masked.
704pub(crate) const OVERCOMMIT_SKIP_RATIO: f64 = 6.0;
705
706/// Ceiling on the boot-headroom oversubscription multiplier in
707/// [`vm_timeout_from_entry`]. The boot-headroom term is multiplied by
708/// the host overcommit ratio (an oversubscribed boot's wall-clock grows
709/// ~linearly with the ratio as the host time-slices the vCPU threads),
710/// clamped here so the deadline stays bounded. Kept equal to
711/// [`OVERCOMMIT_SKIP_RATIO`] so the scaled deadline always covers every
712/// ratio that is allowed to run: auto-collapse beyond the ratio skips,
713/// and an explicit `cpu_budget` deeper than this clamp is a deliberate
714/// extreme the author budgets for via `duration`/`watchdog_timeout`.
715const OVERCOMMIT_HEADROOM_CAP: f64 = OVERCOMMIT_SKIP_RATIO;
716
717/// Stricter skip ratio for the `expect_auto_repro` chain. That inversion
718/// boots a SECOND wide-SMP VM which must replay the forced failure and land
719/// a shape-valid `.repro.wprof.pb` — a far more fragile path under host
720/// time-slicing than a single boot: the repro VM's system-wide wprof
721/// capture over hundreds of vCPUs stops reliably producing a transportable
722/// trace once the host oversubscribes (the boots themselves still finish
723/// inside the oversub-scaled deadline; the trace transport is what breaks).
724/// So this chain auto-skips well below the generic [`OVERCOMMIT_SKIP_RATIO`],
725/// while single-VM wide-SMP BOOT tests keep running (and validating boot) up
726/// to the generic cap. Tuned to sit between the ~1.3x a 256-vCPU guest hits
727/// on the 192-CPU wide-SMP design-target runner (which still RUNS the
728/// auto-repro hop, so it is validated there) and the ~2.7x of a 96-CPU host
729/// (which skips cleanly instead of hard-failing — the "overcommit OR
730/// auto-skip, never hard-fail" contract).
731pub(crate) const EXPECT_AUTO_REPRO_SKIP_RATIO: f64 = 2.0;
732
733/// Host overcommit ratio for a `vcpus`-wide guest given the host's
734/// allowed-CPU count and the test's optional explicit `cpu_budget`:
735/// vCPUs divided by the host CPUs the vCPU threads actually land on.
736/// With an explicit `cpu_budget` the threads collapse onto
737/// `min(cpu_budget, allowed)` (the per-test cap); without one the
738/// default/no-perf path collapses onto the whole allowed cpuset
739/// (`no_perf_cpu_budget`'s `vcpus.min(allowed)` floor when
740/// `allowed < vcpus`, else a fitting 1:1 pin). Under
741/// `KTSTR_CARGO_TEST_MODE` the planner ignores the explicit budget and
742/// masks to the full allowed cpuset, so with a `cpu_budget` the
743/// returned ratio is an UPPER bound there — deadline-safe, since
744/// over-estimating only lengthens the timeout (CI runs `cargo ktstr
745/// test` with that mode OFF, where the budget IS enforced and the ratio
746/// is exact). Floored at 1.0 (a
747/// fitting host is never under-subscribed for timeout purposes) and
748/// divide-by-zero-guarded (an unenumerable cpuset yields 1.0). Pure
749/// over `(vcpus, allowed_cpus, cpu_budget)` so the scaling is
750/// unit-testable without reading the host cpuset.
751pub(crate) fn overcommit_ratio(vcpus: u32, allowed_cpus: usize, cpu_budget: Option<u32>) -> f64 {
752 let allowed = allowed_cpus.max(1);
753 let effective = match cpu_budget {
754 Some(b) => (b as usize).min(allowed),
755 None => allowed,
756 }
757 .max(1);
758 (vcpus as f64 / effective as f64).max(1.0)
759}
760
761/// Reason to auto-skip an over-oversubscribed default/no-perf run, or
762/// `None` to run it. The default/no-perf path collapses the vCPU threads
763/// onto the allowed cpuset (`build_overcommit_run_locks` /
764/// `no_perf_cpu_budget`); at or beyond [`OVERCOMMIT_SKIP_RATIO`] the host
765/// time-slices so hard the boot would race even the oversub-scaled
766/// [`vm_timeout_from_entry`] deadline, so the dispatch
767/// (`run_ktstr_test_inner_impl`) skips with this reason — the "overcommit
768/// OR auto-skip, never hard-fail" contract. Returns `None` (runs) for:
769/// the fitting / mildly-oversubscribed case (< the ratio, e.g. a 256-vCPU
770/// guest at ~1.3x on a 192-CPU CI runner, so wide-SMP boot is VALIDATED
771/// there, never masked); an explicit `cpu_budget` (a deliberate
772/// oversubscription opt-in for contention testing always runs, its deeper
773/// ratio being the author's choice); and an empty (unenumerable) cpuset
774/// (no ratio is computable, so the overcommit warning is the sole
775/// signal). Pure over `(vcpus, allowed_cpus, cpu_budget)` so the skip
776/// boundary is unit-testable without booting a VM.
777pub(crate) fn overcommit_skip_reason(
778 vcpus: u32,
779 allowed_cpus: usize,
780 cpu_budget: Option<u32>,
781 expect_auto_repro: bool,
782) -> Option<String> {
783 if cpu_budget.is_some() || allowed_cpus == 0 {
784 return None;
785 }
786 let oversub = overcommit_ratio(vcpus, allowed_cpus, None);
787 // The two-VM expect_auto_repro inversion uses a much stricter cap
788 // ([`EXPECT_AUTO_REPRO_SKIP_RATIO`]) than a single-VM boot test: its
789 // repro-VM wprof-trace transport is fragile under time-slicing, so it
790 // skips at an oversubscription a boot-only wide-SMP test still runs at.
791 let skip_ratio = if expect_auto_repro {
792 EXPECT_AUTO_REPRO_SKIP_RATIO
793 } else {
794 OVERCOMMIT_SKIP_RATIO
795 };
796 if oversub < skip_ratio {
797 return None;
798 }
799 let chain = if expect_auto_repro {
800 " for the expect_auto_repro inversion chain"
801 } else {
802 ""
803 };
804 Some(format!(
805 "host topology insufficient: {vcpus} vCPUs auto-collapse onto \
806 {allowed_cpus} allowed host CPUs = {oversub:.1}x oversubscription \
807 (>= {skip_ratio:.0}x skip cap{chain}); widen the process cpuset \
808 or shrink the guest topology"
809 ))
810}
811
812/// Derive the host-side VM timeout from the test entry's watchdog and
813/// duration. Adds vCPU-scaled boot headroom so the workload gets its
814/// full duration even after a slow boot on a large topology, then
815/// multiplies THAT headroom by the host [`overcommit_ratio`] (clamped
816/// to [`OVERCOMMIT_HEADROOM_CAP`]) so an oversubscribed boot — whose
817/// wall-clock grows ~linearly with the ratio as the host time-slices
818/// the vCPU threads — still finishes inside the deadline. Only the boot
819/// headroom scales: `base` is the guest's own workload/watchdog budget
820/// plus the absolute host-side [`COLD_BTF_PHASE1_BUDGET`], neither of
821/// which is the AP-bring-up wall time the ratio models; the headroom's
822/// slack absorbs the small extra host wall-clock a short workload
823/// accrues under time-slicing. [`COLD_BTF_PHASE1_BUDGET`] is added when
824/// the entry declares a `bpf_map_write` (the guest blocks on the host's
825/// cold-BTF accessor build before the workload starts), and
826/// [`crate::vmm::freeze_coord::WPROF_SHIP_GRACE`] when it declares
827/// `wprof` (a crashing scheduler's late Phase-5 trace ship is held for
828/// that window before teardown).
829///
830/// `booted_vcpus` is the vCPU count of the topology the VM actually
831/// boots (`resolve_vm_topology(entry, topo).0.total_cpus()`), NOT the
832/// declared `entry.topology`: under a `TopoOverride` (gauntlet preset /
833/// `--ktstr-topo`) they diverge, and both the vCPU-scaled boot headroom
834/// and the oversubscription multiplier must scale to the topology that
835/// boots — otherwise the watchdog fires mid-boot on a larger-than-declared
836/// preset.
837pub(crate) fn vm_timeout_from_entry(
838 entry: &super::entry::KtstrTestEntry,
839 booted_vcpus: u32,
840) -> Duration {
841 let mut base = entry
842 .watchdog_timeout
843 .max(entry.duration)
844 .max(Duration::from_secs(1));
845 if !entry.bpf_map_write.is_empty() {
846 base += COLD_BTF_PHASE1_BUDGET;
847 }
848 // A wprof entry's scheduler may crash; on an error-class exit the
849 // freeze coordinator holds the VM open up to `WPROF_SHIP_GRACE` for
850 // the guest's late Phase-5 wprof trace ship before killing. Add that
851 // window to the host budget so a late crash's full ship grace fits
852 // inside the watchdog deadline (mirrors COLD_BTF_PHASE1_BUDGET).
853 if entry.wprof {
854 base += crate::vmm::freeze_coord::WPROF_SHIP_GRACE;
855 }
856 let vcpus = booted_vcpus;
857 let oversub = overcommit_ratio(
858 vcpus,
859 crate::vmm::host_topology::host_allowed_cpus().len(),
860 entry.cpu_budget,
861 )
862 .min(OVERCOMMIT_HEADROOM_CAP);
863 base + vm_boot_headroom(vcpus).mul_f64(oversub)
864}
865
866/// Configure the ktstr_test VM builder prefix shared by the main
867/// test path ([`super::eval::run_ktstr_test_inner`]) and the
868/// auto-repro path ([`super::probe::attempt_auto_repro`]).
869///
870/// Applies, in order: kernel, init binary, topology, memory floor,
871/// guest cmdline, SHM size, guest argv, host-side timeout, perf-mode
872/// disable flag, optional scheduler binary, every queued BPF map
873/// write, and the scheduler watchdog timeout.
874///
875/// The caller owns the divergent tail. `run_ktstr_test_inner`
876/// additionally wires `performance_mode`,
877/// `sched_enable_cmds`/`sched_disable_cmds` for kernel-built
878/// schedulers, and `monitor_thresholds`. `attempt_auto_repro`
879/// additionally wires `include_files` plus base `sched_args`.
880#[allow(clippy::too_many_arguments)]
881pub(crate) fn build_vm_builder_base(
882 entry: &KtstrTestEntry,
883 kernel: &Path,
884 ktstr_bin: &Path,
885 scheduler: Option<&Path>,
886 staged_schedulers: &[(String, std::path::PathBuf, Vec<String>)],
887 vm_topology: crate::vmm::topology::Topology,
888 memory_mib: u32,
889 cmdline_extra: &str,
890 guest_args: &[String],
891 no_perf_mode: bool,
892) -> crate::vmm::KtstrVmBuilder {
893 // The base builder deliberately does NOT set
894 // `failure_dump_path` — the per-VM target is caller-specific
895 // (primary vs auto-repro). Stale-file pre-clear lives at the
896 // dispatch sites (`test_support::eval` for primary;
897 // `test_support::probe::attempt_auto_repro` for repro), not
898 // inside the setter or this base call. The setter is pure
899 // (no FS side effects); placing the pre-clear in the dispatch
900 // layer prevents the auto-repro path's reuse of this base
901 // builder from accidentally erasing the primary dump that
902 // just landed.
903 let mut builder = crate::vmm::KtstrVm::builder()
904 .kernel(kernel)
905 .init_binary(ktstr_bin)
906 .topology(vm_topology)
907 .memory_deferred_min(memory_mib)
908 .cmdline(cmdline_extra)
909 .run_args(guest_args)
910 .timeout(vm_timeout_from_entry(entry, vm_topology.total_cpus()))
911 .workload_duration(entry.duration)
912 .no_perf_mode(no_perf_mode);
913
914 // Per-test no-perf CPU budget override (#[ktstr_test(cpu_budget = N)]).
915 // None leaves the builder's auto-size (vCPU count) in place; only the
916 // no-perf path consumes it.
917 if let Some(budget) = entry.cpu_budget {
918 builder = builder.cpu_budget(budget);
919 }
920
921 if let Some(sched_path) = scheduler {
922 builder = builder.scheduler_binary(sched_path);
923 }
924
925 // Push each pre-resolved staged scheduler into the builder's
926 // staging set. Caller is responsible for running each entry
927 // through the resolve_scheduler cascade so this fn stays
928 // infallible (sibling to the boot-time `scheduler: Option<&Path>`
929 // shape which is also caller-resolved). KernelBuiltin / Eevdf
930 // staged entries (no binary to resolve) are skipped at the
931 // caller side; only resolved (name, host_binary, sched_args)
932 // tuples reach this loop.
933 for (name, host_path, sched_args) in staged_schedulers {
934 builder = builder.staged_scheduler(name.clone(), host_path.clone(), sched_args.clone());
935 }
936
937 // Opt-in jemalloc-probe wiring. An integration test that needs
938 // the probe (see `tests/jemalloc_probe_tests.rs`) sets
939 // `KTSTR_JEMALLOC_PROBE_BINARY` to the absolute host path of
940 // `ktstr-jemalloc-probe` via `#[ctor]` before the test harness
941 // dispatches. When set, the probe is packed into every VM's
942 // base initramfs; the init binary stays stripped because the
943 // paired alloc-worker carries DWARF. Absent env var = existing
944 // behavior (no probe).
945 //
946 // Required ctor shape in a new test file that needs the probe
947 // in the guest — paste verbatim, adjust the two binary names.
948 // Either ctor form works (ktstr re-exports both): the proc-macro
949 // attribute shown below, or the declarative
950 // `::ktstr::__private::ctor::declarative::ctor! { ... }` block
951 // form (ktstr's own in-tree sites use the declarative form per
952 // src/test_support/dispatch.rs).
953 //
954 // ```ignore
955 // #[::ktstr::__private::ctor::ctor(unsafe, crate_path = ::ktstr::__private::ctor)]
956 // fn set_probe_binary_env_var() {
957 // // SAFETY: ctor runs before any `#[ktstr_test]` thread or
958 // // probe thread spawns; glibc's `__environ` mutation is
959 // // single-threaded here.
960 // unsafe {
961 // std::env::set_var(
962 // ::ktstr::KTSTR_JEMALLOC_PROBE_BINARY_ENV,
963 // env!("CARGO_BIN_EXE_ktstr-jemalloc-probe"),
964 // );
965 // std::env::set_var(
966 // ::ktstr::KTSTR_JEMALLOC_ALLOC_WORKER_BINARY_ENV,
967 // env!("CARGO_BIN_EXE_ktstr-jemalloc-alloc-worker"),
968 // );
969 // }
970 // }
971 // ```
972 //
973 // Declarative-form equivalent (no `crate_path = ` plumbing required
974 // because the macro_rules! expansion resolves paths via `$crate`):
975 //
976 // ```ignore
977 // ::ktstr::__private::ctor::declarative::ctor! {
978 // #[ctor(unsafe)]
979 // fn set_probe_binary_env_var() {
980 // // SAFETY: same as proc-macro form above.
981 // unsafe {
982 // std::env::set_var(
983 // ::ktstr::KTSTR_JEMALLOC_PROBE_BINARY_ENV,
984 // env!("CARGO_BIN_EXE_ktstr-jemalloc-probe"),
985 // );
986 // std::env::set_var(
987 // ::ktstr::KTSTR_JEMALLOC_ALLOC_WORKER_BINARY_ENV,
988 // env!("CARGO_BIN_EXE_ktstr-jemalloc-alloc-worker"),
989 // );
990 // }
991 // }
992 // }
993 // ```
994 //
995 // The `crate_path = ::ktstr::__private::ctor` argument is
996 // non-negotiable: `#[ctor::ctor(unsafe)]` without the
997 // re-export path panics at compile time because the `ctor`
998 // crate is not listed in the test crate's direct deps. ktstr
999 // re-exports `ctor` under `__private::ctor` exactly so test
1000 // authors do not need to add it themselves. ctor 1.0 also
1001 // mandates the `unsafe` marker as the first attribute
1002 // argument; bare `#[ctor::ctor]` no longer compiles.
1003 if let Ok(probe_path) = std::env::var(crate::KTSTR_JEMALLOC_PROBE_BINARY_ENV)
1004 && !probe_path.is_empty()
1005 {
1006 // Pack the probe binary into the guest initramfs at
1007 // `/bin/ktstr-jemalloc-probe`. Closed-loop probe tests run
1008 // the probe via `--pid <alloc_worker_pid>` against the
1009 // paired `ktstr-jemalloc-alloc-worker` target; DWARF comes
1010 // from the worker's own ELF, not the init's.
1011 builder = builder.jemalloc_probe_binary(std::path::PathBuf::from(probe_path));
1012 }
1013 if let Ok(worker_path) = std::env::var(crate::KTSTR_JEMALLOC_ALLOC_WORKER_BINARY_ENV)
1014 && !worker_path.is_empty()
1015 {
1016 // Pack the jemalloc-alloc-worker binary alongside the
1017 // probe. Only the cross-process closed-loop test sets
1018 // this; scheduler-only tests leave the env var unset and
1019 // skip the wiring.
1020 builder = builder.jemalloc_alloc_worker_binary(std::path::PathBuf::from(worker_path));
1021 }
1022
1023 for bpf_write in entry.bpf_map_write {
1024 builder = builder.bpf_map_write(
1025 bpf_write.map_name_suffix(),
1026 bpf_write.field(),
1027 bpf_write.value(),
1028 );
1029 }
1030
1031 for watch in entry.watch_bpf_maps {
1032 builder = builder.watch_bpf_map(
1033 watch.map_name_suffix(),
1034 watch.field(),
1035 watch.agg(),
1036 watch.label(),
1037 );
1038 }
1039
1040 if let Some(disk_cfg) = entry.disk.clone() {
1041 builder = builder.disk(disk_cfg);
1042 }
1043
1044 for net_cfg in entry.networks {
1045 builder = builder.network(*net_cfg);
1046 }
1047
1048 builder = builder.num_snapshots(entry.num_snapshots);
1049
1050 if let Some(root) = entry.workload_root_cgroup {
1051 builder = builder.workload_root_cgroup(root.as_str().to_string());
1052 }
1053 if let Some(parent) = entry.scheduler.cgroup_parent {
1054 builder = builder.scheduler_cgroup_parent(parent.as_str().to_string());
1055 }
1056
1057 builder.watchdog_timeout(entry.watchdog_timeout)
1058}
1059
1060#[cfg(test)]
1061mod tests {
1062 use super::super::entry::Scheduler;
1063 use super::super::test_helpers::{EnvVarGuard, lock_env};
1064 use super::*;
1065
1066 /// RAII pin of the host allowed-CPU set so [`vm_timeout_from_entry`]'s
1067 /// overcommit scaling is deterministic regardless of the runner's
1068 /// real cpuset. Mirrors `host_topology::tests::AllowedCpusGuard`
1069 /// (which is private to that test module). Sets the thread-local
1070 /// `ALLOWED_CPUS_OVERRIDE` and clears it on drop so a panicking test
1071 /// never leaks the override to a sibling sharing the thread.
1072 struct AllowedCpusPin;
1073 impl AllowedCpusPin {
1074 fn new(cpus: Vec<usize>) -> Self {
1075 crate::vmm::host_topology::ALLOWED_CPUS_OVERRIDE.with(|p| *p.borrow_mut() = Some(cpus));
1076 AllowedCpusPin
1077 }
1078 }
1079 impl Drop for AllowedCpusPin {
1080 fn drop(&mut self) {
1081 crate::vmm::host_topology::ALLOWED_CPUS_OVERRIDE.with(|p| *p.borrow_mut() = None);
1082 }
1083 }
1084
1085 #[test]
1086 fn vm_timeout_from_entry_adds_cold_btf_budget_for_bpf_map_write() {
1087 use super::super::entry::{BpfMapWrite, KtstrTestEntry};
1088 static W: BpfMapWrite = BpfMapWrite::new(".bss", "crash", 0);
1089 static WS: &[&BpfMapWrite] = &[&W];
1090 let no_write = KtstrTestEntry {
1091 name: "no_write",
1092 ..KtstrTestEntry::DEFAULT
1093 };
1094 let with_write = KtstrTestEntry {
1095 name: "with_write",
1096 bpf_map_write: WS,
1097 ..KtstrTestEntry::DEFAULT
1098 };
1099 // The cold-BTF phase-1 budget is added to the workload base only
1100 // when the entry declares a host-side bpf_map_write; the delta
1101 // between an otherwise-identical pair is exactly that budget.
1102 assert_eq!(
1103 vm_timeout_from_entry(&with_write, with_write.topology.total_cpus()),
1104 vm_timeout_from_entry(&no_write, no_write.topology.total_cpus())
1105 + COLD_BTF_PHASE1_BUDGET,
1106 "bpf_map_write entries get the cold-BTF phase-1 budget added",
1107 );
1108 }
1109
1110 #[test]
1111 fn no_perf_mode_active_true_when_env_set_to_value() {
1112 let _l = lock_env();
1113 let _g = EnvVarGuard::set(crate::KTSTR_NO_PERF_MODE_ENV, "1");
1114 assert!(no_perf_mode_active());
1115 }
1116
1117 #[test]
1118 fn no_perf_mode_active_false_when_env_unset() {
1119 let _l = lock_env();
1120 let _g = EnvVarGuard::remove(crate::KTSTR_NO_PERF_MODE_ENV);
1121 assert!(!no_perf_mode_active());
1122 }
1123
1124 /// Regression pin: empty-string-as-unset contract. Before the
1125 /// env-var-sweep cleanup, the bare `is_ok()` reader returned
1126 /// true on
1127 /// `KTSTR_NO_PERF_MODE=` (set but empty — e.g. Docker
1128 /// `--env KTSTR_NO_PERF_MODE` pass-through fired without a
1129 /// value), silently flipping perf-mode OFF for every
1130 /// `performance_mode` test. The fix at L146 treats
1131 /// empty-as-unset; this test pins that contract for ALL
1132 /// consumer sites (shell-mode VM at lib.rs, verifier at
1133 /// verifier.rs, dispatch + eval) since they all route
1134 /// through this helper.
1135 #[test]
1136 fn no_perf_mode_active_false_when_env_set_to_empty_string() {
1137 let _l = lock_env();
1138 let _g = EnvVarGuard::set(crate::KTSTR_NO_PERF_MODE_ENV, "");
1139 assert!(
1140 !no_perf_mode_active(),
1141 "empty-string env must be treated as UNSET — a regression \
1142 here flips perf-mode for every consumer that routes \
1143 through no_perf_mode_active",
1144 );
1145 }
1146
1147 #[test]
1148 fn perf_only_active_true_when_env_set_to_value() {
1149 let _l = lock_env();
1150 let _g = EnvVarGuard::set(crate::KTSTR_PERF_ONLY_ENV, "1");
1151 assert!(perf_only_active());
1152 }
1153
1154 #[test]
1155 fn perf_only_active_false_when_env_unset() {
1156 let _l = lock_env();
1157 let _g = EnvVarGuard::remove(crate::KTSTR_PERF_ONLY_ENV);
1158 assert!(!perf_only_active());
1159 }
1160
1161 /// Empty-as-unset contract (mirrors `no_perf_mode_active`): a
1162 /// `KTSTR_PERF_ONLY=` pass-through must NOT silently skip every
1163 /// non-perf test.
1164 #[test]
1165 fn perf_only_active_false_when_env_set_to_empty_string() {
1166 let _l = lock_env();
1167 let _g = EnvVarGuard::set(crate::KTSTR_PERF_ONLY_ENV, "");
1168 assert!(
1169 !perf_only_active(),
1170 "empty-string env must be treated as UNSET",
1171 );
1172 }
1173
1174 /// Selection logic: with perf-only active, a non-performance_mode
1175 /// entry is skipped while a performance_mode entry is kept. When
1176 /// perf-only is inactive, neither is skipped.
1177 #[test]
1178 fn perf_only_skips_entry_keeps_perf_skips_others() {
1179 use super::super::entry::KtstrTestEntry;
1180 let perf = KtstrTestEntry {
1181 name: "perf",
1182 performance_mode: true,
1183 ..KtstrTestEntry::DEFAULT
1184 };
1185 let plain = KtstrTestEntry {
1186 name: "plain",
1187 performance_mode: false,
1188 ..KtstrTestEntry::DEFAULT
1189 };
1190
1191 let _l = lock_env();
1192 {
1193 let _g = EnvVarGuard::set(crate::KTSTR_PERF_ONLY_ENV, "1");
1194 assert!(
1195 !perf_only_skips_entry(&perf),
1196 "a performance_mode test is the selection target, never skipped",
1197 );
1198 assert!(
1199 perf_only_skips_entry(&plain),
1200 "a non-performance_mode test must be skipped under perf-only",
1201 );
1202 }
1203 let _g = EnvVarGuard::remove(crate::KTSTR_PERF_ONLY_ENV);
1204 assert!(!perf_only_skips_entry(&perf));
1205 assert!(
1206 !perf_only_skips_entry(&plain),
1207 "perf-only inactive => nothing is skipped on this axis",
1208 );
1209 }
1210
1211 #[test]
1212 fn bypass_llc_locks_active_true_when_env_set_to_value() {
1213 let _l = lock_env();
1214 let _g = EnvVarGuard::set(crate::KTSTR_BYPASS_LLC_LOCKS_ENV, "1");
1215 assert!(bypass_llc_locks_active());
1216 }
1217
1218 #[test]
1219 fn bypass_llc_locks_active_false_when_env_unset() {
1220 let _l = lock_env();
1221 let _g = EnvVarGuard::remove(crate::KTSTR_BYPASS_LLC_LOCKS_ENV);
1222 assert!(!bypass_llc_locks_active());
1223 }
1224
1225 /// Regression pin: empty-string-as-unset contract for
1226 /// KTSTR_BYPASS_LLC_LOCKS. A bare `KTSTR_BYPASS_LLC_LOCKS=`
1227 /// (CI shell / Docker `--env` pass-through without value)
1228 /// must NOT activate the bypass. The helper enforces this
1229 /// uniformly for all 7 reader sites (vmm/builder.rs,
1230 /// cli/kernel_build/build.rs ×2, bin/ktstr.rs ×2,
1231 /// bin/cargo_ktstr/{kernel/mod, misc/shell}) — a regression
1232 /// here flips the contention contract for every caller.
1233 #[test]
1234 fn bypass_llc_locks_active_false_when_env_set_to_empty_string() {
1235 let _l = lock_env();
1236 let _g = EnvVarGuard::set(crate::KTSTR_BYPASS_LLC_LOCKS_ENV, "");
1237 assert!(
1238 !bypass_llc_locks_active(),
1239 "empty-string env must be treated as UNSET per the contract \
1240 shared with no_perf_mode_active — a regression here flips \
1241 LLC flock contention enforcement for every reader",
1242 );
1243 }
1244
1245 #[test]
1246 fn config_file_parts_nested_path() {
1247 static SCHED: Scheduler = Scheduler::named("cfg").config_file("configs/my_sched.toml");
1248 let entry = KtstrTestEntry {
1249 name: "cfg_test",
1250 scheduler: &SCHED,
1251 ..KtstrTestEntry::DEFAULT
1252 };
1253 let (archive, host, guest) = config_file_parts(&entry).unwrap();
1254 assert_eq!(archive, "include-files/my_sched.toml");
1255 assert_eq!(host, PathBuf::from("configs/my_sched.toml"));
1256 assert_eq!(guest, "/include-files/my_sched.toml");
1257 }
1258
1259 #[test]
1260 fn config_file_parts_bare_filename() {
1261 static SCHED: Scheduler = Scheduler::named("cfg").config_file("config.toml");
1262 let entry = KtstrTestEntry {
1263 name: "cfg_bare",
1264 scheduler: &SCHED,
1265 ..KtstrTestEntry::DEFAULT
1266 };
1267 let (archive, host, guest) = config_file_parts(&entry).unwrap();
1268 assert_eq!(archive, "include-files/config.toml");
1269 assert_eq!(host, PathBuf::from("config.toml"));
1270 assert_eq!(guest, "/include-files/config.toml");
1271 }
1272
1273 #[test]
1274 fn config_file_parts_none_when_unset() {
1275 let entry = KtstrTestEntry {
1276 name: "no_cfg",
1277 ..KtstrTestEntry::DEFAULT
1278 };
1279 assert!(config_file_parts(&entry).is_none());
1280 }
1281
1282 // -- build_cmdline_extra --
1283
1284 use super::super::entry::{KtstrTestEntry, Sysctl};
1285
1286 #[test]
1287 fn build_cmdline_extra_default_is_sidecar_only() {
1288 let _lock = lock_env();
1289 // Make sure the env does not inject spurious RUST_BACKTRACE /
1290 // RUST_LOG entries that would break the default assertion.
1291 let _env_bt = EnvVarGuard::remove("RUST_BACKTRACE");
1292 let _env_log = EnvVarGuard::remove("RUST_LOG");
1293 // Pin KTSTR_SIDECAR_DIR so the propagation token shape is
1294 // stable across tests; without the override, the call falls
1295 // through to the `{kernel}-{commit}` resolver whose output
1296 // depends on the test process's git state.
1297 let _env_sd = EnvVarGuard::set(crate::KTSTR_SIDECAR_DIR_ENV, "/tmp/ktstr-test");
1298
1299 let entry = KtstrTestEntry {
1300 name: "cmdline_test",
1301 ..KtstrTestEntry::DEFAULT
1302 };
1303 let out = build_cmdline_extra(&entry);
1304 assert_eq!(out, "KTSTR_SIDECAR_DIR=/tmp/ktstr-test");
1305 }
1306
1307 #[test]
1308 fn build_cmdline_extra_appends_sysctls_kargs() {
1309 let _lock = lock_env();
1310 let _env_bt = EnvVarGuard::remove("RUST_BACKTRACE");
1311 let _env_log = EnvVarGuard::remove("RUST_LOG");
1312 let _env_sd = EnvVarGuard::set(crate::KTSTR_SIDECAR_DIR_ENV, "/tmp/ktstr-test");
1313
1314 static SYSCTLS: &[Sysctl] = &[Sysctl::new("kernel.foo", "1")];
1315 static SCHED: Scheduler = Scheduler::named("s").sysctls(SYSCTLS).kargs(&["quiet"]);
1316 let entry = KtstrTestEntry {
1317 name: "cmd",
1318 scheduler: &SCHED,
1319 ..KtstrTestEntry::DEFAULT
1320 };
1321 let out = build_cmdline_extra(&entry);
1322 assert_eq!(
1323 out,
1324 "sysctl.kernel.foo=1 quiet KTSTR_SIDECAR_DIR=/tmp/ktstr-test"
1325 );
1326 }
1327
1328 #[test]
1329 fn build_cmdline_extra_propagates_rust_env() {
1330 let _lock = lock_env();
1331 let _env_bt = EnvVarGuard::set("RUST_BACKTRACE", "1");
1332 let _env_log = EnvVarGuard::set("RUST_LOG", "debug");
1333 let _env_sd = EnvVarGuard::set(crate::KTSTR_SIDECAR_DIR_ENV, "/tmp/ktstr-test");
1334
1335 let entry = KtstrTestEntry {
1336 name: "cmd",
1337 ..KtstrTestEntry::DEFAULT
1338 };
1339 let out = build_cmdline_extra(&entry);
1340 assert!(
1341 out.contains("RUST_BACKTRACE=1"),
1342 "expected RUST_BACKTRACE propagation: {out}"
1343 );
1344 assert!(
1345 out.contains("RUST_LOG=debug"),
1346 "expected RUST_LOG propagation: {out}"
1347 );
1348 assert!(
1349 out.contains("KTSTR_SIDECAR_DIR=/tmp/ktstr-test"),
1350 "expected KTSTR_SIDECAR_DIR propagation: {out}"
1351 );
1352 }
1353
1354 #[test]
1355 fn build_cmdline_extra_propagates_sidecar_dir() {
1356 let _lock = lock_env();
1357 let _env_bt = EnvVarGuard::remove("RUST_BACKTRACE");
1358 let _env_log = EnvVarGuard::remove("RUST_LOG");
1359 // Explicit override path proves the token shape is exactly
1360 // `KTSTR_SIDECAR_DIR=<path>` and uses the override verbatim
1361 // (host's `sidecar_dir()` honours the env var as the
1362 // operator-chosen override slot).
1363 let _env_sd = EnvVarGuard::set(crate::KTSTR_SIDECAR_DIR_ENV, "/explicit/sidecar/dir");
1364
1365 let entry = KtstrTestEntry {
1366 name: "cmd",
1367 ..KtstrTestEntry::DEFAULT
1368 };
1369 let out = build_cmdline_extra(&entry);
1370 assert_eq!(out, "KTSTR_SIDECAR_DIR=/explicit/sidecar/dir");
1371 }
1372
1373 // -- resolve_vm_topology --
1374
1375 #[test]
1376 fn resolve_vm_topology_override_is_verbatim() {
1377 let entry = KtstrTestEntry {
1378 name: "topo_test",
1379 ..KtstrTestEntry::DEFAULT
1380 };
1381 let over = super::super::topo::TopoOverride {
1382 numa_nodes: 2,
1383 llcs: 4,
1384 cores: 8,
1385 threads: 2,
1386 memory_mib: 4096,
1387 };
1388 let (topo, mem) = resolve_vm_topology(&entry, Some(&over));
1389 assert_eq!(mem, 4096);
1390 assert_eq!(topo.llcs, 4);
1391 assert_eq!(topo.cores_per_llc, 8);
1392 assert_eq!(topo.threads_per_core, 2);
1393 assert_eq!(topo.numa_nodes, 2);
1394 }
1395
1396 #[test]
1397 fn resolve_vm_topology_none_floors_memory_at_256() {
1398 // Tiny topology: 1*1*1=1 cpu -> 64 MiB raw, entry.memory_mib=0,
1399 // floor = max(64, 256, 0) = 256.
1400 //
1401 // Override memory_mib explicitly to 0 — KtstrTestEntry::DEFAULT
1402 // sets memory_mib=2048, which would bypass the floor entirely
1403 // and leave this test vacuously passing regardless of the
1404 // max(…, 256, …) branch. Setting memory_mib=0 makes the 256
1405 // floor the exact lower bound the assertion verifies.
1406 let entry = KtstrTestEntry {
1407 name: "tiny",
1408 memory_mib: 0,
1409 ..KtstrTestEntry::DEFAULT
1410 };
1411 let (_topo, mem) = resolve_vm_topology(&entry, None);
1412 assert_eq!(mem, 256, "memory floor = 256 MiB, got {mem}");
1413 }
1414
1415 #[test]
1416 fn resolve_vm_topology_none_honors_entry_memory_mib() {
1417 // Entry with explicit memory_mib above the cpu*64 and 256 floors.
1418 let entry = KtstrTestEntry {
1419 name: "mem",
1420 memory_mib: 8192,
1421 ..KtstrTestEntry::DEFAULT
1422 };
1423 let (_topo, mem) = resolve_vm_topology(&entry, None);
1424 assert_eq!(mem, 8192);
1425 }
1426
1427 #[cfg(feature = "wprof")]
1428 #[test]
1429 fn resolve_vm_topology_wprof_floors_memory_on_entry_path() {
1430 // Entry with wprof=true and memory below the wprof floor.
1431 // The entry-derived path must raise memory to WPROF_MIN_MEMORY_MIB.
1432 let entry = KtstrTestEntry {
1433 name: "wprof_floor",
1434 memory_mib: 512,
1435 wprof: true,
1436 ..KtstrTestEntry::DEFAULT
1437 };
1438 let (_topo, mem) = resolve_vm_topology(&entry, None);
1439 assert_eq!(
1440 mem,
1441 crate::vmm::wprof::WPROF_MIN_MEMORY_MIB,
1442 "wprof=true must bump memory to >= WPROF_MIN_MEMORY_MIB \
1443 ({}), got {mem}",
1444 crate::vmm::wprof::WPROF_MIN_MEMORY_MIB,
1445 );
1446 }
1447
1448 #[cfg(feature = "wprof")]
1449 #[test]
1450 fn resolve_vm_topology_wprof_no_bump_when_already_above_floor() {
1451 // Entry already above the wprof floor — must be honored unchanged.
1452 let entry = KtstrTestEntry {
1453 name: "wprof_high",
1454 memory_mib: 8192,
1455 wprof: true,
1456 ..KtstrTestEntry::DEFAULT
1457 };
1458 let (_topo, mem) = resolve_vm_topology(&entry, None);
1459 assert_eq!(
1460 mem, 8192,
1461 "memory_mib above WPROF_MIN_MEMORY_MIB must be honored \
1462 unchanged, got {mem}"
1463 );
1464 }
1465
1466 #[test]
1467 fn resolve_vm_topology_wprof_disabled_does_not_floor() {
1468 // wprof=false: the wprof floor must NOT apply, even when
1469 // entry.memory_mib falls below WPROF_MIN_MEMORY_MIB. Only
1470 // the universal 256 floor + cpu*64 derivation apply.
1471 let entry = KtstrTestEntry {
1472 name: "no_wprof",
1473 memory_mib: 512,
1474 wprof: false,
1475 ..KtstrTestEntry::DEFAULT
1476 };
1477 let (_topo, mem) = resolve_vm_topology(&entry, None);
1478 assert_eq!(
1479 mem, 512,
1480 "wprof=false must not invoke the WPROF_MIN_MEMORY_MIB \
1481 floor, got {mem}"
1482 );
1483 }
1484
1485 #[test]
1486 fn derive_test_memory_mib_baseline_without_wprof() {
1487 let entry = KtstrTestEntry {
1488 name: "baseline",
1489 memory_mib: 0,
1490 ..KtstrTestEntry::DEFAULT
1491 };
1492 let mem = derive_test_memory_mib(2, &entry);
1493 assert_eq!(mem, 256, "2 cpus * 64 = 128, floor 256 wins");
1494 }
1495
1496 #[cfg(feature = "wprof")]
1497 #[test]
1498 fn resolve_vm_topology_wprof_no_bump_at_exact_floor() {
1499 // Boundary case: derived memory equals WPROF_MIN_MEMORY_MIB
1500 // exactly. The handler uses strict `<` so 2048 passes through
1501 // unchanged. A regression that flipped to `<=` would be a
1502 // 2048→2048 no-op (still unobservable), but a regression
1503 // that flipped to `>` (or `>= ... { raw } else { FLOOR }`)
1504 // would catastrophically floor every test. This test pins
1505 // the strict-less-than direction.
1506 let entry = KtstrTestEntry {
1507 name: "wprof_exact",
1508 memory_mib: crate::vmm::wprof::WPROF_MIN_MEMORY_MIB,
1509 wprof: true,
1510 ..KtstrTestEntry::DEFAULT
1511 };
1512 let (_topo, mem) = resolve_vm_topology(&entry, None);
1513 assert_eq!(
1514 mem,
1515 crate::vmm::wprof::WPROF_MIN_MEMORY_MIB,
1516 "memory_mib equal to WPROF_MIN_MEMORY_MIB must pass \
1517 through unchanged (strict-less-than floor condition); \
1518 got {mem}"
1519 );
1520 }
1521
1522 #[cfg(feature = "wprof")]
1523 #[test]
1524 fn resolve_vm_topology_wprof_floors_zero_entry_memory_mib() {
1525 // Edge case: entry.memory_mib=0 with wprof=true. The raw
1526 // derivation `max(cpus*64, 256, 0)` resolves to 256 on the
1527 // default 1-CPU topology, which is well below the floor.
1528 // wprof must bump to WPROF_MIN_MEMORY_MIB.
1529 let entry = KtstrTestEntry {
1530 name: "wprof_zero_mib",
1531 memory_mib: 0,
1532 wprof: true,
1533 ..KtstrTestEntry::DEFAULT
1534 };
1535 let (_topo, mem) = resolve_vm_topology(&entry, None);
1536 assert_eq!(
1537 mem,
1538 crate::vmm::wprof::WPROF_MIN_MEMORY_MIB,
1539 "entry.memory_mib=0 with wprof=true must floor to \
1540 WPROF_MIN_MEMORY_MIB ({}); got {mem}",
1541 crate::vmm::wprof::WPROF_MIN_MEMORY_MIB,
1542 );
1543 }
1544
1545 #[cfg(feature = "wprof")]
1546 #[test]
1547 fn derive_test_memory_mib_helper_applies_wprof_floor() {
1548 // Direct test of the derivation helper used by BOTH
1549 // resolve_vm_topology AND the dispatch.rs sites that
1550 // construct TopoOverride from CLI / preset topology
1551 // (run_ktstr_test_with_topo_str, run_gauntlet_test).
1552 // Pins that the helper applies the wprof floor — a
1553 // regression that re-inlined the formula at the dispatch
1554 // sites without the wprof check would silently bypass
1555 // the floor when `cargo ktstr test --ktstr-topo` runs
1556 // against a wprof-tagged test.
1557 let entry = KtstrTestEntry {
1558 name: "helper",
1559 memory_mib: 0,
1560 wprof: true,
1561 ..KtstrTestEntry::DEFAULT
1562 };
1563 let mem = derive_test_memory_mib(2, &entry);
1564 assert_eq!(
1565 mem,
1566 crate::vmm::wprof::WPROF_MIN_MEMORY_MIB,
1567 "helper must floor wprof memory regardless of caller; got {mem}"
1568 );
1569
1570 // wprof=false: derivation returns the raw formula
1571 // without any floor.
1572 let entry_no_wprof = KtstrTestEntry {
1573 wprof: false,
1574 ..entry
1575 };
1576 let mem = derive_test_memory_mib(2, &entry_no_wprof);
1577 assert_eq!(
1578 mem, 256,
1579 "helper with wprof=false must NOT apply the floor; \
1580 expected max(2*64, 256, 0)=256, got {mem}"
1581 );
1582 }
1583
1584 #[cfg(feature = "wprof")]
1585 #[test]
1586 fn resolve_vm_topology_override_with_wprof_honors_override_verbatim() {
1587 // The override-is-verbatim contract: a TopoOverride with
1588 // memory_mib below WPROF_MIN_MEMORY_MIB is honored as the
1589 // operator's explicit choice. A warn-level log fires (not
1590 // verified in this unit test — tracing capture is out of
1591 // scope here) but the boot memory matches the override.
1592 let entry = KtstrTestEntry {
1593 name: "override_wprof",
1594 wprof: true,
1595 ..KtstrTestEntry::DEFAULT
1596 };
1597 let over = super::super::topo::TopoOverride {
1598 numa_nodes: 1,
1599 llcs: 1,
1600 cores: 1,
1601 threads: 1,
1602 memory_mib: 512,
1603 };
1604 let (_topo, mem) = resolve_vm_topology(&entry, Some(&over));
1605 assert_eq!(
1606 mem, 512,
1607 "TopoOverride.memory_mib must be honored verbatim even \
1608 with wprof enabled, got {mem}"
1609 );
1610 }
1611
1612 // -- append_base_sched_args --
1613
1614 #[test]
1615 fn append_base_sched_args_empty_when_none_set() {
1616 let entry = KtstrTestEntry {
1617 name: "nosched",
1618 ..KtstrTestEntry::DEFAULT
1619 };
1620 let mut args = Vec::new();
1621 append_base_sched_args(&entry, &mut args);
1622 assert!(args.is_empty(), "no sched args expected: {args:?}");
1623 }
1624
1625 /// `cgroup_parent` does NOT auto-inject `--cell-parent-cgroup`
1626 /// into the scheduler argv — the two concerns are decoupled.
1627 /// The scheduler-def `sched_args` and the per-test
1628 /// `extra_sched_args` flow through unchanged; the `cgroup_parent`
1629 /// setting controls the framework's cgroup root but never
1630 /// modifies the scheduler's CLI invocation.
1631 #[test]
1632 fn append_base_sched_args_does_not_auto_inject_cell_parent_cgroup() {
1633 static SCHED: Scheduler = Scheduler::named("s")
1634 .cgroup_parent("/sys/fs/cgroup/ktstr")
1635 .sched_args(&["-v", "--flag"]);
1636 let entry = KtstrTestEntry {
1637 name: "sched",
1638 scheduler: &SCHED,
1639 extra_sched_args: &["--extra"],
1640 ..KtstrTestEntry::DEFAULT
1641 };
1642 let mut args = Vec::new();
1643 append_base_sched_args(&entry, &mut args);
1644 assert_eq!(
1645 args,
1646 vec![
1647 "-v".to_string(),
1648 "--flag".to_string(),
1649 "--extra".to_string(),
1650 ],
1651 "cgroup_parent must not auto-inject --cell-parent-cgroup; \
1652 only sched_args + extra_sched_args reach the scheduler"
1653 );
1654 }
1655
1656 /// User-passed `--cell-parent-cgroup /user` via `extra_sched_args`
1657 /// suppresses the auto-inject so clap inside the scheduler binary
1658 /// doesn't reject the duplicate.
1659 #[test]
1660 fn append_base_sched_args_dedupes_extra_split_form() {
1661 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1662 let entry = KtstrTestEntry {
1663 name: "sched",
1664 scheduler: &SCHED,
1665 extra_sched_args: &["--cell-parent-cgroup", "/user"],
1666 ..KtstrTestEntry::DEFAULT
1667 };
1668 let mut args = Vec::new();
1669 append_base_sched_args(&entry, &mut args);
1670 assert_eq!(
1671 args,
1672 vec!["--cell-parent-cgroup".to_string(), "/user".to_string()],
1673 "auto-inject must be skipped when extra_sched_args carries \
1674 --cell-parent-cgroup in two-token form"
1675 );
1676 }
1677
1678 /// Combined form (`--cell-parent-cgroup=/user`) must also suppress
1679 /// the auto-inject.
1680 #[test]
1681 fn append_base_sched_args_dedupes_extra_combined_form() {
1682 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1683 let entry = KtstrTestEntry {
1684 name: "sched",
1685 scheduler: &SCHED,
1686 extra_sched_args: &["--cell-parent-cgroup=/user"],
1687 ..KtstrTestEntry::DEFAULT
1688 };
1689 let mut args = Vec::new();
1690 append_base_sched_args(&entry, &mut args);
1691 assert_eq!(
1692 args,
1693 vec!["--cell-parent-cgroup=/user".to_string()],
1694 "auto-inject must be skipped when extra_sched_args carries \
1695 --cell-parent-cgroup in combined `=` form"
1696 );
1697 }
1698
1699 /// Scheduler-def `sched_args` carrying `--cell-parent-cgroup`
1700 /// also suppresses the auto-inject.
1701 #[test]
1702 fn append_base_sched_args_dedupes_scheduler_sched_args() {
1703 static SCHED: Scheduler = Scheduler::named("s")
1704 .cgroup_parent("/sys/fs/cgroup/ktstr")
1705 .sched_args(&["--cell-parent-cgroup", "/user"]);
1706 let entry = KtstrTestEntry {
1707 name: "sched",
1708 scheduler: &SCHED,
1709 ..KtstrTestEntry::DEFAULT
1710 };
1711 let mut args = Vec::new();
1712 append_base_sched_args(&entry, &mut args);
1713 assert_eq!(
1714 args,
1715 vec!["--cell-parent-cgroup".to_string(), "/user".to_string()],
1716 "auto-inject must be skipped when scheduler.sched_args carries \
1717 --cell-parent-cgroup"
1718 );
1719 }
1720
1721 /// Scheduler-def `sched_args` carrying the combined `=` form also
1722 /// suppresses the auto-inject — completes the {source × form}
1723 /// 2×2 matrix.
1724 #[test]
1725 fn append_base_sched_args_dedupes_scheduler_sched_args_combined_form() {
1726 static SCHED: Scheduler = Scheduler::named("s")
1727 .cgroup_parent("/sys/fs/cgroup/ktstr")
1728 .sched_args(&["--cell-parent-cgroup=/user"]);
1729 let entry = KtstrTestEntry {
1730 name: "sched",
1731 scheduler: &SCHED,
1732 ..KtstrTestEntry::DEFAULT
1733 };
1734 let mut args = Vec::new();
1735 append_base_sched_args(&entry, &mut args);
1736 assert_eq!(
1737 args,
1738 vec!["--cell-parent-cgroup=/user".to_string()],
1739 "auto-inject must be skipped when scheduler.sched_args carries \
1740 --cell-parent-cgroup in combined `=` form"
1741 );
1742 }
1743
1744 /// When BOTH scheduler.sched_args AND extra_sched_args carry
1745 /// `--cell-parent-cgroup`, the framework's auto-inject is
1746 /// suppressed (`.any()` short-circuits on first match) but the
1747 /// user's duplicates flow through unchanged. The framework does
1748 /// not dedupe user-supplied duplicates — clap inside the
1749 /// scheduler binary will reject them with "cannot be used
1750 /// multiple times", as it should. Pin: the framework correctly
1751 /// avoids ADDING a third copy.
1752 #[test]
1753 fn append_base_sched_args_does_not_dedupe_user_dupes() {
1754 static SCHED: Scheduler = Scheduler::named("s")
1755 .cgroup_parent("/sys/fs/cgroup/ktstr")
1756 .sched_args(&["--cell-parent-cgroup", "/sched"]);
1757 let entry = KtstrTestEntry {
1758 name: "sched",
1759 scheduler: &SCHED,
1760 extra_sched_args: &["--cell-parent-cgroup", "/extra"],
1761 ..KtstrTestEntry::DEFAULT
1762 };
1763 let mut args = Vec::new();
1764 append_base_sched_args(&entry, &mut args);
1765 assert_eq!(
1766 args,
1767 vec![
1768 "--cell-parent-cgroup".to_string(),
1769 "/sched".to_string(),
1770 "--cell-parent-cgroup".to_string(),
1771 "/extra".to_string(),
1772 ],
1773 "framework auto-inject is suppressed; both user-supplied \
1774 entries flow through unchanged (user owns the dup)"
1775 );
1776 }
1777
1778 /// Empty combined value (`--cell-parent-cgroup=`) is rejected at
1779 /// the framework gate with an actionable panic that names the
1780 /// offending test and points the operator at the right fix.
1781 /// Empty values would resolve to `/sys/fs/cgroup` (the host
1782 /// cgroup root) downstream — guaranteed to corrupt unrelated
1783 /// cgroup state — so the framework rejects rather than letting
1784 /// clap surface a generic "value required" error after the
1785 /// cgroup hierarchy has already been built.
1786 #[test]
1787 #[should_panic(expected = "that does not start with `/`")]
1788 fn append_base_sched_args_panics_on_empty_combined_value_via_extra() {
1789 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1790 let entry = KtstrTestEntry {
1791 name: "sched",
1792 scheduler: &SCHED,
1793 extra_sched_args: &["--cell-parent-cgroup="],
1794 ..KtstrTestEntry::DEFAULT
1795 };
1796 let mut args = Vec::new();
1797 append_base_sched_args(&entry, &mut args);
1798 }
1799
1800 /// Two-token form with an empty value as the second token
1801 /// (`["--cell-parent-cgroup", ""]`) is rejected by the same gate.
1802 /// Covers the second route into `parse_cell_parent_cgroup` so a
1803 /// future refactor that switches the empty-detection logic on
1804 /// only one form gets caught.
1805 #[test]
1806 #[should_panic(expected = "that does not start with `/`")]
1807 fn append_base_sched_args_panics_on_empty_two_token_value_via_extra() {
1808 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1809 let entry = KtstrTestEntry {
1810 name: "sched_two_token",
1811 scheduler: &SCHED,
1812 extra_sched_args: &["--cell-parent-cgroup", ""],
1813 ..KtstrTestEntry::DEFAULT
1814 };
1815 let mut args = Vec::new();
1816 append_base_sched_args(&entry, &mut args);
1817 }
1818
1819 /// Bad value via the scheduler-def's own `sched_args` rather than
1820 /// the test's `extra_sched_args` — the chain at the parser site
1821 /// covers both sources, so the gate fires regardless of origin.
1822 /// Pins both the combined form and the scheduler origin.
1823 #[test]
1824 #[should_panic(expected = "that does not start with `/`")]
1825 fn append_base_sched_args_panics_on_empty_combined_value_via_scheduler_sched_args() {
1826 static SCHED: Scheduler = Scheduler::named("s")
1827 .cgroup_parent("/sys/fs/cgroup/ktstr")
1828 .sched_args(&["--cell-parent-cgroup="]);
1829 let entry = KtstrTestEntry {
1830 name: "sched_in_def",
1831 scheduler: &SCHED,
1832 ..KtstrTestEntry::DEFAULT
1833 };
1834 let mut args = Vec::new();
1835 append_base_sched_args(&entry, &mut args);
1836 }
1837
1838 /// Two-token form via the scheduler-def origin — completes the
1839 /// 2-source × 2-form matrix together with the three siblings.
1840 #[test]
1841 #[should_panic(expected = "that does not start with `/`")]
1842 fn append_base_sched_args_panics_on_empty_two_token_value_via_scheduler_sched_args() {
1843 static SCHED: Scheduler = Scheduler::named("s")
1844 .cgroup_parent("/sys/fs/cgroup/ktstr")
1845 .sched_args(&["--cell-parent-cgroup", ""]);
1846 let entry = KtstrTestEntry {
1847 name: "sched_in_def_two_token",
1848 scheduler: &SCHED,
1849 ..KtstrTestEntry::DEFAULT
1850 };
1851 let mut args = Vec::new();
1852 append_base_sched_args(&entry, &mut args);
1853 }
1854
1855 /// Empty-value gate fires even when the scheduler-def has no
1856 /// `cgroup_parent` default. Without the universal gate the empty
1857 /// value would slip through and corrupt unrelated host cgroup
1858 /// state at the downstream `resolve_cgroup_root` interpolation.
1859 #[test]
1860 #[should_panic(expected = "that does not start with `/`")]
1861 fn append_base_sched_args_panics_on_empty_combined_value_no_scheduler_cgroup_parent() {
1862 static SCHED: Scheduler = Scheduler::named("s");
1863 let entry = KtstrTestEntry {
1864 name: "no_default_cgroup",
1865 scheduler: &SCHED,
1866 extra_sched_args: &["--cell-parent-cgroup="],
1867 ..KtstrTestEntry::DEFAULT
1868 };
1869 let mut args = Vec::new();
1870 append_base_sched_args(&entry, &mut args);
1871 }
1872
1873 /// Two-token form, no scheduler default — completes the
1874 /// no-default matrix together with the combined-form sibling.
1875 #[test]
1876 #[should_panic(expected = "that does not start with `/`")]
1877 fn append_base_sched_args_panics_on_empty_two_token_value_no_scheduler_cgroup_parent() {
1878 static SCHED: Scheduler = Scheduler::named("s");
1879 let entry = KtstrTestEntry {
1880 name: "no_default_cgroup_two_token",
1881 scheduler: &SCHED,
1882 extra_sched_args: &["--cell-parent-cgroup", ""],
1883 ..KtstrTestEntry::DEFAULT
1884 };
1885 let mut args = Vec::new();
1886 append_base_sched_args(&entry, &mut args);
1887 }
1888
1889 /// Relative path (no leading `/`) is rejected by the same gate.
1890 /// Pins the broader contract (the message explicitly promises
1891 /// "absolute path under `/`"); empty is just one case of
1892 /// non-absolute.
1893 #[test]
1894 #[should_panic(expected = "that does not start with `/`")]
1895 fn append_base_sched_args_panics_on_relative_path_value() {
1896 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1897 let entry = KtstrTestEntry {
1898 name: "relative_path",
1899 scheduler: &SCHED,
1900 extra_sched_args: &["--cell-parent-cgroup=my_test"],
1901 ..KtstrTestEntry::DEFAULT
1902 };
1903 let mut args = Vec::new();
1904 append_base_sched_args(&entry, &mut args);
1905 }
1906
1907 /// Two-token form of the relative-path case. Closes the matrix
1908 /// gap: combined-form was pinned by the sibling above but a
1909 /// future refactor that split path validation between the
1910 /// combined and two-token branches could regress one form
1911 /// without test catching.
1912 #[test]
1913 #[should_panic(expected = "that does not start with `/`")]
1914 fn append_base_sched_args_panics_on_relative_path_value_two_token() {
1915 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1916 let entry = KtstrTestEntry {
1917 name: "relative_path_two_token",
1918 scheduler: &SCHED,
1919 extra_sched_args: &["--cell-parent-cgroup", "my_test"],
1920 ..KtstrTestEntry::DEFAULT
1921 };
1922 let mut args = Vec::new();
1923 append_base_sched_args(&entry, &mut args);
1924 }
1925
1926 /// `/.` is absolute and has more than one character, so a naive
1927 /// `starts_with('/') && len > 1` check passes — but the kernel
1928 /// canonicalizes `/sys/fs/cgroup/.` back to `/sys/fs/cgroup`
1929 /// (host cgroup root), corrupting unrelated cgroup state.
1930 /// `Path::components` strips the trailing `.`, yielding `[RootDir]`
1931 /// — the validator rejects via the "has no Normal component"
1932 /// check, not the CurDir arm (see `cell_parent_path_is_valid`).
1933 #[test]
1934 #[should_panic(expected = "contains `.`/`..` segments")]
1935 fn append_base_sched_args_panics_on_dot_normalizing_to_root() {
1936 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1937 let entry = KtstrTestEntry {
1938 name: "dot_normalize",
1939 scheduler: &SCHED,
1940 extra_sched_args: &["--cell-parent-cgroup=/."],
1941 ..KtstrTestEntry::DEFAULT
1942 };
1943 let mut args = Vec::new();
1944 append_base_sched_args(&entry, &mut args);
1945 }
1946
1947 /// `/foo/..` canonicalizes back to `/` → `/sys/fs/cgroup`. Same
1948 /// host-root corruption risk as the empty/bare-slash cases. The
1949 /// component-based gate rejects any `..` (ParentDir) segment.
1950 #[test]
1951 #[should_panic(expected = "contains `.`/`..` segments")]
1952 fn append_base_sched_args_panics_on_parent_dir_normalizing_to_root() {
1953 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1954 let entry = KtstrTestEntry {
1955 name: "parent_dir_normalize",
1956 scheduler: &SCHED,
1957 extra_sched_args: &["--cell-parent-cgroup=/foo/.."],
1958 ..KtstrTestEntry::DEFAULT
1959 };
1960 let mut args = Vec::new();
1961 append_base_sched_args(&entry, &mut args);
1962 }
1963
1964 /// Mixed `/./bar/..` — both kinds of normalizing segment in one
1965 /// path. `Path::components` strips the leading `/.`, yielding
1966 /// `[RootDir, Normal("bar"), ParentDir]`; the validator reaches
1967 /// the `ParentDir` and rejects via that arm. The `/.` never
1968 /// surfaces as a CurDir component.
1969 #[test]
1970 #[should_panic(expected = "contains `.`/`..` segments")]
1971 fn append_base_sched_args_panics_on_mixed_normalize_segments() {
1972 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1973 let entry = KtstrTestEntry {
1974 name: "mixed_normalize",
1975 scheduler: &SCHED,
1976 extra_sched_args: &["--cell-parent-cgroup=/./bar/.."],
1977 ..KtstrTestEntry::DEFAULT
1978 };
1979 let mut args = Vec::new();
1980 append_base_sched_args(&entry, &mut args);
1981 }
1982
1983 /// `/foo/./bar` is ACCEPTED — `Path::components` normalizes away
1984 /// every `CurDir` segment (see `cell_parent_path_is_valid` for
1985 /// the full per-position behavior); the canonical form
1986 /// `/foo/bar` is a real non-root path. Pin the accept path so a
1987 /// future refactor to a stricter `.contains("/./")` text check
1988 /// is caught. Also assert the user value flows through verbatim
1989 /// — a regression that canonicalized the path before forwarding
1990 /// would silently rewrite `/foo/./bar` to `/foo/bar`.
1991 #[test]
1992 fn append_base_sched_args_accepts_embedded_dot_segment() {
1993 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
1994 let entry = KtstrTestEntry {
1995 name: "embedded_dot_ok",
1996 scheduler: &SCHED,
1997 extra_sched_args: &["--cell-parent-cgroup=/foo/./bar"],
1998 ..KtstrTestEntry::DEFAULT
1999 };
2000 let mut args = Vec::new();
2001 append_base_sched_args(&entry, &mut args);
2002 assert!(
2003 args.iter().any(|a| a == "--cell-parent-cgroup=/foo/./bar"),
2004 "user value must pass through verbatim (no canonicalization); args: {args:?}",
2005 );
2006 }
2007
2008 /// Bare `/..` is the most damaging path-normalize edge:
2009 /// downstream interpolation `/sys/fs/cgroup/..` canonicalizes to
2010 /// `/sys/fs` — escapes the cgroup hierarchy entirely. The
2011 /// component walk hits `ParentDir` immediately after `RootDir`
2012 /// (no Normal segment between them) and rejects via the
2013 /// ParentDir arm.
2014 #[test]
2015 #[should_panic(expected = "contains `.`/`..` segments")]
2016 fn append_base_sched_args_panics_on_bare_parent_dir() {
2017 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
2018 let entry = KtstrTestEntry {
2019 name: "bare_parent_dir",
2020 scheduler: &SCHED,
2021 extra_sched_args: &["--cell-parent-cgroup=/.."],
2022 ..KtstrTestEntry::DEFAULT
2023 };
2024 let mut args = Vec::new();
2025 append_base_sched_args(&entry, &mut args);
2026 }
2027
2028 /// Mid-path `/foo/../bar` — ParentDir sits BETWEEN Normal
2029 /// segments. Different shape from `/foo/..` (trailing
2030 /// ParentDir): a regression that bailed only on
2031 /// `path.ends_with("/..")` would slip this past. Downstream
2032 /// interpolation `/sys/fs/cgroup/foo/../bar` canonicalizes to
2033 /// `/sys/fs/cgroup/bar` — an unintended sibling directory the
2034 /// test author didn't ask for. Component walk catches ParentDir
2035 /// in any position.
2036 #[test]
2037 #[should_panic(expected = "contains `.`/`..` segments")]
2038 fn append_base_sched_args_panics_on_mid_path_parent_dir() {
2039 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
2040 let entry = KtstrTestEntry {
2041 name: "mid_path_parent_dir",
2042 scheduler: &SCHED,
2043 extra_sched_args: &["--cell-parent-cgroup=/foo/../bar"],
2044 ..KtstrTestEntry::DEFAULT
2045 };
2046 let mut args = Vec::new();
2047 append_base_sched_args(&entry, &mut args);
2048 }
2049
2050 /// Bare `/` slips a naive `starts_with('/')` check but resolves
2051 /// downstream to `/sys/fs/cgroup/` — semantically the host cgroup
2052 /// root, same corruption risk as the empty case. The gate mirrors
2053 /// `CgroupPath::new`'s const-eval contract (rejects both
2054 /// no-leading-slash AND `"/"` alone) so runtime values share the
2055 /// same validation as compile-time declarations.
2056 #[test]
2057 #[should_panic(expected = "is `/` alone")]
2058 fn append_base_sched_args_panics_on_bare_slash_value() {
2059 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
2060 let entry = KtstrTestEntry {
2061 name: "bare_slash",
2062 scheduler: &SCHED,
2063 extra_sched_args: &["--cell-parent-cgroup=/"],
2064 ..KtstrTestEntry::DEFAULT
2065 };
2066 let mut args = Vec::new();
2067 append_base_sched_args(&entry, &mut args);
2068 }
2069
2070 /// Combined-form empty value via scheduler-def `sched_args`
2071 /// when the scheduler also has NO `cgroup_parent` default. Closes
2072 /// the matrix intersection: a future refactor that gates the
2073 /// scheduler-def-source check on `cgroup_parent.is_some()` would
2074 /// pass the other 6 empty tests but regress this cell.
2075 #[test]
2076 #[should_panic(expected = "that does not start with `/`")]
2077 fn append_base_sched_args_panics_on_empty_combined_value_in_scheduler_sched_args_no_default() {
2078 static SCHED: Scheduler = Scheduler::named("s").sched_args(&["--cell-parent-cgroup="]);
2079 let entry = KtstrTestEntry {
2080 name: "scheduler_def_origin_no_default",
2081 scheduler: &SCHED,
2082 ..KtstrTestEntry::DEFAULT
2083 };
2084 let mut args = Vec::new();
2085 append_base_sched_args(&entry, &mut args);
2086 }
2087
2088 /// Two-token-form sibling of the above — completes the
2089 /// 2-form coverage for the scheduler-def-origin × no-default
2090 /// intersection.
2091 #[test]
2092 #[should_panic(expected = "that does not start with `/`")]
2093 fn append_base_sched_args_panics_on_empty_two_token_value_in_scheduler_sched_args_no_default() {
2094 static SCHED: Scheduler = Scheduler::named("s").sched_args(&["--cell-parent-cgroup", ""]);
2095 let entry = KtstrTestEntry {
2096 name: "scheduler_def_origin_two_token_no_default",
2097 scheduler: &SCHED,
2098 ..KtstrTestEntry::DEFAULT
2099 };
2100 let mut args = Vec::new();
2101 append_base_sched_args(&entry, &mut args);
2102 }
2103
2104 /// Bare `--cell-parent-cgroup` flag with no following token
2105 /// (two-token form, trailing in argv) is rejected at the
2106 /// framework gate via the `CellParentCgroupArg::MissingValue`
2107 /// arm. Previously this shape parsed as "absent", triggered the
2108 /// auto-inject, and produced two copies of the flag in the final
2109 /// argv that clap then rejected with a confused "cannot be used
2110 /// multiple times" diagnostic. The gate intercepts here so the
2111 /// operator gets a "missing value" message anchored to their
2112 /// declaration.
2113 #[test]
2114 #[should_panic(expected = "supplies a bare `--cell-parent-cgroup`")]
2115 fn append_base_sched_args_panics_on_missing_value_via_extra() {
2116 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
2117 let entry = KtstrTestEntry {
2118 name: "missing_value_extra",
2119 scheduler: &SCHED,
2120 extra_sched_args: &["--cell-parent-cgroup"],
2121 ..KtstrTestEntry::DEFAULT
2122 };
2123 let mut args = Vec::new();
2124 append_base_sched_args(&entry, &mut args);
2125 }
2126
2127 /// Bare flag preceded by an unrelated trailing token still trips
2128 /// the MissingValue arm — the parser walks the chain in order,
2129 /// hits the bare flag, and `iter.next()` returns None at end of
2130 /// stream regardless of which unrelated tokens came before it.
2131 #[test]
2132 #[should_panic(expected = "supplies a bare `--cell-parent-cgroup`")]
2133 fn append_base_sched_args_panics_on_missing_value_after_other_flag() {
2134 static SCHED: Scheduler = Scheduler::named("s").cgroup_parent("/sys/fs/cgroup/ktstr");
2135 let entry = KtstrTestEntry {
2136 name: "missing_value_after_other",
2137 scheduler: &SCHED,
2138 extra_sched_args: &["--other-flag", "--cell-parent-cgroup"],
2139 ..KtstrTestEntry::DEFAULT
2140 };
2141 let mut args = Vec::new();
2142 append_base_sched_args(&entry, &mut args);
2143 }
2144
2145 /// Bare flag in the scheduler-def's `sched_args` also trips
2146 /// MissingValue — the parser chains both sources and the
2147 /// universal gate handles them identically.
2148 #[test]
2149 #[should_panic(expected = "supplies a bare `--cell-parent-cgroup`")]
2150 fn append_base_sched_args_panics_on_missing_value_in_scheduler_sched_args() {
2151 static SCHED: Scheduler = Scheduler::named("s")
2152 .cgroup_parent("/sys/fs/cgroup/ktstr")
2153 .sched_args(&["--cell-parent-cgroup"]);
2154 let entry = KtstrTestEntry {
2155 name: "missing_value_scheduler_def",
2156 scheduler: &SCHED,
2157 ..KtstrTestEntry::DEFAULT
2158 };
2159 let mut args = Vec::new();
2160 append_base_sched_args(&entry, &mut args);
2161 }
2162
2163 /// Bare flag with no scheduler default `cgroup_parent`. The
2164 /// universal gate must still fire — the panic message in this
2165 /// case omits the "let the framework auto-inject" suggestion
2166 /// (no default to inject) and adds a hint that an absolute path
2167 /// is required for cell-aware schedulers without a declared
2168 /// default.
2169 #[test]
2170 #[should_panic(expected = "supplies a bare `--cell-parent-cgroup`")]
2171 fn append_base_sched_args_panics_on_missing_value_no_scheduler_cgroup_parent() {
2172 static SCHED: Scheduler = Scheduler::named("s");
2173 let entry = KtstrTestEntry {
2174 name: "missing_value_no_default",
2175 scheduler: &SCHED,
2176 extra_sched_args: &["--cell-parent-cgroup"],
2177 ..KtstrTestEntry::DEFAULT
2178 };
2179 let mut args = Vec::new();
2180 append_base_sched_args(&entry, &mut args);
2181 }
2182
2183 /// Bare flag via scheduler-def `sched_args` with no default
2184 /// `cgroup_parent`. Closes the matrix intersection: a future
2185 /// refactor that gated the MissingValue check on
2186 /// `cgroup_parent.is_some()` (mirroring an earlier regression
2187 /// fixed for Value-invalid) would pass the other 4 MissingValue
2188 /// tests but regress this cell.
2189 #[test]
2190 #[should_panic(expected = "supplies a bare `--cell-parent-cgroup`")]
2191 fn append_base_sched_args_panics_on_missing_value_in_scheduler_sched_args_no_default() {
2192 static SCHED: Scheduler = Scheduler::named("s").sched_args(&["--cell-parent-cgroup"]);
2193 let entry = KtstrTestEntry {
2194 name: "missing_value_scheduler_def_no_default",
2195 scheduler: &SCHED,
2196 ..KtstrTestEntry::DEFAULT
2197 };
2198 let mut args = Vec::new();
2199 append_base_sched_args(&entry, &mut args);
2200 }
2201
2202 /// Bare flag after another flag, with no scheduler default.
2203 /// Completes the after-other-flag × default matrix together with
2204 /// the sibling test that has a default.
2205 #[test]
2206 #[should_panic(expected = "supplies a bare `--cell-parent-cgroup`")]
2207 fn append_base_sched_args_panics_on_missing_value_after_other_flag_no_default() {
2208 static SCHED: Scheduler = Scheduler::named("s");
2209 let entry = KtstrTestEntry {
2210 name: "missing_value_after_other_no_default",
2211 scheduler: &SCHED,
2212 extra_sched_args: &["--other-flag", "--cell-parent-cgroup"],
2213 ..KtstrTestEntry::DEFAULT
2214 };
2215 let mut args = Vec::new();
2216 append_base_sched_args(&entry, &mut args);
2217 }
2218
2219 // -- build_vm_builder_base --
2220
2221 /// Kernel-path surfaces in the builder's "kernel not found" error.
2222 /// Proves the `kernel()` setter is wired through the helper.
2223 #[test]
2224 fn build_vm_builder_base_propagates_kernel_path() {
2225 // build()'s no-perf path reads KTSTR_BYPASS_LLC_LOCKS + KTSTR_CPU_CAP
2226 // before the validation checks. Under the shared env lock, pin
2227 // bypass=1 + cpu_cap unset so build() short-circuits the slot/LLC
2228 // acquire path (no acquire_llc_plan contention; cpu_cap=None avoids
2229 // the bypass+cpu_cap bail), leaving the asserted error the only outcome.
2230 let _l = lock_env();
2231 let _g = EnvVarGuard::set(crate::KTSTR_BYPASS_LLC_LOCKS_ENV, "1");
2232 let _c = EnvVarGuard::remove(crate::KTSTR_CPU_CAP_ENV);
2233 let entry = KtstrTestEntry {
2234 name: "vmb_kernel_path",
2235 ..KtstrTestEntry::DEFAULT
2236 };
2237 let exe = crate::resolve_current_exe().unwrap();
2238 let missing_kernel =
2239 PathBuf::from("/nonexistent/build_vm_builder_base_test_kernel.bzImage");
2240 let result = build_vm_builder_base(
2241 &entry,
2242 &missing_kernel,
2243 &exe,
2244 None,
2245 &[],
2246 crate::vmm::topology::Topology::new(1, 1, 1, 1),
2247 256,
2248 "",
2249 &["run".to_string()],
2250 true,
2251 )
2252 .build();
2253 // `KtstrVm` does not implement Debug, so `.unwrap_err()` is not
2254 // available — collapse Ok into a panic to extract the error by hand.
2255 let err = match result {
2256 Ok(_) => panic!("builder.build() unexpectedly succeeded for missing kernel"),
2257 Err(e) => e,
2258 };
2259 let msg = format!("{err}");
2260 assert!(
2261 msg.contains("kernel not found"),
2262 "expected kernel not found error, got: {msg}",
2263 );
2264 assert!(
2265 msg.contains("build_vm_builder_base_test_kernel"),
2266 "expected the fake kernel path to appear in the error, got: {msg}",
2267 );
2268 }
2269
2270 /// A zero-`llcs` topology is forwarded to the builder and surfaces
2271 /// as a validation error. Proves `topology()` is wired through.
2272 #[test]
2273 fn build_vm_builder_base_propagates_topology_validation() {
2274 // See build_vm_builder_base_propagates_kernel_path: pin bypass=1 +
2275 // cpu_cap unset under the shared env lock so build() short-circuits
2276 // the no-perf slot/LLC path and the asserted error is deterministic.
2277 let _l = lock_env();
2278 let _g = EnvVarGuard::set(crate::KTSTR_BYPASS_LLC_LOCKS_ENV, "1");
2279 let _c = EnvVarGuard::remove(crate::KTSTR_CPU_CAP_ENV);
2280 let entry = KtstrTestEntry {
2281 name: "vmb_topology",
2282 ..KtstrTestEntry::DEFAULT
2283 };
2284 let exe = crate::resolve_current_exe().unwrap();
2285 let bad_topology = crate::vmm::topology::Topology {
2286 llcs: 0,
2287 cores_per_llc: 1,
2288 threads_per_core: 1,
2289 numa_nodes: 1,
2290 nodes: None,
2291 distances: None,
2292 };
2293 let result = build_vm_builder_base(
2294 &entry,
2295 &exe,
2296 &exe,
2297 None,
2298 &[],
2299 bad_topology,
2300 256,
2301 "",
2302 &["run".to_string()],
2303 true,
2304 )
2305 .build();
2306 let err = match result {
2307 Ok(_) => panic!("builder.build() unexpectedly succeeded for zero-llcs topology"),
2308 Err(e) => e,
2309 };
2310 let msg = format!("{err}");
2311 assert!(
2312 msg.contains("llcs must be > 0"),
2313 "expected topology validation error, got: {msg}",
2314 );
2315 }
2316
2317 /// An optional scheduler binary is attached when `Some(path)`
2318 /// is supplied, surfacing as a "scheduler binary not found"
2319 /// error when the path is missing.
2320 #[test]
2321 fn build_vm_builder_base_propagates_scheduler_binary() {
2322 // See build_vm_builder_base_propagates_kernel_path: pin bypass=1 +
2323 // cpu_cap unset under the shared env lock so build() short-circuits
2324 // the no-perf slot/LLC path and the asserted error is deterministic.
2325 let _l = lock_env();
2326 let _g = EnvVarGuard::set(crate::KTSTR_BYPASS_LLC_LOCKS_ENV, "1");
2327 let _c = EnvVarGuard::remove(crate::KTSTR_CPU_CAP_ENV);
2328 let entry = KtstrTestEntry {
2329 name: "vmb_scheduler",
2330 ..KtstrTestEntry::DEFAULT
2331 };
2332 let exe = crate::resolve_current_exe().unwrap();
2333 let missing_scheduler = PathBuf::from("/nonexistent/build_vm_builder_base_test_scheduler");
2334 let result = build_vm_builder_base(
2335 &entry,
2336 &exe,
2337 &exe,
2338 Some(&missing_scheduler),
2339 &[],
2340 crate::vmm::topology::Topology::new(1, 1, 1, 1),
2341 256,
2342 "",
2343 &["run".to_string()],
2344 true,
2345 )
2346 .build();
2347 let err = match result {
2348 Ok(_) => panic!("builder.build() unexpectedly succeeded for missing scheduler"),
2349 Err(e) => e,
2350 };
2351 let msg = format!("{err}");
2352 assert!(
2353 msg.contains("scheduler binary not found"),
2354 "expected scheduler binary error, got: {msg}",
2355 );
2356 assert!(
2357 msg.contains("build_vm_builder_base_test_scheduler"),
2358 "expected the fake scheduler path to appear, got: {msg}",
2359 );
2360 }
2361
2362 // -- vm_timeout_from_entry tests --
2363
2364 #[test]
2365 fn vm_timeout_from_entry_uses_watchdog_when_largest() {
2366 // DEFAULT topology = 2 vCPUs → sys_rdy_budget_ms = 10_300
2367 // (10_000 base + 2×150) → vm_boot_headroom = 20.3 s; base =
2368 // max(60s, 30s, 1s) = 60s. Pin a wide cpuset so 2 vCPUs are
2369 // never oversubscribed (overcommit_ratio floors to 1.0).
2370 let _pin = AllowedCpusPin::new((0..256).collect());
2371 let entry = KtstrTestEntry {
2372 name: "wdog",
2373 watchdog_timeout: Duration::from_secs(60),
2374 duration: Duration::from_secs(30),
2375 ..KtstrTestEntry::DEFAULT
2376 };
2377 assert_eq!(
2378 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2379 Duration::from_millis(80_300)
2380 );
2381 }
2382
2383 #[test]
2384 fn vm_timeout_from_entry_uses_duration_when_largest() {
2385 let _pin = AllowedCpusPin::new((0..256).collect());
2386 let entry = KtstrTestEntry {
2387 name: "dur",
2388 watchdog_timeout: Duration::from_secs(5),
2389 duration: Duration::from_secs(120),
2390 ..KtstrTestEntry::DEFAULT
2391 };
2392 // base = max(5s, 120s, 1s) = 120s; vm_boot_headroom(2) = 20.3 s.
2393 assert_eq!(
2394 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2395 Duration::from_millis(140_300)
2396 );
2397 }
2398
2399 #[test]
2400 fn vm_timeout_from_entry_floor_when_both_small() {
2401 // base floors at 1 s; vm_boot_headroom for 2 vCPUs is 20.3 s.
2402 let _pin = AllowedCpusPin::new((0..256).collect());
2403 let entry = KtstrTestEntry {
2404 name: "tiny",
2405 watchdog_timeout: Duration::from_millis(10),
2406 duration: Duration::from_millis(50),
2407 ..KtstrTestEntry::DEFAULT
2408 };
2409 assert_eq!(
2410 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2411 Duration::from_millis(21_300)
2412 );
2413 }
2414
2415 #[test]
2416 fn vm_timeout_from_default_entry() {
2417 // DEFAULT watchdog = 5 s, duration = 12 s → base = 12 s.
2418 // vm_boot_headroom for 2 vCPUs = 20.3 s → 32.3 s total.
2419 let _pin = AllowedCpusPin::new((0..256).collect());
2420 let entry = KtstrTestEntry {
2421 name: "default",
2422 ..KtstrTestEntry::DEFAULT
2423 };
2424 assert_eq!(
2425 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2426 Duration::from_millis(32_300)
2427 );
2428 }
2429
2430 #[test]
2431 fn vm_timeout_from_entry_scales_headroom_with_topology() {
2432 // A reported case: numa=1, llcs=7, cores=9, threads=2 → 126 vCPUs.
2433 // sys_rdy_budget_ms(126) = 28_900 ms (10_000 base + 126×150) →
2434 // vm_boot_headroom = 38.9 s. base = max(5 s watchdog, 12 s
2435 // duration, 1 s) = 12 s → total = 50.9 s.
2436 // Pins the `entry.topology.total_cpus()` → `vm_boot_headroom` wiring.
2437 // Pin a 256-CPU cpuset so 126 vCPUs are not oversubscribed
2438 // (overcommit_ratio = 126/256 floors to 1.0); the oversub
2439 // multiplier is exercised separately below.
2440 let _pin = AllowedCpusPin::new((0..256).collect());
2441 let entry = KtstrTestEntry {
2442 name: "large_topo",
2443 topology: crate::vmm::topology::Topology {
2444 llcs: 7,
2445 cores_per_llc: 9,
2446 threads_per_core: 2,
2447 numa_nodes: 1,
2448 nodes: None,
2449 distances: None,
2450 },
2451 ..KtstrTestEntry::DEFAULT
2452 };
2453 assert_eq!(
2454 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2455 Duration::from_millis(50_900)
2456 );
2457 }
2458
2459 #[test]
2460 fn vm_timeout_from_entry_scales_on_booted_not_declared_vcpus() {
2461 // Under a TopoOverride the VM boots a different vCPU count than
2462 // entry.topology declares; the boot-headroom deadline must scale
2463 // to the BOOTED count passed in, not entry.topology. A default
2464 // 2-vCPU entry "booted" at 126 vCPUs must get the 126-vCPU
2465 // headroom (50.9 s, matching the declared-126 case above), not
2466 // the declared 2-vCPU headroom (32.3 s). Pin a wide cpuset so
2467 // neither count is oversubscribed (multiplier = 1.0). The
2468 // declared-vs-booted gap is otherwise untested — every other
2469 // vm_timeout test passes entry.topology.total_cpus() (declared ==
2470 // booted).
2471 let _pin = AllowedCpusPin::new((0..256).collect());
2472 let entry = KtstrTestEntry {
2473 name: "booted_override",
2474 ..KtstrTestEntry::DEFAULT
2475 };
2476 assert_eq!(
2477 vm_timeout_from_entry(&entry, 126),
2478 Duration::from_millis(50_900),
2479 );
2480 assert_eq!(
2481 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2482 Duration::from_millis(32_300),
2483 );
2484 assert_ne!(
2485 vm_timeout_from_entry(&entry, 126),
2486 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2487 "the deadline must key on the booted count, not the declared entry.topology",
2488 );
2489 }
2490
2491 // -- overcommit_ratio / oversub-scaled vm_timeout --
2492
2493 #[test]
2494 fn overcommit_ratio_floors_at_one_for_fitting_host() {
2495 // vCPUs <= allowed → not oversubscribed → 1.0 (never < 1).
2496 assert_eq!(overcommit_ratio(8, 192, None), 1.0);
2497 assert_eq!(overcommit_ratio(192, 192, None), 1.0);
2498 }
2499
2500 #[test]
2501 fn overcommit_ratio_auto_collapse_uses_allowed_cpuset() {
2502 // No explicit cpu_budget: the vCPU threads collapse onto the
2503 // whole allowed cpuset. 256 vCPUs on 192 allowed = the CI
2504 // wide-SMP case (~1.33x).
2505 let r = overcommit_ratio(256, 192, None);
2506 assert!((r - 256.0 / 192.0).abs() < 1e-9, "got {r}");
2507 }
2508
2509 #[test]
2510 fn overcommit_ratio_explicit_budget_collapses_onto_min_budget_allowed() {
2511 // Explicit cpu_budget caps the host CPUs the vCPU threads land
2512 // on (the deliberate _overcommit test): 256 / min(64, 192) = 4x.
2513 assert_eq!(overcommit_ratio(256, 192, Some(64)), 4.0);
2514 // A budget wider than the allowed set clamps to allowed.
2515 let r = overcommit_ratio(256, 192, Some(1000));
2516 assert!((r - 256.0 / 192.0).abs() < 1e-9, "got {r}");
2517 }
2518
2519 #[test]
2520 fn overcommit_ratio_guards_empty_cpuset() {
2521 // An unenumerable cpuset (allowed_cpus = 0) must not divide by
2522 // zero — treat it as a 1-CPU host.
2523 assert_eq!(overcommit_ratio(8, 0, None), 8.0);
2524 }
2525
2526 #[test]
2527 fn overcommit_skip_reason_skips_severe_auto_collapse() {
2528 // 256 vCPUs auto-collapse onto 8 host CPUs = 32x ≥ 6x → skip.
2529 // (boot-only path: expect_auto_repro = false.)
2530 let r = overcommit_skip_reason(256, 8, None, false);
2531 assert!(
2532 r.as_deref()
2533 .is_some_and(|m| m.contains("host topology insufficient")),
2534 "32x auto-collapse must skip with the typed reason, got {r:?}",
2535 );
2536 }
2537
2538 #[test]
2539 fn overcommit_skip_reason_runs_ci_wide_smp_ratio() {
2540 // 256 vCPUs on a 192-CPU CI runner = 1.33x < 6x → RUNS (None),
2541 // so wide-SMP boot is validated there, never masked.
2542 assert_eq!(overcommit_skip_reason(256, 192, None, false), None);
2543 }
2544
2545 #[test]
2546 fn overcommit_skip_reason_never_skips_explicit_budget() {
2547 // An explicit cpu_budget is a deliberate oversubscription opt-in
2548 // (contention testing): even 256 vCPUs on 8 host CPUs runs.
2549 assert_eq!(overcommit_skip_reason(256, 8, Some(4), false), None);
2550 }
2551
2552 #[test]
2553 fn overcommit_skip_reason_runs_on_empty_cpuset() {
2554 // An unenumerable cpuset (allowed = 0) cannot compute a ratio →
2555 // does not skip; the overcommit warning is the sole signal there.
2556 assert_eq!(overcommit_skip_reason(256, 0, None, false), None);
2557 }
2558
2559 #[test]
2560 fn overcommit_skip_reason_boundary_is_inclusive_at_cap() {
2561 // ≥ cap skips, just-below runs. 48 vCPUs on 8 = exactly 6.0x → skip;
2562 // 47 on 8 = 5.875x < 6.0 → run. (boot-only path.)
2563 assert!(overcommit_skip_reason(48, 8, None, false).is_some());
2564 assert_eq!(overcommit_skip_reason(47, 8, None, false), None);
2565 }
2566
2567 #[test]
2568 fn overcommit_skip_reason_expect_auto_repro_uses_stricter_cap() {
2569 // The expect_auto_repro inversion chain skips at a much lower
2570 // ratio (EXPECT_AUTO_REPRO_SKIP_RATIO = 2.0x) than a boot-only
2571 // wide test. Pins the failing CI case: 256 vCPUs on a 96-CPU host
2572 // = 2.67x.
2573 // - 2.67x WITH expect_auto_repro -> SKIP (the two-VM wprof chain
2574 // cannot run cleanly under that time-slicing).
2575 let skip = overcommit_skip_reason(256, 96, None, true);
2576 assert!(
2577 skip.as_deref().is_some_and(
2578 |m| m.contains("host topology insufficient") && m.contains("expect_auto_repro")
2579 ),
2580 "2.67x with expect_auto_repro must skip naming the chain, got {skip:?}",
2581 );
2582 // - same 2.67x WITHOUT expect_auto_repro -> RUNS (a single-VM
2583 // wide-SMP boot test still validates boot at 2.67x < 6.0x).
2584 assert_eq!(overcommit_skip_reason(256, 96, None, false), None);
2585 // - the 192-CPU design-target runner (256/192 = 1.33x) RUNS the
2586 // auto-repro hop even with expect_auto_repro (1.33x < 2.0x), so
2587 // the >255 inversion is still validated there.
2588 assert_eq!(overcommit_skip_reason(256, 192, None, true), None);
2589 // - an explicit cpu_budget stays a deliberate opt-in: no skip
2590 // even with expect_auto_repro.
2591 assert_eq!(overcommit_skip_reason(256, 8, Some(4), true), None);
2592 // - boundary: exactly 2.0x with expect_auto_repro skips (>= is
2593 // inclusive); just below runs.
2594 assert!(overcommit_skip_reason(16, 8, None, true).is_some());
2595 assert_eq!(overcommit_skip_reason(15, 8, None, true), None);
2596 }
2597
2598 #[test]
2599 fn vm_timeout_scales_boot_headroom_by_overcommit_ratio() {
2600 // 256 vCPUs (16 LLCs × 16 cores) on a 64-CPU allowed cpuset =
2601 // 4x auto-collapse. The boot headroom (58.4 s) scales by 4x;
2602 // base = max(5 s watchdog, 12 s duration, 1 s) = 12 s →
2603 // 12 + 58.4×4 = 245.6 s. Pins that the ratio multiplies ONLY
2604 // the headroom, not base.
2605 let _pin = AllowedCpusPin::new((0..64).collect());
2606 let entry = KtstrTestEntry {
2607 name: "oversub",
2608 topology: crate::vmm::topology::Topology {
2609 llcs: 16,
2610 cores_per_llc: 16,
2611 threads_per_core: 1,
2612 numa_nodes: 1,
2613 nodes: None,
2614 distances: None,
2615 },
2616 ..KtstrTestEntry::DEFAULT
2617 };
2618 // 12_000 + 58_400 × 4 = 245_600 ms.
2619 assert_eq!(
2620 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2621 Duration::from_millis(245_600)
2622 );
2623 }
2624
2625 #[test]
2626 fn vm_timeout_overcommit_multiplier_clamps_at_cap() {
2627 // 256 vCPUs on an 8-CPU cpuset = 32x, but the headroom
2628 // multiplier clamps at OVERCOMMIT_HEADROOM_CAP (6x) so the
2629 // deadline stays bounded: 12 + 58.4×6 = 362.4 s. (Such a host
2630 // auto-SKIPS upstream; this pins the clamp independently.)
2631 let _pin = AllowedCpusPin::new((0..8).collect());
2632 let entry = KtstrTestEntry {
2633 name: "clamp",
2634 topology: crate::vmm::topology::Topology {
2635 llcs: 16,
2636 cores_per_llc: 16,
2637 threads_per_core: 1,
2638 numa_nodes: 1,
2639 nodes: None,
2640 distances: None,
2641 },
2642 ..KtstrTestEntry::DEFAULT
2643 };
2644 // 12_000 + 58_400 × 6 = 362_400 ms.
2645 assert_eq!(
2646 vm_timeout_from_entry(&entry, entry.topology.total_cpus()),
2647 Duration::from_millis(362_400)
2648 );
2649 }
2650
2651 // -- sys_rdy_budget_ms / vm_boot_headroom --
2652
2653 #[test]
2654 fn sys_rdy_budget_ms_base_plus_linear_per_vcpu() {
2655 // Additive: 10_000 ms base + vcpus × 150. Every topology gets
2656 // the base PLUS its per-vCPU term — no dead floor below 67
2657 // vCPUs (the bug that gave a 64-vCPU VM the same 10 s as a
2658 // 1-vCPU VM).
2659 assert_eq!(sys_rdy_budget_ms(1), 10_150);
2660 assert_eq!(sys_rdy_budget_ms(32), 14_800);
2661 assert_eq!(sys_rdy_budget_ms(66), 19_900);
2662 }
2663
2664 #[test]
2665 fn sys_rdy_budget_ms_scales_linearly_in_band() {
2666 // 10_000 ms base + vcpus × 150, in the band below the 90 s cap.
2667 assert_eq!(sys_rdy_budget_ms(67), 20_050);
2668 // The 126-vCPU case lands at 28.9 s.
2669 assert_eq!(sys_rdy_budget_ms(126), 28_900);
2670 // 256-vCPU wide-SMP gets its FULL additive budget (48.4 s) — the
2671 // case the old 30 s cap truncated to 30 s, starving the boot.
2672 assert_eq!(sys_rdy_budget_ms(256), 48_400);
2673 }
2674
2675 #[test]
2676 fn sys_rdy_budget_ms_caps_at_ninety_seconds() {
2677 // The 512-vCPU MAX_VCPUS topology gets its full additive budget
2678 // (10_000 + 512×150 = 86_800 ms), comfortably under the cap.
2679 assert_eq!(sys_rdy_budget_ms(512), 86_800);
2680 // 533 vCPUs is the last under the 90 s cap (10_000 + 533×150 =
2681 // 89_950); 534 is the first clipped (10_000 + 534×150 = 90_100
2682 // → 90_000). Only pathological >533-vCPU counts clip.
2683 assert_eq!(sys_rdy_budget_ms(533), 89_950);
2684 assert_eq!(sys_rdy_budget_ms(534), 90_000);
2685 assert_eq!(sys_rdy_budget_ms(u32::MAX), 90_000);
2686 }
2687
2688 #[test]
2689 fn sys_rdy_budget_ms_zero_returns_base() {
2690 // Guest fallback when /sys/devices/system/cpu/online is missing:
2691 // 0 vCPUs → the bare 10_000 ms base (no per-vCPU term).
2692 assert_eq!(sys_rdy_budget_ms(0), 10_000);
2693 }
2694
2695 #[test]
2696 fn vm_boot_headroom_is_ten_plus_sys_rdy_budget() {
2697 // KERNEL_INIT_HEADROOM (10 s) + sys_rdy_budget_ms(vcpus).
2698 assert_eq!(vm_boot_headroom(1), Duration::from_millis(20_150));
2699 assert_eq!(vm_boot_headroom(126), Duration::from_millis(38_900));
2700 // 256-vCPU wide-SMP: 10 s + 48.4 s = 58.4 s (un-oversubscribed
2701 // headroom; vm_timeout_from_entry scales THIS by the host
2702 // overcommit ratio).
2703 assert_eq!(vm_boot_headroom(256), Duration::from_millis(58_400));
2704 // 512-vCPU MAX_VCPUS budget (86.8 s) → 96.8 s headroom, uncapped
2705 // under the 90 s ceiling.
2706 assert_eq!(vm_boot_headroom(512), Duration::from_millis(96_800));
2707 }
2708
2709 /// Two calls to `content_hash` with the same input must return
2710 /// the same u64. Pins the within-process determinism invariant
2711 /// against a future regression that swaps in a per-call-seeded
2712 /// hasher — e.g. `std::hash::RandomState::new().build_hasher()`,
2713 /// which increments its keys per call within a process, or any
2714 /// time/thread-id-seeded scheme. Note: swapping to std's
2715 /// `DefaultHasher::new()` would NOT regress this test —
2716 /// `DefaultHasher` is itself `SipHasher13::new_with_keys(0, 0)`
2717 /// and therefore deterministic; the cross-rustc-version
2718 /// stability regression class is caught by the value-pin
2719 /// follow-up, not this assertion.
2720 #[test]
2721 fn content_hash_is_deterministic_across_calls() {
2722 let input = "scheduler config payload";
2723 assert_eq!(content_hash(input), content_hash(input));
2724 }
2725
2726 /// Distinct inputs must produce distinct hashes. Catches a trivial
2727 /// regression (constant-returning hasher) that the determinism
2728 /// test alone would silently accept.
2729 #[test]
2730 fn content_hash_differs_for_distinct_inputs() {
2731 assert_ne!(content_hash("alpha"), content_hash("beta"));
2732 }
2733
2734 /// Cross-toolchain stability pin: every `content_hash` output must
2735 /// equal the SipHasher13(keys=0,0) value emitted at commit time.
2736 /// Pins the algorithm choice — a future swap to a different
2737 /// stable hasher (e.g. xxhash, fxhash) would silently regenerate
2738 /// every content-addressed cache filename on disk, breaking cache
2739 /// hit rates without surfacing as a failed test. The companion
2740 /// `content_hash_is_deterministic_across_calls` pin guards
2741 /// within-process determinism; this pin guards cross-process /
2742 /// cross-toolchain / cross-machine stability.
2743 #[test]
2744 fn content_hash_value_pin() {
2745 // SipHasher13(keys=0,0) over the four corpora below. If any
2746 // assertion fails, the algorithm or its seeding changed —
2747 // STOP. `content_hash` names the inline-config tempfile in
2748 // `config_content_parts` at src/test_support/runtime.rs and
2749 // the export-config tempfile in `export.rs`; flipping the
2750 // hashes silently regenerates those filenames on every
2751 // process, breaking any future scheme that tries to dedup
2752 // across runs and breaking intra-run reproducibility if a
2753 // caller comes to depend on stable byte equality across
2754 // identical inputs. Update only after intentional algorithm
2755 // migration. The four corpora — empty + two short ASCII +
2756 // one realistic config payload — span the cases the
2757 // algorithm needs to handle correctly.
2758 assert_eq!(content_hash(""), 0x30406ea523c53def);
2759 assert_eq!(content_hash("alpha"), 0x3c87f3c3317bd39a);
2760 assert_eq!(content_hash("beta"), 0xbb8fd2aa1487d7ac);
2761 assert_eq!(content_hash("scheduler config payload"), 0xc678971ba48d5f80);
2762 }
2763
2764 /// Per-content-hash inline-config files MUST land inside the
2765 /// per-process `scratch_dir()` subtree, NOT bare
2766 /// `std::env::temp_dir()`. The 0o700 process-owned subdirectory
2767 /// blocks the cross-uid symlink-replacement attack on
2768 /// predictable content-addressed filenames in shared `/tmp`. A
2769 /// future "simplification" that reverts the path to bare
2770 /// `std::env::temp_dir().join(...)` silently restores the
2771 /// attack surface; this test fails loudly first.
2772 #[test]
2773 fn config_content_parts_writes_inside_process_scratch_dir() {
2774 use crate::assert::Assert;
2775 use crate::scenario::Ctx;
2776 use crate::test_support::entry::{
2777 KtstrTestEntry, Scheduler, SchedulerSpec, TopologyConstraints,
2778 };
2779 use crate::vmm::topology::Topology;
2780
2781 static SCHED: Scheduler = Scheduler {
2782 name: "config_parts_test_sched",
2783 binary: SchedulerSpec::Discover("nope"),
2784 sysctls: &[],
2785 kargs: &[],
2786 assert: Assert::NO_OVERRIDES,
2787 cgroup_parent: None,
2788 sched_args: &[],
2789 topology: Topology {
2790 llcs: 1,
2791 cores_per_llc: 1,
2792 threads_per_core: 1,
2793 numa_nodes: 1,
2794 nodes: None,
2795 distances: None,
2796 },
2797 constraints: TopologyConstraints::DEFAULT,
2798 config_file: None,
2799 config_file_def: Some(("--config={file}", "/include-files/p.json")),
2800 kernels: &[],
2801 };
2802 fn func(_: &Ctx) -> anyhow::Result<crate::assert::AssertResult> {
2803 Ok(crate::assert::AssertResult::pass())
2804 }
2805 let entry = KtstrTestEntry {
2806 name: "scratch_dir_path_test",
2807 func,
2808 scheduler: &SCHED,
2809 config_content: Some("{\"sentinel\":42}"),
2810 ..KtstrTestEntry::DEFAULT
2811 };
2812 let (_, host_path, _, _) =
2813 config_content_parts(&entry).expect("config_content_parts returns Some");
2814 assert!(
2815 host_path.starts_with(scratch_dir()),
2816 "config tempfile must live inside the process-owned scratch dir, \
2817 not bare std::env::temp_dir(): got host_path={host_path:?}, \
2818 scratch_dir={:?}",
2819 scratch_dir()
2820 );
2821 }
2822
2823 /// Two same-content calls produce the SAME canonical path
2824 /// (content-addressed naming idempotence). Callers using the
2825 /// returned PathBuf for downstream dedup decisions rely on this
2826 /// — a regression that breaks the content-hash → path mapping
2827 /// would silently spam the scratch dir with per-call distinct
2828 /// names instead of reusing the canonical entry.
2829 #[test]
2830 fn config_content_parts_same_content_same_canonical_path() {
2831 use crate::assert::Assert;
2832 use crate::scenario::Ctx;
2833 use crate::test_support::entry::{
2834 KtstrTestEntry, Scheduler, SchedulerSpec, TopologyConstraints,
2835 };
2836 use crate::vmm::topology::Topology;
2837
2838 static SCHED: Scheduler = Scheduler {
2839 name: "config_parts_idempotent_sched",
2840 binary: SchedulerSpec::Discover("nope"),
2841 sysctls: &[],
2842 kargs: &[],
2843 assert: Assert::NO_OVERRIDES,
2844 cgroup_parent: None,
2845 sched_args: &[],
2846 topology: Topology {
2847 llcs: 1,
2848 cores_per_llc: 1,
2849 threads_per_core: 1,
2850 numa_nodes: 1,
2851 nodes: None,
2852 distances: None,
2853 },
2854 constraints: TopologyConstraints::DEFAULT,
2855 config_file: None,
2856 config_file_def: Some(("--config={file}", "/include-files/p.json")),
2857 kernels: &[],
2858 };
2859 fn func(_: &Ctx) -> anyhow::Result<crate::assert::AssertResult> {
2860 Ok(crate::assert::AssertResult::pass())
2861 }
2862 let entry = KtstrTestEntry {
2863 name: "idempotent_path_test",
2864 func,
2865 scheduler: &SCHED,
2866 config_content: Some("{\"idempotent\":true}"),
2867 ..KtstrTestEntry::DEFAULT
2868 };
2869 let (_, p1, _, _) = config_content_parts(&entry).expect("first call returns Some");
2870 let (_, p2, _, _) = config_content_parts(&entry).expect("second call returns Some");
2871 assert_eq!(
2872 p1, p2,
2873 "same content_content -> same canonical path; content-addressed naming \
2874 must be idempotent across calls"
2875 );
2876 // The filename component encodes the content hash via the
2877 // `ktstr-config-{hash:016x}.json` template; verify the prefix
2878 // so a future filename-template change is caught.
2879 let name = p1.file_name().and_then(|n| n.to_str()).unwrap_or("");
2880 assert!(
2881 name.starts_with("ktstr-config-") && name.ends_with(".json"),
2882 "canonical filename must follow `ktstr-config-{{hash}}.json` template, got: {name}"
2883 );
2884 }
2885}