ktstr/test_support/eval/scheduler.rs
1//! Scheduler-binary resolution: maps a `SchedulerSpec` to a path plus a
2//! `ResolveSource` provenance (the discovery cascade, PATH lookup,
3//! staged-scheduler ordering) and dedups include-file lists. Split out
4//! of eval/mod.rs to keep the module under the size ceiling.
5
6use super::*;
7
8/// Dedupe a resolved include-file list produced by unioning the
9/// per-payload `include_files` specs through
10/// [`crate::cli::resolve_include_files`] and appending the scheduler
11/// config file entry. Each input tuple carries an `origin` label
12/// (e.g. `"declarative"`, `"scheduler config_file"`) that is
13/// surfaced in conflict diagnostics so the operator can trace which
14/// declaration contributed each side of a collision.
15///
16/// Policy:
17///
18/// - Identical `(archive_path, host_path)` pairs collapse silently
19/// (the same host file declared twice is harmless). Comparison
20/// uses [`Path::canonicalize`] so two spellings of the same real
21/// file (e.g. `./fio` vs `/usr/bin/fio` when `./fio` is a
22/// symlink) are treated as equal. Canonicalization failure
23/// (missing path, permission denied) falls back to byte-for-byte
24/// PathBuf comparison; literal duplicates still collapse, and a
25/// genuine conflict still surfaces.
26/// - Two entries sharing an `archive_path` but resolving to
27/// different canonical `host_path`s are a genuine ambiguity — a
28/// scheduler's and a payload's `include_files` both claiming
29/// `include-files/config.json` but pointing at different host
30/// paths means one of the two would silently overwrite the other
31/// in the initramfs. Bail with a diagnostic naming both host
32/// paths AND their origin labels so the author can rename one
33/// archive slot.
34///
35/// Case-sensitivity: `archive_path` keys are compared
36/// byte-for-byte (via `BTreeMap<String, _>`), so on a case-
37/// insensitive host filesystem (macOS HFS+, NTFS with the
38/// `case-insensitive` mount flag) two archive paths spelled
39/// `include-files/Helper` and `include-files/helper` are treated
40/// as distinct here even though the host filesystem would
41/// conflate them. This is intentional: `archive_path` is the
42/// path inside the guest initramfs, which is tmpfs / ext4-
43/// equivalent (always case-sensitive), so the guest-side
44/// identity is what governs.
45///
46/// Order is stabilized via `BTreeMap`'s sorted iteration so the
47/// emitted slice is deterministic regardless of which caller
48/// appended first. Extracted from `run_ktstr_test_inner` so the
49/// policy can be unit-tested without constructing a whole
50/// KtstrTestEntry + VmBuilder.
51pub(crate) fn dedupe_include_files(
52 resolved: &[(String, std::path::PathBuf, &'static str)],
53) -> Result<Vec<(String, std::path::PathBuf)>> {
54 let mut seen: std::collections::BTreeMap<String, (std::path::PathBuf, &'static str)> =
55 std::collections::BTreeMap::new();
56 for (archive, host, origin) in resolved {
57 if let Some((existing, existing_origin)) = seen.get(archive) {
58 // Canonicalize both sides before comparing so
59 // symlink-equivalent spellings collapse. A failed
60 // canonicalize (missing path, permission denied) falls
61 // back to the uncanonicalized value so the structural
62 // compare still runs — literal duplicates still collapse
63 // and genuine conflicts still surface.
64 let existing_canon = existing.canonicalize().unwrap_or_else(|_| existing.clone());
65 let host_canon = host.canonicalize().unwrap_or_else(|_| host.clone());
66 if existing_canon != host_canon {
67 anyhow::bail!(
68 "include_files conflict for archive path '{archive}': sources disagree \
69 on host path ({} [origin: {existing_origin}] vs {} [origin: {origin}]). \
70 Remove the duplicate declaration or rename one of the archive entries.",
71 existing.display(),
72 host.display(),
73 );
74 }
75 } else {
76 seen.insert(archive.clone(), (host.clone(), origin));
77 }
78 }
79 Ok(seen
80 .into_iter()
81 .map(|(archive, (host, _origin))| (archive, host))
82 .collect())
83}
84
85/// Provenance of a scheduler binary returned by [`resolve_scheduler`].
86///
87/// Each variant identifies the discovery branch that produced the
88/// path, so downstream tooling (sidecar, cache-key construction, log
89/// lines) can distinguish "we found a pre-built binary in a target
90/// directory whose git hash we don't control" from "we just built
91/// this binary from HEAD in the current workspace and therefore know
92/// its source commit is the workspace HEAD."
93///
94/// Only the [`AutoBuilt`](Self::AutoBuilt) variant carries an honest
95/// source-commit guarantee: every other branch locates an *existing*
96/// file whose provenance is outside this process's knowledge.
97/// Callers that need to stamp a sidecar with a scheduler-specific
98/// commit must discard the hash for every non-`AutoBuilt` resolution
99/// — a stale `target/debug/` binary looks identical to a fresh
100/// `AutoBuilt` one but can be arbitrarily old.
101///
102/// `Eevdf` / `KernelBuiltin` / `Path` resolutions do not go through
103/// the discovery cascade:
104/// - `Eevdf` / `KernelBuiltin` → [`NotFound`](Self::NotFound) (no
105/// user-space binary involved; the tuple's `Option<PathBuf>` is
106/// `None`).
107/// - `Path(p)` → [`Path`](Self::Path) (the caller named the binary
108/// explicitly in the test entry — no env-var or filesystem search
109/// runs).
110///
111/// The variant ordering in the enum mirrors the discovery cascade
112/// order in [`resolve_scheduler`] so a reviewer can scan both lists
113/// in lockstep.
114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115pub enum ResolveSource {
116 /// Resolved via the literal path the caller supplied as
117 /// `SchedulerSpec::Path(p)`. No env-var or filesystem search
118 /// involved — the path arrived in the test entry directly.
119 /// Trusted to the extent the caller trusts the argument; git-
120 /// hash provenance is UNKNOWN to this process.
121 Path,
122 /// Resolved via the `KTSTR_SCHEDULER` environment variable on the
123 /// `SchedulerSpec::Discover` arm. Trusted to the extent the
124 /// caller trusts the variable; git-hash provenance is UNKNOWN
125 /// to this process.
126 EnvVar,
127 /// Resolved via a `$PATH` lookup. Only produced when
128 /// `KTSTR_CARGO_TEST_MODE` is active and a binary by the
129 /// requested name was found on the user's `$PATH` in front of
130 /// the sibling-dir / target-dir cascade. Git-hash provenance
131 /// UNKNOWN — the binary on PATH may be a system-wide install,
132 /// a prior build, or a custom one the user staged for this run.
133 PathLookup,
134 /// Resolved via a sibling of `crate::resolve_current_exe`
135 /// (same directory, or the sibling of a `deps/` directory for
136 /// integration tests / nextest). Git-hash provenance UNKNOWN
137 /// — the binary may be from any previous build.
138 SiblingDir,
139 /// Resolved via a fallback search in `target/debug/`. Git-hash
140 /// provenance UNKNOWN — a stale binary from an older tree
141 /// passes this check identically to a fresh one.
142 TargetDebug,
143 /// Resolved via a fallback search in `target/release/`. Git-hash
144 /// provenance UNKNOWN — same stale-binary hazard as
145 /// [`TargetDebug`](Self::TargetDebug).
146 TargetRelease,
147 /// Built on demand by [`crate::build_and_find_binary`] inside this
148 /// process. The build targets the current workspace's HEAD by
149 /// construction — the ONLY variant where the source commit is
150 /// known to match the workspace tree the tests run from.
151 AutoBuilt,
152 /// No user-space binary path was produced. Returned for
153 /// `SchedulerSpec::Eevdf` and `SchedulerSpec::KernelBuiltin` (the
154 /// kernel supplies the scheduler — no binary to locate). The
155 /// tuple's `Option<PathBuf>` is always `None` for this variant.
156 NotFound,
157}
158
159impl ResolveSource {
160 /// Stable snake_case tag for the sidecar `resolve_source` field and
161 /// the `stats` `--resolve-source` filter — the string analog of the
162 /// variant, mirroring the `run_source` tag convention so the
163 /// persisted JSON shape does not depend on this enum's Rust
164 /// representation. Variant order matches the discovery cascade.
165 pub const fn as_str(&self) -> &'static str {
166 match self {
167 Self::Path => "path",
168 Self::EnvVar => "env_var",
169 Self::PathLookup => "path_lookup",
170 Self::SiblingDir => "sibling_dir",
171 Self::TargetDebug => "target_debug",
172 Self::TargetRelease => "target_release",
173 Self::AutoBuilt => "auto_built",
174 Self::NotFound => "not_found",
175 }
176 }
177}
178
179/// Walk `$PATH` directories in order looking for an executable
180/// named `name`. Returns the first match that is a regular file
181/// with at least one execute permission bit set. None when `PATH`
182/// is unset, empty, or contains no matching executable.
183///
184/// Mirrors the semantics of `which(1)` and the
185/// `crate::export::search_path_for` helper without pulling in a
186/// new crate dependency. Used by [`resolve_scheduler`] only when
187/// `KTSTR_CARGO_TEST_MODE` is active so the existing nextest /
188/// `cargo ktstr test` discovery cascade stays in front of any
189/// system-wide install on PATH for the production test path.
190fn find_on_path(name: &str) -> Option<PathBuf> {
191 use std::os::unix::fs::PermissionsExt;
192 let path_var = std::env::var_os("PATH")?;
193 for dir in std::env::split_paths(&path_var) {
194 let candidate = dir.join(name);
195 if !candidate.is_file() {
196 continue;
197 }
198 let executable = candidate
199 .metadata()
200 .map(|m| m.permissions().mode() & 0o111 != 0)
201 .unwrap_or(false);
202 if executable {
203 return Some(candidate);
204 }
205 }
206 None
207}
208
209/// Resolve every entry in `entry.staged_schedulers` via a caller-
210/// supplied resolver, propagating resolver errors strictly (suitable
211/// for the primary-dispatch path where a missing staged binary is a
212/// hard failure operator should see at dispatch time, not later at
213/// Op-dispatch inside the VM). KernelBuiltin / Eevdf staged entries
214/// — whose resolver returns `Ok(None)` — are silently dropped:
215/// they have no binary to stage and the lifecycle ops resolve them
216/// via shell-script slots instead.
217///
218/// Returns `(name, resolved_host_path, sched_args)` tuples in the
219/// SAME order as `entry.staged_schedulers` iteration. Ordering is
220/// load-bearing: the initramfs packer iterates the result
221/// to emit per-scheduler `/staging/schedulers/<name>/` archive
222/// entries, and parent-directory dependencies are encounter-order
223/// sensitive. Tests pin the order-preservation against a future
224/// refactor that uses `.collect::<HashMap<_,_>>().into_iter()`
225/// (would silently scramble).
226///
227/// `resolver` is a closure rather than a direct call to
228/// [`resolve_scheduler`] so unit tests can drive the order-
229/// preservation contract with a synthetic resolver that returns
230/// known paths without touching the host filesystem.
231pub(crate) fn resolve_staged_schedulers_strict<F>(
232 entry: &KtstrTestEntry,
233 mut resolver: F,
234) -> Result<Vec<(String, PathBuf, Vec<String>)>>
235where
236 F: FnMut(&SchedulerSpec) -> Result<Option<PathBuf>>,
237{
238 let mut out = Vec::with_capacity(entry.staged_schedulers.len());
239 for staged in entry.staged_schedulers {
240 let Some(host_path) = resolver(&staged.binary)? else {
241 continue;
242 };
243 out.push((
244 staged.name.to_string(),
245 host_path,
246 staged.sched_args.iter().map(|s| s.to_string()).collect(),
247 ));
248 }
249 Ok(out)
250}
251
252/// True when `KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK` is set to a
253/// NON-EMPTY value — the knowing-operator opt-out that lets a failed
254/// orchestrated `cargo build -p <sched>` fall back to a pre-built
255/// sibling / `target/{debug,release}/` binary AS-IS instead of failing
256/// the test. Default (unset / empty) refuses the stale fallback so a
257/// build that fails for a new reason cannot silently validate against an
258/// old scheduler. Empty-string rejection mirrors
259/// [`crate::cargo_test_mode::cargo_test_mode_active`] so a stray
260/// `KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK=` from a CI shell cannot
261/// re-enable the hazard.
262fn allow_stale_scheduler_fallback() -> bool {
263 std::env::var(crate::KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK_ENV)
264 .map(|v| !v.is_empty())
265 .unwrap_or(false)
266}
267
268/// Resolve a scheduler binary from a `SchedulerSpec`.
269///
270/// Returns the resolved path (if any) paired with the
271/// [`ResolveSource`] naming the discovery branch that produced it.
272/// The source is load-bearing for downstream provenance: only
273/// [`ResolveSource::AutoBuilt`] guarantees the binary matches the
274/// current workspace tree; every other variant locates a
275/// pre-existing file whose git hash is UNKNOWN to this process.
276///
277/// Variant mapping:
278/// - `Eevdf` / `KernelBuiltin { .. }` → `(None, NotFound)` (no
279/// user-space binary).
280/// - `Path(p)` → `(Some(p), Path)` (explicit caller-named path;
281/// validated for existence).
282/// - `Discover(name)` → cascade through `KTSTR_SCHEDULER` env
283/// ([`EnvVar`](ResolveSource::EnvVar)), `$PATH` lookup when
284/// `KTSTR_CARGO_TEST_MODE` is active
285/// ([`PathLookup`](ResolveSource::PathLookup)), sibling of
286/// `current_exe` ([`SiblingDir`](ResolveSource::SiblingDir)),
287/// `target/debug/` ([`TargetDebug`](ResolveSource::TargetDebug)),
288/// `target/release/` ([`TargetRelease`](ResolveSource::TargetRelease)),
289/// on-demand build ([`AutoBuilt`](ResolveSource::AutoBuilt)). In the
290/// orchestrated (non-cargo-test) flow the on-demand build runs FIRST
291/// and a build FAILURE REFUSES (returns the error) rather than serving
292/// a stale pre-built binary, unless
293/// [`KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK`](crate::KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK_ENV)
294/// is set. Exhausting every branch is a hard error. The PATH lookup is
295/// only enabled in cargo-test mode so the existing nextest /
296/// `cargo ktstr test` discovery cascade remains canonical
297/// (sibling-of-test-binary first) — pulling a system-wide
298/// `scx_layered` ahead of a workspace-built one would corrupt
299/// gauntlet runs whose results must reflect the in-tree
300/// scheduler revision.
301pub fn resolve_scheduler(spec: &SchedulerSpec) -> Result<(Option<PathBuf>, ResolveSource)> {
302 match spec {
303 SchedulerSpec::Eevdf | SchedulerSpec::KernelBuiltin { .. } => {
304 Ok((None, ResolveSource::NotFound))
305 }
306 SchedulerSpec::Path(p) => {
307 let path = PathBuf::from(p);
308 anyhow::ensure!(
309 path.exists(),
310 "scheduler binary at '{p}' does not exist on disk. \
311 SchedulerSpec::Path treats its argument as an \
312 already-built binary — build the scheduler first \
313 (e.g. cargo build -p scx_<name>) and pass its \
314 target/debug/scx_<name> path, or correct the path if \
315 it has shifted."
316 );
317 Ok((Some(path), ResolveSource::Path))
318 }
319 SchedulerSpec::Discover(name) => {
320 // 0. Per-name override KTSTR_SCHEDULER_BIN_<NAME>. Checked FIRST
321 // so a test declaring multiple distinct Discover schedulers
322 // can point each at its own binary; the global
323 // KTSTR_SCHEDULER below collapses them all to one path. A
324 // set-but-missing path falls through to the global + cascade
325 // (lenient, matching the global's own behavior).
326 if let Ok(p) = std::env::var(crate::per_name_scheduler_env(name)) {
327 let path = PathBuf::from(&p);
328 if path.exists() {
329 return Ok((Some(path), ResolveSource::EnvVar));
330 }
331 }
332
333 // 1. KTSTR_SCHEDULER env var (global / coarse fallback —
334 // applies to every Discover scheduler regardless of name).
335 if let Ok(p) = std::env::var(crate::KTSTR_SCHEDULER_ENV) {
336 let path = PathBuf::from(&p);
337 if path.exists() {
338 return Ok((Some(path), ResolveSource::EnvVar));
339 }
340 }
341
342 // 1b. KTSTR_CARGO_TEST_MODE: try $PATH lookup so a user
343 // who installed scx_layered (or scx-ktstr) on PATH can
344 // run the test without going through the cargo-ktstr
345 // wrapper or having a target/debug/ build of the
346 // scheduler. Only active in cargo-test mode — outside
347 // that mode the sibling-dir / target-dir cascade below
348 // remains authoritative so gauntlet runs land on the
349 // workspace-built scheduler revision.
350 if crate::cargo_test_mode::cargo_test_mode_active()
351 && let Some(found) = find_on_path(name)
352 {
353 return Ok((Some(found), ResolveSource::PathLookup));
354 }
355
356 // 1c. Orchestrated (non-cargo-test-mode) flow: prefer a
357 // fresh workspace build. `cargo build -p {name}` rebuilds
358 // the scheduler when its sources (incl. src/bpf/*.bpf.c via
359 // its build.rs) changed and is a fast no-op when
360 // up-to-date, so an edited scheduler never runs stale. The
361 // sibling / target-dir cascade below returns a pre-built
362 // binary AS-IS with no staleness check, so serving it after
363 // a build that was expected to succeed would silently
364 // validate the test against a stale scheduler. Therefore a
365 // build FAILURE here REFUSES by default — it returns the
366 // error, which propagates to a hard test failure on the
367 // result surface (not a swallowed eprintln). The cascade
368 // below is reached on this path ONLY when
369 // KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK is set (the knowing-
370 // operator opt-out for a momentarily-broken cargo). build_*
371 // also errors when cargo is absent or no bin artifact is
372 // produced; the refusal covers all three — a run that cannot
373 // produce a fresh binary must not silently use a stale one.
374 // cargo-test-mode is excluded entirely: it targets an
375 // installed scheduler (PATH lookup above) without a
376 // workspace build, so its cascade is legitimate.
377 if !crate::cargo_test_mode::cargo_test_mode_active() {
378 match crate::build_and_find_binary(name) {
379 Ok(path) => return Ok((Some(path), ResolveSource::AutoBuilt)),
380 Err(e) => {
381 if !allow_stale_scheduler_fallback() {
382 // Attach the SchedulerBuildRefused marker (inner) so
383 // dispatch forces a hard FAIL even under expect_err,
384 // then the operator-facing message (outer, shown first
385 // by {e:#}). build_and_find_binary's cargo-stderr stays
386 // innermost in the chain.
387 return Err(e
388 .context(crate::test_support::eval::SchedulerBuildRefused)
389 .context(format!(
390 "ktstr_test: workspace build of scheduler \
391 '{name}' failed; refusing to validate against \
392 a possibly-stale pre-built binary. Fix the \
393 build, or set \
394 KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK=1 to fall \
395 back to a pre-built sibling/target-dir binary \
396 AS-IS."
397 )));
398 }
399 eprintln!(
400 "ktstr_test: workspace build of scheduler '{name}' \
401 failed ({e:#}); KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK \
402 set — falling back to a pre-built binary if present"
403 );
404 }
405 }
406 }
407
408 // 2. Sibling of current executable (or parent of deps/)
409 if let Ok(exe) = crate::resolve_current_exe()
410 && let Some(dir) = exe.parent()
411 {
412 let candidate = dir.join(name);
413 if candidate.exists() {
414 return Ok((Some(candidate), ResolveSource::SiblingDir));
415 }
416 // Integration tests and nextest place test binaries in
417 // target/{debug,release}/deps/. The scheduler binary is
418 // one level up in target/{debug,release}/.
419 if dir.file_name().is_some_and(|d| d == "deps")
420 && let Some(parent) = dir.parent()
421 {
422 let candidate = parent.join(name);
423 if candidate.exists() {
424 return Ok((Some(candidate), ResolveSource::SiblingDir));
425 }
426 }
427 }
428
429 // 3-4. target/{debug,release}/ pre-built fallbacks (reached
430 // only when the build-first step could not run). Probe the
431 // profile-matching dir FIRST: the scheduler defaults to the
432 // release profile (see `build_and_find_binary`), so prefer
433 // target/release/ over a possibly-stale target/debug/ binary
434 // unless KTSTR_SCHEDULER_PROFILE=dev explicitly selects the
435 // debug tree.
436 let prefer_release = crate::scheduler_profile_name() != "dev";
437 for (dir, source) in target_dir_probe_order(prefer_release) {
438 let candidate = PathBuf::from(dir).join(name);
439 if candidate.exists() {
440 return Ok((Some(candidate), source));
441 }
442 }
443
444 // 5. Build the scheduler package on demand — ONLY in
445 // cargo-test-mode, which skips the build-first step 1c, so this
446 // is its FIRST build attempt when the PATH / sibling / target-dir
447 // lookups all miss. The non-cargo-test flow already ran the build
448 // in step 1c (returning Ok, refusing on failure, or — under the
449 // opt-out — falling through here intending a PRE-BUILT binary), so
450 // re-running the build here would be redundant; skip straight to
451 // the bail.
452 if crate::cargo_test_mode::cargo_test_mode_active() {
453 match crate::build_and_find_binary(name) {
454 Ok(path) => return Ok((Some(path), ResolveSource::AutoBuilt)),
455 Err(e) => {
456 eprintln!("ktstr_test: auto-build scheduler '{name}' failed: {e:#}")
457 }
458 }
459 }
460
461 anyhow::bail!(
462 "scheduler '{name}' not found. Set KTSTR_SCHEDULER or \
463 place it next to the test binary or in target/{{debug,release}}/"
464 )
465 }
466 }
467}
468
469/// Order to probe the pre-built `target/{debug,release}/` scheduler
470/// binaries in the `Discover` cascade fallback: the profile-matching
471/// directory first. With `prefer_release` (the release-profile default —
472/// see [`crate::scheduler_profile_name`] — unless `KTSTR_SCHEDULER_PROFILE=dev`)
473/// the release build is the intended one, so `target/release/` is probed
474/// before a possibly-stale opposite-profile `target/debug/` binary. Pure +
475/// `pub(crate)` so the reorder is unit-testable without staging a CWD.
476pub(crate) fn target_dir_probe_order(prefer_release: bool) -> [(&'static str, ResolveSource); 2] {
477 if prefer_release {
478 [
479 ("target/release", ResolveSource::TargetRelease),
480 ("target/debug", ResolveSource::TargetDebug),
481 ]
482 } else {
483 [
484 ("target/debug", ResolveSource::TargetDebug),
485 ("target/release", ResolveSource::TargetRelease),
486 ]
487 }
488}