ktstr/test_support/eval/
scheduler.rs

1//! Scheduler-binary resolution: maps a `SchedulerSpec` to a path plus a
2//! `ResolveSource` provenance (the discovery cascade, PATH lookup,
3//! staged-scheduler ordering) and dedups include-file lists. Split out
4//! of eval/mod.rs to keep the module under the size ceiling.
5
6use super::*;
7
8/// Dedupe a resolved include-file list produced by unioning the
9/// per-payload `include_files` specs through
10/// [`crate::cli::resolve_include_files`] and appending the scheduler
11/// config file entry. Each input tuple carries an `origin` label
12/// (e.g. `"declarative"`, `"scheduler config_file"`) that is
13/// surfaced in conflict diagnostics so the operator can trace which
14/// declaration contributed each side of a collision.
15///
16/// Policy:
17///
18/// - Identical `(archive_path, host_path)` pairs collapse silently
19///   (the same host file declared twice is harmless). Comparison
20///   uses [`Path::canonicalize`] so two spellings of the same real
21///   file (e.g. `./fio` vs `/usr/bin/fio` when `./fio` is a
22///   symlink) are treated as equal. Canonicalization failure
23///   (missing path, permission denied) falls back to byte-for-byte
24///   PathBuf comparison; literal duplicates still collapse, and a
25///   genuine conflict still surfaces.
26/// - Two entries sharing an `archive_path` but resolving to
27///   different canonical `host_path`s are a genuine ambiguity — a
28///   scheduler's and a payload's `include_files` both claiming
29///   `include-files/config.json` but pointing at different host
30///   paths means one of the two would silently overwrite the other
31///   in the initramfs. Bail with a diagnostic naming both host
32///   paths AND their origin labels so the author can rename one
33///   archive slot.
34///
35/// Case-sensitivity: `archive_path` keys are compared
36/// byte-for-byte (via `BTreeMap<String, _>`), so on a case-
37/// insensitive host filesystem (macOS HFS+, NTFS with the
38/// `case-insensitive` mount flag) two archive paths spelled
39/// `include-files/Helper` and `include-files/helper` are treated
40/// as distinct here even though the host filesystem would
41/// conflate them. This is intentional: `archive_path` is the
42/// path inside the guest initramfs, which is tmpfs / ext4-
43/// equivalent (always case-sensitive), so the guest-side
44/// identity is what governs.
45///
46/// Order is stabilized via `BTreeMap`'s sorted iteration so the
47/// emitted slice is deterministic regardless of which caller
48/// appended first. Extracted from `run_ktstr_test_inner` so the
49/// policy can be unit-tested without constructing a whole
50/// KtstrTestEntry + VmBuilder.
51pub(crate) fn dedupe_include_files(
52    resolved: &[(String, std::path::PathBuf, &'static str)],
53) -> Result<Vec<(String, std::path::PathBuf)>> {
54    let mut seen: std::collections::BTreeMap<String, (std::path::PathBuf, &'static str)> =
55        std::collections::BTreeMap::new();
56    for (archive, host, origin) in resolved {
57        if let Some((existing, existing_origin)) = seen.get(archive) {
58            // Canonicalize both sides before comparing so
59            // symlink-equivalent spellings collapse. A failed
60            // canonicalize (missing path, permission denied) falls
61            // back to the uncanonicalized value so the structural
62            // compare still runs — literal duplicates still collapse
63            // and genuine conflicts still surface.
64            let existing_canon = existing.canonicalize().unwrap_or_else(|_| existing.clone());
65            let host_canon = host.canonicalize().unwrap_or_else(|_| host.clone());
66            if existing_canon != host_canon {
67                anyhow::bail!(
68                    "include_files conflict for archive path '{archive}': sources disagree \
69                     on host path ({} [origin: {existing_origin}] vs {} [origin: {origin}]). \
70                     Remove the duplicate declaration or rename one of the archive entries.",
71                    existing.display(),
72                    host.display(),
73                );
74            }
75        } else {
76            seen.insert(archive.clone(), (host.clone(), origin));
77        }
78    }
79    Ok(seen
80        .into_iter()
81        .map(|(archive, (host, _origin))| (archive, host))
82        .collect())
83}
84
85/// Provenance of a scheduler binary returned by [`resolve_scheduler`].
86///
87/// Each variant identifies the discovery branch that produced the
88/// path, so downstream tooling (sidecar, cache-key construction, log
89/// lines) can distinguish "we found a pre-built binary in a target
90/// directory whose git hash we don't control" from "we just built
91/// this binary from HEAD in the current workspace and therefore know
92/// its source commit is the workspace HEAD."
93///
94/// Only the [`AutoBuilt`](Self::AutoBuilt) variant carries an honest
95/// source-commit guarantee: every other branch locates an *existing*
96/// file whose provenance is outside this process's knowledge.
97/// Callers that need to stamp a sidecar with a scheduler-specific
98/// commit must discard the hash for every non-`AutoBuilt` resolution
99/// — a stale `target/debug/` binary looks identical to a fresh
100/// `AutoBuilt` one but can be arbitrarily old.
101///
102/// `Eevdf` / `KernelBuiltin` / `Path` resolutions do not go through
103/// the discovery cascade:
104/// - `Eevdf` / `KernelBuiltin` → [`NotFound`](Self::NotFound) (no
105///   user-space binary involved; the tuple's `Option<PathBuf>` is
106///   `None`).
107/// - `Path(p)` → [`Path`](Self::Path) (the caller named the binary
108///   explicitly in the test entry — no env-var or filesystem search
109///   runs).
110///
111/// The variant ordering in the enum mirrors the discovery cascade
112/// order in [`resolve_scheduler`] so a reviewer can scan both lists
113/// in lockstep.
114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
115pub enum ResolveSource {
116    /// Resolved via the literal path the caller supplied as
117    /// `SchedulerSpec::Path(p)`. No env-var or filesystem search
118    /// involved — the path arrived in the test entry directly.
119    /// Trusted to the extent the caller trusts the argument; git-
120    /// hash provenance is UNKNOWN to this process.
121    Path,
122    /// Resolved via the `KTSTR_SCHEDULER` environment variable on the
123    /// `SchedulerSpec::Discover` arm. Trusted to the extent the
124    /// caller trusts the variable; git-hash provenance is UNKNOWN
125    /// to this process.
126    EnvVar,
127    /// Resolved via a `$PATH` lookup. Only produced when
128    /// `KTSTR_CARGO_TEST_MODE` is active and a binary by the
129    /// requested name was found on the user's `$PATH` in front of
130    /// the sibling-dir / target-dir cascade. Git-hash provenance
131    /// UNKNOWN — the binary on PATH may be a system-wide install,
132    /// a prior build, or a custom one the user staged for this run.
133    PathLookup,
134    /// Resolved via a sibling of `crate::resolve_current_exe`
135    /// (same directory, or the sibling of a `deps/` directory for
136    /// integration tests / nextest). Git-hash provenance UNKNOWN
137    /// — the binary may be from any previous build.
138    SiblingDir,
139    /// Resolved via a fallback search in `target/debug/`. Git-hash
140    /// provenance UNKNOWN — a stale binary from an older tree
141    /// passes this check identically to a fresh one.
142    TargetDebug,
143    /// Resolved via a fallback search in `target/release/`. Git-hash
144    /// provenance UNKNOWN — same stale-binary hazard as
145    /// [`TargetDebug`](Self::TargetDebug).
146    TargetRelease,
147    /// Built on demand by [`crate::build_and_find_binary`] inside this
148    /// process. The build targets the current workspace's HEAD by
149    /// construction — the ONLY variant where the source commit is
150    /// known to match the workspace tree the tests run from.
151    AutoBuilt,
152    /// No user-space binary path was produced. Returned for
153    /// `SchedulerSpec::Eevdf` and `SchedulerSpec::KernelBuiltin` (the
154    /// kernel supplies the scheduler — no binary to locate). The
155    /// tuple's `Option<PathBuf>` is always `None` for this variant.
156    NotFound,
157}
158
159impl ResolveSource {
160    /// Stable snake_case tag for the sidecar `resolve_source` field and
161    /// the `stats` `--resolve-source` filter — the string analog of the
162    /// variant, mirroring the `run_source` tag convention so the
163    /// persisted JSON shape does not depend on this enum's Rust
164    /// representation. Variant order matches the discovery cascade.
165    pub const fn as_str(&self) -> &'static str {
166        match self {
167            Self::Path => "path",
168            Self::EnvVar => "env_var",
169            Self::PathLookup => "path_lookup",
170            Self::SiblingDir => "sibling_dir",
171            Self::TargetDebug => "target_debug",
172            Self::TargetRelease => "target_release",
173            Self::AutoBuilt => "auto_built",
174            Self::NotFound => "not_found",
175        }
176    }
177}
178
179/// Walk `$PATH` directories in order looking for an executable
180/// named `name`. Returns the first match that is a regular file
181/// with at least one execute permission bit set. None when `PATH`
182/// is unset, empty, or contains no matching executable.
183///
184/// Mirrors the semantics of `which(1)` and the
185/// `crate::export::search_path_for` helper without pulling in a
186/// new crate dependency. Used by [`resolve_scheduler`] only when
187/// `KTSTR_CARGO_TEST_MODE` is active so the existing nextest /
188/// `cargo ktstr test` discovery cascade stays in front of any
189/// system-wide install on PATH for the production test path.
190fn find_on_path(name: &str) -> Option<PathBuf> {
191    use std::os::unix::fs::PermissionsExt;
192    let path_var = std::env::var_os("PATH")?;
193    for dir in std::env::split_paths(&path_var) {
194        let candidate = dir.join(name);
195        if !candidate.is_file() {
196            continue;
197        }
198        let executable = candidate
199            .metadata()
200            .map(|m| m.permissions().mode() & 0o111 != 0)
201            .unwrap_or(false);
202        if executable {
203            return Some(candidate);
204        }
205    }
206    None
207}
208
209/// Resolve every entry in `entry.staged_schedulers` via a caller-
210/// supplied resolver, propagating resolver errors strictly (suitable
211/// for the primary-dispatch path where a missing staged binary is a
212/// hard failure operator should see at dispatch time, not later at
213/// Op-dispatch inside the VM). KernelBuiltin / Eevdf staged entries
214/// — whose resolver returns `Ok(None)` — are silently dropped:
215/// they have no binary to stage and the lifecycle ops resolve them
216/// via shell-script slots instead.
217///
218/// Returns `(name, resolved_host_path, sched_args)` tuples in the
219/// SAME order as `entry.staged_schedulers` iteration. Ordering is
220/// load-bearing: the initramfs packer iterates the result
221/// to emit per-scheduler `/staging/schedulers/<name>/` archive
222/// entries, and parent-directory dependencies are encounter-order
223/// sensitive. Tests pin the order-preservation against a future
224/// refactor that uses `.collect::<HashMap<_,_>>().into_iter()`
225/// (would silently scramble).
226///
227/// `resolver` is a closure rather than a direct call to
228/// [`resolve_scheduler`] so unit tests can drive the order-
229/// preservation contract with a synthetic resolver that returns
230/// known paths without touching the host filesystem.
231pub(crate) fn resolve_staged_schedulers_strict<F>(
232    entry: &KtstrTestEntry,
233    mut resolver: F,
234) -> Result<Vec<(String, PathBuf, Vec<String>)>>
235where
236    F: FnMut(&SchedulerSpec) -> Result<Option<PathBuf>>,
237{
238    let mut out = Vec::with_capacity(entry.staged_schedulers.len());
239    for staged in entry.staged_schedulers {
240        let Some(host_path) = resolver(&staged.binary)? else {
241            continue;
242        };
243        out.push((
244            staged.name.to_string(),
245            host_path,
246            staged.sched_args.iter().map(|s| s.to_string()).collect(),
247        ));
248    }
249    Ok(out)
250}
251
252/// True when `KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK` is set to a
253/// NON-EMPTY value — the knowing-operator opt-out that lets a failed
254/// orchestrated `cargo build -p <sched>` fall back to a pre-built
255/// sibling / `target/{debug,release}/` binary AS-IS instead of failing
256/// the test. Default (unset / empty) refuses the stale fallback so a
257/// build that fails for a new reason cannot silently validate against an
258/// old scheduler. Empty-string rejection mirrors
259/// [`crate::cargo_test_mode::cargo_test_mode_active`] so a stray
260/// `KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK=` from a CI shell cannot
261/// re-enable the hazard.
262fn allow_stale_scheduler_fallback() -> bool {
263    std::env::var(crate::KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK_ENV)
264        .map(|v| !v.is_empty())
265        .unwrap_or(false)
266}
267
268/// Resolve a scheduler binary from a `SchedulerSpec`.
269///
270/// Returns the resolved path (if any) paired with the
271/// [`ResolveSource`] naming the discovery branch that produced it.
272/// The source is load-bearing for downstream provenance: only
273/// [`ResolveSource::AutoBuilt`] guarantees the binary matches the
274/// current workspace tree; every other variant locates a
275/// pre-existing file whose git hash is UNKNOWN to this process.
276///
277/// Variant mapping:
278/// - `Eevdf` / `KernelBuiltin { .. }` → `(None, NotFound)` (no
279///   user-space binary).
280/// - `Path(p)` → `(Some(p), Path)` (explicit caller-named path;
281///   validated for existence).
282/// - `Discover(name)` → cascade through `KTSTR_SCHEDULER` env
283///   ([`EnvVar`](ResolveSource::EnvVar)), `$PATH` lookup when
284///   `KTSTR_CARGO_TEST_MODE` is active
285///   ([`PathLookup`](ResolveSource::PathLookup)), sibling of
286///   `current_exe` ([`SiblingDir`](ResolveSource::SiblingDir)),
287///   `target/debug/` ([`TargetDebug`](ResolveSource::TargetDebug)),
288///   `target/release/` ([`TargetRelease`](ResolveSource::TargetRelease)),
289///   on-demand build ([`AutoBuilt`](ResolveSource::AutoBuilt)). In the
290///   orchestrated (non-cargo-test) flow the on-demand build runs FIRST
291///   and a build FAILURE REFUSES (returns the error) rather than serving
292///   a stale pre-built binary, unless
293///   [`KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK`](crate::KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK_ENV)
294///   is set. Exhausting every branch is a hard error. The PATH lookup is
295///   only enabled in cargo-test mode so the existing nextest /
296///   `cargo ktstr test` discovery cascade remains canonical
297///   (sibling-of-test-binary first) — pulling a system-wide
298///   `scx_layered` ahead of a workspace-built one would corrupt
299///   gauntlet runs whose results must reflect the in-tree
300///   scheduler revision.
301pub fn resolve_scheduler(spec: &SchedulerSpec) -> Result<(Option<PathBuf>, ResolveSource)> {
302    match spec {
303        SchedulerSpec::Eevdf | SchedulerSpec::KernelBuiltin { .. } => {
304            Ok((None, ResolveSource::NotFound))
305        }
306        SchedulerSpec::Path(p) => {
307            let path = PathBuf::from(p);
308            anyhow::ensure!(
309                path.exists(),
310                "scheduler binary at '{p}' does not exist on disk. \
311                 SchedulerSpec::Path treats its argument as an \
312                 already-built binary — build the scheduler first \
313                 (e.g. cargo build -p scx_<name>) and pass its \
314                 target/debug/scx_<name> path, or correct the path if \
315                 it has shifted."
316            );
317            Ok((Some(path), ResolveSource::Path))
318        }
319        SchedulerSpec::Discover(name) => {
320            // 0. Per-name override KTSTR_SCHEDULER_BIN_<NAME>. Checked FIRST
321            // so a test declaring multiple distinct Discover schedulers
322            // can point each at its own binary; the global
323            // KTSTR_SCHEDULER below collapses them all to one path. A
324            // set-but-missing path falls through to the global + cascade
325            // (lenient, matching the global's own behavior).
326            if let Ok(p) = std::env::var(crate::per_name_scheduler_env(name)) {
327                let path = PathBuf::from(&p);
328                if path.exists() {
329                    return Ok((Some(path), ResolveSource::EnvVar));
330                }
331            }
332
333            // 1. KTSTR_SCHEDULER env var (global / coarse fallback —
334            // applies to every Discover scheduler regardless of name).
335            if let Ok(p) = std::env::var(crate::KTSTR_SCHEDULER_ENV) {
336                let path = PathBuf::from(&p);
337                if path.exists() {
338                    return Ok((Some(path), ResolveSource::EnvVar));
339                }
340            }
341
342            // 1b. KTSTR_CARGO_TEST_MODE: try $PATH lookup so a user
343            // who installed scx_layered (or scx-ktstr) on PATH can
344            // run the test without going through the cargo-ktstr
345            // wrapper or having a target/debug/ build of the
346            // scheduler. Only active in cargo-test mode — outside
347            // that mode the sibling-dir / target-dir cascade below
348            // remains authoritative so gauntlet runs land on the
349            // workspace-built scheduler revision.
350            if crate::cargo_test_mode::cargo_test_mode_active()
351                && let Some(found) = find_on_path(name)
352            {
353                return Ok((Some(found), ResolveSource::PathLookup));
354            }
355
356            // 1c. Orchestrated (non-cargo-test-mode) flow: prefer a
357            // fresh workspace build. `cargo build -p {name}` rebuilds
358            // the scheduler when its sources (incl. src/bpf/*.bpf.c via
359            // its build.rs) changed and is a fast no-op when
360            // up-to-date, so an edited scheduler never runs stale. The
361            // sibling / target-dir cascade below returns a pre-built
362            // binary AS-IS with no staleness check, so serving it after
363            // a build that was expected to succeed would silently
364            // validate the test against a stale scheduler. Therefore a
365            // build FAILURE here REFUSES by default — it returns the
366            // error, which propagates to a hard test failure on the
367            // result surface (not a swallowed eprintln). The cascade
368            // below is reached on this path ONLY when
369            // KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK is set (the knowing-
370            // operator opt-out for a momentarily-broken cargo). build_*
371            // also errors when cargo is absent or no bin artifact is
372            // produced; the refusal covers all three — a run that cannot
373            // produce a fresh binary must not silently use a stale one.
374            // cargo-test-mode is excluded entirely: it targets an
375            // installed scheduler (PATH lookup above) without a
376            // workspace build, so its cascade is legitimate.
377            if !crate::cargo_test_mode::cargo_test_mode_active() {
378                match crate::build_and_find_binary(name) {
379                    Ok(path) => return Ok((Some(path), ResolveSource::AutoBuilt)),
380                    Err(e) => {
381                        if !allow_stale_scheduler_fallback() {
382                            // Attach the SchedulerBuildRefused marker (inner) so
383                            // dispatch forces a hard FAIL even under expect_err,
384                            // then the operator-facing message (outer, shown first
385                            // by {e:#}). build_and_find_binary's cargo-stderr stays
386                            // innermost in the chain.
387                            return Err(e
388                                .context(crate::test_support::eval::SchedulerBuildRefused)
389                                .context(format!(
390                                    "ktstr_test: workspace build of scheduler \
391                                     '{name}' failed; refusing to validate against \
392                                     a possibly-stale pre-built binary. Fix the \
393                                     build, or set \
394                                     KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK=1 to fall \
395                                     back to a pre-built sibling/target-dir binary \
396                                     AS-IS."
397                                )));
398                        }
399                        eprintln!(
400                            "ktstr_test: workspace build of scheduler '{name}' \
401                             failed ({e:#}); KTSTR_SCHEDULER_ALLOW_STALE_FALLBACK \
402                             set — falling back to a pre-built binary if present"
403                        );
404                    }
405                }
406            }
407
408            // 2. Sibling of current executable (or parent of deps/)
409            if let Ok(exe) = crate::resolve_current_exe()
410                && let Some(dir) = exe.parent()
411            {
412                let candidate = dir.join(name);
413                if candidate.exists() {
414                    return Ok((Some(candidate), ResolveSource::SiblingDir));
415                }
416                // Integration tests and nextest place test binaries in
417                // target/{debug,release}/deps/. The scheduler binary is
418                // one level up in target/{debug,release}/.
419                if dir.file_name().is_some_and(|d| d == "deps")
420                    && let Some(parent) = dir.parent()
421                {
422                    let candidate = parent.join(name);
423                    if candidate.exists() {
424                        return Ok((Some(candidate), ResolveSource::SiblingDir));
425                    }
426                }
427            }
428
429            // 3-4. target/{debug,release}/ pre-built fallbacks (reached
430            // only when the build-first step could not run). Probe the
431            // profile-matching dir FIRST: the scheduler defaults to the
432            // release profile (see `build_and_find_binary`), so prefer
433            // target/release/ over a possibly-stale target/debug/ binary
434            // unless KTSTR_SCHEDULER_PROFILE=dev explicitly selects the
435            // debug tree.
436            let prefer_release = crate::scheduler_profile_name() != "dev";
437            for (dir, source) in target_dir_probe_order(prefer_release) {
438                let candidate = PathBuf::from(dir).join(name);
439                if candidate.exists() {
440                    return Ok((Some(candidate), source));
441                }
442            }
443
444            // 5. Build the scheduler package on demand — ONLY in
445            // cargo-test-mode, which skips the build-first step 1c, so this
446            // is its FIRST build attempt when the PATH / sibling / target-dir
447            // lookups all miss. The non-cargo-test flow already ran the build
448            // in step 1c (returning Ok, refusing on failure, or — under the
449            // opt-out — falling through here intending a PRE-BUILT binary), so
450            // re-running the build here would be redundant; skip straight to
451            // the bail.
452            if crate::cargo_test_mode::cargo_test_mode_active() {
453                match crate::build_and_find_binary(name) {
454                    Ok(path) => return Ok((Some(path), ResolveSource::AutoBuilt)),
455                    Err(e) => {
456                        eprintln!("ktstr_test: auto-build scheduler '{name}' failed: {e:#}")
457                    }
458                }
459            }
460
461            anyhow::bail!(
462                "scheduler '{name}' not found. Set KTSTR_SCHEDULER or \
463                 place it next to the test binary or in target/{{debug,release}}/"
464            )
465        }
466    }
467}
468
469/// Order to probe the pre-built `target/{debug,release}/` scheduler
470/// binaries in the `Discover` cascade fallback: the profile-matching
471/// directory first. With `prefer_release` (the release-profile default —
472/// see [`crate::scheduler_profile_name`] — unless `KTSTR_SCHEDULER_PROFILE=dev`)
473/// the release build is the intended one, so `target/release/` is probed
474/// before a possibly-stale opposite-profile `target/debug/` binary. Pure +
475/// `pub(crate)` so the reorder is unit-testable without staging a CWD.
476pub(crate) fn target_dir_probe_order(prefer_release: bool) -> [(&'static str, ResolveSource); 2] {
477    if prefer_release {
478        [
479            ("target/release", ResolveSource::TargetRelease),
480            ("target/debug", ResolveSource::TargetDebug),
481        ]
482    } else {
483        [
484            ("target/debug", ResolveSource::TargetDebug),
485            ("target/release", ResolveSource::TargetRelease),
486        ]
487    }
488}