ktstr/test_support/eval/
kernel.rs

1//! Kernel-image resolution and KVM preflight: /dev/kvm accessibility
2//! check, KernelUnavailable error, resolve_test_kernel discovery, and
3//! the cache reader-lock acquisition. Split out of eval/mod.rs to keep
4//! the module under the size ceiling.
5
6use super::*;
7
8/// Check that `/dev/kvm` is accessible for read+write.
9///
10/// Pre-flight check for VM-booting test runs: every ktstr test needs
11/// a KVM fd, and failing fast here yields an actionable error
12/// ("add your user to the kvm group") before the VM builder starts
13/// allocating memory / fetching kernels.
14///
15/// Errno classification on open failure (two branches):
16/// - Transient host pressure (`ENOMEM` / `EBUSY` / `EMFILE` / `ENFILE`
17///   / `EAGAIN`, mirroring the `TRANSIENT_HOST_ERRNOS` set used by
18///   [`crate::vmm::map_transient_to_contention`]): kernel memory
19///   allocator under load, the kvm misc-device's per-CPU init
20///   contended, the calling process exhausting its `RLIMIT_NOFILE`
21///   (`EMFILE`), the system fd table full (`ENFILE`), or a kernel
22///   subsystem signalling "try again" (`EAGAIN`). Routed through
23///   [`crate::vmm::host_topology::ResourceContention`] so the
24///   `#[ktstr_test]` macro SKIPs the run instead of failing it. The
25///   `EMFILE` / `ENFILE` arms specifically prevent fd-table pressure
26///   on `/dev/kvm` open from surfacing as a hard error with a
27///   misleading "kvm group" hint.
28/// - Everything else (`EACCES` / `ENOENT` / `EINVAL` / etc.):
29///   infrastructure misconfiguration or a real fault — the device is
30///   missing, the user lacks permission, or the kernel returned an
31///   unexpected errno. Surfaced as a hard error with the actionable
32///   "kvm group" hint; SKIP-classifying these would silently mask a
33///   misconfigured runner.
34pub(crate) fn ensure_kvm() -> Result<()> {
35    match std::fs::OpenOptions::new()
36        .read(true)
37        .write(true)
38        .open("/dev/kvm")
39    {
40        Ok(_) => Ok(()),
41        Err(e) => {
42            let errno = e.raw_os_error();
43            if matches!(
44                errno,
45                Some(libc::ENOMEM)
46                    | Some(libc::EBUSY)
47                    | Some(libc::EMFILE)
48                    | Some(libc::ENFILE)
49                    | Some(libc::EAGAIN)
50            ) {
51                let snapshot = vmm::host_resource_snapshot();
52                let errno_label = match errno {
53                    Some(libc::ENOMEM) => "ENOMEM",
54                    Some(libc::EBUSY) => "EBUSY",
55                    Some(libc::EMFILE) => "EMFILE",
56                    Some(libc::ENFILE) => "ENFILE",
57                    Some(libc::EAGAIN) => "EAGAIN",
58                    _ => unreachable!(),
59                };
60                Err(anyhow::Error::new(
61                    crate::vmm::host_topology::ResourceContention {
62                        reason: format!(
63                            "/dev/kvm open: transient host errno {errno_label}: \
64                             host resources: {snapshot}\n  \
65                             hint: KVM device open failed with a host-resource \
66                             errno; another peer may be holding the budget. \
67                             nextest will not retry; the SKIP banner records \
68                             this attempt for stats tooling.",
69                        ),
70                    },
71                ))
72            } else {
73                Err(anyhow::Error::new(e).context(
74                    "/dev/kvm not accessible — KVM is required for ktstr_test. \
75                     Check that KVM is enabled and your user is in the kvm group.",
76                ))
77            }
78        }
79    }
80}
81
82// ---------------------------------------------------------------------------
83// Scheduler resolution
84// ---------------------------------------------------------------------------
85
86// ---------------------------------------------------------------------------
87// Kernel resolution
88// ---------------------------------------------------------------------------
89
90/// Marker error for "the test harness can't find a kernel image to
91/// boot the VM against". Wraps the actionable diagnostic that
92/// [`resolve_test_kernel`] emits when neither
93/// `KTSTR_TEST_KERNEL` nor any standard cache / sysroot location
94/// produced a bootable image.
95///
96/// Distinct from a generic `anyhow::bail!` so the
97/// `#[ktstr_test]` macro's wrapper can downcast and emit a SKIP
98/// banner instead of panicking — the canonical "running under
99/// `cargo nextest run` instead of `cargo ktstr test`" symptom.
100/// Routes through [`crate::test_support::is_kernel_unavailable`]
101/// for the macro's predicate; downcast directly when adding new
102/// SKIP arms.
103#[derive(Debug)]
104pub struct KernelUnavailable {
105    pub diagnostic: String,
106}
107
108impl std::fmt::Display for KernelUnavailable {
109    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110        write!(f, "{}", self.diagnostic)
111    }
112}
113
114impl std::error::Error for KernelUnavailable {}
115
116/// Find a kernel image for running tests.
117///
118/// Checks `KTSTR_TEST_KERNEL` env var first (direct image path),
119/// then delegates to [`crate::find_kernel()`] for cache and
120/// filesystem discovery. Returns a typed [`KernelUnavailable`] on
121/// failure so the `#[ktstr_test]` macro wrapper can map it onto a
122/// clean SKIP banner — generic `anyhow` errors propagate to the
123/// panic arm and surface as confusing test failures when the
124/// binary runs outside `cargo ktstr test`.
125pub fn resolve_test_kernel() -> Result<PathBuf> {
126    // Check environment variable first. A set-but-missing
127    // `KTSTR_TEST_KERNEL` is an OPERATOR mistake (they pointed at
128    // a path that doesn't exist), not a "harness not configured"
129    // situation — surface it as a regular anyhow error so the
130    // panic arm catches it. Skipping on a typo would silently mask
131    // the bad path.
132    if let Ok(path) = std::env::var(crate::KTSTR_TEST_KERNEL_ENV) {
133        let p = PathBuf::from(&path);
134        anyhow::ensure!(p.exists(), "KTSTR_TEST_KERNEL not found: {path}");
135        return Ok(p);
136    }
137
138    // Standard locations.
139    if let Some(p) = crate::find_kernel()? {
140        return Ok(p);
141    }
142
143    let image_name = if cfg!(target_arch = "aarch64") {
144        "Image"
145    } else {
146        "bzImage"
147    };
148    Err(anyhow::Error::new(KernelUnavailable {
149        diagnostic: format!(
150            "no kernel found — the test harness was likely invoked \
151             outside `cargo ktstr test` (which builds and injects a \
152             kernel automatically).\n  \
153             hint: run `cargo ktstr test --kernel <path-or-version>` \
154             to drive this test, or set KTSTR_TEST_KERNEL=/path/to/{image_name} \
155             to point at a pre-built bootable image directly.\n  \
156             hint: {kernel_hint}",
157            kernel_hint = crate::KTSTR_KERNEL_HINT,
158        ),
159    }))
160}
161
162/// Detection seam for the [`crate::flock`] helper's timeout-bail
163/// message shape.
164///
165/// Returns `true` iff `rendered` contains BOTH `"timed out after"` and
166/// `"flock LOCK_"`. The two substrings together are the helper's
167/// internal contract for a flock-acquisition timeout — see
168/// `flock/acquire.rs`'s bail format
169/// `"flock {LOCK_EX|LOCK_SH} on {context} timed out after ..."`.
170///
171/// Pinned via the unit test
172/// `flock_timeout_substring_classification_pins_seam` so a
173/// rewording of the bail message that drops either substring is
174/// caught at test time before
175/// [`acquire_test_kernel_lock_if_cached`] starts misclassifying
176/// timeouts as plain anyhow errors.
177pub(crate) fn is_flock_timeout_message(rendered: &str) -> bool {
178    rendered.contains("timed out after") && rendered.contains("flock LOCK_")
179}
180
181/// If `kernel_path` resolves to an image inside a cache entry, hold a
182/// `LOCK_SH` on that entry's coordination lockfile for the duration of
183/// the returned guard. Prevents a concurrent
184/// `cargo ktstr kernel build` from swapping the entry's directory
185/// (see [`crate::cache::CacheDir::store`]) under the VM while the test
186/// reads from it.
187///
188/// Returns `Ok(None)` when `kernel_path` is not shaped like a cache
189/// entry — explicit `KTSTR_TEST_KERNEL=/path/to/bzImage`,
190/// `/lib/modules/.../vmlinuz`, `/boot/vmlinuz-*`, or any path whose
191/// two-level parent does not match the resolved cache root. Such
192/// paths do not need coordination because the build pipeline never
193/// touches them.
194///
195/// Detection: the image is expected at `{root}/{key}/{image_name}`.
196/// Walk `kernel_path` up by two components (image_name, key) to
197/// produce a candidate root and canonicalize both sides before
198/// comparing — symlinks, redundant `./` segments, and `..` traversals
199/// must all reduce to the same inode path or the entry is treated as
200/// non-cache.
201pub(crate) fn acquire_test_kernel_lock_if_cached(
202    kernel_path: &Path,
203) -> Result<Option<crate::cache::SharedLockGuard>> {
204    // Peel the image filename. Fail → not a cache entry.
205    let Some(entry_dir) = kernel_path.parent() else {
206        return Ok(None);
207    };
208    // Peel the entry directory name (this is the candidate cache
209    // key). Fail → not a cache entry.
210    let Some(key_os) = entry_dir.file_name() else {
211        return Ok(None);
212    };
213    let Some(cache_key) = key_os.to_str() else {
214        return Ok(None);
215    };
216    // The directory above the entry is the candidate cache root.
217    let Some(candidate_root) = entry_dir.parent() else {
218        return Ok(None);
219    };
220
221    // Canonicalize both the candidate root and the resolved cache
222    // root so symlinks / `.` / `..` reduce to the same inode path
223    // before comparing. A non-cache path (e.g. /lib/modules/...)
224    // simply canonicalizes to itself and will not match.
225    let candidate_root_canon = match candidate_root.canonicalize() {
226        Ok(p) => p,
227        Err(_) => return Ok(None),
228    };
229    let resolved_root = match crate::cache::CacheDir::default_root() {
230        Ok(p) => p,
231        // Cache root unresolvable (no HOME and no XDG_CACHE_HOME, or
232        // non-UTF-8 KTSTR_CACHE_DIR): no cache exists, so `kernel_path`
233        // cannot be an entry. (A cache root that resolves but is absent
234        // on disk is handled by the canonicalize() arm below.)
235        Err(_) => return Ok(None),
236    };
237    let resolved_root_canon = match resolved_root.canonicalize() {
238        Ok(p) => p,
239        // Cache root resolves but does not exist on disk yet (fresh
240        // developer checkout). `kernel_path` is not inside a cache
241        // entry, so no lock needed.
242        Err(_) => return Ok(None),
243    };
244
245    if candidate_root_canon != resolved_root_canon {
246        return Ok(None);
247    }
248
249    // The path is shaped as a cache entry under the resolved root.
250    // Acquire the reader lock. The flock helper polls on
251    // `EAGAIN`/`EWOULDBLOCK` until either the lock is granted or its
252    // wall-clock timeout elapses. A timeout means a peer (concurrent
253    // `cargo ktstr kernel build` or another reader-blocking writer)
254    // is holding the lock — that is host-resource contention, not a
255    // kernel fault, so route it through
256    // [`crate::vmm::host_topology::ResourceContention`] so the
257    // `#[ktstr_test]` macro SKIPs cleanly and stats tooling records
258    // the attempt via the per-site sidecar. Non-timeout failures
259    // (parent-directory creation failure, an unexpected `try_flock`
260    // errno other than `EAGAIN`/`EWOULDBLOCK`) propagate as hard
261    // errors — they indicate filesystem corruption or a programming
262    // fault that SKIP-skipping would silently mask.
263    //
264    // Detection seam: the flock helper's bail format starts with
265    // `flock LOCK_SH on` (or `LOCK_EX`) and contains `timed out
266    // after`. Both substrings are pinned by the helper's internal
267    // contract and embedded in the rendered message together; the
268    // message also contains the lockfile path and the holder PID
269    // list parsed from `/proc/locks`, which we forward verbatim into
270    // the `ResourceContention` reason so the operator sees the
271    // identical triage information either way.
272    let cache = crate::cache::CacheDir::with_root(resolved_root_canon);
273    match cache.acquire_shared_lock(cache_key) {
274        Ok(guard) => Ok(Some(guard)),
275        Err(e) => {
276            let rendered = format!("{e:#}");
277            if is_flock_timeout_message(&rendered) {
278                let snapshot = crate::vmm::host_resource_snapshot();
279                Err(anyhow::Error::new(
280                    crate::vmm::host_topology::ResourceContention {
281                        reason: format!(
282                            "test kernel cache lock: {rendered}. host resources: \
283                             {snapshot}\n  \
284                             hint: a concurrent `cargo ktstr kernel build` or \
285                             another lockholder is preventing the test VM from \
286                             reading the cached kernel image. nextest will not \
287                             retry; the SKIP banner records this attempt for \
288                             stats tooling. Wait for the holder PIDs above to \
289                             finish, or kill them, then retry.",
290                        ),
291                    },
292                ))
293            } else {
294                Err(e)
295            }
296        }
297    }
298}