ktstr/test_support/eval/kernel.rs
1//! Kernel-image resolution and KVM preflight: /dev/kvm accessibility
2//! check, KernelUnavailable error, resolve_test_kernel discovery, and
3//! the cache reader-lock acquisition. Split out of eval/mod.rs to keep
4//! the module under the size ceiling.
5
6use super::*;
7
8/// Check that `/dev/kvm` is accessible for read+write.
9///
10/// Pre-flight check for VM-booting test runs: every ktstr test needs
11/// a KVM fd, and failing fast here yields an actionable error
12/// ("add your user to the kvm group") before the VM builder starts
13/// allocating memory / fetching kernels.
14///
15/// Errno classification on open failure (two branches):
16/// - Transient host pressure (`ENOMEM` / `EBUSY` / `EMFILE` / `ENFILE`
17/// / `EAGAIN`, mirroring the `TRANSIENT_HOST_ERRNOS` set used by
18/// [`crate::vmm::map_transient_to_contention`]): kernel memory
19/// allocator under load, the kvm misc-device's per-CPU init
20/// contended, the calling process exhausting its `RLIMIT_NOFILE`
21/// (`EMFILE`), the system fd table full (`ENFILE`), or a kernel
22/// subsystem signalling "try again" (`EAGAIN`). Routed through
23/// [`crate::vmm::host_topology::ResourceContention`] so the
24/// `#[ktstr_test]` macro SKIPs the run instead of failing it. The
25/// `EMFILE` / `ENFILE` arms specifically prevent fd-table pressure
26/// on `/dev/kvm` open from surfacing as a hard error with a
27/// misleading "kvm group" hint.
28/// - Everything else (`EACCES` / `ENOENT` / `EINVAL` / etc.):
29/// infrastructure misconfiguration or a real fault — the device is
30/// missing, the user lacks permission, or the kernel returned an
31/// unexpected errno. Surfaced as a hard error with the actionable
32/// "kvm group" hint; SKIP-classifying these would silently mask a
33/// misconfigured runner.
34pub(crate) fn ensure_kvm() -> Result<()> {
35 match std::fs::OpenOptions::new()
36 .read(true)
37 .write(true)
38 .open("/dev/kvm")
39 {
40 Ok(_) => Ok(()),
41 Err(e) => {
42 let errno = e.raw_os_error();
43 if matches!(
44 errno,
45 Some(libc::ENOMEM)
46 | Some(libc::EBUSY)
47 | Some(libc::EMFILE)
48 | Some(libc::ENFILE)
49 | Some(libc::EAGAIN)
50 ) {
51 let snapshot = vmm::host_resource_snapshot();
52 let errno_label = match errno {
53 Some(libc::ENOMEM) => "ENOMEM",
54 Some(libc::EBUSY) => "EBUSY",
55 Some(libc::EMFILE) => "EMFILE",
56 Some(libc::ENFILE) => "ENFILE",
57 Some(libc::EAGAIN) => "EAGAIN",
58 _ => unreachable!(),
59 };
60 Err(anyhow::Error::new(
61 crate::vmm::host_topology::ResourceContention {
62 reason: format!(
63 "/dev/kvm open: transient host errno {errno_label}: \
64 host resources: {snapshot}\n \
65 hint: KVM device open failed with a host-resource \
66 errno; another peer may be holding the budget. \
67 nextest will not retry; the SKIP banner records \
68 this attempt for stats tooling.",
69 ),
70 },
71 ))
72 } else {
73 Err(anyhow::Error::new(e).context(
74 "/dev/kvm not accessible — KVM is required for ktstr_test. \
75 Check that KVM is enabled and your user is in the kvm group.",
76 ))
77 }
78 }
79 }
80}
81
82// ---------------------------------------------------------------------------
83// Scheduler resolution
84// ---------------------------------------------------------------------------
85
86// ---------------------------------------------------------------------------
87// Kernel resolution
88// ---------------------------------------------------------------------------
89
90/// Marker error for "the test harness can't find a kernel image to
91/// boot the VM against". Wraps the actionable diagnostic that
92/// [`resolve_test_kernel`] emits when neither
93/// `KTSTR_TEST_KERNEL` nor any standard cache / sysroot location
94/// produced a bootable image.
95///
96/// Distinct from a generic `anyhow::bail!` so the
97/// `#[ktstr_test]` macro's wrapper can downcast and emit a SKIP
98/// banner instead of panicking — the canonical "running under
99/// `cargo nextest run` instead of `cargo ktstr test`" symptom.
100/// Routes through [`crate::test_support::is_kernel_unavailable`]
101/// for the macro's predicate; downcast directly when adding new
102/// SKIP arms.
103#[derive(Debug)]
104pub struct KernelUnavailable {
105 pub diagnostic: String,
106}
107
108impl std::fmt::Display for KernelUnavailable {
109 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
110 write!(f, "{}", self.diagnostic)
111 }
112}
113
114impl std::error::Error for KernelUnavailable {}
115
116/// Find a kernel image for running tests.
117///
118/// Checks `KTSTR_TEST_KERNEL` env var first (direct image path),
119/// then delegates to [`crate::find_kernel()`] for cache and
120/// filesystem discovery. Returns a typed [`KernelUnavailable`] on
121/// failure so the `#[ktstr_test]` macro wrapper can map it onto a
122/// clean SKIP banner — generic `anyhow` errors propagate to the
123/// panic arm and surface as confusing test failures when the
124/// binary runs outside `cargo ktstr test`.
125pub fn resolve_test_kernel() -> Result<PathBuf> {
126 // Check environment variable first. A set-but-missing
127 // `KTSTR_TEST_KERNEL` is an OPERATOR mistake (they pointed at
128 // a path that doesn't exist), not a "harness not configured"
129 // situation — surface it as a regular anyhow error so the
130 // panic arm catches it. Skipping on a typo would silently mask
131 // the bad path.
132 if let Ok(path) = std::env::var(crate::KTSTR_TEST_KERNEL_ENV) {
133 let p = PathBuf::from(&path);
134 anyhow::ensure!(p.exists(), "KTSTR_TEST_KERNEL not found: {path}");
135 return Ok(p);
136 }
137
138 // Standard locations.
139 if let Some(p) = crate::find_kernel()? {
140 return Ok(p);
141 }
142
143 let image_name = if cfg!(target_arch = "aarch64") {
144 "Image"
145 } else {
146 "bzImage"
147 };
148 Err(anyhow::Error::new(KernelUnavailable {
149 diagnostic: format!(
150 "no kernel found — the test harness was likely invoked \
151 outside `cargo ktstr test` (which builds and injects a \
152 kernel automatically).\n \
153 hint: run `cargo ktstr test --kernel <path-or-version>` \
154 to drive this test, or set KTSTR_TEST_KERNEL=/path/to/{image_name} \
155 to point at a pre-built bootable image directly.\n \
156 hint: {kernel_hint}",
157 kernel_hint = crate::KTSTR_KERNEL_HINT,
158 ),
159 }))
160}
161
162/// Detection seam for the [`crate::flock`] helper's timeout-bail
163/// message shape.
164///
165/// Returns `true` iff `rendered` contains BOTH `"timed out after"` and
166/// `"flock LOCK_"`. The two substrings together are the helper's
167/// internal contract for a flock-acquisition timeout — see
168/// `flock/acquire.rs`'s bail format
169/// `"flock {LOCK_EX|LOCK_SH} on {context} timed out after ..."`.
170///
171/// Pinned via the unit test
172/// `flock_timeout_substring_classification_pins_seam` so a
173/// rewording of the bail message that drops either substring is
174/// caught at test time before
175/// [`acquire_test_kernel_lock_if_cached`] starts misclassifying
176/// timeouts as plain anyhow errors.
177pub(crate) fn is_flock_timeout_message(rendered: &str) -> bool {
178 rendered.contains("timed out after") && rendered.contains("flock LOCK_")
179}
180
181/// If `kernel_path` resolves to an image inside a cache entry, hold a
182/// `LOCK_SH` on that entry's coordination lockfile for the duration of
183/// the returned guard. Prevents a concurrent
184/// `cargo ktstr kernel build` from swapping the entry's directory
185/// (see [`crate::cache::CacheDir::store`]) under the VM while the test
186/// reads from it.
187///
188/// Returns `Ok(None)` when `kernel_path` is not shaped like a cache
189/// entry — explicit `KTSTR_TEST_KERNEL=/path/to/bzImage`,
190/// `/lib/modules/.../vmlinuz`, `/boot/vmlinuz-*`, or any path whose
191/// two-level parent does not match the resolved cache root. Such
192/// paths do not need coordination because the build pipeline never
193/// touches them.
194///
195/// Detection: the image is expected at `{root}/{key}/{image_name}`.
196/// Walk `kernel_path` up by two components (image_name, key) to
197/// produce a candidate root and canonicalize both sides before
198/// comparing — symlinks, redundant `./` segments, and `..` traversals
199/// must all reduce to the same inode path or the entry is treated as
200/// non-cache.
201pub(crate) fn acquire_test_kernel_lock_if_cached(
202 kernel_path: &Path,
203) -> Result<Option<crate::cache::SharedLockGuard>> {
204 // Peel the image filename. Fail → not a cache entry.
205 let Some(entry_dir) = kernel_path.parent() else {
206 return Ok(None);
207 };
208 // Peel the entry directory name (this is the candidate cache
209 // key). Fail → not a cache entry.
210 let Some(key_os) = entry_dir.file_name() else {
211 return Ok(None);
212 };
213 let Some(cache_key) = key_os.to_str() else {
214 return Ok(None);
215 };
216 // The directory above the entry is the candidate cache root.
217 let Some(candidate_root) = entry_dir.parent() else {
218 return Ok(None);
219 };
220
221 // Canonicalize both the candidate root and the resolved cache
222 // root so symlinks / `.` / `..` reduce to the same inode path
223 // before comparing. A non-cache path (e.g. /lib/modules/...)
224 // simply canonicalizes to itself and will not match.
225 let candidate_root_canon = match candidate_root.canonicalize() {
226 Ok(p) => p,
227 Err(_) => return Ok(None),
228 };
229 let resolved_root = match crate::cache::CacheDir::default_root() {
230 Ok(p) => p,
231 // Cache root unresolvable (no HOME and no XDG_CACHE_HOME, or
232 // non-UTF-8 KTSTR_CACHE_DIR): no cache exists, so `kernel_path`
233 // cannot be an entry. (A cache root that resolves but is absent
234 // on disk is handled by the canonicalize() arm below.)
235 Err(_) => return Ok(None),
236 };
237 let resolved_root_canon = match resolved_root.canonicalize() {
238 Ok(p) => p,
239 // Cache root resolves but does not exist on disk yet (fresh
240 // developer checkout). `kernel_path` is not inside a cache
241 // entry, so no lock needed.
242 Err(_) => return Ok(None),
243 };
244
245 if candidate_root_canon != resolved_root_canon {
246 return Ok(None);
247 }
248
249 // The path is shaped as a cache entry under the resolved root.
250 // Acquire the reader lock. The flock helper polls on
251 // `EAGAIN`/`EWOULDBLOCK` until either the lock is granted or its
252 // wall-clock timeout elapses. A timeout means a peer (concurrent
253 // `cargo ktstr kernel build` or another reader-blocking writer)
254 // is holding the lock — that is host-resource contention, not a
255 // kernel fault, so route it through
256 // [`crate::vmm::host_topology::ResourceContention`] so the
257 // `#[ktstr_test]` macro SKIPs cleanly and stats tooling records
258 // the attempt via the per-site sidecar. Non-timeout failures
259 // (parent-directory creation failure, an unexpected `try_flock`
260 // errno other than `EAGAIN`/`EWOULDBLOCK`) propagate as hard
261 // errors — they indicate filesystem corruption or a programming
262 // fault that SKIP-skipping would silently mask.
263 //
264 // Detection seam: the flock helper's bail format starts with
265 // `flock LOCK_SH on` (or `LOCK_EX`) and contains `timed out
266 // after`. Both substrings are pinned by the helper's internal
267 // contract and embedded in the rendered message together; the
268 // message also contains the lockfile path and the holder PID
269 // list parsed from `/proc/locks`, which we forward verbatim into
270 // the `ResourceContention` reason so the operator sees the
271 // identical triage information either way.
272 let cache = crate::cache::CacheDir::with_root(resolved_root_canon);
273 match cache.acquire_shared_lock(cache_key) {
274 Ok(guard) => Ok(Some(guard)),
275 Err(e) => {
276 let rendered = format!("{e:#}");
277 if is_flock_timeout_message(&rendered) {
278 let snapshot = crate::vmm::host_resource_snapshot();
279 Err(anyhow::Error::new(
280 crate::vmm::host_topology::ResourceContention {
281 reason: format!(
282 "test kernel cache lock: {rendered}. host resources: \
283 {snapshot}\n \
284 hint: a concurrent `cargo ktstr kernel build` or \
285 another lockholder is preventing the test VM from \
286 reading the cached kernel image. nextest will not \
287 retry; the SKIP banner records this attempt for \
288 stats tooling. Wait for the holder PIDs above to \
289 finish, or kill them, then retry.",
290 ),
291 },
292 ))
293 } else {
294 Err(e)
295 }
296 }
297 }
298}