ktstr/cache/cache_dir.rs
1//! [`CacheDir`] handle, lock guards, and cache-lock timeout policy.
2//!
3//! Public surface: [`CacheDir`] (the operator-facing handle exposed
4//! via `crate::cache::CacheDir`), [`SharedLockGuard`] /
5//! [`ExclusiveLockGuard`] (RAII wrappers around per-key flock
6//! acquisitions), and the [`CacheDir::store`] /
7//! [`CacheDir::lookup`] / [`CacheDir::list`] /
8//! [`CacheDir::clean_all`] / [`CacheDir::clean_keep`] lifecycle methods.
9//! The internal `warn_if_unstripped_vmlinux` and `should_warn_unstripped`
10//! helpers gate a per-lookup warning on entries whose vmlinux
11//! sidecar took the strip-failure fallback in
12//! `super::vmlinux_strip::strip_vmlinux_debug`.
13//!
14//! Sibling modules:
15//! - `super::metadata` — pure types ([`super::KernelSource`],
16//! [`super::KernelMetadata`], [`super::CacheArtifacts`], [`super::KconfigStatus`],
17//! [`super::CacheEntry`], [`super::ListedEntry`]) plus the
18//! `super::metadata::classify_corrupt_reason` dispatcher and
19//! `super::metadata::format_image_missing_reason` helper that
20//! `list` uses to emit corrupt-entry reason strings.
21//! - `super::housekeeping` — atomic-rename install primitives
22//! (`atomic_swap_dirs`, `TmpDirGuard`), cache-key /
23//! filename validators, the JSON metadata reader
24//! (`read_metadata`), and the cross-PID
25//! orphan-tempdir sweep (`clean_orphaned_tmp_dirs`).
26//! - `super::vmlinux_strip` — the ELF strip pipeline
27//! (`strip_vmlinux_debug`) `store()`
28//! invokes when an artifact carries a vmlinux sidecar.
29//! - [`super::resolve`] — env-cascade root resolution that
30//! `CacheDir::new` and `CacheDir::default_root` flow through.
31//!
32//! Reader/writer asymmetry: shared (reader) lock blocks 10 s — the
33//! reader timeout is fixed and not operator-tunable. The exclusive
34//! (writer) lock blocks 5 minutes by default but is the ONLY one
35//! overridable, via the [`STORE_EXCLUSIVE_LOCK_TIMEOUT_ENV`]
36//! environment variable. Writer must outlast every concurrent test
37//! reader; reader bails fast on a stuck writer. See
38//! [`SHARED_LOCK_DEFAULT_TIMEOUT`] and
39//! [`STORE_EXCLUSIVE_LOCK_DEFAULT_TIMEOUT`] for the literal
40//! durations and their rationale.
41//!
42//! Tests live in a sibling file `cache_dir_tests.rs`, pulled in
43//! below via `#[path]` so they remain the `cache_dir::tests`
44//! submodule. That preserves access to private items
45//! (`lookup_silent`, `should_emit_unstripped_warn`,
46//! `store_exclusive_lock_timeout`, the `STORE_EXCLUSIVE_LOCK_*`
47//! constants) and `super::*` resolution; the split is purely a
48//! file-size measure.
49
50use std::collections::HashSet;
51use std::fs;
52use std::path::{Path, PathBuf};
53use std::sync::{Mutex, OnceLock};
54
55use crate::sync::MutexExt;
56use anyhow::Context;
57
58use super::housekeeping::{
59 TmpDirGuard, atomic_swap_dirs, clean_orphaned_tmp_dirs, fsync_parent, fsync_staging_dir,
60 read_metadata, validate_cache_key, validate_filename,
61};
62#[cfg(test)]
63use super::metadata::KconfigStatus;
64use super::metadata::{
65 CacheArtifacts, CacheEntry, KernelMetadata, ListedEntry, format_image_missing_reason,
66};
67use super::resolve::resolve_cache_root;
68use super::vmlinux_strip::strip_vmlinux_debug;
69use super::{LOCK_DIR_NAME, TMP_DIR_PREFIX};
70use crate::flock::{FlockMode, acquire_flock_with_timeout};
71
72/// Default wall-clock timeout for [`CacheDir::acquire_shared_lock`].
73const SHARED_LOCK_DEFAULT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
74
75/// Default timeout for [`CacheDir::store`]'s internal `LOCK_EX`
76/// acquire when [`STORE_EXCLUSIVE_LOCK_TIMEOUT_ENV`] is unset.
77///
78/// 5 minutes covers a `store` peer's full critical section in the
79/// worst case: under heavy parallelism N concurrent runners may
80/// contend on the SAME `cache_key`, where the head writer holds
81/// `LOCK_EX` while it copies the boot image, runs the two-stage
82/// vmlinux strip pipeline ([`super::vmlinux_strip::strip_vmlinux_debug`]),
83/// writes `metadata.json`, and finishes the
84/// [`super::housekeeping::atomic_swap_dirs`] swap. A real vmlinux
85/// strip on a debug-symbol-rich build can spend tens of seconds
86/// inside the strip pipeline alone, and stacking N peers in series
87/// behind that producer scales the wait linearly. 60 s was tight
88/// enough that 5–10 contending peers reliably timed out before
89/// the head writer finished. The new 5-minute default leaves
90/// headroom for ~50 contending peers behind a slow strip without
91/// losing the "fail loud rather than block forever" property of a
92/// finite timeout.
93const STORE_EXCLUSIVE_LOCK_DEFAULT_TIMEOUT: std::time::Duration =
94 std::time::Duration::from_secs(300);
95
96/// Environment variable name that overrides
97/// [`STORE_EXCLUSIVE_LOCK_DEFAULT_TIMEOUT`]. Parsed via
98/// [`humantime::parse_duration`] so operators can tune with
99/// human-readable units (`30s`, `2m`, `10min`, `1h`). An invalid
100/// value falls back to the default with a `warn!` so a typo never
101/// silently disables the lock — the operator can see the
102/// fall-through in their tracing output and fix the setting.
103const STORE_EXCLUSIVE_LOCK_TIMEOUT_ENV: &str = "KTSTR_CACHE_STORE_LOCK_TIMEOUT";
104
105/// Resolve the per-store `LOCK_EX` acquire timeout, honoring the
106/// [`STORE_EXCLUSIVE_LOCK_TIMEOUT_ENV`] override. Pure function so
107/// tests can exercise the parse/fall-through branches without
108/// driving a full `store()` cycle.
109fn store_exclusive_lock_timeout() -> std::time::Duration {
110 match std::env::var(STORE_EXCLUSIVE_LOCK_TIMEOUT_ENV) {
111 Ok(v) if !v.is_empty() => match humantime::parse_duration(&v) {
112 Ok(d) => d,
113 Err(e) => {
114 tracing::warn!(
115 env = %STORE_EXCLUSIVE_LOCK_TIMEOUT_ENV,
116 value = %v,
117 err = %e,
118 "invalid cache-store lock timeout env value; \
119 falling back to default timeout",
120 );
121 STORE_EXCLUSIVE_LOCK_DEFAULT_TIMEOUT
122 }
123 },
124 _ => STORE_EXCLUSIVE_LOCK_DEFAULT_TIMEOUT,
125 }
126}
127
128/// Handle to the kernel image cache directory.
129#[derive(Debug)]
130#[non_exhaustive]
131pub struct CacheDir {
132 root: PathBuf,
133}
134
135/// Process-level dedup set for the unstripped-vmlinux warning.
136///
137/// `lookup()` is the user-visible entry point and may be called many
138/// times per CLI invocation against the same cache_key (for example,
139/// a multi-kernel gauntlet does N lookups of the same stale entry
140/// across its scenario fan-out). Without dedup, every lookup would
141/// re-emit the strip-fallback warn — N copies of the same line drowns
142/// out unrelated diagnostics. The set holds every cache_key for which
143/// the warn has already fired in this process; on hit, the warn
144/// helper skips re-emission.
145///
146/// `OnceLock` rather than `LazyLock` to keep the lazy init explicit.
147/// The mutex is held only across an O(1) HashSet insert; contention
148/// under realistic lookup fan-out is negligible.
149fn warned_keys() -> &'static Mutex<HashSet<String>> {
150 static SET: OnceLock<Mutex<HashSet<String>>> = OnceLock::new();
151 SET.get_or_init(|| Mutex::new(HashSet::new()))
152}
153
154/// Pure dedup-gate logic for [`warn_if_unstripped_vmlinux`].
155///
156/// Returns `true` iff a fresh `tracing::warn!` should fire for this
157/// entry: `should_warn_unstripped` accepts the entry AND the entry's
158/// cache_key is being recorded in `set` for the first time. Returns
159/// `false` if the entry does not need warning at all OR if the key
160/// was already in the set (already-warned suppression).
161///
162/// Takes `&Mutex<HashSet<String>>` rather than reaching into the
163/// process-wide [`warned_keys`] static so tests can drive the gate
164/// against a fresh per-test mutex without polluting (or being
165/// polluted by) the global set. Production callers pass
166/// `warned_keys()`; the bool return decouples the side effect (the
167/// `tracing::warn!`) from the decision so the latter is unit-testable.
168fn should_emit_unstripped_warn(entry: &CacheEntry, set: &Mutex<HashSet<String>>) -> bool {
169 if !should_warn_unstripped(entry) {
170 return false;
171 }
172 let mut guard = set.lock_unpoisoned();
173 guard.insert(entry.key.clone())
174}
175
176/// Emit a per-lookup warning when a cache entry was created with an
177/// unstripped vmlinux.
178///
179/// **Once per cache_key per process.** A `static` HashSet (see
180/// [`warned_keys`]) records every key for which the warn has already
181/// fired; subsequent calls for the same key are silent. Suppression
182/// covers callers that lookup the same stale entry repeatedly within
183/// one CLI invocation (e.g. multi-kernel gauntlet). The dedup
184/// decision is delegated to [`should_emit_unstripped_warn`], which
185/// is independently unit-tested.
186///
187/// Uses [`tracing::warn!`] so the message routes through the same
188/// observability pipeline as every other cache-layer diagnostic
189/// (the cargo-ktstr binary's `tracing_subscriber::fmt` writes warns
190/// to stderr; library consumers can subscribe a different layer).
191/// `eprintln!` would bypass that pipeline and force every consumer
192/// to live with raw-stderr output regardless of their tracing
193/// configuration.
194///
195/// The mutex is held only across the O(1) HashSet insert inside
196/// `should_emit_unstripped_warn`; the `tracing::warn!` macro fires
197/// AFTER lock release so a slow tracing subscriber cannot serialise
198/// concurrent lookups.
199fn warn_if_unstripped_vmlinux(entry: &CacheEntry) {
200 if should_emit_unstripped_warn(entry, warned_keys()) {
201 tracing::warn!(
202 cache_key = %entry.key,
203 "cache: using unstripped vmlinux (strip failed on a prior build; \
204 re-run with a clean cache to retry)",
205 );
206 }
207}
208
209/// Pure decision logic for [`warn_if_unstripped_vmlinux`].
210pub(crate) fn should_warn_unstripped(entry: &CacheEntry) -> bool {
211 entry.metadata.has_vmlinux() && !entry.metadata.vmlinux_stripped()
212}
213
214/// Whether the existing `cached` cache entry already satisfies a
215/// caller's intent to `store` an artifact under the same cache key.
216///
217/// Pure decision logic for [`CacheDir::store`]'s in-lock re-lookup
218/// (step 3 of the docs). When N concurrent peers race on the same
219/// `cache_key` they all miss the pre-lock cache check, serialise
220/// behind `LOCK_EX`, and would otherwise each repeat the head
221/// writer's copy / strip / atomic-publish work. This predicate
222/// answers the post-lock question: "is the head writer's output
223/// byte-equivalent to what I'd publish?" If yes, the late peers
224/// short-circuit — only the head writer pays the publish cost.
225///
226/// Compares only the metadata fields that drive the on-disk bytes
227/// `store()` would write:
228///
229/// - `config_hash` (CRC32 of the final `.config`) — pins the
230/// kernel image identity.
231/// - `ktstr_kconfig_hash` (CRC32 of `ktstr.kconfig`) — kconfig
232/// fragment that produced the build.
233/// - `extra_kconfig_hash` (CRC32 of the user `--extra-kconfig`
234/// fragment) — same.
235/// - `caller_has_vmlinux` — whether the caller passed a vmlinux
236/// sidecar in `CacheArtifacts`. This is the actual switch
237/// `store()` keys on (it overwrites `metadata.has_vmlinux`
238/// from the artifacts argument), so the predicate compares
239/// against the artifacts shape, not the caller's metadata
240/// field.
241///
242/// Excludes:
243///
244/// - `built_at` — wall-clock timestamp that drifts every build;
245/// pinning it would break the early-return and serialise every
246/// peer through a redundant publish.
247/// - `version` — display-only string, not a byte-difference.
248/// - `source` — acquire-time provenance (Tarball / Git / Local +
249/// payload). Two peers may publish the same image under
250/// different `source` payloads (e.g. one from a tarball mirror,
251/// one from a git checkout) and still produce byte-equivalent
252/// bytes. The kconfig hash is the authoritative content key.
253/// - `arch`, `image_name` — fixed by the cache key shape.
254/// - `vmlinux_stripped` — set by `store()` based on
255/// strip pipeline success/failure, not caller intent. The head
256/// writer either succeeded (stripped) or fell back (unstripped);
257/// late peers would just observe the head writer's outcome.
258/// - `source_vmlinux_size`, `source_vmlinux_mtime_secs` —
259/// DWARF-routing hints, not cached content.
260///
261/// Pure function so a unit test can pin every accept/reject branch
262/// without driving a full `store()` cycle through a temp cache.
263pub(crate) fn cache_content_matches(
264 cached: &KernelMetadata,
265 caller: &KernelMetadata,
266 caller_has_vmlinux: bool,
267) -> bool {
268 cached.config_hash == caller.config_hash
269 && cached.ktstr_kconfig_hash == caller.ktstr_kconfig_hash
270 && cached.extra_kconfig_hash == caller.extra_kconfig_hash
271 && cached.has_vmlinux() == caller_has_vmlinux
272}
273
274impl CacheDir {
275 /// Open a cache directory at the resolved root path.
276 pub fn new() -> anyhow::Result<Self> {
277 let root = resolve_cache_root()?;
278 Ok(CacheDir { root })
279 }
280
281 /// Open a cache directory at a specific path.
282 pub fn with_root(root: PathBuf) -> Self {
283 CacheDir { root }
284 }
285
286 /// Resolve the default cache root path without side effects.
287 pub fn default_root() -> anyhow::Result<PathBuf> {
288 resolve_cache_root()
289 }
290
291 /// Root directory this `CacheDir` is anchored at.
292 pub fn root(&self) -> &Path {
293 &self.root
294 }
295
296 /// Look up a cached kernel by cache key.
297 ///
298 /// On hit, emits a `tracing::warn!` via
299 /// `warn_if_unstripped_vmlinux` when the cached entry took the
300 /// strip-failure fallback (see `should_warn_unstripped` for the
301 /// exact predicate). Caller-facing call sites want the warning;
302 /// internal call sites that look the entry up only to compare
303 /// against caller intent (notably [`Self::store`]'s in-lock
304 /// recheck) use `Self::lookup_silent` to avoid double-emitting
305 /// the same warning the caller will see on its next `lookup`.
306 pub fn lookup(&self, cache_key: &str) -> Option<CacheEntry> {
307 let entry = self.lookup_silent(cache_key)?;
308 warn_if_unstripped_vmlinux(&entry);
309 Some(entry)
310 }
311
312 /// Look up a cached kernel without emitting the unstripped-vmlinux
313 /// warning. Internal callers that consume the entry's metadata
314 /// without surfacing it to the user — specifically the in-lock
315 /// recheck inside [`Self::store`] — use this variant so a recheck
316 /// hit on a strip-fallback entry does not log a duplicate warning
317 /// that the user-facing [`Self::lookup`] will already log on their
318 /// next call.
319 fn lookup_silent(&self, cache_key: &str) -> Option<CacheEntry> {
320 if let Err(e) = validate_cache_key(cache_key) {
321 tracing::warn!("invalid cache key: {e}");
322 return None;
323 }
324 let entry_dir = self.root.join(cache_key);
325 if !entry_dir.is_dir() {
326 return None;
327 }
328 let metadata = read_metadata(&entry_dir).ok()?;
329 if !entry_dir.join(&metadata.image_name).exists() {
330 return None;
331 }
332 Some(CacheEntry {
333 key: cache_key.to_string(),
334 path: entry_dir,
335 metadata,
336 })
337 }
338
339 /// List all cached kernel entries, sorted by build time (newest
340 /// first).
341 pub fn list(&self) -> anyhow::Result<Vec<ListedEntry>> {
342 let mut entries: Vec<ListedEntry> = Vec::new();
343 let read_dir = match fs::read_dir(&self.root) {
344 Ok(rd) => rd,
345 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(entries),
346 Err(e) => return Err(e.into()),
347 };
348 for dir_entry in read_dir {
349 let dir_entry = dir_entry?;
350 let path = dir_entry.path();
351 let name = match dir_entry.file_name().into_string() {
352 Ok(n) => n,
353 Err(_) => continue,
354 };
355 // Skip every dotfile child — ktstr reserves all
356 // dot-prefixed names (current uses: `.locks/`, `.tmp-*`).
357 // `validate_cache_key` rejects leading-dot inputs, so a
358 // dotfile in the cache root is either ktstr bookkeeping or
359 // an external artifact; either way `list()` must not
360 // surface it as a cache entry.
361 if name.starts_with('.') {
362 continue;
363 }
364 if !path.is_dir() {
365 continue;
366 }
367 match read_metadata(&path) {
368 Ok(metadata) => {
369 let image_path = path.join(&metadata.image_name);
370 if image_path.exists() {
371 entries.push(ListedEntry::Valid(Box::new(CacheEntry {
372 key: name,
373 path,
374 metadata,
375 })));
376 } else {
377 entries.push(ListedEntry::Corrupt {
378 key: name,
379 path,
380 reason: format_image_missing_reason(&metadata.image_name),
381 });
382 }
383 }
384 Err(reason) => {
385 tracing::info!(
386 entry = %name,
387 path = %path.display(),
388 %reason,
389 "cache entry corrupt at list-time",
390 );
391 entries.push(ListedEntry::Corrupt {
392 key: name,
393 path,
394 reason,
395 });
396 }
397 }
398 }
399 entries.sort_by(|a, b| {
400 let a_time = a.as_valid().map(|e| e.metadata.built_at.as_str());
401 let b_time = b.as_valid().map(|e| e.metadata.built_at.as_str());
402 b_time.cmp(&a_time)
403 });
404 Ok(entries)
405 }
406
407 /// Store a kernel image (and optional vmlinux sidecar) in the
408 /// cache under `cache_key`. Atomic install via temp directory +
409 /// `renameat2(RENAME_EXCHANGE)`, so a concurrent reader never
410 /// observes a partially-written entry.
411 ///
412 /// # Steps (in order)
413 ///
414 /// 1. **Validate inputs.** `validate_cache_key` rejects
415 /// `..`, slashes, NUL, leading-dot keys (the `TMP_DIR_PREFIX`
416 /// reservation plus any other dotfile-shaped key, since
417 /// `list()` skips every dotfile child);
418 /// `validate_filename` rejects path-separator characters in
419 /// the image basename. Invalid input fails before any I/O.
420 /// 2. **Acquire the per-key store lock.** `LOCK_EX` on
421 /// `<root>/.locks/<cache_key>.lock`. Timeout defaults to
422 /// `STORE_EXCLUSIVE_LOCK_DEFAULT_TIMEOUT` (5 minutes) and
423 /// can be overridden via `STORE_EXCLUSIVE_LOCK_TIMEOUT_ENV`
424 /// for environments where a slow vmlinux strip stacks many
425 /// contending peers behind the head writer. The lock
426 /// excludes other writers for the same key while letting
427 /// readers and writers for unrelated keys proceed. Timeout
428 /// produces an error rather than blocking forever — a hung
429 /// writer cannot indefinitely block a fresh rebuild attempt.
430 /// 3. **Double-checked re-lookup inside the lock.** After
431 /// acquiring `LOCK_EX`, re-run `Self::lookup_silent` for
432 /// `cache_key`. When N peers race to publish the same key
433 /// they all miss the pre-lock cache check, queue on
434 /// `LOCK_EX`, and serialise behind the head writer. Without
435 /// this recheck, every peer re-runs the full copy + strip +
436 /// publish steps in series even though the head writer's
437 /// output already satisfies them. The recheck early-returns
438 /// when the existing cached entry's content-defining metadata
439 /// fields (`cache_content_matches` — config_hash,
440 /// ktstr_kconfig_hash, extra_kconfig_hash, has_vmlinux) match
441 /// the caller's intent for this publish, so only the head
442 /// writer pays the strip/copy/rename cost. Cache-relevant
443 /// differences (a fresh kconfig hash, a different vmlinux
444 /// presence) bypass the early-return and proceed to a real
445 /// overwrite-publish. Cache-irrelevant differences (a fresh
446 /// `built_at` timestamp, a different `version` display
447 /// string) trigger the early-return — the on-disk bytes the
448 /// overwrite would write are byte-equivalent to what's
449 /// already cached, so the publish is redundant.
450 /// 4. **Stage into a temp directory.** `<root>/.tmp-<key>-<pid>`
451 /// is created (or pruned and recreated if a previous attempt
452 /// by the same PID exists), with `TmpDirGuard` enrolling the
453 /// path for cleanup on any subsequent error. A best-effort
454 /// `clean_orphaned_tmp_dirs` pass also runs here so dead
455 /// sibling temp directories from crashed PIDs are GC'd before
456 /// we add another one.
457 /// 5. **Copy the boot image.** `metadata.image_name` lands at
458 /// `tmp/<image_name>` via `reflink::reflink_or_copy` (copy-on-write
459 /// when the cache filesystem supports it, else a plain byte copy).
460 /// 6. **Strip and copy vmlinux (if supplied).** When
461 /// `artifacts.vmlinux` is `Some`, `strip_vmlinux_debug`
462 /// runs the two-stage strip pipeline and the result is written
463 /// to `tmp/vmlinux`. **Strip-fallback rationale:** if the
464 /// strip pipeline returns an error (e.g. an unrecognised ELF
465 /// layout from a future toolchain or an exotic config), the
466 /// write does NOT abort — it falls back to copying the raw
467 /// unstripped vmlinux and records `vmlinux_stripped: false`
468 /// in metadata. The cache trades a much larger on-disk
469 /// payload for "still usable for monitoring/probes," and
470 /// `cargo ktstr kernel list --json` exposes the
471 /// `vmlinux_stripped` field so operators can spot entries
472 /// that need rebuilding once the strip-failure root cause is
473 /// fixed. A hard failure here would be worse: it would
474 /// effectively brick the cache for that build.
475 /// 7. **Write `metadata.json`.** A pretty-printed serde dump of
476 /// `KernelMetadata` (with `has_vmlinux` and `vmlinux_stripped`
477 /// set from step 6) at `tmp/metadata.json`. Pretty-print is
478 /// intentional — operators inspect this file directly when
479 /// debugging cache state.
480 /// 8. **Atomic publish.** `fs::rename(tmp → final)` if `final`
481 /// does not exist; otherwise `atomic_swap_dirs` uses
482 /// `renameat2(RENAME_EXCHANGE)` to swap the two directories
483 /// in a single atomic syscall. Either way, no reader observes
484 /// a partial entry; the swap path also cleans up the
485 /// now-stale prior version under the temp name.
486 pub fn store(
487 &self,
488 cache_key: &str,
489 artifacts: &CacheArtifacts<'_>,
490 metadata: &KernelMetadata,
491 ) -> anyhow::Result<CacheEntry> {
492 validate_cache_key(cache_key)?;
493 validate_filename(&metadata.image_name)?;
494
495 let _store_lock =
496 self.acquire_exclusive_lock_blocking(cache_key, store_exclusive_lock_timeout())?;
497
498 // Double-checked re-lookup inside LOCK_EX: when N peers race
499 // on the same cache_key they all miss the pre-lock cache
500 // check, queue on the lock, and would otherwise repeat the
501 // head writer's copy/strip/publish work in series. The
502 // recheck early-returns when the existing entry's
503 // content-defining metadata fields match what we'd publish
504 // (see [`cache_content_matches`] for the predicate). The
505 // matched entry is returned to the caller verbatim — its
506 // on-disk bytes are byte-equivalent to what we would write,
507 // so no overwrite-publish is needed.
508 //
509 // The recheck-hit early-return BYPASSES the orphan tempdir
510 // sweep at step 4. That is intentional: every orphan-sweep
511 // call costs an opendir + readdir + N kill(pid, 0) probes,
512 // and the recheck-hit path is the hot path for serialised
513 // peer fan-out — adding the sweep here would charge every
514 // late peer a syscall budget the head writer already paid.
515 // Orphans accumulate only on the cache-miss / overwrite
516 // path, which is also where new tempdirs are created, so
517 // the GC runs proportionally to tempdir creation. Uses the
518 // private `lookup_silent` variant (no warn) so the recheck
519 // does not double-emit the unstripped-vmlinux warn that
520 // store()'s caller would see again on its next lookup().
521 if let Some(existing) = self.lookup_silent(cache_key)
522 && cache_content_matches(&existing.metadata, metadata, artifacts.vmlinux.is_some())
523 {
524 tracing::debug!(
525 cache_key = cache_key,
526 "cache.store: in-lock recheck hit; skipping copy/strip/publish",
527 );
528 return Ok(existing);
529 }
530
531 let final_dir = self.root.join(cache_key);
532 let tmp_dir = self.root.join(format!(
533 "{TMP_DIR_PREFIX}{}-{}",
534 cache_key,
535 std::process::id(),
536 ));
537
538 if tmp_dir.exists() {
539 fs::remove_dir_all(&tmp_dir)?;
540 }
541 if let Err(e) = clean_orphaned_tmp_dirs(&self.root) {
542 tracing::warn!(err = %format!("{e:#}"), "clean_orphaned_tmp_dirs failed; continuing store");
543 }
544 fs::create_dir_all(&tmp_dir)?;
545
546 let _guard = TmpDirGuard(&tmp_dir);
547
548 let image_dest = tmp_dir.join(&metadata.image_name);
549 crate::reflink::reflink_or_copy(artifacts.image, &image_dest)
550 .context("copy kernel image to cache")?;
551
552 let (has_vmlinux, vmlinux_stripped) = if let Some(vmlinux) = artifacts.vmlinux {
553 let vmlinux_dest = tmp_dir.join("vmlinux");
554 match strip_vmlinux_debug(vmlinux) {
555 Ok(stripped) => {
556 crate::reflink::reflink_or_copy(stripped.path(), &vmlinux_dest)
557 .context("copy stripped vmlinux to cache")?;
558 (true, true)
559 }
560 Err(e) => {
561 tracing::warn!(
562 cache_key = cache_key,
563 err = %format!("{e:#}"),
564 "vmlinux strip failed, caching unstripped \
565 (larger on-disk payload). See \
566 `cargo ktstr kernel list --json` \
567 vmlinux_stripped field.",
568 );
569 crate::reflink::reflink_or_copy(vmlinux, &vmlinux_dest)
570 .context("copy vmlinux to cache")?;
571 (true, false)
572 }
573 }
574 } else {
575 (false, false)
576 };
577
578 let mut meta = metadata.clone();
579 meta.set_has_vmlinux(has_vmlinux);
580 meta.set_vmlinux_stripped(vmlinux_stripped);
581 let meta_json = serde_json::to_string_pretty(&meta)?;
582 fs::write(tmp_dir.join("metadata.json"), meta_json)
583 .map_err(|e| anyhow::anyhow!("write cache metadata: {e}"))?;
584
585 // Durability: fsync the staged files + tmp dir before the
586 // publish rename so a host crash is less likely to leave the
587 // cache pointing at a torn entry. Best-effort — lookup re-parses
588 // metadata.json on read (and checks the image file exists), so a
589 // torn metadata.json re-detects as a cache miss and rebuilds. A
590 // torn kernel image or vmlinux with intact metadata is not
591 // content-checked here; it surfaces at the consumer (kernel boot
592 // fails, or monitor/probe reads of vmlinux fail) — a loud
593 // failure, never silent corruption of a shipped artifact.
594 if let Err(e) = fsync_staging_dir(&tmp_dir) {
595 tracing::warn!(
596 cache_key = cache_key,
597 err = %e,
598 "fsync of staged cache entry before publish failed; \
599 publishing without the durability barrier (validate-on-read \
600 remains the correctness backstop)",
601 );
602 }
603
604 match fs::rename(&tmp_dir, &final_dir) {
605 Ok(()) => {}
606 Err(e)
607 if e.raw_os_error() == Some(libc::ENOTEMPTY)
608 || e.raw_os_error() == Some(libc::EEXIST) =>
609 {
610 atomic_swap_dirs(&tmp_dir, &final_dir)?;
611 }
612 Err(e) => {
613 return Err(anyhow::anyhow!("atomic rename cache entry: {e}"));
614 }
615 }
616
617 // Persist the publish rename itself: fsync the cache root so the
618 // new entry name survives a crash. Best-effort, same rationale.
619 if let Err(e) = fsync_parent(&final_dir) {
620 tracing::warn!(
621 cache_key = cache_key,
622 err = %e,
623 "fsync of cache root after publish failed; the rename may \
624 not survive a crash (validate-on-read backstop)",
625 );
626 }
627
628 Ok(CacheEntry {
629 key: cache_key.to_string(),
630 path: final_dir,
631 metadata: meta,
632 })
633 }
634
635 /// Remove every cached entry. Returns the number of entries
636 /// removed. Preserves the `.locks/` subdirectory.
637 pub fn clean_all(&self) -> anyhow::Result<usize> {
638 self.remove_entries(self.list()?)
639 }
640
641 /// Remove every cached entry except the `keep` most recent ones
642 /// (by `built_at` timestamp). Preserves the `.locks/`
643 /// subdirectory.
644 pub fn clean_keep(&self, keep: usize) -> anyhow::Result<usize> {
645 self.remove_entries(self.list()?.into_iter().skip(keep))
646 }
647
648 fn remove_entries<I: IntoIterator<Item = ListedEntry>>(
649 &self,
650 iter: I,
651 ) -> anyhow::Result<usize> {
652 let to_remove: Vec<_> = iter.into_iter().collect();
653 let count = to_remove.len();
654 for entry in &to_remove {
655 fs::remove_dir_all(entry.path())?;
656 }
657 Ok(count)
658 }
659
660 // ---------------- Per-entry coordination locks ----------------
661
662 /// Absolute path to the coordination lockfile for `cache_key`.
663 pub(crate) fn lock_path(&self, cache_key: &str) -> PathBuf {
664 self.root
665 .join(LOCK_DIR_NAME)
666 .join(format!("{cache_key}.lock"))
667 }
668
669 /// Create the `{cache_root}/.locks/` subdirectory if absent.
670 pub(crate) fn ensure_lock_dir(&self) -> anyhow::Result<()> {
671 let dir = self.root.join(LOCK_DIR_NAME);
672 fs::create_dir_all(&dir)
673 .with_context(|| format!("create lock subdirectory {}", dir.display()))
674 }
675
676 /// Acquire `LOCK_SH` on the cache-entry lockfile.
677 pub fn acquire_shared_lock(&self, cache_key: &str) -> anyhow::Result<SharedLockGuard> {
678 validate_cache_key(cache_key)?;
679 let path = self.lock_path(cache_key);
680 let fd = acquire_flock_with_timeout(
681 &path,
682 FlockMode::Shared,
683 SHARED_LOCK_DEFAULT_TIMEOUT,
684 &format!("cache entry {cache_key:?}"),
685 None,
686 )?;
687 Ok(SharedLockGuard { fd })
688 }
689
690 /// Acquire `LOCK_EX` on the cache-entry lockfile, blocking up
691 /// to `timeout`. On timeout, the error message surfaces the
692 /// `STORE_EXCLUSIVE_LOCK_TIMEOUT_ENV` override so an operator
693 /// hitting a contended `store()` discovers the env-var
694 /// remediation without reading the docs.
695 pub fn acquire_exclusive_lock_blocking(
696 &self,
697 cache_key: &str,
698 timeout: std::time::Duration,
699 ) -> anyhow::Result<ExclusiveLockGuard> {
700 validate_cache_key(cache_key)?;
701 let path = self.lock_path(cache_key);
702 let fd = acquire_flock_with_timeout(
703 &path,
704 FlockMode::Exclusive,
705 timeout,
706 &format!("cache entry {cache_key:?}"),
707 Some(
708 "override the timeout via KTSTR_CACHE_STORE_LOCK_TIMEOUT (humantime: 30s, 2m, 1h)",
709 ),
710 )?;
711 Ok(ExclusiveLockGuard { fd })
712 }
713
714 /// Non-blocking `LOCK_EX` attempt on the cache-entry lockfile.
715 pub fn try_acquire_exclusive_lock(
716 &self,
717 cache_key: &str,
718 ) -> anyhow::Result<ExclusiveLockGuard> {
719 validate_cache_key(cache_key)?;
720 // try_flock doesn't lazily create the parent directory like
721 // acquire_flock_with_timeout does — must materialise .locks/
722 // here so the open(O_CREAT) inside try_flock has a parent.
723 self.ensure_lock_dir()?;
724 let path = self.lock_path(cache_key);
725 match crate::flock::try_flock(&path, crate::flock::FlockMode::Exclusive)? {
726 Some(fd) => Ok(ExclusiveLockGuard { fd }),
727 None => {
728 let holders = crate::flock::read_holders(&path).unwrap_or_default();
729 anyhow::bail!(
730 "cache entry {cache_key:?} is locked by active test runs \
731 (lockfile {lockfile}, holders: {holders}). Wait for \
732 those tests to finish, or kill them, then retry.",
733 lockfile = path.display(),
734 holders = crate::flock::format_holder_list(&holders),
735 );
736 }
737 }
738 }
739}
740
741/// RAII guard for a `LOCK_SH` hold on a cache-entry lockfile.
742#[derive(Debug)]
743pub struct SharedLockGuard {
744 #[allow(dead_code)]
745 fd: std::os::fd::OwnedFd,
746}
747
748/// RAII guard for a `LOCK_EX` hold on a cache-entry lockfile.
749#[derive(Debug)]
750pub struct ExclusiveLockGuard {
751 #[allow(dead_code)]
752 fd: std::os::fd::OwnedFd,
753}
754
755#[cfg(test)]
756#[path = "cache_dir_tests.rs"]
757mod tests;