ktstr/
remote_cache.rs

1//! Remote cache backend for GHA runners via opendal.
2//!
3//! When `KTSTR_GHA_CACHE=1` and `ACTIONS_CACHE_URL` are set, cache
4//! operations transparently extend to a remote GHA cache. Local cache
5//! is always authoritative: lookups check local first, stores write to
6//! both. Remote failures are non-fatal (logged as warnings).
7//!
8//! Cache entries are serialized as tar archives containing the kernel
9//! image, vmlinux (if present), and metadata.json, stored as a single
10//! blob per cache key in the GHA cache service.
11//!
12//! Tar payloads are zstd-compressed before upload and decompressed on
13//! download. Decompression is bounded by
14//! [`MAX_DECOMPRESSED_REMOTE_CACHE_BYTES`] to guard against a hostile
15//! zstd payload (zstd compresses pathologically well on repeated
16//! bytes, so a few-KiB blob can decompress to gigabytes). A blob that
17//! does not start with the zstd magic number is rejected.
18
19use std::io::{Read, Write};
20use std::path::Path;
21use std::sync::LazyLock;
22
23use crate::cache::{CacheDir, CacheEntry, KernelMetadata};
24
25/// Tokio runtime for opendal async operations.
26///
27/// opendal's `Operator` is async. cargo-ktstr is synchronous, so we
28/// provide a dedicated single-threaded runtime and call `block_on()`
29/// for each remote cache operation. Created lazily on first use;
30/// never created when remote cache is disabled.
31///
32/// # Serialization
33///
34/// `new_current_thread()` plus synchronous callers means every
35/// `block_on(op.read | op.write)` runs to completion on the calling
36/// thread before the next remote operation can start — there is no
37/// task scheduler driving multiple futures concurrently. Today's
38/// callers ([`remote_lookup`] and [`remote_store`]) issue exactly
39/// one I/O per invocation and the surrounding `cargo-ktstr` flow
40/// does not parallelise cache lookups, so the serial pattern is
41/// correct for the current workload. If a future caller needs
42/// concurrent remote ops (e.g. a parallel pre-fetch over many cache
43/// keys), this runtime configuration must change — either to a
44/// multi-thread runtime, or to a single explicit `block_on(async {
45/// join!(...) })` that drives futures concurrently within the
46/// current_thread runtime.
47///
48/// Calling `block_on` from inside an existing tokio async context
49/// panics — this runtime must only be entered from synchronous call
50/// sites.
51static RUNTIME: LazyLock<tokio::runtime::Runtime> = LazyLock::new(|| {
52    tokio::runtime::Builder::new_current_thread()
53        .enable_all()
54        .build()
55        .expect("failed to create tokio runtime for remote cache")
56});
57
58/// Check if remote GHA cache is enabled.
59///
60/// Requires both `KTSTR_GHA_CACHE=1` and `ACTIONS_CACHE_URL` to be
61/// set. Returns false silently when either is absent (normal for
62/// local dev).
63pub fn is_enabled() -> bool {
64    std::env::var(crate::KTSTR_GHA_CACHE_ENV)
65        .ok()
66        .is_some_and(|v| v == "1")
67        && std::env::var("ACTIONS_CACHE_URL")
68            .ok()
69            .is_some_and(|v| !v.is_empty())
70}
71
72/// Namespace string passed to opendal's `Ghac::version` builder.
73/// Two purposes:
74///
75/// 1. Isolates ktstr cache entries from other tools sharing the
76///    same GHA cache service.
77/// 2. Carries a `-vN` suffix so format changes invalidate stale
78///    entries without colliding with the previous wire shape.
79///
80/// **Bump the version suffix when the on-the-wire format changes
81/// in a way old readers cannot interpret.** Examples that require
82/// a bump:
83/// - Compression format change (e.g. zstd → zstd+dict, or zstd → lz4).
84/// - Removal of a fallback path readers used to depend on (e.g. the
85///   v2 bump went out alongside dropping the raw-tar fallback that
86///   pre-zstd entries relied on — see [`decompress_payload`]).
87/// - Tar layout change (filenames, structure, additional required
88///   members).
89/// - Metadata schema change that breaks deserialization of older
90///   entries.
91///
92/// Additive changes that older readers can still parse (e.g. a new
93/// optional field in metadata) do NOT require a bump.
94const REMOTE_CACHE_NAMESPACE: &str = "ktstr-v2";
95
96/// Create an opendal operator for the GHA cache service.
97///
98/// Relies on opendal's Ghac service, which reads `ACTIONS_CACHE_URL`
99/// and `ACTIONS_RUNTIME_TOKEN` from the environment (set automatically
100/// by the GHA runner); ktstr itself does not touch either variable.
101/// The `version` field is set to [`REMOTE_CACHE_NAMESPACE`] —
102/// namespaces ktstr entries against other tools sharing the cache
103/// AND invalidates stale entries when ktstr's wire format changes.
104fn create_operator() -> Result<opendal::Operator, String> {
105    let builder = opendal::services::Ghac::default()
106        .root("/")
107        .version(REMOTE_CACHE_NAMESPACE);
108
109    opendal::Operator::new(builder)
110        .map_err(|e| format!("create ghac operator: {e}"))
111        .map(|b| b.finish())
112}
113
114/// Zstd magic number (first 4 bytes of any zstd frame).
115const ZSTD_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
116
117/// Decompressed-size ceiling for `decompress_payload` zstd payloads.
118/// Bounds the allocation a malicious or corrupted zstd payload from
119/// the GHA cache service can force, since zstd compresses
120/// pathologically well on repeated bytes (a few-KiB compressed blob
121/// can decompress to gigabytes). 1 GiB covers any realistic cache
122/// entry — bzImage is ~15 MiB, stripped vmlinux ~45 MiB, an
123/// unstripped debug vmlinux with BTF can reach ~500 MiB — while
124/// bounding worst-case allocation against hostile zstd payloads.
125/// Public so a downstream consumer can size buffers against the
126/// same ceiling without hardcoding the value.
127pub const MAX_DECOMPRESSED_REMOTE_CACHE_BYTES: u64 = 1024 * 1024 * 1024;
128
129/// Pack a cache entry directory into a tar archive in memory.
130///
131/// The tar contains the kernel image, vmlinux (if present), and
132/// metadata.json from the cache entry directory. Paths inside the
133/// tar are relative filenames (no directory prefix).
134///
135/// The tar is then compressed with zstd before upload.
136/// [`unpack_and_store`] verifies the zstd magic number on download
137/// and decompresses; a payload missing the magic is rejected
138/// (the on-the-wire format is zstd-only).
139fn pack_entry(entry_dir: &Path, metadata: &KernelMetadata) -> Result<Vec<u8>, String> {
140    let mut archive = tar::Builder::new(Vec::new());
141
142    // Null out source_tree_path before serializing — it contains
143    // local filesystem paths that must not leak to remote storage.
144    // For non-Local source variants there's nothing to sanitize.
145    let mut meta_sanitized = metadata.clone();
146    if let crate::cache::KernelSource::Local {
147        source_tree_path, ..
148    } = &mut meta_sanitized.source
149    {
150        *source_tree_path = None;
151    }
152
153    // Add metadata.json.
154    let meta_json = serde_json::to_string_pretty(&meta_sanitized)
155        .map_err(|e| format!("serialize metadata: {e}"))?;
156    let meta_bytes = meta_json.as_bytes();
157    crate::tar_util::pack_tar_entry(
158        &mut archive,
159        "metadata.json",
160        0o644,
161        meta_bytes.len() as u64,
162        meta_bytes,
163    )
164    .map_err(|e| format!("tar append metadata: {e}"))?;
165
166    // Add kernel image.
167    let image_path = entry_dir.join(&metadata.image_name);
168    let mut image_file = std::fs::File::open(&image_path)
169        .map_err(|e| format!("open image {}: {e}", image_path.display()))?;
170    let image_size = image_file
171        .metadata()
172        .map_err(|e| format!("image metadata: {e}"))?
173        .len();
174    crate::tar_util::pack_tar_entry(
175        &mut archive,
176        &metadata.image_name,
177        0o644,
178        image_size,
179        &mut image_file,
180    )
181    .map_err(|e| format!("tar append image: {e}"))?;
182
183    // Add vmlinux if present (BTF source for build.rs).
184    let vmlinux_path = entry_dir.join("vmlinux");
185    if let Ok(mut vmlinux_file) = std::fs::File::open(&vmlinux_path) {
186        let vmlinux_size = vmlinux_file
187            .metadata()
188            .map_err(|e| format!("vmlinux metadata: {e}"))?
189            .len();
190        crate::tar_util::pack_tar_entry(
191            &mut archive,
192            "vmlinux",
193            0o644,
194            vmlinux_size,
195            &mut vmlinux_file,
196        )
197        .map_err(|e| format!("tar append vmlinux: {e}"))?;
198    }
199
200    let tar_bytes = archive
201        .into_inner()
202        .map_err(|e| format!("finalize tar: {e}"))?;
203
204    // Compress with zstd (level 3: good ratio at fast speed).
205    zstd::encode_all(tar_bytes.as_slice(), 3).map_err(|e| format!("zstd compress: {e}"))
206}
207
208/// Decompress a zstd-compressed cache blob. Rejects payloads that
209/// do not start with the zstd magic number — the on-the-wire format
210/// is zstd-only since the encoder ([`pack_entry`]) always compresses.
211/// The magic-number precondition catches truncated downloads (any
212/// payload < 4 bytes) and non-zstd content with a clearer error
213/// than the zstd library's "invalid header" diagnostic.
214///
215/// Bounded by [`MAX_DECOMPRESSED_REMOTE_CACHE_BYTES`] — a payload
216/// that would expand past that ceiling surfaces an error rather than
217/// allocating unbounded memory, guarding against a hostile zstd
218/// payload from the GHA cache service.
219fn decompress_payload(data: &[u8]) -> Result<Vec<u8>, String> {
220    if data.len() < 4 || data[..4] != ZSTD_MAGIC {
221        return Err("remote cache entry missing zstd magic".to_string());
222    }
223    decompress_capped(data, MAX_DECOMPRESSED_REMOTE_CACHE_BYTES)
224        .map_err(|e| format!("zstd decompress: {e}"))
225}
226
227/// Decompress a zstd payload into a `Vec<u8>` capped at
228/// `max_decompressed` bytes — bombing out with an error if the
229/// payload would expand past the ceiling. Reads through
230/// `Read::take(cap + 1)` so a payload that decompresses to
231/// exactly `cap` bytes is accepted while one that produces
232/// `cap + 1` bytes (or more) is rejected — the +1 sentinel
233/// distinguishes "EOF coincided with the cap" from "more data
234/// behind the cap".
235fn decompress_capped(bytes: &[u8], max_decompressed: u64) -> Result<Vec<u8>, String> {
236    let decoder =
237        zstd::stream::read::Decoder::new(bytes).map_err(|e| format!("zstd decoder init: {e}"))?;
238    let mut out = Vec::new();
239    decoder
240        .take(max_decompressed.saturating_add(1))
241        .read_to_end(&mut out)
242        .map_err(|e| format!("zstd decompress read: {e}"))?;
243    if out.len() as u64 > max_decompressed {
244        return Err(format!(
245            "zstd-decompressed payload exceeds the {max_decompressed}-byte cap (decompression-bomb guard)",
246        ));
247    }
248    Ok(out)
249}
250
251/// Unpack a tar archive into a cache directory via CacheDir::store.
252///
253/// Extracts metadata.json, the kernel image, and vmlinux (if present)
254/// from the tar blob, writes them to temp files, then stores via the
255/// local cache API for atomic placement. The unpacked vmlinux was
256/// already stripped by the producer; `CacheDir::store` re-runs the
257/// strip pipeline (idempotent — the keep-list partition produces the
258/// same layout) and falls back to copying verbatim on error.
259///
260/// Decompression is bounded by [`MAX_DECOMPRESSED_REMOTE_CACHE_BYTES`].
261fn unpack_and_store(cache: &CacheDir, cache_key: &str, data: &[u8]) -> Result<CacheEntry, String> {
262    let tar_bytes = decompress_payload(data)?;
263    let mut archive = tar::Archive::new(tar_bytes.as_slice());
264    let entries = archive
265        .entries()
266        .map_err(|e| format!("read tar entries: {e}"))?;
267
268    let mut metadata: Option<KernelMetadata> = None;
269    let mut image_data: Option<(String, Vec<u8>)> = None;
270    let mut vmlinux_data: Option<Vec<u8>> = None;
271
272    for entry_result in entries {
273        let mut entry = entry_result.map_err(|e| format!("tar entry: {e}"))?;
274        let path = entry
275            .path()
276            .map_err(|e| format!("tar entry path: {e}"))?
277            .to_string_lossy()
278            .into_owned();
279
280        if path == "metadata.json" {
281            let mut content = String::new();
282            entry
283                .read_to_string(&mut content)
284                .map_err(|e| format!("read metadata from tar: {e}"))?;
285            metadata = Some(
286                serde_json::from_str(&content)
287                    .map_err(|e| format!("parse metadata from tar: {e}"))?,
288            );
289        } else if path == "vmlinux" {
290            let mut data = Vec::new();
291            entry
292                .read_to_end(&mut data)
293                .map_err(|e| format!("read vmlinux from tar: {e}"))?;
294            vmlinux_data = Some(data);
295        } else {
296            let mut data = Vec::new();
297            entry
298                .read_to_end(&mut data)
299                .map_err(|e| format!("read image from tar: {e}"))?;
300            image_data = Some((path, data));
301        }
302    }
303
304    let meta = metadata.ok_or_else(|| "tar archive missing metadata.json".to_string())?;
305    let (_, img_bytes) =
306        image_data.ok_or_else(|| "tar archive missing kernel image".to_string())?;
307
308    // Write image and vmlinux to temp files for CacheDir::store.
309    let tmp_dir = tempfile::TempDir::new().map_err(|e| format!("create temp dir: {e}"))?;
310    let tmp_image = tmp_dir.path().join(&meta.image_name);
311    let mut f = std::fs::File::create(&tmp_image).map_err(|e| format!("create temp image: {e}"))?;
312    f.write_all(&img_bytes)
313        .map_err(|e| format!("write temp image: {e}"))?;
314    drop(f);
315
316    let tmp_vmlinux_path;
317    let vmlinux_ref = if let Some(ref vml_bytes) = vmlinux_data {
318        tmp_vmlinux_path = tmp_dir.path().join("vmlinux");
319        let mut vf = std::fs::File::create(&tmp_vmlinux_path)
320            .map_err(|e| format!("create temp vmlinux: {e}"))?;
321        vf.write_all(vml_bytes)
322            .map_err(|e| format!("write temp vmlinux: {e}"))?;
323        drop(vf);
324        Some(tmp_vmlinux_path.as_path())
325    } else {
326        None
327    };
328
329    let mut artifacts = crate::cache::CacheArtifacts::new(&tmp_image);
330    if let Some(v) = vmlinux_ref {
331        artifacts = artifacts.with_vmlinux(v);
332    }
333    cache
334        .store(cache_key, &artifacts, &meta)
335        .map_err(|e| format!("local cache store: {e}"))
336}
337
338/// Look up a cache key in the remote GHA cache.
339///
340/// On hit, downloads the tar blob and unpacks it into the local
341/// cache via `CacheDir::store`. Returns the local `CacheEntry` on
342/// success. Returns `None` on remote miss. Logs warnings on errors
343/// and returns `None` (non-fatal).
344///
345/// `cli_label` prefixes diagnostic output (e.g. `"ktstr"` or
346/// `"cargo ktstr"`).
347pub fn remote_lookup(cache: &CacheDir, cache_key: &str, cli_label: &str) -> Option<CacheEntry> {
348    let op = match create_operator() {
349        Ok(op) => op,
350        Err(e) => {
351            eprintln!("{cli_label}: remote cache warning: {e}");
352            return None;
353        }
354    };
355
356    let data = match RUNTIME.block_on(op.read(cache_key)) {
357        Ok(buf) => buf.to_vec(),
358        Err(e) => {
359            if e.kind() == opendal::ErrorKind::NotFound {
360                return None;
361            }
362            eprintln!("{cli_label}: remote cache read warning: {e}");
363            return None;
364        }
365    };
366
367    match unpack_and_store(cache, cache_key, &data) {
368        Ok(entry) => {
369            eprintln!("{cli_label}: fetched from remote cache: {cache_key}");
370            Some(entry)
371        }
372        Err(e) => {
373            eprintln!("{cli_label}: remote cache unpack warning ({cache_key}): {e}");
374            None
375        }
376    }
377}
378
379/// Store a cache entry in the remote GHA cache.
380///
381/// Packs the entry directory as a tar blob and uploads it. Failures
382/// are non-fatal (logged as warnings).
383///
384/// `cli_label` prefixes diagnostic output (e.g. `"ktstr"` or
385/// `"cargo ktstr"`).
386pub fn remote_store(entry: &CacheEntry, cli_label: &str) {
387    // CacheEntry guarantees metadata presence; no need to branch.
388    let meta = &entry.metadata;
389
390    let op = match create_operator() {
391        Ok(op) => op,
392        Err(e) => {
393            eprintln!("{cli_label}: remote cache warning: {e}");
394            return;
395        }
396    };
397
398    let data = match pack_entry(&entry.path, meta) {
399        Ok(d) => d,
400        Err(e) => {
401            eprintln!("{cli_label}: remote cache pack warning: {e}");
402            return;
403        }
404    };
405
406    match RUNTIME.block_on(op.write(&entry.key, data)) {
407        Ok(_) => {
408            eprintln!("{cli_label}: stored to remote cache: {}", entry.key);
409        }
410        Err(e) => {
411            eprintln!("{cli_label}: remote cache write warning: {e}");
412        }
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419    use crate::cache::{CacheArtifacts, CacheDir, KernelMetadata, KernelSource};
420
421    fn test_metadata() -> KernelMetadata {
422        KernelMetadata::new(
423            KernelSource::Tarball,
424            "x86_64",
425            "bzImage",
426            "2026-04-12T10:00:00Z",
427        )
428        .with_version("6.14.2")
429    }
430
431    fn create_fake_image(dir: &std::path::Path) -> std::path::PathBuf {
432        let image = dir.join("bzImage");
433        std::fs::write(&image, b"fake kernel image data for testing").unwrap();
434        image
435    }
436
437    // -- is_enabled --
438
439    #[test]
440    fn remote_cache_disabled_by_default() {
441        let _g1 = EnvVarGuard::remove(crate::KTSTR_GHA_CACHE_ENV);
442        let _g2 = EnvVarGuard::remove("ACTIONS_CACHE_URL");
443        assert!(!is_enabled());
444    }
445
446    #[test]
447    fn remote_cache_disabled_without_cache_url() {
448        let _g1 = EnvVarGuard::set(crate::KTSTR_GHA_CACHE_ENV, "1");
449        let _g2 = EnvVarGuard::remove("ACTIONS_CACHE_URL");
450        assert!(!is_enabled());
451    }
452
453    #[test]
454    fn remote_cache_disabled_without_gha_flag() {
455        let _g1 = EnvVarGuard::remove(crate::KTSTR_GHA_CACHE_ENV);
456        let _g2 = EnvVarGuard::set("ACTIONS_CACHE_URL", "https://example.com");
457        assert!(!is_enabled());
458    }
459
460    #[test]
461    fn remote_cache_disabled_with_empty_url() {
462        let _g1 = EnvVarGuard::set(crate::KTSTR_GHA_CACHE_ENV, "1");
463        let _g2 = EnvVarGuard::set("ACTIONS_CACHE_URL", "");
464        assert!(!is_enabled());
465    }
466
467    #[test]
468    fn remote_cache_disabled_with_wrong_flag() {
469        let _g1 = EnvVarGuard::set(crate::KTSTR_GHA_CACHE_ENV, "0");
470        let _g2 = EnvVarGuard::set("ACTIONS_CACHE_URL", "https://example.com");
471        assert!(!is_enabled());
472    }
473
474    #[test]
475    fn remote_cache_enabled_when_both_set() {
476        let _g1 = EnvVarGuard::set(crate::KTSTR_GHA_CACHE_ENV, "1");
477        let _g2 = EnvVarGuard::set("ACTIONS_CACHE_URL", "https://example.com");
478        assert!(is_enabled());
479    }
480
481    // -- pack/unpack roundtrip --
482
483    #[test]
484    fn remote_cache_pack_unpack_roundtrip() {
485        let tmp = tempfile::TempDir::new().unwrap();
486        let cache = CacheDir::with_root(tmp.path().join("cache"));
487
488        let src = tempfile::TempDir::new().unwrap();
489        let image = create_fake_image(src.path());
490        let meta = test_metadata();
491        let entry = cache
492            .store("test-key", &CacheArtifacts::new(&image), &meta)
493            .unwrap();
494
495        let packed = pack_entry(&entry.path, &entry.metadata).unwrap();
496        assert!(!packed.is_empty());
497
498        let tmp2 = tempfile::TempDir::new().unwrap();
499        let cache2 = CacheDir::with_root(tmp2.path().join("cache"));
500        let restored = unpack_and_store(&cache2, "test-key", &packed).unwrap();
501
502        assert_eq!(restored.key, "test-key");
503        let restored_meta = &restored.metadata;
504        assert_eq!(restored_meta.version.as_deref(), Some("6.14.2"));
505        assert_eq!(restored_meta.arch, "x86_64");
506        assert_eq!(restored_meta.image_name, "bzImage");
507        assert_eq!(restored_meta.source, KernelSource::Tarball);
508
509        let restored_image = restored.path.join("bzImage");
510        let original_content = std::fs::read(&image).unwrap();
511        let restored_content = std::fs::read(&restored_image).unwrap();
512        assert_eq!(original_content, restored_content);
513    }
514
515    #[test]
516    fn remote_cache_pack_entry_excludes_config_sidecar() {
517        // .config is not cached any more (IKCONFIG covers CONFIG_HZ
518        // for ktstr-built kernels). Even if an entry directory has a
519        // leftover .config on disk (e.g. from an older cache version),
520        // pack_entry must not include it — the tar carries only
521        // metadata.json + image + optional vmlinux.
522        let tmp = tempfile::TempDir::new().unwrap();
523        let cache = CacheDir::with_root(tmp.path().join("cache"));
524        let src = tempfile::TempDir::new().unwrap();
525        let image = create_fake_image(src.path());
526        let meta = test_metadata();
527        let entry = cache
528            .store("legacy-config", &CacheArtifacts::new(&image), &meta)
529            .unwrap();
530        // Simulate a leftover .config from an older cache version.
531        std::fs::write(entry.path.join(".config"), b"CONFIG_HZ=1000\n").unwrap();
532
533        let packed = pack_entry(&entry.path, &entry.metadata).unwrap();
534        let tar_bytes = decompress_payload(&packed).unwrap();
535        let mut archive = tar::Archive::new(tar_bytes.as_slice());
536        let paths: Vec<String> = archive
537            .entries()
538            .unwrap()
539            .map(|e| e.unwrap().path().unwrap().to_string_lossy().into_owned())
540            .collect();
541        assert!(
542            !paths.iter().any(|p| p == ".config"),
543            "pack_entry should not include .config, got {paths:?}"
544        );
545    }
546
547    #[test]
548    fn remote_cache_pack_produces_valid_tar() {
549        let tmp = tempfile::TempDir::new().unwrap();
550        let cache = CacheDir::with_root(tmp.path().join("cache"));
551
552        let src = tempfile::TempDir::new().unwrap();
553        let image = create_fake_image(src.path());
554        let meta = test_metadata();
555        let entry = cache
556            .store("valid-tar", &CacheArtifacts::new(&image), &meta)
557            .unwrap();
558
559        let packed = pack_entry(&entry.path, &entry.metadata).unwrap();
560
561        // pack_entry returns zstd-compressed data; decompress before
562        // validating tar contents.
563        let tar_bytes = decompress_payload(&packed).unwrap();
564        let mut archive = tar::Archive::new(tar_bytes.as_slice());
565        let entries: Vec<_> = archive.entries().unwrap().collect();
566        assert_eq!(entries.len(), 2);
567    }
568
569    #[test]
570    fn remote_cache_pack_is_zstd_compressed() {
571        let tmp = tempfile::TempDir::new().unwrap();
572        let cache = CacheDir::with_root(tmp.path().join("cache"));
573
574        let src = tempfile::TempDir::new().unwrap();
575        let image = create_fake_image(src.path());
576        let meta = test_metadata();
577        let entry = cache
578            .store("zstd-key", &CacheArtifacts::new(&image), &meta)
579            .unwrap();
580
581        let packed = pack_entry(&entry.path, &entry.metadata).unwrap();
582        assert!(
583            packed.len() >= 4 && packed[..4] == ZSTD_MAGIC,
584            "packed data should start with zstd magic"
585        );
586    }
587
588    /// Rejection test for a raw (non-zstd) tar blob — the
589    /// on-the-wire format is zstd-only, so a payload without the
590    /// magic is either corruption or hostile content and
591    /// `unpack_and_store` must surface a "zstd magic" diagnostic
592    /// rather than try to parse the bytes as tar. Replaces the
593    /// previous `remote_cache_unpack_handles_raw_tar` backward-compat
594    /// test (the raw-tar fallback was deleted as part of the
595    /// pre-1.0 cleanup).
596    #[test]
597    fn remote_cache_unpack_rejects_raw_tar() {
598        let tmp = tempfile::TempDir::new().unwrap();
599        let cache = CacheDir::with_root(tmp.path().join("cache"));
600
601        let mut archive = tar::Builder::new(Vec::new());
602        let meta = test_metadata();
603        let meta_json = serde_json::to_string_pretty(&meta).unwrap();
604        let meta_bytes = meta_json.as_bytes();
605        crate::tar_util::pack_tar_entry(
606            &mut archive,
607            "metadata.json",
608            0o644,
609            meta_bytes.len() as u64,
610            meta_bytes,
611        )
612        .unwrap();
613        let raw_tar = archive.into_inner().unwrap();
614
615        // Raw tar should not start with zstd magic.
616        assert!(raw_tar.len() < 4 || raw_tar[..4] != ZSTD_MAGIC);
617
618        let err = unpack_and_store(&cache, "raw-tar-key", &raw_tar).unwrap_err();
619        assert!(
620            err.contains("zstd magic"),
621            "non-zstd payload must be rejected with a `zstd magic` \
622             diagnostic from the precondition check, got: {err}",
623        );
624    }
625
626    /// Short-input boundary: payloads of 0..=3 bytes cannot carry
627    /// the 4-byte zstd magic sentinel, so the precondition check in
628    /// `decompress_payload` must reject all of them with the same
629    /// "zstd magic" diagnostic. Pins that the `data.len() < 4` half
630    /// of the guard fires independently of the magic-bytes
631    /// comparison, so a truncated download is rejected instead of
632    /// triggering an out-of-bounds slice or feeding an ill-formed
633    /// header to the zstd decoder.
634    #[test]
635    fn remote_cache_decompress_payload_rejects_short_inputs() {
636        for len in 0..=3 {
637            let bytes = vec![0u8; len];
638            let err = super::decompress_payload(&bytes).unwrap_err();
639            assert!(
640                err.contains("zstd magic"),
641                "{len}-byte payload must be rejected by the magic-number \
642                 precondition, got: {err}",
643            );
644        }
645    }
646
647    #[test]
648    fn remote_cache_unpack_rejects_missing_metadata() {
649        let tmp = tempfile::TempDir::new().unwrap();
650        let cache = CacheDir::with_root(tmp.path().join("cache"));
651
652        let mut archive = tar::Builder::new(Vec::new());
653        let data = b"kernel image";
654        crate::tar_util::pack_tar_entry(
655            &mut archive,
656            "bzImage",
657            0o644,
658            data.len() as u64,
659            data.as_slice(),
660        )
661        .unwrap();
662        let raw_tar = archive.into_inner().unwrap();
663        let packed = zstd::encode_all(raw_tar.as_slice(), 3).unwrap();
664
665        let result = unpack_and_store(&cache, "no-meta", &packed);
666        assert!(result.is_err());
667        assert!(
668            result.unwrap_err().contains("missing metadata"),
669            "expected metadata error"
670        );
671    }
672
673    #[test]
674    fn remote_cache_unpack_rejects_missing_image() {
675        let tmp = tempfile::TempDir::new().unwrap();
676        let cache = CacheDir::with_root(tmp.path().join("cache"));
677
678        let mut archive = tar::Builder::new(Vec::new());
679        let meta = test_metadata();
680        let meta_json = serde_json::to_string_pretty(&meta).unwrap();
681        let meta_bytes = meta_json.as_bytes();
682        crate::tar_util::pack_tar_entry(
683            &mut archive,
684            "metadata.json",
685            0o644,
686            meta_bytes.len() as u64,
687            meta_bytes,
688        )
689        .unwrap();
690        let raw_tar = archive.into_inner().unwrap();
691        let packed = zstd::encode_all(raw_tar.as_slice(), 3).unwrap();
692
693        let result = unpack_and_store(&cache, "no-image", &packed);
694        assert!(result.is_err());
695        assert!(
696            result.unwrap_err().contains("missing kernel image"),
697            "expected image error"
698        );
699    }
700
701    // -- remote_lookup skipped when disabled --
702
703    #[test]
704    fn remote_cache_remote_lookup_returns_none_when_disabled() {
705        let _g1 = EnvVarGuard::remove(crate::KTSTR_GHA_CACHE_ENV);
706        let _g2 = EnvVarGuard::remove("ACTIONS_CACHE_URL");
707        assert!(!is_enabled());
708    }
709
710    // -- remote_store with disabled remote --
711
712    #[test]
713    fn remote_cache_remote_store_when_disabled() {
714        let _g1 = EnvVarGuard::remove(crate::KTSTR_GHA_CACHE_ENV);
715        let _g2 = EnvVarGuard::remove("ACTIONS_CACHE_URL");
716
717        let tmp = tempfile::TempDir::new().unwrap();
718        let cache = CacheDir::with_root(tmp.path().join("cache"));
719        let src = tempfile::TempDir::new().unwrap();
720        let image = create_fake_image(src.path());
721        let meta = test_metadata();
722        let entry = cache
723            .store("test-entry", &CacheArtifacts::new(&image), &meta)
724            .unwrap();
725
726        let packed = pack_entry(&entry.path, &entry.metadata);
727        assert!(packed.is_ok());
728    }
729
730    // -- pack with various metadata --
731
732    #[test]
733    fn remote_cache_source_tree_path_sanitized_on_roundtrip() {
734        let tmp = tempfile::TempDir::new().unwrap();
735        let cache = CacheDir::with_root(tmp.path().join("cache"));
736
737        let src = tempfile::TempDir::new().unwrap();
738        let image = create_fake_image(src.path());
739        let meta = KernelMetadata::new(
740            KernelSource::Local {
741                source_tree_path: Some(std::path::PathBuf::from("/tmp/linux-src")),
742                git_hash: Some("deadbee".to_string()),
743            },
744            "x86_64",
745            "bzImage",
746            "2026-04-12T10:00:00Z",
747        );
748        assert!(matches!(
749            meta.source,
750            KernelSource::Local {
751                source_tree_path: Some(_),
752                git_hash: Some(_),
753            }
754        ));
755
756        let entry = cache
757            .store("stp-key", &CacheArtifacts::new(&image), &meta)
758            .unwrap();
759
760        let packed = pack_entry(&entry.path, &entry.metadata).unwrap();
761
762        let tmp2 = tempfile::TempDir::new().unwrap();
763        let cache2 = CacheDir::with_root(tmp2.path().join("cache"));
764        let restored = unpack_and_store(&cache2, "stp-key", &packed).unwrap();
765
766        let restored_meta = &restored.metadata;
767        assert!(
768            matches!(
769                &restored_meta.source,
770                KernelSource::Local {
771                    source_tree_path: None,
772                    git_hash: Some(h),
773                } if h == "deadbee"
774            ),
775            "source_tree_path must be stripped during pack, git_hash must survive"
776        );
777    }
778
779    #[test]
780    fn remote_cache_pack_with_git_metadata() {
781        let tmp = tempfile::TempDir::new().unwrap();
782        let cache = CacheDir::with_root(tmp.path().join("cache"));
783
784        let src = tempfile::TempDir::new().unwrap();
785        let image = create_fake_image(src.path());
786        let meta = KernelMetadata::new(
787            KernelSource::git("a1b2c3d", "v6.15-rc3"),
788            "x86_64",
789            "bzImage",
790            "2026-04-12T12:00:00Z",
791        );
792
793        let entry = cache
794            .store("git-key", &CacheArtifacts::new(&image), &meta)
795            .unwrap();
796        let packed = pack_entry(&entry.path, &entry.metadata).unwrap();
797
798        let tmp2 = tempfile::TempDir::new().unwrap();
799        let cache2 = CacheDir::with_root(tmp2.path().join("cache"));
800        let restored = unpack_and_store(&cache2, "git-key", &packed).unwrap();
801
802        let rmeta = &restored.metadata;
803        assert!(matches!(
804            rmeta.source,
805            KernelSource::Git {
806                git_hash: Some(ref h),
807                git_ref: Some(ref r),
808            }
809            if h == "a1b2c3d" && r == "v6.15-rc3"
810        ));
811    }
812
813    /// Decompression-bomb guard: a zstd payload that decompresses
814    /// past the configured cap surfaces an error tagged with
815    /// "decompression-bomb guard" — `decompress_payload` must not
816    /// allocate past the ceiling. Test uses a small synthetic
817    /// payload (8 KiB of zeros, which compresses to a tiny blob
818    /// but decompresses to 8192 bytes) routed through the private
819    /// `decompress_capped` helper against a 1024-byte cap so the
820    /// test runs in microseconds rather than allocating a
821    /// production-sized buffer.
822    #[test]
823    fn remote_cache_decompress_capped_rejects_decompression_bomb() {
824        let payload = vec![0u8; 8192];
825        let compressed = zstd::encode_all(payload.as_slice(), 3).unwrap();
826        let cap: u64 = 1024;
827        let err = super::decompress_capped(&compressed, cap).unwrap_err();
828        assert!(
829            err.contains("decompression-bomb guard"),
830            "expected decompression-bomb guard error, got: {err}",
831        );
832    }
833
834    /// Boundary case: a payload whose decompressed length is
835    /// exactly `cap` bytes is accepted (the cap is inclusive).
836    /// Pins the `>` (not `>=`) discriminator at the cap boundary
837    /// so a future refactor that flips the comparison surfaces
838    /// here rather than turning a legal cache entry into a
839    /// false-positive bomb rejection.
840    #[test]
841    fn remote_cache_decompress_capped_accepts_payload_at_cap_boundary() {
842        let payload = b"hello world".to_vec();
843        let compressed = zstd::encode_all(payload.as_slice(), 3).unwrap();
844        let out = super::decompress_capped(&compressed, payload.len() as u64).unwrap();
845        assert_eq!(
846            out, payload,
847            "payload exactly at the cap must round-trip — \
848             cap is inclusive (`>` not `>=`)",
849        );
850    }
851
852    /// Pin the shape of [`super::REMOTE_CACHE_NAMESPACE`]: non-empty,
853    /// keeps the `ktstr-v` prefix that namespaces ktstr entries
854    /// against other tools sharing the GHA cache, and carries a
855    /// numeric version suffix that bumps invalidate stale entries.
856    /// Without this pin, a refactor that dropped the prefix would
857    /// silently start sharing the namespace with another tool, and
858    /// a bump that landed `ktstr-v2a` would still pass any
859    /// substring-only check while breaking the suffix-as-version
860    /// contract.
861    #[test]
862    fn remote_cache_namespace_has_version_suffix() {
863        let ns = super::REMOTE_CACHE_NAMESPACE;
864        assert!(!ns.is_empty(), "namespace must not be empty");
865        assert!(
866            ns.starts_with("ktstr-v"),
867            "namespace must keep `ktstr-v` prefix; got: {ns}",
868        );
869        let suffix = ns.strip_prefix("ktstr-v").unwrap();
870        assert!(
871            suffix.parse::<u32>().is_ok(),
872            "version suffix must be numeric; got: {suffix:?}",
873        );
874    }
875
876    use crate::test_support::test_helpers::EnvVarGuard;
877}