ktstr/fetch.rs
1//! Kernel source acquisition: tarball download, GitHub codeload
2//! snapshot, git clone, local tree.
3//!
4//! The acquisition entry points each return an [`AcquiredSource`]
5//! carrying the source directory, cache key, and metadata the caller
6//! needs to proceed to configuration and build: [`download_tarball`]
7//! (kernel.org stable/RC), `download_github_archive` (a GitHub codeload
8//! commit snapshot), `git_clone_kinded` (a kind-directed shallow clone
9//! that dispatches to `git_clone_tag` / [`git_clone`]), and
10//! [`local_source`] (an on-disk tree).
11
12use std::io::Read;
13use std::num::NonZeroU32;
14use std::path::{Path, PathBuf};
15use std::sync::OnceLock;
16use std::time::{Duration, Instant};
17
18use anyhow::{Context, Result, anyhow};
19use reqwest::blocking::Client;
20use sha2::{Digest, Sha256};
21
22/// Process-wide [`reqwest::blocking::Client`] lazily initialized on
23/// first access via [`shared_client`]. Keeping a single `Client`
24/// instance across the fetch-family reuses its TCP connection pool
25/// and TLS session cache across repeated calls to the same host
26/// within a CLI run. Cross-host fetches in the same run still
27/// re-handshake because reqwest's connection pool keys on host.
28static SHARED_CLIENT: OnceLock<Client> = OnceLock::new();
29
30/// Connect-phase timeout for [`shared_client`]: bounds the time spent
31/// in the TCP + TLS handshake before reqwest gives up on a peer.
32/// Bounds the dead-route case — a CDN edge that accepts the SYN but
33/// stalls the handshake, or a route that blackholes outright —
34/// without putting any ceiling on the response body's streaming
35/// duration once the connection is up.
36///
37/// No total request `.timeout()` is set: the same client serves both
38/// short requests (releases.json, sha256sums.asc) and large
39/// tarball streams ([`download_stable_tarball`],
40/// [`download_rc_tarball`]), where a 130–180 MiB compressed payload
41/// over a slow uplink can take minutes of wall-clock to deliver.
42/// Capping that with a per-request timeout would abort legitimate
43/// downloads; bounding only the connect phase preserves the
44/// dead-route guarantee while letting
45/// the body stream as long as the upstream is making forward
46/// progress.
47const SHARED_CLIENT_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
48
49/// Return the process-wide shared [`reqwest::blocking::Client`]. First
50/// call constructs it via `Client::builder()` with
51/// `SHARED_CLIENT_CONNECT_TIMEOUT` applied; every subsequent call
52/// returns a reference to the same instance. This helper is for
53/// top-level CLI entries that want the default client.
54///
55/// Tests that need to verify a network round-trip (rather than a
56/// cache hit) must NOT pass `shared_client()` to a cache-routed
57/// helper (`cached_releases`, `cached_releases_with`,
58/// [`fetch_latest_stable_version`], [`fetch_version_for_prefix`]) —
59/// `RELEASES_CACHE` may already be populated by a peer test, in
60/// which case the helper returns cached data and the network is
61/// never touched. Construct a local `Client` and pass it to the
62/// cache-routed helper to skip the cache; the pointer-equality gate
63/// in `cached_releases_with` routes a non-singleton client to a
64/// direct `fetch_releases` call against `RELEASES_URL` (the
65/// production URL — the bypass skips the cache, NOT the URL). For
66/// full URL injection (e.g. localhost mock server testing), call
67/// either `fetch_releases` directly with the mock URL — see
68/// `fetch_releases_against_localhost_mock_returns_parsed` — or use
69/// the cache-aware seam `cached_releases_with_url`, which routes
70/// the non-singleton bypass branch through the supplied URL while
71/// preserving the singleton/cache routing identical to
72/// `cached_releases_with`.
73///
74/// # Panics
75///
76/// Panics on the first call if `Client::builder().build()` fails to
77/// construct a client. Documented failure modes include TLS backend
78/// initialization (e.g. rustls/native-tls subsystem unreachable) and
79/// system-resolver config load failure; both are treated as setup
80/// bugs rather than runtime errors. The
81/// `expect` here, rather than propagating the error, mirrors the
82/// inherited behavior of `reqwest::blocking::Client::new()` (which
83/// is itself an infallible wrapper around `builder().build().expect`).
84pub fn shared_client() -> &'static Client {
85 SHARED_CLIENT.get_or_init(|| {
86 Client::builder()
87 .connect_timeout(SHARED_CLIENT_CONNECT_TIMEOUT)
88 .build()
89 .expect("build shared reqwest client")
90 })
91}
92
93/// Process-wide cache of the parsed `releases.json` payload.
94/// Populated by [`cached_releases_with`] on its first successful
95/// singleton-path fetch; every subsequent singleton call returns a
96/// clone of the cached vector without re-issuing the HTTP request.
97/// Lifetime matches the process — `releases.json` does not change
98/// underneath a single CLI invocation, so a per-process cache
99/// cannot serve stale data in any way the user would notice.
100///
101/// Failures are NOT cached: a transient kernel.org outage that
102/// errors the first call must allow a later caller to retry, since
103/// the underlying network condition may have cleared. Storing
104/// `Vec<Release>` rather than `Result<Vec<Release>>` enforces this
105/// at the type level — there's no way to populate the cache with
106/// a failure.
107///
108/// Companion to [`SHARED_CLIENT`]: both amortize per-invocation
109/// network cost across the resolve pipeline. Without this cache,
110/// `cargo ktstr test --kernel 6.10..6.12 --kernel 6.14..6.16`
111/// fetches `releases.json` twice — once per Range spec — under
112/// the rayon par_iter that drives `resolve_kernel_set`. With
113/// the cache the first Range to reach `expand_kernel_range`
114/// populates the slot; the second observes the populated slot
115/// and skips the network entirely.
116static RELEASES_CACHE: OnceLock<Vec<Release>> = OnceLock::new();
117
118/// Cache for the gregkh stable-mirror release tags — the `X.Y.Z`
119/// version strings parsed from its `refs/tags/vX.Y.Z` advertisement.
120/// Companion to [`RELEASES_CACHE`]: `--include-eol` may expand several
121/// `A..B` ranges under one `resolve_kernel_set`, and each would
122/// otherwise re-ls-remote the mirror. Populated on the first successful
123/// enumeration; a failed ls-remote leaves it empty so the next caller
124/// retries (`Vec`, not `Result`, mirroring `RELEASES_CACHE`).
125static STABLE_TAGS_CACHE: OnceLock<Vec<String>> = OnceLock::new();
126
127/// Fetch `releases.json` via the process-wide [`shared_client`],
128/// routing through [`RELEASES_CACHE`].
129///
130/// Thin wrapper for callers that don't already thread a `&Client`
131/// — top-level CLI entries like [`crate::cli::expand_kernel_range`]
132/// (under the rayon-driven `cargo ktstr` resolve pipeline) and
133/// `crate::cli::fetch_active_prefixes` (the EOL-annotation pass).
134/// Caching, race semantics, and fault-injection routing are all
135/// documented on [`cached_releases_with`].
136pub(crate) fn cached_releases() -> Result<Vec<Release>> {
137 cached_releases_with(shared_client())
138}
139
140/// Pointer-equality against the [`OnceLock`]-backed
141/// [`shared_client`] singleton is the correct predicate because
142/// `shared_client()` returns a stable `&'static Client` address.
143/// The [`cached_releases_with`] gate uses this predicate to
144/// decide whether to consult [`RELEASES_CACHE`]: the singleton
145/// hits the cache, every other (test-constructed) `Client`
146/// bypasses it and exercises the underlying [`fetch_releases`]
147/// path.
148///
149/// Caveat: `shared_client().clone()` produces a distinct
150/// `Client` at a different address even though it shares the
151/// singleton's connection pool via the inner `Arc`, so the
152/// clone bypasses the cache. Always pass `shared_client()`
153/// directly — never a clone — when cache routing is desired.
154///
155/// Side-effect-free when [`SHARED_CLIENT`] is uninitialized:
156/// no client can equal a not-yet-allocated singleton, so we
157/// return `false` without triggering `get_or_init` — tests
158/// that pass a local `Client` before any production code path
159/// has touched the singleton skip the construction entirely.
160fn is_shared_client(client: &Client) -> bool {
161 match SHARED_CLIENT.get() {
162 Some(singleton) => std::ptr::eq(client, singleton),
163 None => false,
164 }
165}
166
167/// Unified cache-aware entry point for `releases.json`. Routes
168/// the process-wide [`shared_client`] singleton through
169/// [`RELEASES_CACHE`]; any other (test-constructed) `Client`
170/// bypasses [`RELEASES_CACHE`] and calls [`fetch_releases`] with
171/// [`RELEASES_URL`] directly — the cache is skipped but the
172/// production URL is used.
173///
174/// Used by every in-file caller that already threads a `&Client`
175/// — [`fetch_latest_stable_version`], [`fetch_version_for_prefix`],
176/// [`latest_in_series`] — so production callers reuse
177/// [`RELEASES_CACHE`] and tests still get cache-bypass via the
178/// pointer-equality gate. [`cached_releases`] is the no-`Client`
179/// wrapper for top-level CLI entries.
180///
181/// Tests that need URL injection on the bypass branch (e.g.
182/// localhost mock server testing) call
183/// [`cached_releases_with_url`] directly with their mock URL —
184/// the URL-injectable form preserves identical routing
185/// semantics. This wrapper is the production entry point and
186/// pins the URL to [`RELEASES_URL`]; production code MUST go
187/// through this wrapper. A singleton call with a non-RELEASES_URL
188/// would otherwise populate [`RELEASES_CACHE`] with
189/// non-production data and corrupt every later production
190/// call — the singleton-path branch in
191/// [`cached_releases_with_url`] guards against this in both
192/// dev (`debug_assert!`) and release builds (fall back to
193/// bypass), but routing every production call through this
194/// wrapper makes the misuse impossible by construction.
195/// Caching, race semantics, and the bypass-vs-cache routing
196/// are fully documented on [`cached_releases_with_url`].
197fn cached_releases_with(client: &Client) -> Result<Vec<Release>> {
198 cached_releases_with_url(client, RELEASES_URL)
199}
200
201/// URL-injectable form of [`cached_releases_with`]. Production
202/// always reaches this through the [`cached_releases_with`]
203/// wrapper, which pins `url` to [`RELEASES_URL`]; the explicit
204/// `url` parameter exists so the bypass-branch test can route
205/// the non-singleton path through a localhost
206/// [`std::net::TcpListener`]-backed mock instead of hitting real
207/// kernel.org. Without this seam, the bypass test would either
208/// (a) require a real network round-trip on every run, or
209/// (b) accept a 5s timeout penalty on offline hosts to surface
210/// `Err` as a bypass-confirmation signal — both costs the seam
211/// eliminates.
212///
213/// Cache contract is identical to [`cached_releases_with`]:
214/// non-singleton clients bypass [`RELEASES_CACHE`] and call
215/// [`fetch_releases`] with `url`; the singleton routes through
216/// the cache only when `url == RELEASES_URL` (consulting via
217/// `OnceLock::get`, populating via `OnceLock::set` on miss). A
218/// singleton call with a non-RELEASES_URL trips the
219/// `debug_assert!` in dev builds and falls back to the bypass
220/// behavior in release builds — fetches directly via `url`,
221/// returns the result, never touches [`RELEASES_CACHE`]. The
222/// cache only ever stores data fetched from the singleton +
223/// RELEASES_URL combination, so a test that injects a mock URL
224/// on either branch cannot pollute the production cache.
225///
226/// Failures are propagated without populating [`RELEASES_CACHE`],
227/// so a transient kernel.org outage on the first call lets the
228/// next caller retry. Storing `Vec<Release>` (not
229/// `Result<Vec<Release>>`) enforces this at the type level.
230///
231/// Concurrent population on the singleton path is safe via the
232/// `OnceLock::set` race: the loser's `set` returns `Err(clone)`
233/// (the cloned vector that was passed in is moved back), the
234/// returned `Err` is discarded via `let _ = …`, and the loser
235/// returns its own original `fresh` vector. Both winner and
236/// loser return content-equivalent data since both fetched the
237/// same `releases.json`. Worst case under concurrent first
238/// calls: both callers issue the network round-trip, only one
239/// populates [`RELEASES_CACHE`]; every later call — from any
240/// thread — observes the populated slot via the `get` fast-path
241/// and skips the network.
242fn cached_releases_with_url(client: &Client, url: &str) -> Result<Vec<Release>> {
243 // Non-singleton clients bypass the cache (test fault injection).
244 if !is_shared_client(client) {
245 return fetch_releases(client, url);
246 }
247 // Cache-poison guard: the singleton path populates
248 // RELEASES_CACHE on miss. A test author that mistakenly
249 // passes a non-production URL with shared_client() would
250 // fill the cache with non-production data and corrupt every
251 // later production call (which reaches the cache via
252 // get-fast-path). Catch the misuse at debug-build time —
253 // production callers always thread RELEASES_URL through the
254 // `cached_releases_with` wrapper, so the assertion is a
255 // no-op for them; only a future test author wiring this
256 // function up with shared_client() and a mock URL would trip
257 // it.
258 debug_assert!(
259 url == RELEASES_URL,
260 "cached_releases_with_url: shared_client() must use RELEASES_URL \
261 to avoid RELEASES_CACHE pollution — got url={url:?}, expected \
262 RELEASES_URL ({RELEASES_URL:?}). Tests that need URL injection \
263 must pass a non-singleton Client (which takes the bypass branch \
264 above and never touches the cache).",
265 );
266 // Release-build guard: `debug_assert!` is stripped in
267 // optimized builds, so a non-RELEASES_URL on the singleton
268 // path would otherwise reach the populate-on-miss path below
269 // and persistently poison RELEASES_CACHE for every later
270 // production caller. Mirror the bypass-branch behavior
271 // (fetch directly, do not touch the cache) so the misuse
272 // degrades to a slow per-call fetch instead of a permanently
273 // wrong cache. The debug_assert above still fires loudly in
274 // dev builds; this branch only catches the misuse that
275 // slipped through to release.
276 if url != RELEASES_URL {
277 return fetch_releases(client, url);
278 }
279 if let Some(cached) = RELEASES_CACHE.get() {
280 return Ok(cached.clone());
281 }
282 let fresh = fetch_releases(client, url)?;
283 // Race-loss: `set` returns `Err(clone)` carrying back the
284 // clone we passed in; we discard it and return the original
285 // `fresh` below. See the rustdoc above for full semantics.
286 let _ = RELEASES_CACHE.set(fresh.clone());
287 Ok(fresh)
288}
289
290/// Downloaded/cloned kernel source ready for building.
291#[non_exhaustive]
292pub struct AcquiredSource {
293 /// Path to the kernel source directory.
294 pub source_dir: PathBuf,
295 /// Cache key for this source (e.g. "6.14.2-tarball-x86_64-kc{kconfig_hash}").
296 pub cache_key: String,
297 /// Version string if known (e.g. "6.14.2", "6.15-rc3").
298 pub version: Option<String>,
299 /// How the source was acquired, with per-variant payload
300 /// (git hash/ref for `Git`, source tree path and git hash for
301 /// `Local`).
302 pub kernel_source: crate::cache::KernelSource,
303 /// Whether the source is a temporary directory that should be
304 /// cleaned up after building.
305 pub is_temp: bool,
306 /// For local sources: whether the working tree is dirty.
307 /// Dirty trees must not be cached.
308 pub is_dirty: bool,
309 /// For local sources: whether the source is an actual git
310 /// repository. `true` when `gix::discover` succeeded and the
311 /// crate could compute index + worktree dirty state; `false`
312 /// for non-git source trees (tarball-extracted, rsync'd,
313 /// hand-assembled) where dirty detection is impossible and
314 /// the source is always cache-skipped pessimistically. Lets
315 /// the cache-skip hint branch on whether `commit` / `stash`
316 /// are actionable remediations (they aren't for non-git
317 /// sources).
318 ///
319 /// For non-local sources (tarball, git clone) the field is
320 /// set to `true` by convention — these paths are always
321 /// `is_dirty = false`, so the cache-skip branch that reads
322 /// `is_git` is never reached and the value is inert. Pinning
323 /// to `true` (rather than leaving the field meaningless)
324 /// keeps the invariant "is_git is meaningful only when
325 /// is_dirty is true, but always set" so a future code path
326 /// that reaches `is_git` outside the cache-skip context does
327 /// not trip on an `is_git = false` under a known-good source.
328 pub is_git: bool,
329}
330
331/// Target architecture string and boot image name.
332pub fn arch_info() -> (&'static str, &'static str) {
333 #[cfg(target_arch = "x86_64")]
334 {
335 ("x86_64", "bzImage")
336 }
337 #[cfg(target_arch = "aarch64")]
338 {
339 ("aarch64", "Image")
340 }
341}
342
343/// Parse a version string into its major version for URL construction.
344///
345/// "6.14.2" -> 6, "6.15-rc3" -> 6.
346fn major_version(version: &str) -> Result<u32> {
347 let major_str = version
348 .split('.')
349 .next()
350 .ok_or_else(|| anyhow!("invalid version: {version}"))?;
351 major_str
352 .parse::<u32>()
353 .with_context(|| format!("invalid major version in {version}"))
354}
355
356/// Determine if a version string represents an RC release.
357///
358/// RC releases use a different URL pattern and gzip compression
359/// (vs xz for stable).
360fn is_rc(version: &str) -> bool {
361 version.contains("-rc")
362}
363
364/// One (`moniker`, `version`) row from kernel.org's `releases.json`.
365///
366/// A named struct instead of a bare `(String, String)` tuple so every
367/// call site reads its field by name (`r.moniker`, `r.version`) rather
368/// than positional destructuring — the two strings are trivially
369/// swappable at a tuple-destructure call site, and a silent swap
370/// would mis-drive `is_skippable_release_moniker` while the
371/// now-misnamed "moniker" string flows into `version_prefix`
372/// downstream. Naming the fields removes that class of bug at the
373/// type-checker level and shows up in IDE hints on every iteration
374/// site.
375///
376/// Both fields are owned `String` (not `&str`) because the values are
377/// parsed out of a `reqwest::Response` body whose lifetime ends when
378/// `fetch_releases` returns; downstream callers iterate the vector
379/// long after that borrow would dangle.
380#[derive(Clone, Debug)]
381pub(crate) struct Release {
382 /// releases.json `moniker` field — stable / longterm / mainline /
383 /// linux-next / etc. Consumed by
384 /// [`is_skippable_release_moniker`] and by
385 /// [`fetch_latest_stable_version`]'s stable/longterm filter.
386 pub moniker: String,
387 /// releases.json `version` field — e.g. `"6.14.2"`, `"6.15-rc3"`,
388 /// `"6.16-rc2-next-20260420"`. Consumed by
389 /// [`version_tuple`], [`patch_level`], and
390 /// `cli::version_prefix`.
391 pub version: String,
392}
393
394/// Is this releases.json moniker one that the version-resolution
395/// pipeline should skip?
396///
397/// `linux-next` is a rolling integration branch whose version strings
398/// carry a date suffix rather than a stable tag, so it does not fit
399/// the major.minor.patch resolution model used by `latest_in_series`,
400/// `fetch_version_for_prefix`, and `cli::fetch_active_prefixes`. The
401/// release iteration in all three sites filters it out; this helper
402/// is the single point of truth for that decision so a future moniker
403/// that also warrants skipping can be added in one place.
404pub(crate) fn is_skippable_release_moniker(moniker: &str) -> bool {
405 moniker == "linux-next"
406}
407
408/// Find the latest version in the same major.minor series from releases.json.
409///
410/// Returns `Some("6.14.10")` for prefix `"6.14"` if that series exists in
411/// releases.json. Returns `None` if the series is not found (EOL or invalid).
412fn latest_in_series(client: &Client, version: &str) -> Option<String> {
413 let prefix = {
414 let parts: Vec<&str> = version.split('.').collect();
415 if parts.len() >= 2 {
416 format!("{}.{}", parts[0], parts[1])
417 } else {
418 return None;
419 }
420 };
421
422 // Routes through [`RELEASES_CACHE`] for the singleton; see
423 // [`cached_releases_with`] for the bypass gate.
424 let releases = cached_releases_with(client).ok()?;
425 let mut best: Option<(String, (u32, u32, u32))> = None;
426 for r in &releases {
427 if is_skippable_release_moniker(&r.moniker) {
428 continue;
429 }
430 if !r.version.starts_with(&prefix) {
431 continue;
432 }
433 if r.version.len() != prefix.len() && r.version.as_bytes()[prefix.len()] != b'.' {
434 continue;
435 }
436 if let Some(tuple) = version_tuple(&r.version)
437 && (best.is_none() || tuple > best.as_ref().unwrap().1)
438 {
439 best = Some((r.version.clone(), tuple));
440 }
441 }
442 best.map(|(v, _)| v)
443}
444
445/// Build a user-facing error message for a version that was not found.
446///
447/// Suggests the latest version in the same major.minor series when
448/// releases.json contains one.
449fn version_not_found_msg(client: &Client, version: &str) -> String {
450 let parts: Vec<&str> = version.split('.').collect();
451 let prefix = if parts.len() >= 2 {
452 format!("{}.{}", parts[0], parts[1])
453 } else {
454 version.to_string()
455 };
456 match latest_in_series(client, version) {
457 Some(latest) if latest != version => {
458 format!("version {version} not found. latest {prefix}.x: {latest}")
459 }
460 _ => format!("version {version} not found"),
461 }
462}
463
464/// Reject responses where the server returned HTML instead of a binary
465/// archive. Some CDN error pages return 200 with text/html.
466fn reject_html_response(response: &reqwest::blocking::Response, url: &str) -> Result<()> {
467 if let Some(ct) = response.headers().get(reqwest::header::CONTENT_TYPE)
468 && let Ok(ct_str) = ct.to_str()
469 && ct_str.contains("text/html")
470 {
471 anyhow::bail!(
472 "download {url}: server returned HTML instead of tarball (URL may be invalid)"
473 );
474 }
475 Ok(())
476}
477
478/// Print download size from Content-Length header if available.
479///
480/// `cli_label` prefixes the diagnostic line so the message matches the
481/// binary the user invoked (`"ktstr"` vs `"cargo ktstr"`).
482fn print_download_size(
483 response: &reqwest::blocking::Response,
484 url: &str,
485 cli_label: &str,
486 mp: Option<&crate::cli::FetchProgress>,
487) {
488 let line = if let Some(len) = response.content_length() {
489 let mib = len as f64 / (1024.0 * 1024.0);
490 format!("{cli_label}: downloading {url} ({mib:.1} MiB)")
491 } else {
492 format!("{cli_label}: downloading {url}")
493 };
494 // Route through the progress group so the line coordinates with
495 // concurrent bars on a TTY (and still reaches piped/CI stderr when
496 // the group is hidden); raw `eprintln!` when no group is present.
497 match mp {
498 Some(fp) => fp.println(&line),
499 None => eprintln!("{line}"),
500 }
501}
502
503/// Maximum tolerated stretch of "no body bytes received" before a
504/// streaming download is declared stalled. Catches a TCP connection
505/// that completed handshake (so connect_timeout doesn't fire) but
506/// then silently stops delivering body data — a common CDN failure
507/// mode where keepalive holds the socket open while the upstream
508/// origin is unreachable. The 60s value is generous enough that a
509/// real slow uplink delivering chunks every few seconds never
510/// triggers it, but tight enough that a wedged connection surfaces
511/// before the run's overall test timeout.
512const DOWNLOAD_NO_PROGRESS_TIMEOUT: Duration = Duration::from_secs(60);
513
514/// Streaming `Read` adapter for kernel tarball downloads.
515///
516/// Wraps the [`reqwest::blocking::Response`] body to do two things
517/// the bare response cannot:
518///
519/// 1. **Body-progress watchdog.** Tracks `last_progress` (the
520/// instant of the last successful read with `n > 0`) and errors
521/// when more than [`DOWNLOAD_NO_PROGRESS_TIMEOUT`] elapses
522/// between byte-producing reads. Without this, a CDN edge that
523/// keepalives the socket but stops delivering body bytes would
524/// only surface after reqwest's per-request read timeout
525/// ([`DOWNLOAD_REQUEST_READ_TIMEOUT`], 300s), which bounds a
526/// single stalled `read()`; the watchdog applies the tighter
527/// 60s no-progress bound across successive reads. The check fires
528/// BEFORE the inner `read()` so a stalled inner reader cannot
529/// out-block the watchdog.
530///
531/// 2. **Streaming SHA-256.** Updates a [`Sha256`] hasher with every
532/// byte that flows past, so the caller can verify the finalized
533/// digest against an expected value (parsed out of
534/// `sha256sums.asc`) without a second pass over the data. The
535/// hasher only sees bytes that were actually consumed by the
536/// decoder + tar extractor, which is the same set of bytes that
537/// landed on disk — so a partial download that errored midway
538/// produces a hash over only what we successfully streamed,
539/// preventing false-positive verifications on truncated input.
540///
541/// Sits between [`reqwest::blocking::Response`] and the
542/// decompression layer (`XzDecoder` / `GzDecoder`); both
543/// decompressors expose `into_inner()` so the wrapper can be
544/// recovered after extraction completes (see
545/// [`Self::finalize`]).
546struct DownloadStream<R: Read> {
547 /// Underlying reqwest response body. Owned because `XzDecoder`
548 /// and `GzDecoder` take ownership of their inner reader, so
549 /// the wrapper must hold the response by value rather than by
550 /// reference.
551 inner: R,
552 /// Running SHA-256 hasher updated on every byte-producing read.
553 /// Consumed by [`DownloadStream::finalize`] (which takes `self`
554 /// by value); the call site recovers the wrapper from inside
555 /// the decoder + tar archive chain via `into_inner` before
556 /// finalizing.
557 hasher: Sha256,
558 /// Total body bytes read so far. Surfaced in the watchdog
559 /// error message so an operator triaging "no progress" can see
560 /// how many bytes did arrive before the stall — distinguishing
561 /// "connection dropped after a few bytes" from "connection
562 /// dropped after most of the payload".
563 bytes_total: u64,
564 /// `Instant` of the last successful read with `n > 0`. Set at
565 /// construction (not on first read) so a connection that wins
566 /// the handshake but never delivers any body bytes still
567 /// trips the watchdog after [`DOWNLOAD_NO_PROGRESS_TIMEOUT`]
568 /// rather than waiting for an indeterminate pre-data window.
569 last_progress: Instant,
570 /// Tolerated stretch of zero-progress time. Pinned at
571 /// construction from [`DOWNLOAD_NO_PROGRESS_TIMEOUT`]; held in
572 /// the struct rather than read from the constant on every
573 /// `read()` so a future per-call override (e.g. shorter
574 /// timeouts in tests) lands without touching the watchdog
575 /// logic.
576 no_progress_timeout: Duration,
577 /// Optional indicatif download bar, advanced by `inc(n)` on
578 /// every byte-producing read in lockstep with `bytes_total`.
579 /// `None` is the no-bar path (non-TTY, or no progress group
580 /// threaded in) and carries zero per-read overhead beyond the
581 /// `Option` check. Advancing here — the single byte-accounting
582 /// site — guarantees `bar.position() == finalize().1`, so the
583 /// bar can never drift from the bytes the hasher and watchdog
584 /// observed.
585 progress: Option<indicatif::ProgressBar>,
586}
587
588impl<R: Read> DownloadStream<R> {
589 /// Construct a streaming wrapper around `inner` with the production
590 /// no-progress budget, optionally attaching an indicatif progress
591 /// bar. `last_progress` is set to "now" so the watchdog clock starts
592 /// at construction; the downstream decoder may take an indeterminate
593 /// time before the first `read()`, but any actual progress resets
594 /// the clock. The optional bar is advanced by `inc(n)` on every
595 /// byte-producing read (see the `progress` field); `progress = None`
596 /// is the non-TTY / no-group path (no bar). The bar is a pure
597 /// observer — it never affects the watchdog gate or the streaming
598 /// sha256, so a stalled or truncated download still surfaces its
599 /// error unchanged.
600 fn with_progress(inner: R, progress: Option<indicatif::ProgressBar>) -> Self {
601 Self {
602 inner,
603 hasher: Sha256::new(),
604 bytes_total: 0,
605 last_progress: Instant::now(),
606 no_progress_timeout: DOWNLOAD_NO_PROGRESS_TIMEOUT,
607 progress,
608 }
609 }
610
611 /// Consume the wrapper and return `(hex_digest, bytes_total)`.
612 /// Lowercase hex matches the format kernel.org publishes in
613 /// `sha256sums.asc`, so the caller can do a direct
614 /// `eq_ignore_ascii_case` comparison without re-encoding.
615 fn finalize(self) -> (String, u64) {
616 (hex::encode(self.hasher.finalize()), self.bytes_total)
617 }
618}
619
620impl<R: Read> Read for DownloadStream<R> {
621 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
622 // Watchdog gate: trip BEFORE delegating to the inner reader
623 // so a stalled inner read does not get a fresh chance to
624 // run after the no-progress window has already expired. The
625 // wrapper cannot interrupt a `read()` that is currently
626 // blocked in a syscall — that protection comes from the
627 // per-request timeout configured via
628 // `RequestBuilder::timeout` — but it can refuse to issue
629 // the next call once the cumulative no-progress window
630 // crosses the bound.
631 let elapsed = self.last_progress.elapsed();
632 if elapsed > self.no_progress_timeout {
633 return Err(std::io::Error::new(
634 std::io::ErrorKind::TimedOut,
635 format!(
636 "download stalled: no body bytes for {}s after {} bytes received",
637 elapsed.as_secs(),
638 self.bytes_total,
639 ),
640 ));
641 }
642 match self.inner.read(buf) {
643 Ok(0) => {
644 // EOF: do NOT update last_progress — a 0-byte read
645 // is not progress, and updating here would let a
646 // decoder that polls past EOF reset the watchdog
647 // indefinitely.
648 Ok(0)
649 }
650 Ok(n) => {
651 self.hasher.update(&buf[..n]);
652 self.bytes_total += n as u64;
653 self.last_progress = Instant::now();
654 // Advance the bar in lockstep with `bytes_total` (same
655 // `n`, same reads) so `position()` and `finalize().1`
656 // never diverge. No-op when no bar is attached.
657 if let Some(pb) = &self.progress {
658 pb.inc(n as u64);
659 }
660 Ok(n)
661 }
662 Err(e) => Err(e),
663 }
664 }
665}
666
667/// Per-request body-stream timeout passed to
668/// [`reqwest::blocking::RequestBuilder::timeout`] for tarball
669/// downloads. The blocking client treats this as a per-`read()`
670/// deadline (reset on every successful read), so it complements the
671/// [`DownloadStream`] watchdog: reqwest's deadline kills a single
672/// stalled syscall, and the watchdog observes the cumulative
673/// no-progress window across multiple reads. Set generously
674/// (5 minutes) because a slow but progressing connection can
675/// legitimately take that long for a single read on a large CDN
676/// chunk; the watchdog provides the tighter 60s no-progress bound.
677const DOWNLOAD_REQUEST_READ_TIMEOUT: Duration = Duration::from_secs(300);
678
679/// Total request timeout for [`fetch_sha256sums_from_url`]: bounds
680/// the wall-clock window for the single small-body GET that
681/// retrieves the cleartext-signed checksum manifest. The body is
682/// the `sha256sums.asc` cleartext block — typically a few KiB of
683/// `<hash> <filename>` lines plus a PGP signature trailer — so a
684/// tight 30 s ceiling fits the realistic case (sub-second on a
685/// healthy CDN edge) while still bounding the failure mode this
686/// guards against: a stalled CDN that accepts the connection but
687/// never delivers bytes. Without a per-request timeout the
688/// shared client only carries [`SHARED_CLIENT_CONNECT_TIMEOUT`]
689/// (handshake-only), so a stalled body read would hang the build
690/// indefinitely. The caller treats any error from this function
691/// as "no expected hash available" and downgrades verification
692/// to a warning, so a 30 s timeout that fires on a hung CDN
693/// surfaces as an unverified-but-progressing download rather
694/// than a wedged build.
695const SHA256SUMS_REQUEST_TIMEOUT: Duration = Duration::from_secs(30);
696
697/// Construct the cdn.kernel.org `sha256sums.asc` URL for a stable
698/// major series:
699/// `https://cdn.kernel.org/pub/linux/kernel/v{major}.x/sha256sums.asc`.
700/// Single source of truth for the manifest URL shape, used by
701/// [`resolve_expected_sha256`] (production) and shared with the
702/// URL-injection test seam so the two never drift.
703fn sha256sums_url(major: u32) -> String {
704 format!("https://cdn.kernel.org/pub/linux/kernel/v{major}.x/sha256sums.asc")
705}
706
707/// GET the cleartext SHA-256 manifest at `url` and return its body.
708///
709/// Returns the file body as a `String` on success. Any error
710/// (transport failure, non-2xx status, non-UTF-8 body) is
711/// propagated; the caller treats failure as "no expected hash
712/// available" and downgrades verification to a warning.
713///
714/// Takes the full `url` rather than a `major` so the GET-and-status
715/// mechanics are reachable with an injected URL (a localhost mock)
716/// without a real cdn.kernel.org round-trip — mirrors the
717/// [`fetch_releases`] / [`cached_releases_with_url`] seam. Production
718/// reaches this only via [`resolve_expected_sha256_from_url`], whose
719/// URL is pinned by [`sha256sums_url`].
720fn fetch_sha256sums_from_url(client: &Client, url: &str) -> Result<String> {
721 tracing::info!(%url, "fetching kernel tarball sha256sums (requires network)");
722 let response = client
723 .get(url)
724 .timeout(SHA256SUMS_REQUEST_TIMEOUT)
725 .send()
726 .with_context(|| format!("fetch {url}"))?;
727 if !response.status().is_success() {
728 anyhow::bail!("fetch {url}: HTTP {}", response.status());
729 }
730 response
731 .text()
732 .with_context(|| format!("read body of {url}"))
733}
734
735/// Extract the SHA-256 hex digest for `target_filename` from the
736/// cleartext-signed `sha256sums.asc` body.
737///
738/// kernel.org publishes `sha256sums.asc` as a PGP-cleartext-signed
739/// document: a `-----BEGIN PGP SIGNED MESSAGE-----` header, an
740/// optional `Hash:` line, a blank line, the cleartext body
741/// (`<64-hex-chars> <filename>` per line), then a
742/// `-----BEGIN PGP SIGNATURE-----` block. We only need the
743/// cleartext body — signature verification is a separate concern
744/// (the user-facing instruction is "If no expected hash available,
745/// log warning", not "require signature").
746///
747/// Returns `Some(lowercase_hex)` on first match. Returns `None` if
748/// the target filename does not appear in the manifest (e.g. the
749/// upstream rotated or removed the entry).
750fn parse_sha256_for_file(manifest: &str, target_filename: &str) -> Option<String> {
751 // Strip the PGP signature trailer if present. Everything after
752 // the signature marker is binary noise that never contains
753 // checksum lines.
754 let body = manifest
755 .split_once("-----BEGIN PGP SIGNATURE-----")
756 .map(|(before, _)| before)
757 .unwrap_or(manifest);
758 for line in body.lines() {
759 let line = line.trim();
760 // sha256sum format: `<64-hex-chars><whitespace><filename>`.
761 // Split on whitespace; require exactly two tokens and a
762 // 64-char hex first token.
763 let mut parts = line.split_whitespace();
764 let Some(hash) = parts.next() else { continue };
765 let Some(name) = parts.next() else { continue };
766 if name != target_filename {
767 continue;
768 }
769 if hash.len() != 64 || !hash.chars().all(|c| c.is_ascii_hexdigit()) {
770 continue;
771 }
772 return Some(hash.to_ascii_lowercase());
773 }
774 None
775}
776
777/// Verify `actual_hex` against `expected_hex` (case-insensitive).
778/// Returns `Ok(())` on match, `Err` with a diagnostic message on
779/// mismatch. Pulled out of the call site so the comparison logic
780/// has one home and the diagnostic carries both digests in lowercase
781/// hex for direct copy-paste reuse.
782fn verify_sha256(actual_hex: &str, expected_hex: &str, url: &str) -> Result<()> {
783 if actual_hex.eq_ignore_ascii_case(expected_hex) {
784 Ok(())
785 } else {
786 anyhow::bail!(
787 "sha256 mismatch for {url}: expected {}, got {}. \
788 If cdn.kernel.org updated this tarball in-place, \
789 retry with --skip-sha256 to bypass verification.",
790 expected_hex.to_ascii_lowercase(),
791 actual_hex.to_ascii_lowercase(),
792 );
793 }
794}
795
796/// Resolve the expected SHA-256 digest for a stable tarball from
797/// cdn.kernel.org's `sha256sums.asc` manifest.
798///
799/// Three outcomes:
800/// - `Some(hex)` — manifest fetched and the entry for `tarball_name`
801/// was parsed cleanly.
802/// - `None` with no warning (only when `skip_sha256 = true`) —
803/// operator explicitly opted out of verification; emits a single
804/// security-sensitive bypass warning instead.
805/// - `None` with a per-cause warning (manifest fetch failed, or
806/// manifest fetched but entry missing) — best-effort fallback so
807/// a transient cdn.kernel.org outage / schema drift does not
808/// gate the whole download.
809///
810/// The fallback path is deliberately permissive: we trade strict
811/// authentication for build availability. A network-path attacker
812/// who can deny `sha256sums.asc` while serving a poisoned
813/// `linux-{version}.tar.xz` could exploit this; operators who
814/// require strict verification should pin the source via a
815/// `--kernel <path>` or `--kernel git+…` source rather than the
816/// download path. The bypass warnings
817/// surface on the operator's diagnostic stream so the lost
818/// guarantee is visible to ops triage.
819///
820/// Extracted from [`download_stable_tarball`] so the gate is
821/// directly unit-testable without mocking network calls — the
822/// caller-supplied `client` reaches a `Client::get` only when
823/// `skip_sha256 == false`, so a `skip_sha256 = true` test does not
824/// need a configured `Client`.
825fn resolve_expected_sha256(
826 client: &Client,
827 major: u32,
828 tarball_name: &str,
829 skip_sha256: bool,
830) -> Option<String> {
831 resolve_expected_sha256_from_url(client, &sha256sums_url(major), tarball_name, skip_sha256)
832}
833
834/// URL-injectable core of [`resolve_expected_sha256`]: the skip-gate,
835/// fetch-then-parse, and per-cause warn-and-downgrade logic, against
836/// an arbitrary `sha256sums_url`. Production reaches this only via
837/// [`resolve_expected_sha256`], which pins the URL to
838/// [`sha256sums_url`]; the seam exists so the no-skip arm's
839/// fetch-and-parse path is testable against a localhost mock without a
840/// real cdn.kernel.org round-trip — mirrors [`cached_releases_with_url`].
841fn resolve_expected_sha256_from_url(
842 client: &Client,
843 sha256sums_url: &str,
844 tarball_name: &str,
845 skip_sha256: bool,
846) -> Option<String> {
847 if skip_sha256 {
848 tracing::warn!(
849 tarball = %tarball_name,
850 "--skip-sha256: bypassing checksum verification — the \
851 downloaded tarball will not be authenticated against \
852 cdn.kernel.org's sha256sums.asc manifest. Use only when \
853 upstream has updated a tarball in-place and the manifest \
854 is mismatched.",
855 );
856 return None;
857 }
858 // Best-effort expected-hash lookup: any failure (network,
859 // status, parse, missing entry) downgrades to a warning so the
860 // download still proceeds. The warning surfaces the cause so an
861 // operator triaging "kernel build went weird" can spot that
862 // verification was skipped.
863 match fetch_sha256sums_from_url(client, sha256sums_url) {
864 Ok(manifest) => match parse_sha256_for_file(&manifest, tarball_name) {
865 Some(hex) => Some(hex),
866 None => {
867 tracing::warn!(
868 tarball = %tarball_name,
869 "sha256sums.asc fetched but no entry for {tarball_name}; \
870 download will proceed without checksum verification. \
871 Pass --skip-sha256 to bypass the manifest fetch when \
872 the entry is known to be absent.",
873 );
874 None
875 }
876 },
877 Err(err) => {
878 tracing::warn!(
879 error = %format!("{err:#}"),
880 "failed to fetch sha256sums.asc; download will proceed \
881 without checksum verification. Pass --skip-sha256 to \
882 bypass the manifest fetch when the manifest is known \
883 to be unavailable.",
884 );
885 None
886 }
887 }
888}
889
890/// GitHub mirror of the linux-stable tree — comprehensive (stable +
891/// base-release `vX.Y.Z` tags back to v2.6) and the authoritative
892/// source for tags whose `.tar.xz` is no longer on cdn.kernel.org.
893///
894/// cdn.kernel.org keeps only the LATEST tarball of each series
895/// currently in `releases.json`; every superseded point release AND
896/// every tag of an EOL series is pruned (a GET for the tarball 404s,
897/// verified empirically — and HEAD is not a dependable existence probe
898/// on the CDN). The gregkh mirror still carries every `vX.Y.Z` tag, and
899/// codeload serves each tag's snapshot as a `tar.gz`, so a codeload
900/// download recovers the source a pruned tarball would have provided —
901/// no clone. Its `ls-refs` advertises every release tag, which
902/// `--include-eol` enumerates to surface EOL series absent from
903/// `releases.json` (see [`cached_stable_tags`]) and which
904/// [`fetch_version_for_prefix`] resolves for an EOL/unreleased series.
905/// github.com advertises allow-sha + a ref-prefix filter and a codeload
906/// CDN; git.kernel.org offers neither. Used by [`download_tarball`]'s
907/// [`TarballNotFound`] fallback and the prefix resolver.
908const STABLE_MIRROR_URL: &str = "https://github.com/gregkh/linux";
909
910/// Marker error attached to a stable-tarball download failure when
911/// cdn.kernel.org returns HTTP 404.
912///
913/// A 404 means the tarball is pruned — an EOL series (absent from
914/// `releases.json`) or a superseded point release (the CDN retains
915/// only each maintained series' latest). [`download_tarball`] detects
916/// this via `downcast_ref` (the context-aware anyhow accessor — a
917/// `chain().any(..is::<T>())` walk would MISS a context-wrapped
918/// marker) and falls back to a codeload snapshot of the tag from the
919/// gregkh mirror ([`STABLE_MIRROR_URL`]). Any other HTTP status is a
920/// hard error with no fallback.
921#[derive(Debug)]
922struct TarballNotFound;
923
924impl std::fmt::Display for TarballNotFound {
925 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
926 f.write_str("stable tarball pruned from cdn.kernel.org (EOL or superseded point release)")
927 }
928}
929
930impl std::error::Error for TarballNotFound {}
931
932/// Download a stable kernel tarball (.tar.xz) from cdn.kernel.org.
933///
934/// Returns a [`TarballNotFound`] error (downcast-detectable) when the
935/// CDN 404s the tarball — see that type for the pruning semantics and
936/// [`download_tarball`] for the git-tag fallback it triggers.
937///
938/// Streams the body through a [`DownloadStream`] watchdog so a
939/// stalled connection (no body bytes for
940/// [`DOWNLOAD_NO_PROGRESS_TIMEOUT`]) surfaces as an error rather
941/// than blocking indefinitely. Computes SHA-256 over the streamed
942/// bytes and verifies against the digest in
943/// `sha256sums.asc` for the matching `linux-{version}.tar.xz`
944/// entry; if the manifest fetch / parse fails (transient outage,
945/// schema drift, missing entry), logs a warning and continues
946/// without verification rather than failing the whole download.
947///
948/// `skip_sha256 = true` bypasses the manifest fetch entirely and
949/// emits a single bypass warning. Intended for the case where
950/// cdn.kernel.org has updated a tarball in-place (a new point
951/// release reusing the same URL) and the manifest is stale or
952/// mismatched. Unverified downloads are a security-sensitive
953/// fallback — the bypass warning surfaces the lost guarantee on
954/// the operator's diagnostic stream.
955fn download_stable_tarball(
956 client: &Client,
957 version: &str,
958 dest_dir: &Path,
959 cli_label: &str,
960 skip_sha256: bool,
961 mp: Option<&crate::cli::FetchProgress>,
962) -> Result<PathBuf> {
963 let major = major_version(version)?;
964 let url = format!("https://cdn.kernel.org/pub/linux/kernel/v{major}.x/linux-{version}.tar.xz");
965 download_stable_tarball_from_url(client, &url, version, dest_dir, cli_label, skip_sha256, mp)
966}
967
968/// URL-injectable core of [`download_stable_tarball`]: the GET, the
969/// 404→[`TarballNotFound`] / other-status→hard-error status gate, and
970/// the stream→verify→extract pipeline, against an arbitrary tarball
971/// `url`. Production reaches this only via [`download_stable_tarball`],
972/// which pins the cdn.kernel.org URL; the seam exists so the status
973/// routing (404 marker vs hard error) is unit-testable against a
974/// localhost mock without a real cdn round-trip — mirrors
975/// [`resolve_expected_sha256_from_url`] / [`fetch_releases`].
976fn download_stable_tarball_from_url(
977 client: &Client,
978 url: &str,
979 version: &str,
980 dest_dir: &Path,
981 cli_label: &str,
982 skip_sha256: bool,
983 mp: Option<&crate::cli::FetchProgress>,
984) -> Result<PathBuf> {
985 let major = major_version(version)?;
986 let tarball_name = format!("linux-{version}.tar.xz");
987
988 let expected_sha256 = resolve_expected_sha256(client, major, &tarball_name, skip_sha256);
989
990 tracing::info!(%url, "downloading stable kernel tarball (requires network)");
991 let response = client
992 .get(url)
993 .timeout(DOWNLOAD_REQUEST_READ_TIMEOUT)
994 .send()
995 .with_context(|| format!("download {url}"))?;
996 if !response.status().is_success() {
997 if response.status() == reqwest::StatusCode::NOT_FOUND {
998 // Pruned tarball (EOL series or superseded point release).
999 // Return the downcast-detectable marker so `download_tarball`
1000 // falls back to a codeload snapshot of the tag from the
1001 // gregkh mirror (`STABLE_MIRROR_URL`) rather than failing
1002 // outright.
1003 return Err(anyhow::Error::new(TarballNotFound));
1004 }
1005 anyhow::bail!("download {url}: HTTP {}", response.status());
1006 }
1007 reject_html_response(&response, url)?;
1008 print_download_size(&response, url, cli_label, mp);
1009 // Capture the total before `response` is moved into the stream so a
1010 // determinate (percent + ETA) bar can be built; `None` when the
1011 // server sent no Content-Length, in which case the bar degrades to
1012 // a live byte counter.
1013 let total = response.content_length();
1014
1015 // Route status lines through the progress group (see
1016 // `print_download_size`); `eprintln!` when no group is threaded in.
1017 let status = |line: &str| match mp {
1018 Some(fp) => fp.println(line),
1019 None => eprintln!("{line}"),
1020 };
1021 status(&format!("{cli_label}: extracting tarball (xz)"));
1022 // Stage extraction inside `dest_dir` (same filesystem) so the
1023 // final `fs::rename` into place is atomic and a verification
1024 // failure leaves `dest_dir` untouched. A bad mirror that serves
1025 // a wrong-version archive — or sneaks stray top-level entries
1026 // alongside `linux-{version}/` — gets caught after extraction
1027 // but before anything lands in `dest_dir`. The TempDir's Drop
1028 // sweeps every entry the malicious archive deposited.
1029 let staging =
1030 tempfile::TempDir::new_in(dest_dir).with_context(|| "create extraction staging dir")?;
1031 let download_bar = mp.map(|fp| fp.download_bar(version, total));
1032 let stream = DownloadStream::with_progress(response, download_bar.as_ref().map(|b| b.bar()));
1033 let decoder = xz2::read::XzDecoder::new(stream);
1034 let mut archive = tar::Archive::new(decoder);
1035 archive
1036 .unpack(staging.path())
1037 .with_context(|| "extract tarball")?;
1038
1039 // Recover the watchdog wrapper from inside the decoder/archive
1040 // chain to read the streaming digest. `into_inner` on tar +
1041 // xz2 each peel one layer of the chain. Done after a successful
1042 // unpack so we don't compute over a partial stream.
1043 let stream = archive.into_inner().into_inner();
1044 let (actual_hex, bytes_total) = stream.finalize();
1045 // Download is complete (every byte streamed) — clear the bar
1046 // before emitting the verification status so the two don't overlap.
1047 if let Some(bar) = &download_bar {
1048 bar.finish();
1049 }
1050 if let Some(expected) = expected_sha256.as_deref() {
1051 verify_sha256(&actual_hex, expected, url)?;
1052 status(&format!(
1053 "{cli_label}: sha256 verified ({bytes_total} bytes, hash {actual_hex})"
1054 ));
1055 } else if !skip_sha256 {
1056 // Skip path already emitted its bespoke bypass warning
1057 // before the download; firing again here under "no
1058 // expected sha256 available" would mislead — that wording
1059 // implies a fallback, not an explicit operator opt-out.
1060 tracing::warn!(
1061 url = %url,
1062 bytes = bytes_total,
1063 sha256 = %actual_hex,
1064 "no expected sha256 available for {url}; computed digest \
1065 {actual_hex} over {bytes_total} bytes is unverified",
1066 );
1067 }
1068
1069 let source_dir = promote_staged_kernel_tree(&staging, dest_dir, version)?;
1070 Ok(source_dir)
1071}
1072
1073/// Verify a kernel tarball's staged extraction contains exactly one
1074/// top-level entry named `linux-{version}/` and atomically rename it
1075/// into `dest_dir/linux-{version}`. Bails — leaving `dest_dir`
1076/// untouched — when the staging dir holds a stray entry, when the
1077/// expected inner directory is missing, or when the rename fails.
1078/// The caller's `TempDir` outlives this helper, so its Drop sweeps
1079/// any residual staging contents whether this returns Ok or Err.
1080fn promote_staged_kernel_tree(
1081 staging: &tempfile::TempDir,
1082 dest_dir: &Path,
1083 version: &str,
1084) -> Result<PathBuf> {
1085 let expected_name = format!("linux-{version}");
1086 let mut found_inner = false;
1087 for entry in std::fs::read_dir(staging.path()).with_context(|| "read staging dir entries")? {
1088 let entry = entry.with_context(|| "iterate staging dir entry")?;
1089 let name = entry.file_name();
1090 if name == std::ffi::OsStr::new(&expected_name) {
1091 found_inner = true;
1092 } else {
1093 anyhow::bail!(
1094 "tarball contains unexpected top-level entry {name:?}; \
1095 expected only {expected_name}/"
1096 );
1097 }
1098 }
1099 if !found_inner {
1100 anyhow::bail!("expected directory {expected_name} after extraction");
1101 }
1102 let inner = staging.path().join(&expected_name);
1103 let source_dir = dest_dir.join(&expected_name);
1104 std::fs::rename(&inner, &source_dir)
1105 .with_context(|| format!("rename {} -> {}", inner.display(), source_dir.display()))?;
1106 Ok(source_dir)
1107}
1108
1109/// Promote the single top-level directory a codeload archive extracts
1110/// out of `staging` into `dest_dir/{canonical}`, so it survives
1111/// `staging`'s `Drop`.
1112///
1113/// Unlike [`promote_staged_kernel_tree`], the top-dir name is not
1114/// `linux-{version}` — GitHub derives it from the ref (`linux-6.11.11`
1115/// for a tag, `linux-{sha}` for a commit, `linux-{branch}` for a
1116/// branch), so this promotes the SOLE entry by structure rather than by
1117/// a fixed name, renaming it to a caller-supplied `canonical` name that
1118/// keys off the resolved commit (collision-free across refs). A hostile
1119/// or malformed snapshot that deposits zero or several top-level
1120/// entries — or a top-level entry that is not a plain directory (a
1121/// regular file, or a symlink, which the directory-entry file-type
1122/// check rejects rather than following) — is rejected before anything
1123/// lands in `dest_dir`; the `TempDir`'s `Drop` sweeps every entry the
1124/// archive left.
1125fn promote_single_kernel_tree(
1126 staging: &tempfile::TempDir,
1127 dest_dir: &Path,
1128 canonical: &str,
1129) -> Result<PathBuf> {
1130 let mut entries = Vec::new();
1131 for entry in std::fs::read_dir(staging.path()).with_context(|| "read staging dir entries")? {
1132 entries.push(entry.with_context(|| "iterate staging dir entry")?);
1133 }
1134 if entries.len() != 1 {
1135 anyhow::bail!(
1136 "codeload archive must contain exactly one top-level entry; found {}",
1137 entries.len()
1138 );
1139 }
1140 let inner = entries[0].path();
1141 // Use the DIRECTORY-ENTRY file type (does NOT follow symlinks) so a
1142 // top-level symlink-to-directory is rejected rather than promoted:
1143 // `Path::is_dir()` would follow the link and accept an
1144 // attacker-chosen target, and `fs::rename` moves the symlink itself
1145 // (it never dereferences), leaving the build reading through it.
1146 let entry_type = entries[0]
1147 .file_type()
1148 .with_context(|| "stat codeload top-level entry")?;
1149 if !entry_type.is_dir() {
1150 anyhow::bail!(
1151 "codeload archive top-level entry is not a plain directory: {}",
1152 inner.display()
1153 );
1154 }
1155 let source_dir = dest_dir.join(canonical);
1156 std::fs::rename(&inner, &source_dir)
1157 .with_context(|| format!("rename {} -> {}", inner.display(), source_dir.display()))?;
1158 Ok(source_dir)
1159}
1160
1161/// Download an RC kernel tarball (.tar.gz) from git.kernel.org.
1162///
1163/// Streams the body through a [`DownloadStream`] watchdog so a
1164/// stalled connection surfaces as an error rather than blocking
1165/// indefinitely. RC tarballs are dynamically generated by gitweb
1166/// at request time and have no published `sha256sums` manifest, so
1167/// this path always logs a warning that the digest is unverified —
1168/// it is computed and surfaced for diagnostic value (operators can
1169/// pin it manually) but never compared to an authoritative source.
1170fn download_rc_tarball(
1171 client: &Client,
1172 version: &str,
1173 dest_dir: &Path,
1174 cli_label: &str,
1175 mp: Option<&crate::cli::FetchProgress>,
1176) -> Result<PathBuf> {
1177 let url = format!("https://git.kernel.org/torvalds/t/linux-{version}.tar.gz");
1178 tracing::info!(%url, "downloading RC kernel tarball (requires network)");
1179
1180 let response = client
1181 .get(&url)
1182 .timeout(DOWNLOAD_REQUEST_READ_TIMEOUT)
1183 .send()
1184 .with_context(|| format!("download {url}"))?;
1185 if response.status() == reqwest::StatusCode::NOT_FOUND {
1186 anyhow::bail!(
1187 "RC tarball not found: {url}\n \
1188 RC releases are removed from git.kernel.org after the stable version ships."
1189 );
1190 }
1191 if !response.status().is_success() {
1192 anyhow::bail!("download {url}: HTTP {}", response.status());
1193 }
1194 reject_html_response(&response, &url)?;
1195 print_download_size(&response, &url, cli_label, mp);
1196 // RC tarballs are gitweb-generated and often arrive without a
1197 // Content-Length, so `total` is frequently `None` and the bar
1198 // degrades to a live byte counter (rate, no ETA).
1199 let total = response.content_length();
1200
1201 let status = |line: &str| match mp {
1202 Some(fp) => fp.println(line),
1203 None => eprintln!("{line}"),
1204 };
1205 status(&format!("{cli_label}: extracting tarball (gzip)"));
1206 // Stage extraction inside `dest_dir` (same filesystem) so the
1207 // final atomic rename keeps `dest_dir` clean when a bad mirror
1208 // serves a wrong-version archive or sneaks stray top-level
1209 // entries past the archive boundary. RC tarballs have no
1210 // upstream sha256 manifest, so structural verification is the
1211 // only defence against a hostile gitweb response.
1212 let staging =
1213 tempfile::TempDir::new_in(dest_dir).with_context(|| "create extraction staging dir")?;
1214 let download_bar = mp.map(|fp| fp.download_bar(version, total));
1215 let stream = DownloadStream::with_progress(response, download_bar.as_ref().map(|b| b.bar()));
1216 let decoder = flate2::read::GzDecoder::new(stream);
1217 let mut archive = tar::Archive::new(decoder);
1218 archive
1219 .unpack(staging.path())
1220 .with_context(|| "extract tarball")?;
1221
1222 // Surface the streamed digest as a warning. RC tarballs have
1223 // no upstream manifest, so verification is impossible — but
1224 // emitting the hash gives an operator a value they can
1225 // capture for offline pinning if they want to detect drift on
1226 // re-fetch.
1227 let stream = archive.into_inner().into_inner();
1228 let (actual_hex, bytes_total) = stream.finalize();
1229 if let Some(bar) = &download_bar {
1230 bar.finish();
1231 }
1232 tracing::warn!(
1233 url = %url,
1234 bytes = bytes_total,
1235 sha256 = %actual_hex,
1236 "no expected sha256 available for {url} (RC tarballs are \
1237 dynamically generated by git.kernel.org and have no \
1238 published manifest); computed digest {actual_hex} over \
1239 {bytes_total} bytes is unverified",
1240 );
1241
1242 let source_dir = promote_staged_kernel_tree(&staging, dest_dir, version)?;
1243 Ok(source_dir)
1244}
1245
1246/// Download a GitHub source snapshot for `git_ref` as a codeload
1247/// `tar.gz` and extract it, returning an [`AcquiredSource`] keyed
1248/// identically to the clone path ([`git_cache_key`] over the resolved
1249/// `commit_hash`) so a codeload-acquired kernel and a clone-acquired
1250/// one of the same commit share the cache entry.
1251///
1252/// GitHub serves a gzip snapshot for any tag/branch/commit via
1253/// codeload; the caller supplies the `archive_url`
1254/// ([`github_archive_url`]) and the pre-resolved `commit_hash`
1255/// ([`resolve_ref_commit`]) — the snapshot has no `.git`, so the
1256/// commit cannot be read back from the tree. Modeled on
1257/// [`download_rc_tarball`] (gzip decode; codeload carries no sha256
1258/// manifest, so extraction is structurally verified —
1259/// [`promote_single_kernel_tree`] rejects any top level that is not a
1260/// single plain directory (multi-entry, a file, or a symlink) — and
1261/// the streamed digest is logged, not compared).
1262pub(crate) fn download_github_archive(
1263 client: &Client,
1264 archive_url: &str,
1265 git_ref: &str,
1266 commit_hash: &str,
1267 dest_dir: &Path,
1268 cli_label: &str,
1269 mp: Option<&crate::cli::FetchProgress>,
1270) -> Result<AcquiredSource> {
1271 tracing::info!(%archive_url, "downloading GitHub codeload snapshot (requires network)");
1272 let response = client
1273 .get(archive_url)
1274 .timeout(DOWNLOAD_REQUEST_READ_TIMEOUT)
1275 .send()
1276 .with_context(|| format!("download {archive_url}"))?;
1277 if response.status() == reqwest::StatusCode::NOT_FOUND {
1278 anyhow::bail!(
1279 "codeload snapshot not found: {archive_url}\n \
1280 the ref may not exist on the remote, or the repo is private"
1281 );
1282 }
1283 if !response.status().is_success() {
1284 anyhow::bail!("download {archive_url}: HTTP {}", response.status());
1285 }
1286 reject_html_response(&response, archive_url)?;
1287 print_download_size(&response, archive_url, cli_label, mp);
1288 // codeload responses are dynamically generated and often arrive
1289 // without a Content-Length, so `total` is frequently `None` and the
1290 // bar degrades to a live byte counter.
1291 let total = response.content_length();
1292
1293 let status = |line: &str| match mp {
1294 Some(fp) => fp.println(line),
1295 None => eprintln!("{line}"),
1296 };
1297 status(&format!("{cli_label}: extracting snapshot (gzip)"));
1298 // Stage extraction inside `dest_dir` (same filesystem) so the final
1299 // atomic rename keeps `dest_dir` clean when a bad response serves a
1300 // malformed archive or sneaks stray top-level entries. codeload
1301 // snapshots have no upstream sha256 manifest, so structural
1302 // verification (single top-level dir) is the only defence against a
1303 // hostile response.
1304 let staging =
1305 tempfile::TempDir::new_in(dest_dir).with_context(|| "create extraction staging dir")?;
1306 let short_hash: String = commit_hash.chars().take(7).collect();
1307 let download_bar = mp.map(|fp| fp.download_bar(git_ref, total));
1308 let stream = DownloadStream::with_progress(response, download_bar.as_ref().map(|b| b.bar()));
1309 let decoder = flate2::read::GzDecoder::new(stream);
1310 let mut archive = tar::Archive::new(decoder);
1311 archive
1312 .unpack(staging.path())
1313 .with_context(|| "extract snapshot")?;
1314
1315 // Drain the watchdog to read the streamed digest. codeload has no
1316 // published manifest, so the digest cannot be verified — log it so
1317 // an operator can capture it for offline pinning. `into_inner` peels
1318 // the tar then the gz layer, recovering the `DownloadStream`.
1319 let stream = archive.into_inner().into_inner();
1320 let (actual_hex, bytes_total) = stream.finalize();
1321 if let Some(bar) = &download_bar {
1322 bar.finish();
1323 }
1324 tracing::info!(
1325 url = %archive_url,
1326 bytes = bytes_total,
1327 sha256 = %actual_hex,
1328 "codeload snapshot extracted (unverified: codeload archives have \
1329 no published sha256 manifest)",
1330 );
1331
1332 // Name the promoted tree by the resolved commit so distinct refs
1333 // never collide in `dest_dir` (the tree is temporary — `is_temp`).
1334 let canonical = format!("linux-git-{short_hash}");
1335 let source_dir = promote_single_kernel_tree(&staging, dest_dir, &canonical)?;
1336 let version = read_makefile_version(&source_dir);
1337
1338 Ok(AcquiredSource {
1339 source_dir,
1340 cache_key: git_cache_key(git_ref, commit_hash),
1341 version,
1342 kernel_source: crate::cache::KernelSource::git(short_hash, git_ref),
1343 is_temp: true,
1344 is_dirty: false,
1345 is_git: true,
1346 })
1347}
1348
1349/// Download a kernel tarball (stable or RC) and extract it.
1350///
1351/// `cli_label` prefixes diagnostic status output (e.g. `"ktstr"` or
1352/// `"cargo ktstr"`).
1353///
1354/// `skip_sha256` propagates to `download_stable_tarball` only —
1355/// stable tarballs publish a `sha256sums.asc` manifest the flag
1356/// bypasses. RC tarballs (`download_rc_tarball`) have no published
1357/// manifest so verification is impossible regardless of the flag;
1358/// the RC path always runs unverified and emits its own warning,
1359/// so `skip_sha256` is a no-op on the RC arm. `--kernel <path>` and
1360/// `--kernel git+…` sources do not reach this function at all.
1361///
1362/// `mp` is the progress group the determinate download bar is added
1363/// to; `None` disables the bar (the single-shot `kernel build` paths
1364/// and unit tests pass `None`).
1365pub fn download_tarball(
1366 client: &Client,
1367 version: &str,
1368 dest_dir: &Path,
1369 cli_label: &str,
1370 skip_sha256: bool,
1371 mp: Option<&crate::cli::FetchProgress>,
1372) -> Result<AcquiredSource> {
1373 let (arch, _) = arch_info();
1374 let source_dir = if is_rc(version) {
1375 download_rc_tarball(client, version, dest_dir, cli_label, mp)?
1376 } else {
1377 match download_stable_tarball(client, version, dest_dir, cli_label, skip_sha256, mp) {
1378 Ok(dir) => dir,
1379 // Pruned tarball (EOL series or superseded point release):
1380 // cdn.kernel.org keeps only each maintained series' latest
1381 // .tar.xz. Recover the source from the stable tree's
1382 // `v{version}` tag via a shallow (depth-1) clone. The kernel
1383 // built from this source is cached by the caller under the
1384 // SAME `{version}-tarball-...` key returned below, so a
1385 // re-run hits that cache and never re-clones.
1386 Err(e) if e.downcast_ref::<TarballNotFound>().is_some() => {
1387 let tag = format!("v{version}");
1388 // A 404 says the tarball is gone, not why. cdn.kernel.org
1389 // keeps only the latest tarball per series, but the gregkh
1390 // GitHub mirror carries every `vX.Y.Z` release tag and
1391 // codeload serves the tag's snapshot as a tar.gz — no
1392 // clone, and a commit-pinned snapshot. Resolve the tag to
1393 // its commit first (kind-directed, so a tag never aliases
1394 // a same-named branch); a tag absent there means the
1395 // version simply does not exist — surface the friendly
1396 // "not found" suggestion (with the latest in-series patch)
1397 // instead of a cryptic fetch failure.
1398 let Some(commit_hash) = resolve_ref_commit(
1399 STABLE_MIRROR_URL,
1400 &tag,
1401 crate::kernel_path::GitRefKind::Tag,
1402 ) else {
1403 anyhow::bail!("{}", version_not_found_msg(client, version));
1404 };
1405 let archive_url = github_archive_url(STABLE_MIRROR_URL, &commit_hash)
1406 .expect("STABLE_MIRROR_URL is a github.com URL");
1407 let msg = format!(
1408 "{cli_label}: {version} not on cdn.kernel.org (pruned/EOL); \
1409 fetching gregkh mirror tag {tag}"
1410 );
1411 match mp {
1412 Some(fp) => fp.println(&msg),
1413 None => eprintln!("{msg}"),
1414 }
1415 download_github_archive(
1416 client,
1417 &archive_url,
1418 &tag,
1419 &commit_hash,
1420 dest_dir,
1421 cli_label,
1422 mp,
1423 )?
1424 .source_dir
1425 }
1426 Err(e) => return Err(e),
1427 }
1428 };
1429
1430 Ok(AcquiredSource {
1431 source_dir,
1432 cache_key: format!("{version}-tarball-{arch}-kc{}", crate::cache_key_suffix()),
1433 version: Some(version.to_string()),
1434 kernel_source: crate::cache::KernelSource::Tarball,
1435 is_temp: true,
1436 is_dirty: false,
1437 is_git: true,
1438 })
1439}
1440
1441/// Parse the patch level from a kernel version string.
1442/// "6.12.8" → Some(8), "7.0" → Some(0), "abc" → None.
1443fn patch_level(version: &str) -> Option<u32> {
1444 let parts: Vec<&str> = version.split('.').collect();
1445 match parts.len() {
1446 2 => Some(0), // "7.0" has patch level 0
1447 3 => parts[2].parse().ok(),
1448 _ => None,
1449 }
1450}
1451
1452/// Production URL for `releases.json`. Tests call [`fetch_releases`] directly with a localhost mock URL.
1453pub(crate) const RELEASES_URL: &str = "https://www.kernel.org/releases.json";
1454
1455/// Fetch `releases.json` from `url` and return a vector of
1456/// [`Release`] records. Issues an HTTP GET unconditionally — no
1457/// cache consultation.
1458///
1459/// Production callers reach this function via
1460/// [`cached_releases_with`] (or [`cached_releases`]) which pass
1461/// [`RELEASES_URL`]; the cache helper only invokes
1462/// `fetch_releases` on a cache miss for the singleton path or on
1463/// the bypass branch for non-singleton clients. Tests that need
1464/// to exercise the underlying GET directly — without the cache
1465/// layer — call this function with a locally-constructed `Client`
1466/// and a localhost URL pointed at a TcpListener-backed mock that
1467/// returns canned `releases.json` content.
1468pub(crate) fn fetch_releases(client: &Client, url: &str) -> Result<Vec<Release>> {
1469 tracing::info!(%url, "fetching kernel.org releases index (requires network)");
1470 let response = client
1471 .get(url)
1472 .send()
1473 .with_context(|| format!("fetch {url}"))?;
1474 if !response.status().is_success() {
1475 anyhow::bail!("fetch {url}: HTTP {}", response.status());
1476 }
1477 let body = response.text().with_context(|| "read response body")?;
1478 parse_releases_body(&body)
1479}
1480
1481fn parse_releases_body(body: &str) -> Result<Vec<Release>> {
1482 let json: serde_json::Value =
1483 serde_json::from_str(body).with_context(|| "parse releases.json")?;
1484 let releases = json
1485 .get("releases")
1486 .and_then(|r| r.as_array())
1487 .ok_or_else(|| anyhow!("releases.json: missing releases array"))?;
1488 let input_rows = releases.len();
1489 let parsed: Vec<Release> = releases
1490 .iter()
1491 .filter_map(|r| {
1492 let moniker = r.get("moniker")?.as_str()?;
1493 let version = r.get("version")?.as_str()?;
1494 Some(Release {
1495 moniker: moniker.to_string(),
1496 version: version.to_string(),
1497 })
1498 })
1499 .collect();
1500 // Per-row tolerance: a corrupt row is silently dropped via the
1501 // filter_map `?` chain so a single bad entry does not abort the
1502 // whole fetch (see `fetch_releases_row_missing_moniker_drops_row`
1503 // and siblings). The drop is also a hazard: the truncated vector
1504 // gets cached in [`RELEASES_CACHE`] for the rest of the process
1505 // lifetime via the singleton path, so a transient malformed row
1506 // at fetch time persists as a partial snapshot for every later
1507 // cache-hit caller. Surface the drop count so an operator
1508 // tailing logs sees that releases.json arrived partial — without
1509 // this, the symptom (a missing version on resolve) is invisible
1510 // until it propagates as "version not found" elsewhere.
1511 let dropped = input_rows - parsed.len();
1512 if dropped > 0 {
1513 tracing::warn!(
1514 input_rows,
1515 parsed_rows = parsed.len(),
1516 dropped,
1517 "releases.json: dropped {dropped} of {input_rows} row(s) \
1518 missing moniker/version (or non-string values); cached \
1519 snapshot will reflect this for the process lifetime"
1520 );
1521 }
1522 Ok(parsed)
1523}
1524
1525/// Fetch the latest stable kernel version from kernel.org.
1526///
1527/// Selects from the `releases` array (moniker "stable" or "longterm"),
1528/// requiring patch version >= 8 to avoid brand-new major versions
1529/// that may have build issues on CI runners.
1530///
1531/// When `client` is the process-wide [`shared_client`] singleton,
1532/// routes through `RELEASES_CACHE`; other clients bypass the
1533/// cache via pointer-equality and exercise `fetch_releases`
1534/// directly — see `cached_releases_with` for details.
1535///
1536/// `cli_label` prefixes diagnostic status output (e.g. `"ktstr"` or
1537/// `"cargo ktstr"`).
1538pub fn fetch_latest_stable_version(client: &Client, cli_label: &str) -> Result<String> {
1539 eprintln!("{cli_label}: fetching latest kernel version");
1540 let releases = cached_releases_with(client)?;
1541
1542 let mut best: Option<&str> = None;
1543 for r in &releases {
1544 if r.moniker != "stable" && r.moniker != "longterm" {
1545 continue;
1546 }
1547 if patch_level(&r.version).unwrap_or(0) < 8 {
1548 continue;
1549 }
1550 // Pick the first matching release — releases.json is ordered
1551 // newest first, so the first stable with patch >= 8 is the best.
1552 best = Some(r.version.as_str());
1553 break;
1554 }
1555
1556 let version =
1557 best.ok_or_else(|| anyhow!("no stable kernel with patch >= 8 found in releases.json"))?;
1558 eprintln!("{cli_label}: latest stable kernel: {version}");
1559 Ok(version.to_string())
1560}
1561
1562/// Parse a version string into numeric components for comparison.
1563/// "6.14.2" → Some((6, 14, 2)), "6.14" → Some((6, 14, 0)),
1564/// "7.0" → Some((7, 0, 0)). Returns None for unparseable versions.
1565fn version_tuple(version: &str) -> Option<(u32, u32, u32)> {
1566 let parts: Vec<&str> = version.split('.').collect();
1567 match parts.len() {
1568 2 => {
1569 let major = parts[0].parse().ok()?;
1570 let minor = parts[1].parse().ok()?;
1571 Some((major, minor, 0))
1572 }
1573 3 => {
1574 let major = parts[0].parse().ok()?;
1575 let minor = parts[1].parse().ok()?;
1576 let patch = parts[2].parse().ok()?;
1577 Some((major, minor, patch))
1578 }
1579 _ => None,
1580 }
1581}
1582
1583/// Return true when `s` is a kernel major.minor prefix like
1584/// `"6.14"` (as opposed to a full patch version `"6.14.2"` or an rc
1585/// tag `"6.15-rc3"`). Callers use this to decide whether the input
1586/// needs prefix resolution via [`fetch_version_for_prefix`].
1587///
1588/// Accepts any string with fewer than 2 dots and no `-rc` substring,
1589/// so `"7"` (single-segment) and `""` both return true. This matches
1590/// the historical inline check used by kernel-build dispatchers.
1591pub fn is_major_minor_prefix(s: &str) -> bool {
1592 s.matches('.').count() < 2 && !s.contains("-rc")
1593}
1594
1595/// Resolve the highest version matching a prefix.
1596///
1597/// E.g., "6.12" → "6.12.81", "6" → "6.19.12" (highest 6.x.y).
1598///
1599/// Scans all monikers in releases.json except linux-next. On no active
1600/// match (an EOL or unreleased series, absent from releases.json),
1601/// resolves the highest `vX.Y.z` stable patch from the gregkh mirror's
1602/// git tags; if the series has NO stable point release yet (only a base
1603/// tag), falls back to the bare `{prefix}` mainline base — see
1604/// `latest_patch_from_git_tags`.
1605///
1606/// When `client` is the process-wide [`shared_client`] singleton,
1607/// routes through `RELEASES_CACHE`; other clients bypass the
1608/// cache via pointer-equality and exercise `fetch_releases`
1609/// directly — see `cached_releases_with` for details. Cache
1610/// scope is releases.json only; the EOL-series git-tag fallback in
1611/// `latest_patch_from_git_tags` always hits the network.
1612///
1613/// `cli_label` prefixes diagnostic status output (e.g. `"ktstr"` or
1614/// `"cargo ktstr"`).
1615pub fn fetch_version_for_prefix(client: &Client, prefix: &str, cli_label: &str) -> Result<String> {
1616 eprintln!("{cli_label}: fetching latest {prefix}.x kernel version");
1617 let releases = cached_releases_with(client)?;
1618
1619 let mut best: Option<(&str, (u32, u32, u32))> = None;
1620 for r in &releases {
1621 if is_skippable_release_moniker(&r.moniker) {
1622 continue;
1623 }
1624 if !r.version.starts_with(prefix) {
1625 continue;
1626 }
1627 if r.version.len() != prefix.len() && r.version.as_bytes()[prefix.len()] != b'.' {
1628 continue;
1629 }
1630 let Some(tuple) = version_tuple(&r.version) else {
1631 continue;
1632 };
1633 if best.is_none() || tuple > best.unwrap().1 {
1634 best = Some((r.version.as_str(), tuple));
1635 }
1636 }
1637
1638 if let Some((version, _)) = best {
1639 eprintln!("{cli_label}: latest {prefix}.x kernel: {version}");
1640 return Ok(version.to_string());
1641 }
1642
1643 eprintln!(
1644 "{cli_label}: {prefix}.x not in releases.json (EOL or unreleased series); \
1645 resolving latest patch via the gregkh mirror tags"
1646 );
1647 match latest_patch_from_git_tags(STABLE_MIRROR_URL, prefix, cli_label)? {
1648 Some(version) => Ok(version),
1649 None => {
1650 // No stable point release for this series — fall back to the
1651 // mainline base (the `{prefix}` release itself, e.g. a series
1652 // just cut with no `.1` yet, per the "only if there is no
1653 // X.Y.z stable use X.Y mainline" rule). The base tarball is
1654 // fetched by the normal download path (cdn.kernel.org,
1655 // falling back to the gregkh mirror snapshot); torvalds is
1656 // the mainline authority the gregkh mirror tracks.
1657 eprintln!(
1658 "{cli_label}: no {prefix}.x stable point release; using {prefix} mainline base"
1659 );
1660 Ok(prefix.to_string())
1661 }
1662 }
1663}
1664
1665/// Resolve a series' latest stable patch by ls-remote-ing the gregkh
1666/// GitHub mirror's `refs/tags/v{prefix}.{patch}` tags and taking the
1667/// highest patch. Returns `Ok(None)` when the series has NO stable
1668/// point release (no `v{prefix}.N` tag) — the caller then falls back to
1669/// the mainline base.
1670///
1671/// The gregkh mirror is the RELIABLE EOL-resolution source: it carries
1672/// every `vX.Y.Z` release tag (back to v2.6) and its codeload CDN
1673/// serves each tag's tarball, so resolution and the pruned-tarball
1674/// download (see [`download_tarball`]'s fallback) share ONE
1675/// comprehensive mirror. cdn.kernel.org cannot be used here: its
1676/// `v{major}.x/` directory index 404s, and its `sha256sums.asc` is
1677/// served inconsistently per CDN edge (200 from some nodes, 404 from
1678/// others — the 404 nodes break CI runners while the tarball fetch on
1679/// those same nodes still succeeds).
1680fn latest_patch_from_git_tags(url: &str, prefix: &str, cli_label: &str) -> Result<Option<String>> {
1681 eprintln!("{cli_label}: resolving {prefix}.x release tags via {url}");
1682 let refs = ls_remote_refs(url)
1683 .with_context(|| format!("ls-remote {url} for {prefix}.x release tags"))?;
1684 match max_tag_patch(refs.iter().map(ref_full_name), prefix) {
1685 Some(patch) => {
1686 let version = format!("{prefix}.{patch}");
1687 eprintln!("{cli_label}: latest {prefix}.x kernel (from git tags): {version}");
1688 Ok(Some(version))
1689 }
1690 None => Ok(None),
1691 }
1692}
1693
1694/// The advertised full ref name (`refs/...`), as raw bytes, of a
1695/// protocol handshake ref.
1696fn ref_full_name(r: &gix::protocol::handshake::Ref) -> &[u8] {
1697 use gix::protocol::handshake::Ref::{Direct, Peeled, Symbolic, Unborn};
1698 match r {
1699 Peeled { full_ref_name, .. }
1700 | Direct { full_ref_name, .. }
1701 | Symbolic { full_ref_name, .. }
1702 | Unborn { full_ref_name, .. } => full_ref_name.as_ref(),
1703 }
1704}
1705
1706/// Highest `{patch}` among `refs/tags/v{prefix}.{patch}` ref names.
1707///
1708/// gix folds an annotated tag's peeled entry into a single
1709/// `Ref::Peeled` whose `full_ref_name` is the BASE name — no `^{}`
1710/// suffix — and a lightweight tag arrives as a `Ref::Direct` with the
1711/// base name too, so every tag advertises its base
1712/// `refs/tags/v{prefix}.{patch}` name for the needle to match. The
1713/// `^{}` strip below is therefore a defensive no-op on real gix output
1714/// (it only affects a raw wire ref name gix never emits; the base
1715/// entry supplies the patch regardless). Pure (no network) so it is
1716/// unit-testable with synthetic ref names.
1717///
1718/// The trailing `.` in the `refs/tags/v{prefix}.` needle keeps a
1719/// `6.14` prefix from matching a `6.140` series, and the numeric-only
1720/// patch tail rejects `-rc` and other non-release tags.
1721fn max_tag_patch<'a>(ref_names: impl Iterator<Item = &'a [u8]>, prefix: &str) -> Option<u32> {
1722 let needle = format!("refs/tags/v{prefix}.");
1723 let mut best: Option<u32> = None;
1724 for name in ref_names {
1725 let Some(rest) = name.strip_prefix(needle.as_bytes()) else {
1726 continue;
1727 };
1728 let rest = rest.strip_suffix(b"^{}").unwrap_or(rest);
1729 if let Ok(s) = std::str::from_utf8(rest)
1730 && let Ok(patch) = s.parse::<u32>()
1731 {
1732 best = Some(best.map_or(patch, |b| b.max(patch)));
1733 }
1734 }
1735 best
1736}
1737
1738/// ls-remote the gregkh stable mirror ([`STABLE_MIRROR_URL`]) once and
1739/// cache the release version strings (`X.Y.Z`) parsed from its
1740/// `refs/tags/vX.Y.Z` advertisement, for `--include-eol` range
1741/// expansion. Returns EVERY release-tag version verbatim (including
1742/// `-rc*` and old series); the caller
1743/// (`crate::cli::select_series_latest_in_range`) does the
1744/// range / rc / per-series filtering. `None` on ls-remote failure —
1745/// not cached, so the next caller retries. gregkh/linux mirrors
1746/// linux-stable comprehensively (tags back to v2.6), so this surfaces
1747/// EOL series that `releases.json` has dropped.
1748pub(crate) fn cached_stable_tags() -> Option<&'static [String]> {
1749 if let Some(tags) = STABLE_TAGS_CACHE.get() {
1750 return Some(tags.as_slice());
1751 }
1752 let refs = ls_remote_refs(STABLE_MIRROR_URL)?;
1753 let tags: Vec<String> = refs
1754 .iter()
1755 .filter_map(|r| {
1756 // Base tag name only: gix folds an annotated tag's peeled
1757 // entry into one `Ref::Peeled` carrying the base name, and a
1758 // lightweight tag is a `Ref::Direct` with the base name, so
1759 // `^{}` never appears on real gix output — the strip is a
1760 // defensive no-op. Non-`refs/tags/v*` refs are skipped.
1761 let name = ref_full_name(r);
1762 let v = name.strip_prefix(b"refs/tags/v")?;
1763 let v = v.strip_suffix(b"^{}").unwrap_or(v);
1764 std::str::from_utf8(v).ok().map(|s| s.to_string())
1765 })
1766 .collect();
1767 // Loser of a concurrent race discards its clone (both fetched the
1768 // same advertisement, so the cached content is equivalent).
1769 let _ = STABLE_TAGS_CACHE.set(tags);
1770 STABLE_TAGS_CACHE.get().map(|v| v.as_slice())
1771}
1772
1773/// Cache key for a git-cloned kernel: the raw user ref verbatim, the
1774/// resolved commit's FULL hash, the target arch, and the
1775/// kconfig-fragment suffix. The SINGLE construction site, shared by all
1776/// three sharers of a commit's cache entry: [`git_clone`] (post-clone,
1777/// from `head_id`), `download_github_archive` (post-download, keyed on
1778/// the resolved commit), and the pre-fetch ls-remote cache probe in
1779/// `resolve_git_kernel` — a drift between any of them would make the
1780/// probe miss the entry the fetch wrote and defeat the fetch-skip, and
1781/// split the codeload and clone paths onto separate entries for one
1782/// commit.
1783///
1784/// The FULL 40-hex commit hash keys the entry (not a 7-hex prefix): a
1785/// branch/tag tip moves over time, so the `{git_ref}` segment alone
1786/// cannot distinguish successive commits — only the hash does. A 7-hex
1787/// (28-bit) prefix would let a moved tip whose new commit shares the
1788/// first 7 hex with the cached old commit hit the stale entry and serve
1789/// the wrong kernel build under the new ref. The full id removes that
1790/// collision class; the probe and clone both render full lowercase hex
1791/// before any truncation, so keying on it is drift-free.
1792pub(crate) fn git_cache_key(git_ref: &str, commit_hash: &str) -> String {
1793 let (arch, _) = arch_info();
1794 // Sanitize the ref segment so no ref can produce a key
1795 // validate_cache_key (cache::housekeeping) rejects: it rejects `/`,
1796 // `\`, `..`, a NUL byte, and a leading `.`. A slashed branch ref
1797 // (e.g. `for-next/core`) or a dot-prefixed ref (`.foo`) would
1798 // otherwise be uncacheable verbatim and break both the pre-fetch
1799 // probe lookup and the store. The full commit_hash already makes
1800 // the key unique, so collapsing several refs onto one sanitized
1801 // prefix is safe — two refs at the same commit want the same build;
1802 // two at different commits differ in the hash segment.
1803 let safe_ref: String = git_ref
1804 .chars()
1805 .map(|c| {
1806 if c == '/' || c == '\\' || c == '\0' {
1807 '_'
1808 } else {
1809 c
1810 }
1811 })
1812 .collect();
1813 let safe_ref = safe_ref.replace("..", "__");
1814 // A leading `.` (hidden entry, `.` / `..`) is rejected by
1815 // validate_cache_key; prefix `_` so a `.foo` ref stays cacheable.
1816 let safe_ref = if safe_ref.starts_with('.') {
1817 format!("_{safe_ref}")
1818 } else {
1819 safe_ref
1820 };
1821 format!(
1822 "{safe_ref}-git-{commit_hash}-{arch}-kc{}",
1823 crate::cache_key_suffix()
1824 )
1825}
1826
1827/// If `url` is a GitHub remote, build the codeload archive URL for the
1828/// resolved `commit_hash`: `github.com/OWNER/REPO/archive/<commit>.tar.gz`
1829/// (302 → codeload.github.com, its CDN) serves a gzip source snapshot
1830/// for any commit — verified empirically. This lets a GitHub source's
1831/// commit be fetched over HTTP (no clone, no server-side allow-sha
1832/// requirement) rather than cloned. `None` for a non-GitHub URL
1833/// (self-hosted / GitLab / …) — those take the gix clone path.
1834///
1835/// The caller resolves the ref to `commit_hash` FIRST (a kind-directed
1836/// ls-remote; a sha is already the commit), so the download fetches the
1837/// EXACT commit the cache entry is keyed on — a branch tip that
1838/// advances between the ls-remote probe and this GET cannot mislabel
1839/// the entry the way a ref-name snapshot would. `commit_hash` is
1840/// lowercased to align with `git_cache_key`'s hash segment.
1841///
1842/// Accepts the https/http/ssh/git and scp-style GitHub remotes, each
1843/// with an optional trailing `/` and `.git`; the host is matched
1844/// case-insensitively (DNS hostnames are case-insensitive).
1845pub(crate) fn github_archive_url(url: &str, commit_hash: &str) -> Option<String> {
1846 // Match the github.com scheme+host CASE-INSENSITIVELY (DNS
1847 // hostnames are case-insensitive, so `GitHub.com` is a GitHub URL),
1848 // keeping the OWNER/REPO path verbatim. Accept the https/http/ssh/git
1849 // schemes (with an optional `git@` userinfo) and the scp-style
1850 // git@github.com:OWNER/REPO, each with an optional trailing `.git`.
1851 let mut path = None;
1852 for prefix in [
1853 "https://github.com/",
1854 "http://github.com/",
1855 "ssh://git@github.com/",
1856 "ssh://github.com/",
1857 "git://github.com/",
1858 "git@github.com:",
1859 ] {
1860 if url
1861 .get(..prefix.len())
1862 .is_some_and(|head| head.eq_ignore_ascii_case(prefix))
1863 {
1864 path = Some(&url[prefix.len()..]);
1865 break;
1866 }
1867 }
1868 let path = path?;
1869 // Trim trailing slashes (a common copy-paste artifact) before the
1870 // `.git` strip so `OWNER/REPO/` and `OWNER/REPO.git/` still resolve
1871 // to codeload rather than misrouting to the clone path.
1872 let path = path.trim_end_matches('/');
1873 let path = path.strip_suffix(".git").unwrap_or(path);
1874 // Exactly OWNER/REPO — reject deeper paths (a stray extra segment
1875 // is not a repo root, so fall through to the clone path).
1876 let mut segs = path.split('/');
1877 let owner = segs.next().filter(|s| !s.is_empty())?;
1878 let repo = segs.next().filter(|s| !s.is_empty())?;
1879 if segs.next().is_some() {
1880 return None;
1881 }
1882 // Always the resolved COMMIT (lowercased) — never a ref-name
1883 // snapshot — so the extracted tree matches git_cache_key's commit
1884 // exactly regardless of a concurrent branch-tip move. codeload
1885 // serves any commit case-insensitively.
1886 Some(format!(
1887 "https://github.com/{owner}/{repo}/archive/{}.tar.gz",
1888 commit_hash.to_ascii_lowercase()
1889 ))
1890}
1891
1892/// The object id the advertised ref named exactly `target` points at,
1893/// or `None` if no ref matches. For an annotated tag (`Ref::Peeled`)
1894/// this is the PEELED commit (`object`), never the tag object;
1895/// `Ref::Unborn` carries no commit and never matches. Used by the
1896/// kind-directed [`resolve_ref_commit`] so tag-peeling and
1897/// unborn-skipping stay consistent.
1898fn pick_ref_object(
1899 refs: &[gix::protocol::handshake::Ref],
1900 target: &str,
1901) -> Option<gix::hash::ObjectId> {
1902 refs.iter().find_map(|r| {
1903 let (name, object) = match r {
1904 gix::protocol::handshake::Ref::Peeled {
1905 full_ref_name,
1906 object,
1907 ..
1908 }
1909 | gix::protocol::handshake::Ref::Direct {
1910 full_ref_name,
1911 object,
1912 }
1913 | gix::protocol::handshake::Ref::Symbolic {
1914 full_ref_name,
1915 object,
1916 ..
1917 } => (full_ref_name, object),
1918 gix::protocol::handshake::Ref::Unborn { .. } => return None,
1919 };
1920 (*name == target).then_some(*object)
1921 })
1922}
1923
1924/// Resolve `git_ref` to its full commit hash under `ref_kind`, via a
1925/// kind-directed ls-remote. Unlike the clone path, the codeload
1926/// download has no checked-out `.git` to read `head_id` from, so it
1927/// resolves the commit here — [`git_cache_key`] needs it to key the
1928/// entry a clone of the same ref would write (shared cache).
1929///
1930/// A `Sha` ref IS the commit (lowercased to match `git_clone`'s
1931/// rendering) and resolves offline — no handshake. `Tag`/`Branch`
1932/// match ONLY the fully-qualified `refs/tags/{ref}` / `refs/heads/{ref}`
1933/// so a tag never aliases a same-named branch (a bare-name DWIM lookup
1934/// would resolve either). `None` on
1935/// ls-remote failure, no match, or `Unknown` (rejected by
1936/// [`crate::kernel_path::KernelId::validate`] upstream, so it is never
1937/// resolved).
1938pub(crate) fn resolve_ref_commit(
1939 url: &str,
1940 git_ref: &str,
1941 ref_kind: crate::kernel_path::GitRefKind,
1942) -> Option<String> {
1943 use crate::kernel_path::GitRefKind;
1944 let target = match ref_kind {
1945 GitRefKind::Sha => return Some(git_ref.to_ascii_lowercase()),
1946 GitRefKind::Tag => format!("refs/tags/{git_ref}"),
1947 GitRefKind::Branch => format!("refs/heads/{git_ref}"),
1948 GitRefKind::Unknown => return None,
1949 };
1950 pick_ref_object(&ls_remote_refs(url)?, &target).map(|object| format!("{object}"))
1951}
1952
1953/// True when `git_ref` is a full 40-char hex commit id — recognizable
1954/// as a sha without a remote handshake. A 39/41-char ref, or any
1955/// 40-char ref carrying a non-hex byte, is a name (branch/tag) and
1956/// falls through to ls-remote. Case is not normalized here (the caller
1957/// lowercases the full hash to match `git_clone`'s rendering).
1958fn is_full_sha(git_ref: &str) -> bool {
1959 git_ref.len() == 40 && git_ref.bytes().all(|b| b.is_ascii_hexdigit())
1960}
1961
1962/// ls-remote `url` and return EVERY advertised ref WITHOUT fetching a
1963/// pack. Best-effort: `None` on any failure (network, auth). Shared by
1964/// [`resolve_ref_commit`] (resolve one kind-directed ref → commit),
1965/// [`cached_stable_tags`], and [`latest_patch_from_git_tags`] (highest
1966/// `v{prefix}.{patch}` tag).
1967///
1968/// The ad-hoc repo (`init_opts` on a tempdir, with repo-local git config
1969/// only — see `anon_open_opts`) carries no working tree and fetches no
1970/// pack. Remote-side ref-prefix filtering is
1971/// DISABLED: gix's default (`prefix_from_spec_as_filter_on_remote =
1972/// true`) derives protocol-v2 `ls-refs` `ref-prefix` filters from the
1973/// remote's fetch refspecs; an anonymous `remote_at` has none, and
1974/// `fetch_tags = Included` injects only `refs/tags/*`, so the server
1975/// would return TAGS ONLY and `refs/heads/*` would never arrive.
1976/// Disabling the filter returns all refs, so a branch, tag, or HEAD
1977/// all resolve.
1978fn ls_remote_refs(url: &str) -> Option<Vec<gix::protocol::handshake::Ref>> {
1979 let tmp = tempfile::TempDir::new().ok()?;
1980 let repo = gix::ThreadSafeRepository::init_opts(
1981 tmp.path(),
1982 gix::create::Kind::WithWorktree,
1983 gix::create::Options::default(),
1984 anon_open_opts(),
1985 )
1986 .ok()?
1987 .to_thread_local();
1988 let remote = repo.remote_at(url).ok()?;
1989 let conn = remote.connect(gix::remote::Direction::Fetch).ok()?;
1990 let (refmap, _handshake) = conn
1991 .ref_map(
1992 gix::progress::Discard,
1993 gix::remote::ref_map::Options {
1994 prefix_from_spec_as_filter_on_remote: false,
1995 ..Default::default()
1996 },
1997 )
1998 .ok()?;
1999 Some(refmap.remote_refs)
2000}
2001
2002/// Open options for ktstr's git fetches: load ONLY repo-local git
2003/// config, never the user (`~/.gitconfig`), XDG, system
2004/// (`/etc/gitconfig`), or `GIT_CONFIG_*` env sources. This neutralizes a
2005/// `url.<base>.insteadOf` rewrite (e.g. a developer rule mapping
2006/// `https://github.com/` to `git@github.com:`) that would otherwise
2007/// route an anonymous public fetch through SSH and prompt for the key
2008/// passphrase once per operation — several at once under the concurrent
2009/// intra-range kernel resolution. Environment permissions stay at the
2010/// Full-trust default so an `http(s)_proxy` env var still applies.
2011///
2012/// SCOPE: these opts govern EVERY gix remote path — the internal version
2013/// resolution (`ls_remote_refs` and its callers) AND every user-supplied
2014/// `git+URL` clone via `git_clone_inner`, including a self-hosted
2015/// `git+https://...` source. The tradeoff is deliberate: a PUBLIC source
2016/// (the common case — kernel.org / gregkh / torvalds mirrors) fetches
2017/// anonymously with no credential prompt, and a PRIVATE source must use a
2018/// `git+ssh://user@host/repo` URL (SSH authenticates via `~/.ssh`,
2019/// independent of gitconfig). gitconfig-driven auth (an `insteadOf`
2020/// HTTPS->SSH rewrite plus credential/`git_binary` config) is
2021/// intentionally NOT honored, so it can never silently reroute an
2022/// anonymous fetch through SSH. The ad-hoc temp repos carry no local
2023/// config, so the effective URL-rewrite set is empty: the passed URL is
2024/// used verbatim.
2025fn anon_open_opts() -> gix::open::Options {
2026 use gix::sec::trust::DefaultForLevel;
2027 let mut opts = gix::open::Options::default_for_level(gix::sec::Trust::Full);
2028 opts.permissions.config.system = false;
2029 opts.permissions.config.git = false;
2030 opts.permissions.config.user = false;
2031 opts.permissions.config.env = false;
2032 opts.permissions.config.git_binary = false;
2033 opts
2034}
2035
2036/// Shallow-clone a git repository at a BRANCH ref.
2037///
2038/// `cli_label` prefixes diagnostic status output (e.g. `"ktstr"` or
2039/// `"cargo ktstr"`).
2040///
2041/// `mp` is the progress group a determinate clone bar is added to;
2042/// `None` disables the bar and passes `gix::progress::Discard` to gix
2043/// exactly as before (the single-shot `kernel build` paths and unit
2044/// tests pass `None`). The bar shows real object/file counts + ETA
2045/// during the receiving / resolving / checkout phases that gix reports
2046/// a bounded total for; see the `crate::cli::progress` module.
2047///
2048/// For a TAG ref use `git_clone_tag`: gix's shallow clone only
2049/// resolves branches via `with_ref_name` — see `git_clone_inner`.
2050pub fn git_clone(
2051 url: &str,
2052 git_ref: &str,
2053 dest_dir: &Path,
2054 cli_label: &str,
2055 mp: Option<&crate::cli::FetchProgress>,
2056) -> Result<AcquiredSource> {
2057 git_clone_inner(url, git_ref, dest_dir, cli_label, mp, None)
2058}
2059
2060/// Shallow-clone a git repository at a TAG ref (e.g. `v6.14.11`).
2061///
2062/// gix's shallow clone routes the ref through `Category::LocalBranch`
2063/// (`refs/heads/`) in its single-branch-shallow path
2064/// (`gix::clone::fetch`), so a tag never matches on the remote and the
2065/// fetch fails with "None of the refspec(s) matched". This appends a
2066/// `+refs/tags/{tag}:refs/heads/{tag}` refspec so the tag is fetched
2067/// into the local branch ref the checkout resolves. The `#tag=` git
2068/// source (via [`git_clone_kinded`]) uses this for a non-GitHub remote;
2069/// a GitHub remote takes the codeload path instead. (The pruned/EOL
2070/// tarball recovery no longer clones — [`download_tarball`]'s
2071/// `TarballNotFound` fallback fetches a gregkh codeload snapshot.)
2072pub(crate) fn git_clone_tag(
2073 url: &str,
2074 tag: &str,
2075 dest_dir: &Path,
2076 cli_label: &str,
2077 mp: Option<&crate::cli::FetchProgress>,
2078) -> Result<AcquiredSource> {
2079 let extra_refspec = format!("+refs/tags/{tag}:refs/heads/{tag}");
2080 git_clone_inner(url, tag, dest_dir, cli_label, mp, Some(extra_refspec))
2081}
2082
2083/// Clone a git source at `git_ref`, dispatching on `ref_kind` to the
2084/// correct clone path. A well-formed `github.com/OWNER/REPO` source
2085/// normally takes the codeload path ([`download_github_archive`], via
2086/// [`crate::cli::resolve_git_kernel`]) and reaches here only as a
2087/// fallback when the pre-fetch ls-remote resolution failed (no commit →
2088/// no codeload URL). A `github.com` URL whose path is not exactly
2089/// `OWNER/REPO` (so `github_archive_url` returns `None`) can still reach
2090/// the `Sha` arm below.
2091///
2092/// - `Tag` → [`git_clone_tag`] (adds the `refs/tags/*` refspec gix's
2093/// shallow path omits).
2094/// - `Branch` → [`git_clone`] (the plain shallow single-branch clone).
2095/// - `Sha` → a hard error: gix cannot fetch a bare commit, and a
2096/// self-hosted server generally lacks allow-sha-in-want. The
2097/// actionable message points at GitHub (codeload serves any sha) or a
2098/// tag/branch.
2099/// - `Unknown` → a hard error; [`crate::kernel_path::KernelId::validate`]
2100/// rejects it upstream, so this is a defensive backstop.
2101pub(crate) fn git_clone_kinded(
2102 url: &str,
2103 git_ref: &str,
2104 ref_kind: crate::kernel_path::GitRefKind,
2105 dest_dir: &Path,
2106 cli_label: &str,
2107 mp: Option<&crate::cli::FetchProgress>,
2108) -> Result<AcquiredSource> {
2109 use crate::kernel_path::GitRefKind;
2110 match ref_kind {
2111 GitRefKind::Tag => git_clone_tag(url, git_ref, dest_dir, cli_label, mp),
2112 GitRefKind::Branch => git_clone(url, git_ref, dest_dir, cli_label, mp),
2113 GitRefKind::Sha => anyhow::bail!(
2114 "git+{url}#sha={git_ref}: fetching this source by commit sha is \
2115 not supported — gix cannot fetch a bare commit and the remote \
2116 lacks allow-sha-in-want. Use a github.com/OWNER/REPO URL \
2117 (codeload serves any commit) or pin a #tag= / #branch= instead."
2118 ),
2119 GitRefKind::Unknown => anyhow::bail!(
2120 "git+{url}: ref kind could not be determined; use #tag=NAME, \
2121 #branch=NAME, or #sha=<40-hex>"
2122 ),
2123 }
2124}
2125
2126/// Shared shallow-clone implementation for [`git_clone`] (branch) and
2127/// [`git_clone_tag`] (tag).
2128///
2129/// `extra_refspec`, when `Some`, is appended to the remote's fetch
2130/// refspecs via `configure_remote` before the fetch (the tag path uses
2131/// it to fetch `refs/tags/*`). `None` leaves the branch clone
2132/// byte-identical to the historical behavior.
2133fn git_clone_inner(
2134 url: &str,
2135 git_ref: &str,
2136 dest_dir: &Path,
2137 cli_label: &str,
2138 mp: Option<&crate::cli::FetchProgress>,
2139 extra_refspec: Option<String>,
2140) -> Result<AcquiredSource> {
2141 // Any 40-hex `git_ref` cannot be cloned here, whatever kind the
2142 // operator meant it as: gix's `with_ref_name(<40-hex>)` treats it as
2143 // an object-id (its own `# Panics` doc: "an object-id as hex-hash"
2144 // panics at `fetch_then_checkout`, gix `clone/access.rs`), and
2145 // fetching a bare commit needs server-side allow-sha-in-want this
2146 // path does not implement. Reject with an actionable error rather
2147 // than panic. Placed at the single clone entry so every caller is
2148 // covered — including a `#branch=`/`#tag=` whose NAME is 40 hex.
2149 if is_full_sha(git_ref) {
2150 anyhow::bail!(
2151 "git+{url}#{git_ref}: cannot fetch a kernel by a raw commit SHA — \
2152 gix's shallow clone treats any 40-hex ref as a commit id (even a \
2153 branch/tag named 40 hex chars). Use a branch or tag name that is \
2154 not 40 hex chars, or on github.com `#sha=<40-hex>` (codeload \
2155 fetches the commit)."
2156 );
2157 }
2158 let cloning = format!("{cli_label}: cloning {url} (ref: {git_ref}, depth: 1)");
2159 match mp {
2160 Some(fp) => fp.println(&cloning),
2161 None => eprintln!("{cloning}"),
2162 }
2163
2164 let clone_dir = dest_dir.join("linux");
2165
2166 // Build the clone with anon_open_opts() (repo-local config only)
2167 // rather than gix::prepare_clone, whose open opts load the user's
2168 // gitconfig and would apply an `insteadOf` HTTPS->SSH rewrite,
2169 // prompting for a key passphrase. Mirrors gix::prepare_clone's
2170 // (WithWorktree, default create opts) otherwise.
2171 let mut prep = gix::clone::PrepareFetch::new(
2172 url,
2173 &clone_dir,
2174 gix::create::Kind::WithWorktree,
2175 gix::create::Options::default(),
2176 anon_open_opts(),
2177 )
2178 .with_context(|| "prepare clone")?
2179 .with_shallow(gix::remote::fetch::Shallow::DepthAtRemote(
2180 NonZeroU32::new(1).expect("1 is nonzero"),
2181 ))
2182 .with_ref_name(Some(git_ref))
2183 .with_context(|| "set ref name")?;
2184
2185 // Tag path only: gix's single-branch-shallow fetch derives its
2186 // refspec from `with_ref_name` via Category::LocalBranch
2187 // (`refs/heads/{ref}`), which never matches a `refs/tags/*` ref.
2188 // Append the caller's `+refs/tags/{tag}:refs/heads/{tag}` so the
2189 // tag is fetched into the branch ref the checkout resolves.
2190 // `with_refspecs` APPENDS (keeping gix's own single-branch spec),
2191 // so a branch clone that reaches here would still match its spec —
2192 // but the branch path passes `None` and skips this entirely.
2193 if let Some(spec) = extra_refspec {
2194 prep = prep.configure_remote(move |remote| {
2195 Ok(remote.with_refspecs(Some(spec.as_str()), gix::remote::Direction::Fetch)?)
2196 });
2197 }
2198
2199 // Drive a determinate clone bar from gix's progress tree (see
2200 // [`crate::cli::progress::CloneProgress`]). `None` when no progress
2201 // group is threaded in; the gix calls then pass `Discard` exactly
2202 // as before. One interrupt flag (never set) is shared by both
2203 // phases, matching the prior per-call `AtomicBool::new(false)`.
2204 let clone_progress = mp.map(|fp| fp.clone_progress(git_ref));
2205 let interrupt = std::sync::atomic::AtomicBool::new(false);
2206
2207 let (mut checkout, _outcome) = match &clone_progress {
2208 Some(cp) => prep
2209 .fetch_then_checkout(cp.item(), &interrupt)
2210 .with_context(|| "clone fetch")?,
2211 None => prep
2212 .fetch_then_checkout(gix::progress::Discard, &interrupt)
2213 .with_context(|| "clone fetch")?,
2214 };
2215
2216 let (_repo, _outcome) = match &clone_progress {
2217 Some(cp) => checkout
2218 .main_worktree(cp.item(), &interrupt)
2219 .with_context(|| "checkout")?,
2220 None => checkout
2221 .main_worktree(gix::progress::Discard, &interrupt)
2222 .with_context(|| "checkout")?,
2223 };
2224
2225 // Clone + checkout done — stop the poll thread, join it, clear the
2226 // bar. On any error path above, `clone_progress` is dropped
2227 // instead, and its `Drop` performs the same shutdown (leak-proof).
2228 if let Some(cp) = clone_progress {
2229 cp.finish();
2230 }
2231
2232 let repo = gix::open(&clone_dir).with_context(|| "open cloned repo")?;
2233 let head = repo.head_id().with_context(|| "read HEAD")?;
2234 // FULL commit hash keys the cache (see `git_cache_key` — a 7-hex
2235 // prefix risks a moved-tip collision serving a stale build); the
2236 // 7-hex `short_hash` is kept only for the human-facing source record.
2237 let commit_hash = format!("{head}");
2238 let short_hash = commit_hash.chars().take(7).collect::<String>();
2239
2240 let cache_key = git_cache_key(git_ref, &commit_hash);
2241
2242 // Record the kernel version from the checked-out source Makefile, as
2243 // local_source does — the worktree is fully checked out here, so a
2244 // git-clone-acquired honoring kernel also earns the 90% tmpfs reclaim
2245 // via the metadata.json sidecar. None on an unreadable/unparsable
2246 // Makefile, which keeps the conservative 50% default.
2247 let version = read_makefile_version(&clone_dir);
2248
2249 Ok(AcquiredSource {
2250 source_dir: clone_dir,
2251 cache_key,
2252 version,
2253 kernel_source: crate::cache::KernelSource::git(short_hash, git_ref),
2254 is_temp: true,
2255 is_dirty: false,
2256 is_git: true,
2257 })
2258}
2259
2260/// Use a local kernel source tree.
2261///
2262/// Dirty detection uses gix `tree_index_status` (HEAD-vs-index) and
2263/// `status().into_index_worktree_iter()` (index-vs-worktree) to check
2264/// for modifications to tracked files. Submodule checks are skipped
2265/// entirely. Untracked files do not affect the dirty flag.
2266///
2267/// When the tree is dirty, the HEAD commit does not describe the
2268/// source actually being built, so `git_hash` is dropped — no
2269/// commit identifies a dirty worktree. `is_dirty=true` carries that
2270/// fact forward; callers (see [`crate::cli`]) use it to bypass the
2271/// kernel cache entirely.
2272///
2273/// No diagnostic output: all operator-visible messaging for a
2274/// local source is routed through `kernel_build_pipeline`'s
2275/// cache-skip hint (`DIRTY_TREE_CACHE_SKIP_HINT` /
2276/// `NON_GIT_TREE_CACHE_SKIP_HINT`), which has the full context
2277/// to emit a single informational line rather than two redundant
2278/// warnings. Sibling entries (`download_tarball`, `git_clone`)
2279/// still take a `cli_label` because they genuinely print
2280/// progress lines — `local_source` does not.
2281pub fn local_source(source_path: &Path) -> Result<AcquiredSource> {
2282 let (arch, _) = arch_info();
2283
2284 if !source_path.is_dir() {
2285 anyhow::bail!("{}: not a directory", source_path.display());
2286 }
2287
2288 let canonical = source_path
2289 .canonicalize()
2290 .with_context(|| format!("canonicalize {}", source_path.display()))?;
2291
2292 let LocalSourceState {
2293 short_hash,
2294 is_dirty,
2295 is_git,
2296 } = inspect_local_source_state(&canonical)?;
2297
2298 // User .config is folded into the cache key so two builds of the
2299 // same HEAD with different `.config` files do NOT collide on the
2300 // same key — see [`config_hash_for_key`] for the encoding.
2301 // Read at `local_source` time (rather than at the post-build
2302 // store site) so cache LOOKUP and cache STORE see the same key.
2303 let user_config_hash = config_hash_for_key(&canonical);
2304
2305 let cache_key =
2306 compose_local_cache_key(arch, &short_hash, &canonical, user_config_hash.as_deref());
2307
2308 // Record the kernel version from the source-tree Makefile so the
2309 // tmpfs-fraction gate (TmpfsFraction::for_kernel_version, via the
2310 // cache metadata.json sidecar) recognizes a locally-built honoring
2311 // kernel — symmetric with the tarball path. None when the Makefile
2312 // is unreadable/unparsable, which keeps the conservative 50% default.
2313 let version = read_makefile_version(&canonical);
2314
2315 Ok(AcquiredSource {
2316 source_dir: canonical.clone(),
2317 cache_key,
2318 version,
2319 kernel_source: crate::cache::KernelSource::Local {
2320 source_tree_path: Some(canonical),
2321 git_hash: short_hash,
2322 },
2323 is_temp: false,
2324 is_dirty,
2325 is_git,
2326 })
2327}
2328
2329/// Parse the kernel `MAJOR.MINOR.PATCH` version from a source tree's
2330/// top-level `Makefile` (`VERSION` / `PATCHLEVEL` / `SUBLEVEL`) — the
2331/// authoritative version of a locally-built kernel, mirroring the
2332/// version a tarball acquisition records. Returns `None` if the
2333/// `Makefile` is unreadable or any of the three fields is absent or
2334/// non-numeric, so the caller records no version and the rootfs tmpfs
2335/// fraction conservatively defaults to 50% (the honoring gate
2336/// `TmpfsFraction::for_kernel_version` keys on a positively-known
2337/// version). `EXTRAVERSION` (e.g. `-rc7`) is intentionally ignored: the
2338/// gate keys on `MAJOR.MINOR.PATCH` only.
2339fn read_makefile_version(source_dir: &Path) -> Option<String> {
2340 let text = std::fs::read_to_string(source_dir.join("Makefile")).ok()?;
2341 // Each field is a top-of-file `NAME = N` assignment; take the first
2342 // matching line and require a bare integer (a trailing comment or
2343 // non-numeric value yields None for that field, hence overall None).
2344 let field = |name: &str| -> Option<u16> {
2345 text.lines().find_map(|line| {
2346 line.trim()
2347 .strip_prefix(name)?
2348 .trim_start()
2349 .strip_prefix('=')?
2350 .trim()
2351 .parse::<u16>()
2352 .ok()
2353 })
2354 };
2355 Some(format!(
2356 "{}.{}.{}",
2357 field("VERSION")?,
2358 field("PATCHLEVEL")?,
2359 field("SUBLEVEL")?
2360 ))
2361}
2362
2363/// Result of [`inspect_local_source_state`] — git hash and dirty/git
2364/// classification of a canonical source-tree path. Pulled out of
2365/// [`local_source`] so the post-build dirty re-check (a second call
2366/// from [`crate::cli::kernel_build_pipeline`]) reuses the exact same
2367/// gix path.
2368#[derive(Debug, Clone)]
2369pub struct LocalSourceState {
2370 /// HEAD short hash (7 chars). `None` when the tree is dirty
2371 /// (HEAD doesn't describe the actual source) or non-git (no
2372 /// HEAD at all). Mirrors the `git_hash` field on
2373 /// [`AcquiredSource::kernel_source`] for [`crate::cache::KernelSource::Local`].
2374 pub short_hash: Option<String>,
2375 /// Tracked-file dirt: HEAD-vs-index disagreement OR
2376 /// index-vs-worktree disagreement. Always `true` for non-git
2377 /// trees (dirty detection is impossible without git, so the
2378 /// pessimistic stance is dirty).
2379 pub is_dirty: bool,
2380 /// `true` when `gix::discover` succeeded (the tree is a git
2381 /// repo); `false` otherwise. Lets the cache-skip hint branch
2382 /// on whether `commit` / `stash` is actionable.
2383 pub is_git: bool,
2384}
2385
2386/// Inspect a canonical source-tree path for git hash + dirty state.
2387///
2388/// Submodule checks are skipped (false positives on kernel trees
2389/// with uninitialized submodules). The non-git arm returns
2390/// `(None, true, false)` so the caller's cache-skip hint can
2391/// distinguish "dirty git repo" from "not a git repo at all".
2392///
2393/// Called twice per build by [`crate::cli::kernel_build_pipeline`]:
2394/// once at acquire time (via [`local_source`]) and again after
2395/// `make` returns to detect mid-build worktree edits, branch flips,
2396/// or commits that would otherwise let a racing-write build land in
2397/// the cache under a stale identity. Both calls share the same gix
2398/// path so the post-build comparison is apples-to-apples.
2399///
2400/// Non-atomic against concurrent git operations: the probe runs
2401/// six sequential gix calls (`discover` → `head_id` → `head_tree`
2402/// → `index_or_empty` → `tree_index_status` → `status`), each a
2403/// separate filesystem read with no transactional bracket. A
2404/// concurrent `git commit`, `git add`, or worktree write between
2405/// any two calls can produce internally-inconsistent results —
2406/// e.g. `head_id` reads commit C0, a peer commit lands C1, then
2407/// `head_tree` reads C1's root tree and the diff against the
2408/// post-add index reports unexpected dirt. Git itself serializes
2409/// its own writes via per-resource lockfiles under `.git/`
2410/// (`index.lock` for staging operations, `HEAD.lock` and
2411/// `refs/heads/<branch>.lock` for ref updates), so peer `git`
2412/// processes wait on whichever lockfile their operation touches;
2413/// the genuinely-unsynchronized class is worktree-only writes
2414/// (autoformatter, IDE-on-save) which the index-worktree status
2415/// step catches regardless of timing.
2416///
2417/// The disposition is intentionally pessimistic so inconsistency is
2418/// safe: any `Err` propagates to the caller, which treats it as a
2419/// rebuild signal (`MidWaitState::ProbeFailed` in the mid-wait
2420/// caller); any spurious dirty signal falls into DirtyEdit /
2421/// HashAdvanced, both forcing a rebuild. The cost of a false-
2422/// positive rebuild is one extra `make`; the cost of a false-
2423/// negative would be a cache slot keyed on a HEAD that no longer
2424/// describes the source — the asymmetry is the reason for the
2425/// pessimistic disposition. Callers should treat the returned
2426/// state as a best-effort approximation of probe-time, not an
2427/// instantaneous snapshot.
2428pub fn inspect_local_source_state(canonical: &Path) -> Result<LocalSourceState> {
2429 let (short_hash, is_dirty, is_git) = match gix::discover(canonical) {
2430 Ok(repo) => {
2431 let head = repo.head_id().with_context(|| "read HEAD")?;
2432 let short_hash = format!("{}", head).chars().take(7).collect::<String>();
2433
2434 // tree_index_status compares a TREE id against the index;
2435 // the HEAD commit id is not itself a tree, so peel HEAD
2436 // to its root tree before diffing or the diff silently
2437 // returns an error and index dirt goes undetected.
2438 let head_tree = repo.head_tree().with_context(|| "read HEAD tree")?;
2439 let head_tree_id = head_tree.id;
2440
2441 // Check HEAD-vs-index for tracked file changes.
2442 let mut index_dirty = false;
2443 let index = repo.index_or_empty().with_context(|| "open index")?;
2444 let _ = repo.tree_index_status(
2445 &head_tree_id,
2446 &index,
2447 None,
2448 gix::status::tree_index::TrackRenames::Disabled,
2449 |_, _, _| {
2450 index_dirty = true;
2451 Ok::<_, std::convert::Infallible>(std::ops::ControlFlow::Break(()))
2452 },
2453 );
2454
2455 // Check index-vs-worktree for modified tracked files,
2456 // skipping submodules entirely (Ignore::All).
2457 let worktree_dirty = if !index_dirty {
2458 repo.status(gix::progress::Discard)
2459 .with_context(|| "status")?
2460 .index_worktree_rewrites(None)
2461 .index_worktree_submodules(gix::status::Submodule::Given {
2462 ignore: gix::submodule::config::Ignore::All,
2463 check_dirty: false,
2464 })
2465 .index_worktree_options_mut(|opts| {
2466 opts.dirwalk_options = None;
2467 })
2468 .into_index_worktree_iter(Vec::new())
2469 .map(|mut iter| iter.next().is_some())
2470 .unwrap_or(false)
2471 } else {
2472 false
2473 };
2474
2475 let is_dirty = index_dirty || worktree_dirty;
2476 // Drop the HEAD hash when dirty — the commit does not
2477 // describe the actual source being built, so publishing
2478 // it via git_hash / cache_key would misidentify the
2479 // build input.
2480 let hash = if is_dirty { None } else { Some(short_hash) };
2481 (hash, is_dirty, true)
2482 }
2483 Err(_) => {
2484 // The downstream kernel_build_pipeline (cli::kernel_build_pipeline)
2485 // emits `NON_GIT_TREE_CACHE_SKIP_HINT` — a single
2486 // informational line that names both the cause and the
2487 // remediation paths — once the is_dirty=true branch
2488 // decides to skip the cache. Emitting a second
2489 // "not a git repository" warning here duplicated that
2490 // content for every non-git `--kernel <path>` run. The
2491 // `(None, true, false)` tuple silently communicates
2492 // the non-git state to the cache-skip decision site;
2493 // no separate stderr line is needed on this path.
2494 (None, true, false)
2495 }
2496 };
2497 Ok(LocalSourceState {
2498 short_hash,
2499 is_dirty,
2500 is_git,
2501 })
2502}
2503
2504/// Compose the cache key for a local source given its arch, optional
2505/// HEAD short hash, canonical source path, and optional user
2506/// `.config` hash.
2507///
2508/// Three shapes:
2509/// - `local-{hash7}-{arch}-kc{suffix}` — clean git tree, no user
2510/// `.config` (plain `make defconfig` path or no config file yet)
2511/// - `local-{hash7}-{arch}-cfg{user_config}-kc{suffix}` — clean git
2512/// tree with a user `.config` whose hash differs from `defconfig`
2513/// - `local-unknown-{path_hash}-{arch}-kc{suffix}` — dirty / non-git
2514/// tree (HEAD does not describe the source; the path-derived
2515/// crc32 salt keeps two distinct dirty trees from colliding on the
2516/// same `local-unknown-...` slot)
2517///
2518/// `path_hash` is the full 8-char (32-bit) lowercase-hex CRC32 of
2519/// the canonical source-path bytes. CRC32 keeps the per-path
2520/// disambiguator stable across runs without pulling in a
2521/// crypto-grade hash for what is fundamentally a slot disambiguator.
2522///
2523/// `user_config_hash` is `None` whenever the source tree has no
2524/// `.config` file yet (the build will run `make defconfig` and
2525/// produce one). This collapses the user-config branch back into the
2526/// hash-only key so a fresh checkout's first build still hits a
2527/// later cache lookup keyed without the cfg segment.
2528pub fn compose_local_cache_key(
2529 arch: &str,
2530 short_hash: &Option<String>,
2531 canonical: &Path,
2532 user_config_hash: Option<&str>,
2533) -> String {
2534 let suffix = crate::cache_key_suffix();
2535 match short_hash {
2536 Some(hash) => match user_config_hash {
2537 Some(cfg) => format!("local-{hash}-{arch}-cfg{cfg}-kc{suffix}"),
2538 None => format!("local-{hash}-{arch}-kc{suffix}"),
2539 },
2540 None => {
2541 let path_hash = canonical_path_hash(canonical);
2542 format!("local-unknown-{path_hash}-{arch}-kc{suffix}")
2543 }
2544 }
2545}
2546
2547/// CRC32 of the canonical source-path bytes, lowercase hex
2548/// (full 8-char width — the entire 32-bit value). Disambiguates
2549/// `local-unknown-...` cache keys and per-source-tree lockfile
2550/// names across distinct dirty / non-git source trees so two
2551/// parallel `cargo ktstr test --kernel ./linux-a` and
2552/// `--kernel ./linux-b` runs can't write each other's vmlinux into
2553/// the same cache slot or share a single source-tree flock.
2554///
2555/// Full 32 bits (8 hex chars) of CRC32 keep collision risk
2556/// negligible against the practical population (handful of source
2557/// trees per host) while staying human-readable. The earlier
2558/// 6-char (24-bit) form left ~6× the collision surface for the
2559/// same key shape; truncation served no purpose other than visual
2560/// brevity. Path bytes are taken via `OsStr::as_encoded_bytes` so
2561/// a non-UTF-8 component (rare on Linux but possible) doesn't lose
2562/// entropy through a UTF-8 lossy conversion.
2563pub(crate) fn canonical_path_hash(canonical: &Path) -> String {
2564 let bytes = canonical.as_os_str().as_encoded_bytes();
2565 format!("{:08x}", crc32fast::hash(bytes))
2566}
2567
2568/// Read `<canonical>/.config` and return its CRC32 as a lowercase
2569/// hex string suitable for embedding in the cache key. Returns
2570/// `None` when no `.config` exists (a fresh tree before the build
2571/// runs `make defconfig`).
2572///
2573/// Distinct from the `config_hash` written into [`crate::cache::KernelMetadata`]
2574/// at store time — that records the FINAL `.config` after
2575/// configuration runs, for diagnostic display in `kernel list`.
2576/// This helper records the PRE-BUILD `.config` so the cache key
2577/// reflects what the operator's tree currently has on disk; the
2578/// same `.config` content always maps to the same key, even if the
2579/// downstream `make olddefconfig` step elaborates additional
2580/// defaults.
2581fn config_hash_for_key(canonical: &Path) -> Option<String> {
2582 let config_path = canonical.join(".config");
2583 let data = std::fs::read(&config_path).ok()?;
2584 Some(format!("{:08x}", crc32fast::hash(&data)))
2585}
2586
2587#[cfg(test)]
2588#[path = "fetch_tests.rs"]
2589mod tests;