ktstr/cli/kernel_build/
make.rs

1//! `make` subprocess invocation.
2//!
3//! Three `make` invocation paths:
4//! - [`run_make`] inherits the parent's stdout/stderr under a
5//!   wall-clock timeout — used for `mrproper` and other calls made
6//!   outside an active progress group, where raw pass-through is fine.
7//! - [`run_make_captured`] captures merged stdout+stderr under a
8//!   wall-clock timeout and replays it only on failure/timeout — used
9//!   for the `defconfig` / `olddefconfig` configure step, which runs
10//!   under a live "Configuring kernel..." spinner that raw
11//!   pass-through would clobber (and whose output — e.g. `override:
12//!   reassigning to symbol …` warnings from the fragment — is routine
13//!   noise on the success path).
14//! - [`run_make_with_output`] streams merged stdout+stderr through the
15//!   progress group live with no timeout — used for the full build,
16//!   whose minutes-long (legitimately unbounded) output the operator
17//!   watches.
18//!
19//! The two timed paths share [`poll_deadline`] (wrapped by
20//! [`poll_child_with_timeout`] for the inherit path); the two
21//! capturing paths share [`drain_lines_lossy`]. Extracting both lets
22//! the timeout and line-drain mechanics be exercised against synthetic
23//! [`std::process::Child`] fixtures and in-memory readers without
24//! spawning real `make` or emitting non-UTF-8 compiler output.
25
26use std::io::BufRead;
27use std::path::Path;
28use std::time::Duration;
29
30use anyhow::{Context, Result, bail};
31
32/// Wall-clock ceiling for the timed `make` paths ([`run_make`],
33/// [`run_make_captured`]).
34///
35/// The timeout protects against a wedged make holding the calling
36/// pipeline forever. Without it, a stuck `olddefconfig` (e.g. an
37/// interactive `conf` prompt that the `configure_kernel` pre-step
38/// failed to bypass, or a kernel-tree inconsistency that wedges
39/// `make`) would block the parent process indefinitely. The ceiling
40/// is intentionally generous — a single `make defconfig` completes in
41/// seconds on any hardware, but large WIP kernel trees with many
42/// out-of-tree patches can stretch `mrproper` / `olddefconfig` past
43/// the typical seconds-scale; 30 minutes covers every legitimate
44/// caller while still bounding a genuine wedge.
45pub(super) const MAKE_TIMEOUT: Duration = Duration::from_secs(30 * 60);
46
47/// Production `try_wait` poll cadence for the timed `make` paths —
48/// small enough that a completed make is reaped within one tick, large
49/// enough that the polling itself is not measurable load. Tests pass a
50/// sub-millisecond override directly to [`poll_child_with_timeout`] /
51/// [`run_make_captured`] so timeout-fires-and-reaps assertions
52/// complete quickly.
53pub(super) const MAKE_POLL_INTERVAL: Duration = Duration::from_millis(100);
54
55/// Run make in a kernel directory under a wall-clock timeout, with the
56/// parent's stdout/stderr inherited.
57///
58/// Used for `mrproper` and other `make` calls made outside an active
59/// progress group, where raw pass-through is fine. The two capturing
60/// siblings — `run_make_captured` (configure step, replay-on-failure)
61/// and [`run_make_with_output`] (full build, live stream) — pipe-drain
62/// the output instead so it does not clobber a live spinner or bar.
63///
64/// On timeout the child is killed (SIGKILL) and reaped before bailing
65/// so no zombie outlives the function; see `poll_child_with_timeout`
66/// for the shared poll+reap mechanics.
67pub fn run_make(kernel_dir: &Path, args: &[&str]) -> Result<()> {
68    let child = std::process::Command::new("make")
69        .args(args)
70        .current_dir(kernel_dir)
71        .spawn()
72        .with_context(|| format!("spawn make {}", args.join(" ")))?;
73
74    poll_child_with_timeout(
75        child,
76        MAKE_TIMEOUT,
77        MAKE_POLL_INTERVAL,
78        &format!("make {}", args.join(" ")),
79    )
80}
81
82/// Outcome of [`poll_deadline`] — reported without collapsing to a
83/// single error so callers can attach their own bail wording and, on
84/// the capturing path, replay captured output and sweep the process
85/// group before failing.
86enum PollOutcome {
87    /// Child exited on its own; `try_wait` already reaped it. The
88    /// carried status may be success or failure.
89    Exited(std::process::ExitStatus),
90    /// Deadline elapsed before the child exited; it was killed and
91    /// reaped inside [`poll_deadline`].
92    TimedOut,
93    /// `try_wait` itself errored; the child was killed and reaped
94    /// inside [`poll_deadline`] before returning.
95    WaitErr(std::io::Error),
96}
97
98/// Poll `child` until it exits or `timeout` elapses, then return the
99/// outcome without bailing.
100///
101/// `timeout` is the wall-clock budget AFTER `child` has already
102/// spawned (the deadline is computed relative to the call instant).
103/// `poll_interval` controls the `try_wait` cadence — small enough that
104/// a completed child is reaped within one tick, large enough that
105/// polling itself is not measurable load. Production passes
106/// [`MAKE_POLL_INTERVAL`] (100ms); tests pass 1ms so a sub-second
107/// timeout assertion completes quickly.
108///
109/// On a clean exit the returned status is already reaped (`try_wait`
110/// consumed it). On timeout or a `try_wait` error the child is killed
111/// AND reaped before returning, so no zombie outlives the call in any
112/// path. Extracted from [`run_make`] so the timeout mechanics can be
113/// exercised against synthetic [`std::process::Child`] fixtures with
114/// sub-second deadlines (real `make` invocations would burn the full
115/// production timeout) — see [`poll_child_with_timeout`]'s tests.
116fn poll_deadline(
117    child: &mut std::process::Child,
118    timeout: Duration,
119    poll_interval: Duration,
120) -> PollOutcome {
121    let deadline = std::time::Instant::now() + timeout;
122    loop {
123        match child.try_wait() {
124            Ok(Some(status)) => return PollOutcome::Exited(status),
125            Ok(None) => {
126                if std::time::Instant::now() >= deadline {
127                    // Wedged — kill + reap so no zombie persists.
128                    let _ = child.kill();
129                    let _ = child.wait();
130                    return PollOutcome::TimedOut;
131                }
132                std::thread::sleep(poll_interval);
133            }
134            Err(e) => {
135                // Reap before returning so a transient try_wait
136                // failure doesn't leak the child.
137                let _ = child.kill();
138                let _ = child.wait();
139                return PollOutcome::WaitErr(e);
140            }
141        }
142    }
143}
144
145/// Poll an already-spawned `child` under a wall-clock timeout, mapping
146/// the [`poll_deadline`] outcome to a labeled `Result` for the
147/// stdout/stderr-inherited [`run_make`] path.
148///
149/// `label` is the human-facing name embedded in error messages (e.g.
150/// `"make defconfig"`) — pinning a synthetic label in the test surface
151/// lets the assertion match the bail wording without depending on
152/// `make` being installed on the runner.
153///
154/// On timeout the bail carries `timed out after`; on a non-zero exit
155/// it carries `failed`; on a `try_wait` error it wraps the io error
156/// with `wait on {label}`. The child is always reaped before this
157/// returns (see [`poll_deadline`]).
158pub(super) fn poll_child_with_timeout(
159    mut child: std::process::Child,
160    timeout: Duration,
161    poll_interval: Duration,
162    label: &str,
163) -> Result<()> {
164    match poll_deadline(&mut child, timeout, poll_interval) {
165        PollOutcome::Exited(status) => {
166            anyhow::ensure!(status.success(), "{label} failed");
167            Ok(())
168        }
169        PollOutcome::TimedOut => {
170            bail!("{label} timed out after {timeout:?}; child killed")
171        }
172        PollOutcome::WaitErr(e) => Err(e).with_context(|| format!("wait on {label}")),
173    }
174}
175
176/// Drain a reader into a `Vec<String>`, one entry per newline-delimited
177/// chunk, with a final partial chunk (no trailing newline) emitted
178/// with the same lossy-UTF-8 conversion. Byte-oriented so non-UTF-8
179/// input survives via `from_utf8_lossy` (U+FFFD replacement) instead
180/// of being dropped at the line boundary. Strips the trailing `\n`
181/// and an optional preceding `\r` so CRLF input matches LF semantics.
182/// Calls `on_line` for each line before appending to the returned
183/// `Vec`.
184///
185/// Returned entries and the `on_line` argument never carry their
186/// terminating `\n` (or `\r\n`) — the strip runs before emission, so
187/// callers that re-emit with `println!` get clean single-newline
188/// formatting and callers that persist the strings do not double-
189/// count line terminators. Interior `\r` bytes (lone CR not paired
190/// with a trailing LF) pass through verbatim, matching the unit
191/// coverage in `drain_lines_lossy_lone_cr_at_eof_is_preserved` and
192/// `drain_lines_lossy_interior_cr_is_preserved`.
193///
194/// Extracted from [`run_make_with_output`] so the read logic is
195/// testable with in-memory readers (the caller still owns child
196/// kill+wait).
197pub(super) fn drain_lines_lossy(
198    mut reader: impl BufRead,
199    mut on_line: impl FnMut(&str),
200) -> std::io::Result<Vec<String>> {
201    let mut captured = Vec::new();
202    let mut buf = Vec::new();
203    loop {
204        buf.clear();
205        let n = reader.read_until(b'\n', &mut buf)?;
206        if n == 0 {
207            break;
208        }
209        let mut slice: &[u8] = &buf;
210        if let Some(rest) = slice.strip_suffix(b"\n") {
211            slice = rest;
212            if let Some(rest) = slice.strip_suffix(b"\r") {
213                slice = rest;
214            }
215        }
216        let line = String::from_utf8_lossy(slice).into_owned();
217        on_line(&line);
218        captured.push(line);
219    }
220    Ok(captured)
221}
222
223/// Run make with merged stdout+stderr piped through the progress group.
224///
225/// Creates a single pipe via `nix::unistd::pipe2(O_CLOEXEC)`, hands
226/// the write end to the child's stdout AND stderr (a clone), and
227/// reads from the read end. `O_CLOEXEC` prevents the raw pipe fds
228/// from leaking into any concurrently-spawned children on other
229/// threads — without the flag, a race between `pipe()` and the
230/// `Stdio::from()` consumption could let an unrelated `fork+exec`
231/// inherit the write end and hold the reader open indefinitely.
232/// One pipe, one reader — no threads, no channel, no chance of a
233/// deadlock where reading stdout blocks while stderr fills its
234/// buffer. Same merged-stream semantics that `sh -c "make … 2>&1"`
235/// gives, without the shell-out.
236///
237/// When a progress group is supplied, each line is printed via
238/// `crate::cli::FetchProgress::println` (which lands above the live
239/// bars, or on stderr when the group is hidden). When `None`, output
240/// is captured and shown only on failure.
241///
242/// Pipe-read I/O errors propagate via `Err` rather than silently
243/// ending the read loop. The prior line-iterator formulation
244/// (`.lines()` + `Result::ok`) dropped every error-tagged item —
245/// a mid-stream read failure just looked like EOF and the child's
246/// tail output disappeared without a diagnostic. The byte-oriented
247/// `drain_lines_lossy` now surfaces such failures with `anyhow`
248/// context naming the merged-stream read, so a broken-pipe or EIO
249/// during make's output is caught at the call site.
250///
251/// Lines observed by the progress group's `println` and retained in the
252/// on-failure replay buffer are LF-normalized: `drain_lines_lossy`
253/// strips the trailing `\n`, and a preceding `\r` (the CRLF form
254/// Make emits on some toolchain + terminal combinations) is
255/// stripped too, so every line the caller sees is LF-only and
256/// terminator-less. Interior lone `\r` bytes — e.g. a progress
257/// bar using carriage-return redraw — pass through verbatim (see
258/// `drain_lines_lossy_interior_cr_is_preserved`), which keeps
259/// the on-failure replay readable without mangling tools that
260/// legitimately use `\r` mid-line.
261pub fn run_make_with_output(
262    kernel_dir: &Path,
263    args: &[&str],
264    progress: Option<&crate::cli::FetchProgress>,
265) -> Result<()> {
266    let (read_fd, write_fd) = nix::unistd::pipe2(nix::fcntl::OFlag::O_CLOEXEC)
267        .context("create pipe for merged make stdout+stderr")?;
268    let write_fd_err = write_fd
269        .try_clone()
270        .context("clone pipe write end for stderr")?;
271
272    // NOTE: do NOT arm PR_SET_PDEATHSIG via CommandExt::pre_exec to reap
273    // make on parent death — pre_exec forces the fork+exec path, and this
274    // orchestrator is multithreaded (jemalloc background threads, at least),
275    // so the forked child can deadlock BEFORE exec on a lock another thread
276    // held at fork time (empirically the high-volume drain tests below hang
277    // with no make ever exec'd). Parent death is already handled without it:
278    // the parent holds the only pipe READ end (below), so when it dies make
279    // and its gcc workers get SIGPIPE on their next write (std resets SIGPIPE
280    // to SIG_DFL in spawned children) and terminate; the normal / pipe-error
281    // paths reap via the explicit wait / kill below.
282    let mut child = std::process::Command::new("make")
283        .args(args)
284        .current_dir(kernel_dir)
285        .stdout(std::process::Stdio::from(write_fd))
286        .stderr(std::process::Stdio::from(write_fd_err))
287        .spawn()
288        .with_context(|| format!("spawn make {}", args.join(" ")))?;
289
290    // Parent has no remaining writer handles. `Stdio::from(OwnedFd)`
291    // consumed `write_fd` and `write_fd_err` into the Command
292    // builder; during `.spawn()` the builder installs them as the
293    // child's stdout/stderr via `dup2`, then drops its own OwnedFd
294    // copies. The child therefore holds the only live write ends
295    // (its dup2'd stdout/stderr, fd 1/2). When `make` exits, those
296    // fds are closed and the reader here sees EOF naturally.
297    //
298    // Read as bytes and convert each line via `from_utf8_lossy` at
299    // the boundary. Compiler output can include non-UTF-8 bytes —
300    // source paths on exotic filesystems, embedded binary fragments
301    // from diagnostic tools, locale-encoded text — and a pure-String
302    // reader would drop those lines via the `Result::ok` filter,
303    // hiding real compiler errors in CI logs. Lossy conversion keeps
304    // every line visible with U+FFFD where the bytes were not valid
305    // UTF-8.
306    let reader = std::io::BufReader::new(std::fs::File::from(read_fd));
307    let captured = match drain_lines_lossy(reader, |line| {
308        if let Some(p) = progress {
309            p.println(line);
310        }
311    }) {
312        Ok(v) => v,
313        Err(e) => {
314            // On pipe-read I/O failure, kill and reap the child
315            // before propagating so `make` doesn't linger as a
316            // zombie — stdlib's Child does not auto-wait on drop.
317            // Both ops use `.ok()` because the read-side error is
318            // the actionable diagnostic; a secondary wait/kill
319            // failure should not mask it.
320            child.kill().ok();
321            child.wait().ok();
322            return Err(e).context("read merged make stdout+stderr");
323        }
324    };
325
326    let status = child.wait()?;
327    if !status.success() {
328        // Always show captured output on failure so CI logs contain
329        // the actual compiler errors, not just "make failed".
330        for line in &captured {
331            eprintln!("{line}");
332        }
333        bail!("make {} failed", args.join(" "));
334    }
335    Ok(())
336}
337
338/// Run make capturing merged stdout+stderr, under a wall-clock
339/// timeout, replaying the captured output ONLY on failure or timeout.
340///
341/// Used for the `defconfig` / `olddefconfig` configure step, which
342/// runs under a live "Configuring kernel..." spinner. Unlike
343/// [`run_make_with_output`] (which streams every line live for the
344/// minutes-long build), this path stays SILENT on success: the
345/// configure step is fast and its output is routine noise (defconfig
346/// echoes, `override: reassigning to symbol …` warnings from ktstr's
347/// baked-in fragment intentionally overriding defconfig values), so
348/// streaming it would clutter the spinner without informing the
349/// operator. On failure or timeout the full captured output is
350/// replayed via [`replay_captured`] — above the progress bars through
351/// [`crate::cli::FetchProgress::println`] when a group is supplied,
352/// else on stderr — so a genuine configure error stays fully
353/// diagnosable.
354///
355/// Drains the merged pipe on a scoped worker thread while the calling
356/// thread runs [`poll_deadline`]: a single blocking read loop cannot
357/// also poll the child, so the read runs on its own thread. Continuous
358/// draining means the child never blocks on a full pipe regardless of
359/// output volume — the same single-pipe/single-reader no-deadlock
360/// property [`run_make_with_output`] documents. stdin is inherited,
361/// matching [`run_make`]: `olddefconfig` resolves new symbols
362/// non-interactively after the fragment append, and the timeout
363/// backstops any prompt that slips through.
364pub(super) fn run_make_captured(
365    kernel_dir: &Path,
366    args: &[&str],
367    progress: Option<&crate::cli::FetchProgress>,
368    timeout: Duration,
369    poll_interval: Duration,
370) -> Result<()> {
371    use std::os::unix::process::CommandExt as _;
372
373    let (read_fd, write_fd) = nix::unistd::pipe2(nix::fcntl::OFlag::O_CLOEXEC)
374        .context("create pipe for merged make stdout+stderr")?;
375    let write_fd_err = write_fd
376        .try_clone()
377        .context("clone pipe write end for stderr")?;
378
379    // `process_group(0)` makes the child a process-group leader (pgid
380    // == its pid). The configure step's `make` spawns a short-lived
381    // `scripts/kconfig/conf`; if that wedges (e.g. an interactive
382    // prompt blocked on stdin) it inherits — and holds open — the
383    // pipe's write end. Killing `make` alone would leave `conf` alive,
384    // the write end open, and the drain thread blocked on read
385    // forever. The group kill (below) sweeps `make` AND its
386    // descendants so every write end closes and the reader hits EOF.
387    //
388    // Trade-off of the fresh group: `make` is no longer in the
389    // terminal's foreground group, so a Ctrl-C (SIGINT) during the
390    // seconds-scale configure step reaches only the parent CLI, not
391    // `make` directly — the parent exiting then breaks the pipe and
392    // `make` dies of EPIPE. The streaming build path
393    // (`run_make_with_output`) keeps the parent's group and stays
394    // directly Ctrl-C-interruptible.
395    let mut child = std::process::Command::new("make")
396        .args(args)
397        .current_dir(kernel_dir)
398        .stdout(std::process::Stdio::from(write_fd))
399        .stderr(std::process::Stdio::from(write_fd_err))
400        .process_group(0)
401        .spawn()
402        .with_context(|| format!("spawn make {}", args.join(" ")))?;
403    // The child leads its own group, so its pid IS the pgid. Guard the
404    // cast the way `scenario::payload_run::kill_payload_process_group`
405    // does: `killpg` with a non-positive pgid broadcasts to the
406    // caller's group / every permitted process, so a pid that is
407    // non-positive or outside `pid_t` must never reach `killpg`. Linux
408    // pid_max <= 2^22 makes this unreachable in practice; the guard
409    // keeps the destructive syscall safe regardless.
410    let child_pgid: Option<nix::unistd::Pid> = libc::pid_t::try_from(child.id())
411        .ok()
412        .filter(|&p| p > 0)
413        .map(nix::unistd::Pid::from_raw);
414    debug_assert!(
415        child_pgid.is_some(),
416        "make child pid {} is not a valid pgid",
417        child.id()
418    );
419
420    // Parent holds no write ends after spawn (Stdio::from consumed
421    // both OwnedFds and the builder dropped its dup2'd copies), so the
422    // reader sees EOF once every write-end holder closes — on the
423    // child's natural exit, or on the timeout group-kill below. Drain
424    // on a scoped thread so the calling thread can enforce the
425    // deadline concurrently.
426    let reader = std::io::BufReader::new(std::fs::File::from(read_fd));
427    let (outcome, drained) = std::thread::scope(|scope| {
428        let drain = scope.spawn(move || drain_lines_lossy(reader, |_line| {}));
429        let outcome = poll_deadline(&mut child, timeout, poll_interval);
430        // Sweep the process group on EVERY path before joining the
431        // drain thread. poll_deadline already reaped `make` itself (via
432        // try_wait on a clean exit, or kill+wait on timeout), but any
433        // descendant that inherited the pipe's write end — a wedged
434        // `conf` on the timeout path, or a backgrounded recipe process
435        // on a clean exit — would keep the reader from ever seeing EOF
436        // and hang drain.join() with no remaining backstop. killpg
437        // closes those write ends. An already-empty (all-reaped) group
438        // ESRCHs harmlessly. Unconditional-sweep-after-wait matches
439        // `scenario::payload_run::PayloadHandle::wait`.
440        if let Some(pgid) = child_pgid {
441            let _ = nix::sys::signal::killpg(pgid, nix::sys::signal::Signal::SIGKILL);
442        }
443        let drained = drain.join().expect("make output drain thread panicked");
444        (outcome, drained)
445    });
446
447    // A drain-thread read error wins over the make outcome here (the
448    // `?` propagates it before the outcome match), matching
449    // `run_make_with_output`: the pipe-read failure is itself the
450    // actionable diagnostic.
451    let captured = drained.context("read merged make stdout+stderr")?;
452    let label = format!("make {}", args.join(" "));
453    // Every failure arm replays the captured output before surfacing so
454    // the contract in this function's doc ("replayed on failure or
455    // timeout") holds symmetrically — including the rare WaitErr path.
456    match outcome {
457        PollOutcome::Exited(status) if status.success() => Ok(()),
458        PollOutcome::Exited(_) => {
459            replay_captured(&captured, progress);
460            bail!("{label} failed");
461        }
462        PollOutcome::TimedOut => {
463            replay_captured(&captured, progress);
464            bail!("{label} timed out after {timeout:?}; child killed");
465        }
466        PollOutcome::WaitErr(e) => {
467            replay_captured(&captured, progress);
468            Err(e).with_context(|| format!("wait on {label}"))
469        }
470    }
471}
472
473/// Replay captured make output on the failure path — above the
474/// progress bars via [`crate::cli::FetchProgress::println`] when a
475/// group is live (so it does not interleave with the bars), else on
476/// stderr. Lines are already LF-normalized and terminator-less
477/// ([`drain_lines_lossy`]).
478fn replay_captured(captured: &[String], progress: Option<&crate::cli::FetchProgress>) {
479    for line in captured {
480        match progress {
481            Some(p) => p.println(line),
482            None => eprintln!("{line}"),
483        }
484    }
485}
486
487/// Build the kernel with output piped through the progress group.
488///
489/// `jobs_override` supplies the `-jN` count when set (used by
490/// `kernel_build_pipeline` under `--cpu-cap` to keep gcc's
491/// parallelism aligned with the reserved CPU count). `None`
492/// falls back to `std::thread::available_parallelism`.
493pub fn make_kernel_with_output(
494    kernel_dir: &Path,
495    progress: Option<&crate::cli::FetchProgress>,
496    jobs_override: Option<usize>,
497) -> Result<()> {
498    let nproc = jobs_override.unwrap_or_else(|| {
499        std::thread::available_parallelism()
500            .map(|n| n.get())
501            .unwrap_or(1)
502    });
503    let args = build_make_args(nproc);
504    let arg_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
505    run_make_with_output(kernel_dir, &arg_refs, progress)
506}
507
508/// Build the make arguments for a kernel build.
509///
510/// Returns the argument list that would be passed to `make` for a
511/// parallel kernel build: `["-jN", "KCFLAGS=-Wno-error"]`.
512pub(super) fn build_make_args(nproc: usize) -> Vec<String> {
513    vec![format!("-j{nproc}"), "KCFLAGS=-Wno-error".into()]
514}
515
516#[cfg(test)]
517mod tests {
518    use super::*;
519    use std::time::Duration;
520
521    /// Whether `name` resolves to a binary on `PATH`. Inlined here
522    /// (rather than reaching across to `super::super::resolve::resolve_in_path`)
523    /// so the test module is self-contained and cannot regress on
524    /// a path change in the resolver helper.
525    fn make_in_path() -> bool {
526        let Ok(path) = std::env::var("PATH") else {
527            return false;
528        };
529        std::env::split_paths(&path).any(|p| p.join("make").is_file())
530    }
531
532    // -- drain_lines_lossy --
533
534    #[test]
535    fn drain_lines_lossy_eof_terminated_happy_path() {
536        let input: &[u8] = b"alpha\nbeta\ngamma\n";
537        let mut seen = Vec::new();
538        let captured = drain_lines_lossy(std::io::Cursor::new(input), |line| {
539            seen.push(line.to_string())
540        })
541        .unwrap();
542        assert_eq!(captured, vec!["alpha", "beta", "gamma"]);
543        assert_eq!(seen, captured);
544    }
545
546    #[test]
547    fn drain_lines_lossy_strips_crlf() {
548        let input: &[u8] = b"one\r\ntwo\r\nthree\r\n";
549        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| {}).unwrap();
550        assert_eq!(captured, vec!["one", "two", "three"]);
551    }
552
553    #[test]
554    fn drain_lines_lossy_non_utf8_bytes_survive_via_replacement() {
555        let input: &[u8] = b"valid\n\xffbroken\ntail\n";
556        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| {}).unwrap();
557        assert_eq!(captured, vec!["valid", "\u{FFFD}broken", "tail"]);
558    }
559
560    #[test]
561    fn drain_lines_lossy_empty_stream_yields_empty_vec() {
562        let input: &[u8] = b"";
563        let mut calls = 0usize;
564        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| calls += 1).unwrap();
565        assert!(captured.is_empty());
566        assert_eq!(calls, 0);
567    }
568
569    #[test]
570    fn drain_lines_lossy_single_line_without_trailing_newline() {
571        let input: &[u8] = b"no-newline";
572        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| {}).unwrap();
573        assert_eq!(captured, vec!["no-newline"]);
574    }
575
576    #[test]
577    fn drain_lines_lossy_lone_cr_at_eof_is_preserved() {
578        let input: &[u8] = b"foo\r";
579        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| {}).unwrap();
580        assert_eq!(captured, vec!["foo\r"]);
581    }
582
583    #[test]
584    fn drain_lines_lossy_interior_cr_is_preserved() {
585        let input: &[u8] = b"ab\rcd\n";
586        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| {}).unwrap();
587        assert_eq!(captured, vec!["ab\rcd"]);
588    }
589
590    #[test]
591    fn drain_lines_lossy_propagates_io_error_after_first_read() {
592        use std::io::{BufReader, ErrorKind, Read};
593
594        struct FlakyReader {
595            calls: usize,
596        }
597        impl Read for FlakyReader {
598            fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
599                self.calls += 1;
600                match self.calls {
601                    1 => {
602                        let data = b"line1\n";
603                        let n = data.len().min(buf.len());
604                        buf[..n].copy_from_slice(&data[..n]);
605                        Ok(n)
606                    }
607                    _ => Err(std::io::Error::new(ErrorKind::BrokenPipe, "pipe closed")),
608                }
609            }
610        }
611
612        let err = drain_lines_lossy(BufReader::new(FlakyReader { calls: 0 }), |_| {})
613            .expect_err("flaky reader must surface Err");
614        assert_eq!(err.kind(), ErrorKind::BrokenPipe);
615    }
616
617    #[test]
618    fn drain_lines_lossy_mixed_lf_and_crlf() {
619        let input: &[u8] = b"lf-line\ncrlf-line\r\nlf-again\n";
620        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| {}).unwrap();
621        assert_eq!(captured, vec!["lf-line", "crlf-line", "lf-again"]);
622    }
623
624    #[test]
625    fn drain_lines_lossy_empty_lines_lf() {
626        let input: &[u8] = b"a\n\nb\n";
627        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| {}).unwrap();
628        assert_eq!(captured, vec!["a", "", "b"]);
629    }
630
631    #[test]
632    fn drain_lines_lossy_empty_lines_crlf() {
633        let input: &[u8] = b"\r\n\r\n";
634        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_| {}).unwrap();
635        assert_eq!(captured, vec!["", ""]);
636    }
637
638    #[test]
639    fn drain_lines_lossy_callback_fires_once_per_line_in_order() {
640        let input: &[u8] = b"a\nb\nc\n";
641        let lens = std::cell::RefCell::new(Vec::<usize>::new());
642        let captured = drain_lines_lossy(std::io::Cursor::new(input), |_line| {
643            let mut v = lens.borrow_mut();
644            let current = v.len();
645            v.push(current);
646        })
647        .unwrap();
648        assert_eq!(captured, vec!["a", "b", "c"]);
649        assert_eq!(lens.into_inner(), vec![0, 1, 2]);
650    }
651
652    // -- run_make_with_output --
653
654    /// `Command::current_dir` on a non-existent path causes
655    /// `Command::spawn` to fail before exec, with an underlying
656    /// `io::Error` of kind `NotFound`. The wrap via
657    /// `.with_context(|| format!("spawn make {}", ...))` must surface
658    /// BOTH the `"spawn make <args>"` annotation AND the underlying
659    /// `io::Error` with `ErrorKind::NotFound` in the anyhow chain.
660    /// `ErrorKind::NotFound` is structural and locale-free; matching
661    /// on the rendered "No such file or directory" string would
662    /// flake under `LANG=fr_FR.UTF-8`.
663    #[test]
664    fn run_make_with_output_surfaces_actionable_error_when_kernel_dir_missing() {
665        let tmp = tempfile::TempDir::new().unwrap();
666        let missing = tmp.path().join("nonexistent_child");
667        let err = run_make_with_output(&missing, &["foo"], None)
668            .expect_err("nonexistent kernel_dir must surface a spawn failure");
669        let rendered = format!("{err:#}");
670        assert!(
671            rendered.contains("spawn make foo"),
672            "expected `spawn make foo` context layer, got: {rendered}"
673        );
674        let has_not_found = err.chain().any(|e| {
675            e.downcast_ref::<std::io::Error>()
676                .is_some_and(|io| io.kind() == std::io::ErrorKind::NotFound)
677        });
678        assert!(
679            has_not_found,
680            "expected underlying io::Error with ErrorKind::NotFound in anyhow chain, \
681             got: {rendered}"
682        );
683    }
684
685    /// End-to-end exercise of the merged-pipe path against a real
686    /// `make` invocation that emits ~200 KiB across stdout+stderr,
687    /// past the 64 KiB Linux pipe buffer. Pins both the no-deadlock
688    /// invariant (single-pipe + single-reader cannot deadlock) and
689    /// the failure-path Err wording (`"make ... failed"` from the
690    /// final `bail!`).
691    #[test]
692    fn run_make_with_output_drains_high_volume_failing_make_without_deadlock() {
693        if !make_in_path() {
694            skip!("make not in PATH");
695        }
696        let dir = tempfile::TempDir::new().unwrap();
697        let stdout_chunk: String = "S".repeat(1024);
698        let stderr_chunk: String = "E".repeat(1024);
699        let mut recipe = String::new();
700        for _ in 0..100 {
701            recipe.push_str(&format!("\t@printf '%s\\n' '{stdout_chunk}'\n"));
702            recipe.push_str(&format!("\t@printf '%s\\n' '{stderr_chunk}' >&2\n"));
703        }
704        let makefile = format!("default:\n{recipe}\t@false\n");
705        std::fs::write(dir.path().join("Makefile"), makefile).unwrap();
706        let err = run_make_with_output(dir.path(), &["default"], None)
707            .expect_err("non-zero exit must surface as Err");
708        let rendered = format!("{err:#}");
709        assert!(
710            rendered.contains("make default failed"),
711            "expected `make default failed` wording from bail!, got: {rendered}"
712        );
713    }
714
715    /// Stderr-only high-volume burst: 128 KiB to stderr alone (2x
716    /// the default 64 KiB pipe buffer). No stdout writes — buffer
717    /// can only drain via the merged-pipe reader. A regression that
718    /// wired stderr to a separate unread pipe would deadlock here.
719    #[test]
720    fn run_make_with_output_drains_stderr_only_high_volume_without_deadlock() {
721        if !make_in_path() {
722            skip!("make not in PATH");
723        }
724        let dir = tempfile::TempDir::new().unwrap();
725        let chunk: String = "X".repeat(1024);
726        let mut recipe = String::new();
727        for _ in 0..128 {
728            recipe.push_str(&format!("\t@printf '%s\\n' '{chunk}' >&2\n"));
729        }
730        let makefile = format!("default:\n{recipe}\t@false\n");
731        std::fs::write(dir.path().join("Makefile"), makefile).unwrap();
732        let err = run_make_with_output(dir.path(), &["default"], None)
733            .expect_err("non-zero exit must surface as Err");
734        let rendered = format!("{err:#}");
735        assert!(
736            rendered.contains("make default failed"),
737            "expected `make default failed` wording, got: {rendered}"
738        );
739    }
740
741    /// Spawn-failure path must not leak the pipe2 OwnedFds. Counts
742    /// `/proc/self/fd` entries before and after a guaranteed-spawn-
743    /// failure call; the count must not grow over 128 iterations.
744    /// A regression that switched to raw fd integers (no Drop) or
745    /// consumed write_fd via a path other than Stdio::from would
746    /// surface as a 1-3 fd leak per call (128-384 over the loop).
747    #[test]
748    fn run_make_with_output_releases_fds_on_spawn_failure() {
749        let proc_fd = std::path::Path::new("/proc/self/fd");
750        if !proc_fd.is_dir() {
751            skip!("/proc/self/fd not available");
752        }
753        let count_fds = || -> usize {
754            std::fs::read_dir(proc_fd)
755                .expect("read /proc/self/fd")
756                .filter_map(|e| e.ok())
757                .count()
758        };
759        let tmp = tempfile::TempDir::new().unwrap();
760        let missing = tmp.path().join("nonexistent_child");
761        // Warm-up pass: ignore first-call process-wide allocations.
762        let _ = run_make_with_output(&missing, &["foo"], None);
763        let before = count_fds();
764        const FD_LEAK_ITERATIONS: u32 = 128;
765        for _ in 0..FD_LEAK_ITERATIONS {
766            let _ = run_make_with_output(&missing, &["foo"], None);
767        }
768        let after = count_fds();
769        assert!(
770            after <= before,
771            "fd leak on spawn failure: {before} -> {after} \
772             ({FD_LEAK_ITERATIONS} calls, expected no growth)"
773        );
774    }
775
776    // -- poll_child_with_timeout --
777
778    fn spawn_sleeping_child(seconds: u64) -> (std::process::Child, u32) {
779        let child = std::process::Command::new("sh")
780            .arg("-c")
781            .arg(format!("sleep {seconds}"))
782            .spawn()
783            .expect("spawn sh -c sleep N");
784        let pid = child.id();
785        (child, pid)
786    }
787
788    fn pid_is_alive(pid: u32) -> bool {
789        use nix::sys::signal::kill;
790        use nix::unistd::Pid;
791        kill(Pid::from_raw(pid as i32), None).is_ok()
792    }
793
794    /// Timeout fires when the child outlives the deadline; the
795    /// helper bails with the labeled timeout error AND reaps the
796    /// child (no zombie persists past helper return). Three pins:
797    /// (1) bail wording carries label + `timed out after`,
798    /// (2) elapsed wall-clock stays within a small multiple of the
799    /// configured timeout (proves deadline check works),
800    /// (3) PID slot is reclaimed (proves child.wait() ran).
801    #[test]
802    fn poll_child_with_timeout_bails_and_reaps_on_timeout() {
803        let (child, pid) = spawn_sleeping_child(60);
804        assert!(
805            pid_is_alive(pid),
806            "fixture precondition: spawned child pid {pid} must be \
807             alive before the helper runs",
808        );
809
810        let start = std::time::Instant::now();
811        let result = poll_child_with_timeout(
812            child,
813            Duration::from_millis(100),
814            Duration::from_millis(1),
815            "make wedged-target",
816        );
817        let elapsed = start.elapsed();
818
819        let err = result.expect_err("timed-out child must surface as Err");
820        let rendered = format!("{err:#}");
821        assert!(
822            rendered.contains("make wedged-target"),
823            "timeout bail must include the label parameter; got: {rendered}",
824        );
825        assert!(
826            rendered.contains("timed out after"),
827            "timeout bail must include the literal `timed out after` \
828             phrase so CI log scrapers can pattern-match wedged builds; \
829             got: {rendered}",
830        );
831
832        assert!(
833            elapsed < Duration::from_secs(5),
834            "helper must return within a small multiple of the \
835             configured timeout (100ms); took {elapsed:?} which \
836             suggests the deadline check is broken",
837        );
838
839        let zombie_check_deadline = std::time::Instant::now() + Duration::from_secs(1);
840        loop {
841            if !pid_is_alive(pid) {
842                break;
843            }
844            if std::time::Instant::now() >= zombie_check_deadline {
845                panic!(
846                    "child pid {pid} still alive 1s after helper returned — \
847                     timeout path leaked a zombie (missing child.wait() \
848                     after child.kill()?)",
849                );
850            }
851            std::thread::sleep(Duration::from_millis(10));
852        }
853    }
854
855    /// Successful pre-deadline exit: the helper observes
856    /// `Ok(Some(status))` with success, returns Ok, and reaps via
857    /// the natural process-exit path. Pins that the timeout
858    /// machinery does not false-fire on a fast-exiting child.
859    #[test]
860    fn poll_child_with_timeout_succeeds_when_child_exits_clean() {
861        let child = std::process::Command::new("true")
862            .spawn()
863            .expect("spawn true");
864        let pid = child.id();
865
866        let result = poll_child_with_timeout(
867            child,
868            Duration::from_secs(5),
869            Duration::from_millis(1),
870            "make happy-target",
871        );
872        assert!(
873            result.is_ok(),
874            "child that exits 0 must surface as Ok; got: {result:?}",
875        );
876        let zombie_check_deadline = std::time::Instant::now() + Duration::from_secs(1);
877        loop {
878            if !pid_is_alive(pid) {
879                break;
880            }
881            if std::time::Instant::now() >= zombie_check_deadline {
882                panic!(
883                    "child pid {pid} still alive 1s after Ok return — \
884                     successful-exit path leaked a zombie",
885                );
886            }
887            std::thread::sleep(Duration::from_millis(10));
888        }
889    }
890
891    /// Failed pre-deadline exit: the helper observes
892    /// `Ok(Some(status))` with non-success and surfaces as Err with
893    /// `{label} failed`. Distinct from the timeout case because the
894    /// bail wording differs (`failed` vs `timed out after`); CI log
895    /// scrapers must distinguish wedged-make from build-failed.
896    #[test]
897    fn poll_child_with_timeout_surfaces_nonzero_exit_as_err() {
898        let child = std::process::Command::new("false")
899            .spawn()
900            .expect("spawn false");
901        let result = poll_child_with_timeout(
902            child,
903            Duration::from_secs(5),
904            Duration::from_millis(1),
905            "make broken-target",
906        );
907        let err = result.expect_err("child that exits non-zero must surface as Err");
908        let rendered = format!("{err:#}");
909        assert!(
910            rendered.contains("make broken-target"),
911            "non-zero-exit bail must include the label; got: {rendered}",
912        );
913        assert!(
914            rendered.contains("failed"),
915            "non-zero-exit bail must use the `failed` wording so it is \
916             distinguishable from the timeout-path's `timed out after`; \
917             got: {rendered}",
918        );
919        assert!(
920            !rendered.contains("timed out"),
921            "non-zero-exit bail must NOT contain `timed out` — that \
922             phrase belongs to the deadline-fired path only; got: {rendered}",
923        );
924    }
925
926    // -- build_make_args --
927
928    #[test]
929    fn cli_build_make_args_single_core() {
930        let args = build_make_args(1);
931        assert_eq!(args, vec!["-j1", "KCFLAGS=-Wno-error"]);
932    }
933
934    #[test]
935    fn cli_build_make_args_multi_core() {
936        let args = build_make_args(16);
937        assert_eq!(args, vec!["-j16", "KCFLAGS=-Wno-error"]);
938    }
939
940    // -- run_make_captured --
941
942    /// Spawn failure (nonexistent `current_dir`) surfaces the
943    /// `spawn make <args>` context with the underlying
944    /// `ErrorKind::NotFound`, the same contract as
945    /// [`run_make_with_output`] — proving the capturing+timed path
946    /// wraps spawn errors before reaching the drain/poll machinery.
947    /// No `make` needed: the failure happens before exec.
948    #[test]
949    fn run_make_captured_surfaces_error_when_kernel_dir_missing() {
950        let tmp = tempfile::TempDir::new().unwrap();
951        let missing = tmp.path().join("nonexistent_child");
952        let err = run_make_captured(
953            &missing,
954            &["foo"],
955            None,
956            Duration::from_secs(5),
957            Duration::from_millis(1),
958        )
959        .expect_err("nonexistent kernel_dir must surface a spawn failure");
960        let rendered = format!("{err:#}");
961        assert!(
962            rendered.contains("spawn make foo"),
963            "expected `spawn make foo` context layer, got: {rendered}"
964        );
965        let has_not_found = err.chain().any(|e| {
966            e.downcast_ref::<std::io::Error>()
967                .is_some_and(|io| io.kind() == std::io::ErrorKind::NotFound)
968        });
969        assert!(
970            has_not_found,
971            "expected underlying io::Error with ErrorKind::NotFound, got: {rendered}"
972        );
973    }
974
975    /// Clean exit (fake `@true` target) returns Ok — mirrors the
976    /// configure step's success path where the spinner must not be
977    /// clobbered (output captured, nothing emitted).
978    #[test]
979    fn run_make_captured_succeeds_on_clean_exit() {
980        if !make_in_path() {
981            skip!("make not in PATH");
982        }
983        let dir = tempfile::TempDir::new().unwrap();
984        std::fs::write(dir.path().join("Makefile"), "default:\n\t@true\n").unwrap();
985        run_make_captured(
986            dir.path(),
987            &["default"],
988            None,
989            Duration::from_secs(30),
990            Duration::from_millis(1),
991        )
992        .expect("clean exit must surface as Ok");
993    }
994
995    /// Non-zero exit surfaces `make <args> failed` (captured output is
996    /// replayed first, then the bail fires). Distinct wording from the
997    /// timeout path so CI scrapers can tell build-failed from wedged.
998    #[test]
999    fn run_make_captured_bails_on_failing_make() {
1000        if !make_in_path() {
1001            skip!("make not in PATH");
1002        }
1003        let dir = tempfile::TempDir::new().unwrap();
1004        std::fs::write(
1005            dir.path().join("Makefile"),
1006            "default:\n\t@printf 'boom\\n'\n\t@false\n",
1007        )
1008        .unwrap();
1009        let err = run_make_captured(
1010            dir.path(),
1011            &["default"],
1012            None,
1013            Duration::from_secs(30),
1014            Duration::from_millis(1),
1015        )
1016        .expect_err("non-zero exit must surface as Err");
1017        let rendered = format!("{err:#}");
1018        assert!(
1019            rendered.contains("make default failed"),
1020            "expected `make default failed`, got: {rendered}"
1021        );
1022        assert!(
1023            !rendered.contains("timed out"),
1024            "non-zero exit must not use the timeout wording; got: {rendered}"
1025        );
1026    }
1027
1028    /// The timeout path must not hang when a recipe grandchild (here
1029    /// `sleep`) inherits and holds the merged pipe's write end open.
1030    /// `process_group(0)` at spawn plus the `killpg` sweep on timeout
1031    /// kills `make` AND the `sleep`, so the drain thread hits EOF and
1032    /// joins. A regression that killed only `make` (no group sweep)
1033    /// would block the drain-thread join until `sleep` exits (~30s),
1034    /// blowing the sub-5s wall-clock assertion. Pins the `timed out
1035    /// after` wording too.
1036    #[test]
1037    fn run_make_captured_times_out_and_sweeps_group_without_hang() {
1038        if !make_in_path() {
1039            skip!("make not in PATH");
1040        }
1041        let dir = tempfile::TempDir::new().unwrap();
1042        std::fs::write(dir.path().join("Makefile"), "default:\n\t@sleep 30\n").unwrap();
1043        let start = std::time::Instant::now();
1044        let err = run_make_captured(
1045            dir.path(),
1046            &["default"],
1047            None,
1048            Duration::from_millis(100),
1049            Duration::from_millis(1),
1050        )
1051        .expect_err("wedged make must surface as Err");
1052        let elapsed = start.elapsed();
1053        let rendered = format!("{err:#}");
1054        assert!(
1055            rendered.contains("make default"),
1056            "timeout bail must include the label; got: {rendered}"
1057        );
1058        assert!(
1059            rendered.contains("timed out after"),
1060            "timeout bail must include `timed out after`; got: {rendered}"
1061        );
1062        assert!(
1063            elapsed < Duration::from_secs(5),
1064            "run must return promptly after the 100ms timeout — took {elapsed:?}, \
1065             which means the group sweep did not unblock the output-drain thread \
1066             (the recipe's `sleep` still held the pipe open)"
1067        );
1068    }
1069
1070    /// High-volume merged output (~200 KiB, past the 64 KiB pipe
1071    /// buffer) drains without deadlock through the timed capturing
1072    /// path: the drain thread empties the pipe continuously so the
1073    /// child never blocks on a full pipe, and the generous timeout
1074    /// never fires. A regression that read the pipe only after the
1075    /// child exited would deadlock (child blocks on write, parent
1076    /// waits for exit).
1077    #[test]
1078    fn run_make_captured_drains_high_volume_without_deadlock() {
1079        if !make_in_path() {
1080            skip!("make not in PATH");
1081        }
1082        let dir = tempfile::TempDir::new().unwrap();
1083        let stdout_chunk: String = "S".repeat(1024);
1084        let stderr_chunk: String = "E".repeat(1024);
1085        let mut recipe = String::new();
1086        for _ in 0..100 {
1087            recipe.push_str(&format!("\t@printf '%s\\n' '{stdout_chunk}'\n"));
1088            recipe.push_str(&format!("\t@printf '%s\\n' '{stderr_chunk}' >&2\n"));
1089        }
1090        let makefile = format!("default:\n{recipe}\t@false\n");
1091        std::fs::write(dir.path().join("Makefile"), makefile).unwrap();
1092        let err = run_make_captured(
1093            dir.path(),
1094            &["default"],
1095            None,
1096            Duration::from_secs(30),
1097            Duration::from_millis(1),
1098        )
1099        .expect_err("non-zero exit must surface as Err");
1100        let rendered = format!("{err:#}");
1101        assert!(
1102            rendered.contains("make default failed"),
1103            "expected `make default failed`, got: {rendered}"
1104        );
1105    }
1106
1107    /// Exercises the `Some(progress)` replay branch (replay_captured
1108    /// routes each captured line through `FetchProgress::println`,
1109    /// above the bars). Every other run_make_captured test passes None
1110    /// (the eprintln fallback), so without this the Some arm is
1111    /// uncovered — a regression that broke the above-bars routing would
1112    /// pass CI. Under nextest the group is hidden, so println routes to
1113    /// stderr (nextest-captured) rather than the bars; the test asserts
1114    /// the failure still surfaces cleanly with no panic.
1115    #[test]
1116    fn run_make_captured_replays_through_progress_group_on_failure() {
1117        if !make_in_path() {
1118            skip!("make not in PATH");
1119        }
1120        let dir = tempfile::TempDir::new().unwrap();
1121        std::fs::write(
1122            dir.path().join("Makefile"),
1123            "default:\n\t@printf 'boom-via-progress\\n'\n\t@false\n",
1124        )
1125        .unwrap();
1126        let progress = crate::cli::FetchProgress::new();
1127        let err = run_make_captured(
1128            dir.path(),
1129            &["default"],
1130            Some(&progress),
1131            Duration::from_secs(30),
1132            Duration::from_millis(1),
1133        )
1134        .expect_err("non-zero exit must surface as Err");
1135        assert!(
1136            format!("{err:#}").contains("make default failed"),
1137            "expected `make default failed`, got: {err:#}"
1138        );
1139    }
1140}