ktstr/test_support/
profraw.rs

1//! Guest-side LLVM coverage profraw flush + host-side write-out.
2//!
3//! Under `-C instrument-coverage`, the compiler inserts profile counters
4//! and registers an atexit handler via `.init_array` that writes
5//! `.profraw` at process exit. Inside a ktstr guest VM, `std::process::exit`
6//! bypasses the atexit handler when the ktstr `#[ctor]` runs first
7//! (the ordering between `.init_array` entries is unspecified). To keep
8//! coverage data from being dropped, [`try_flush_profraw`] calls the
9//! compiler-rt buffer API (`__llvm_profile_get_size_for_buffer` +
10//! `__llvm_profile_write_buffer`) directly under `cfg(coverage)`,
11//! serializes profraw into a heap buffer, and publishes it through the
12//! guest-to-host bulk channel under `MSG_TYPE_PROFRAW`.
13//!
14//! VP data scope: the buffer flush covers coverage counters and
15//! bitmaps only; PGO value-profile data is not preserved.
16//! `__llvm_profile_write_buffer` passes a NULL `VPDataReader` to
17//! `lprofWriteData` (defined in
18//! `compiler-rt/lib/profile/InstrProfilingBuffer.c`),
19//! whereas the file-based `__llvm_profile_write_file` path passes
20//! `lprofGetVPDataReader()` (`InstrProfilingFile.c`) and DOES
21//! capture VP records. This matches the current `-C instrument-coverage`
22//! use case, which does not emit VP data. Combining coverage with PGO
23//! (`-C profile-generate`) in the same binary would silently lose VP
24//! records on this path; switch back to the file-based serializer if
25//! that combination becomes a requirement.
26//!
27//! On the host, [`write_profraw`] receives those bytes via the SHM ring
28//! and writes them into `LLVM_COV_TARGET_DIR` (or a fallback sibling
29//! directory next to the test binary) as
30//! `ktstr-test-{pid}-{counter}.profraw`.
31//!
32//! # Host atexit profraw redirect
33//!
34//! The host-side atexit path (the OS-managed dump that fires on
35//! `std::process::exit` for non-VM-dispatch test runs — including
36//! every test run via `cargo nextest run` directly, without a
37//! `cargo ktstr` wrapper) reads `LLVM_PROFILE_FILE` once during the
38//! LLVM runtime's `.init_array` initializer; if unset, the compiler-rt
39//! default is `default.profraw` in the process cwd. When the operator
40//! launched the test from a kernel source tree, that cwd points at the
41//! source tree and the dump leaks into someone else's directory.
42//!
43//! [`redirect_default_profraw_path`] is a `priority = 0` ctor that
44//! runs BEFORE the LLVM runtime's `.init_array` entry (which has no
45//! priority and lands at the default `.init_array` slot per glibc
46//! ordering rules) and points `LLVM_PROFILE_FILE` at the same
47//! workspace-local target directory the cargo-ktstr wrapper already
48//! injects, so a directly-invoked `cargo nextest run` no longer drops
49//! `default.profraw` in cwd. The redirect is a no-op when:
50//!   - getpid() == 1 (in-VM init; the SHM-ring flush above owns
51//!     guest-side coverage and the env is irrelevant inside the VM
52//!     because `std::process::exit` bypasses atexit anyway).
53//!   - `LLVM_PROFILE_FILE` is already set (operator override or
54//!     wrapper injection takes precedence — same `existing_env.is_some()`
55//!     short-circuit `cargo-ktstr.rs::profraw_inject_for` applies).
56//!   - The target binary is NOT coverage-instrumented. Detection is a
57//!     symtab probe for the `__llvm_profile_write_buffer` /
58//!     `__llvm_profile_get_size_for_buffer` function symbols (the bare
59//!     `__llvm_profile_runtime` marker can be dead-stripped entirely
60//!     under `--gc-sections`, leaving no `.symtab` entry; see
61//!     `is_coverage_instrumented_binary`); the
62//!     guest-side flush [`try_flush_profraw`] calls those same compiler-rt
63//!     entry points directly. Non-instrumented binaries that link the
64//!     ktstr lib (e.g. `cargo-ktstr` itself in a non-coverage build)
65//!     must NOT set the env, otherwise the env propagates to spawned
66//!     child test binaries, which then short-circuit their own
67//!     redirect on the inherited value and write profraw into the
68//!     PARENT's target dir rather than their own per-binary one
69//!     (cargo-ktstr's exe lives in `target/{profile}/` while test
70//!     binaries live in `target/{profile}/deps/`, so the two
71//!     `current_exe`-relative target dirs differ).
72//!
73//! Supporting helper:
74//! - [`find_symbol_vaddrs`] walks `.symtab` in one pass for multiple
75//!   symbols at once, used by the coverage-instrumentation detection
76//!   probes (in-process and on the host-side `/init` payload).
77//!
78//! Those probes read the binary via `memmap2::Mmap` rather than
79//! `std::fs::read` so the kernel page cache backs the bytes goblin
80//! parses; for coverage-instrumented binaries (hundreds of MiB up to
81//! ~1 GiB) this avoids the heap allocation + copy of the entire binary
82//! just to read its symbol table. [`try_flush_profraw`] itself no
83//! longer parses the ELF — under `cfg(coverage)` it calls the
84//! buffer-API entry points directly.
85
86use anyhow::{Context, Result};
87use std::fs::File;
88use std::path::{Path, PathBuf};
89
90#[cfg(coverage)]
91use crate::vmm;
92
93/// Flush LLVM coverage profraw to the host through the bulk channel.
94///
95/// Under `-C instrument-coverage` (cargo-llvm-cov sets `cfg(coverage)`)
96/// the compiler-rt profile runtime is linked, so the buffer-API entry
97/// points `__llvm_profile_get_size_for_buffer` and
98/// `__llvm_profile_write_buffer` are defined. This calls them directly:
99/// it allocates a buffer of the reported size, serializes the live
100/// profile counters into it, and publishes the buffer through the
101/// virtio-console bulk port for host-side extraction.
102///
103/// Calling `__llvm_profile_write_buffer` directly is also what keeps it
104/// alive under `--gc-sections`: the call site is a link-time reference.
105/// Resolving it by name through the ELF `.symtab` at runtime instead
106/// (an earlier approach) was NOT a link reference, so the linker
107/// dead-stripped `write_buffer` — nothing else in the retained graph
108/// called it (the runtime's own `write_file` path is stripped too) — and
109/// the flush silently no-op'd, leaving guest coverage at 0%.
110///
111/// No-op when not coverage-instrumented (the `cfg(coverage)` body is
112/// absent, so the symbols are never referenced and the build links
113/// without the profile runtime) or when called from host context.
114pub(crate) fn try_flush_profraw() {
115    #[cfg(coverage)]
116    {
117        if !vmm::guest_comms::is_guest() {
118            return;
119        }
120
121        // Flush at most once per process. The guest `/init` (pid 1) can
122        // reach `try_flush_profraw` from several paths in the same
123        // process — the post-dispatch site (rust_init Phase 5), the
124        // probe result-publish path, and the ctor / nextest `--exact`
125        // dispatch paths (which flush then `process::exit`). A second
126        // flush emits a second `Profraw` frame and `llvm-profdata merge`
127        // would double-count the counters. First flush per process wins.
128        {
129            use std::sync::atomic::{AtomicBool, Ordering};
130            static FLUSHED: AtomicBool = AtomicBool::new(false);
131            if FLUSHED.swap(true, Ordering::SeqCst) {
132                return;
133            }
134        }
135
136        // SAFETY: both are stable compiler-rt buffer-API entry points,
137        // defined whenever `-C instrument-coverage` linked the profile
138        // runtime (guaranteed under `cfg(coverage)`). `get_size` is
139        // `uint64_t (void)`; `write_buffer` is `int (char *)`, returning
140        // 0 on success after serializing the live counters into the
141        // caller's buffer. The dispatch context is single-threaded
142        // (guest `/init`, post-dispatch).
143        unsafe extern "C" {
144            fn __llvm_profile_get_size_for_buffer() -> u64;
145            fn __llvm_profile_write_buffer(buf: *mut std::os::raw::c_char) -> std::os::raw::c_int;
146        }
147
148        let needed = unsafe { __llvm_profile_get_size_for_buffer() } as usize;
149        if needed == 0 {
150            // Reliable Dmesg frame (NOT eprintln — the Phase-2 stdio->bulk
151            // redirect is lossy near reboot) so a zero-coverage run is never
152            // silent (frames sent, no profraw, no error).
153            vmm::guest_comms::send_dmesg(
154                b"ktstr coverage: __llvm_profile_get_size_for_buffer returned 0; no guest profile to flush\n",
155            );
156            return;
157        }
158
159        let mut buf: Vec<u8> = vec![0u8; needed];
160        // `__llvm_profile_write_buffer` returns 0 on success.
161        if unsafe { __llvm_profile_write_buffer(buf.as_mut_ptr().cast::<std::os::raw::c_char>()) }
162            != 0
163        {
164            vmm::guest_comms::send_dmesg(
165                b"ktstr coverage: __llvm_profile_write_buffer failed; guest coverage lost for this run\n",
166            );
167            return;
168        }
169
170        vmm::guest_comms::send_profraw(&buf);
171    }
172}
173
174/// Resolve multiple symbol virtual addresses in a single pass through
175/// the ELF `.symtab`. Returns addresses in the same order as `names`.
176///
177/// Matches purely by name: a symbol is resolved regardless of its
178/// `st_size`, so zero-size symbols — e.g. gc-sections'd data markers
179/// like `__llvm_profile_runtime`, whose `st_size` is dropped on some
180/// `--gc-sections` link paths — still resolve as long as the name
181/// survives in `.symtab`. (Callers match exact, specific names, so
182/// admitting zero-size symbols cannot introduce a false positive.)
183pub(crate) fn find_symbol_vaddrs(elf: &goblin::elf::Elf<'_>, names: &[&str]) -> Vec<Option<u64>> {
184    let mut results = vec![None; names.len()];
185    let mut remaining = names.len();
186
187    for sym in elf.syms.iter() {
188        if remaining == 0 {
189            break;
190        }
191        let sym_name = match elf.strtab.get_at(sym.st_name) {
192            Some(n) => n,
193            None => continue,
194        };
195        for (i, name) in names.iter().enumerate() {
196            if results[i].is_none() && sym_name == *name {
197                results[i] = Some(sym.st_value);
198                remaining -= 1;
199                break;
200            }
201        }
202    }
203    results
204}
205
206static PROFRAW_COUNTER: std::sync::atomic::AtomicU32 = std::sync::atomic::AtomicU32::new(0);
207
208/// Persist every coverage-profraw frame in a post-run guest bulk drain
209/// to the llvm-cov-target directory.
210///
211/// Walks the [`crate::vmm::host_comms::BulkDrainResult`] the host
212/// bucketed into [`crate::vmm::result::VmResult::guest_messages`] and,
213/// for each [`MsgType::Profraw`](crate::vmm::wire::MsgType::Profraw)
214/// frame that passed its per-frame CRC and carries a non-empty payload,
215/// calls [`write_profraw`]. Mirrors the CRC + non-empty gate the
216/// per-frame eval/probe dispatch applied so a corrupted or empty frame
217/// is never written.
218///
219/// Called from [`crate::vmm::KtstrVm::run`] so the direct
220/// `KtstrVm::run()` path persists guest coverage like the
221/// eval (`run_ktstr_test_inner`) and auto-repro (`probe`) paths do —
222/// previously the direct path silently dropped the profraw the guest
223/// `/init` flushed. The eval and probe paths funnel through
224/// `KtstrVm::run`, so they no longer extract `Profraw` frames
225/// themselves; doing so here AND there would write the same payload
226/// twice and `llvm-profdata merge` would double-count the counters.
227pub(crate) fn persist_guest_profraw(messages: &crate::vmm::host_comms::BulkDrainResult) {
228    use crate::vmm::wire::MsgType;
229    for entry in &messages.entries {
230        if MsgType::from_wire(entry.msg_type) == Some(MsgType::Profraw)
231            && entry.crc_ok
232            && !entry.payload.is_empty()
233            && let Err(e) = write_profraw(&entry.payload)
234        {
235            eprintln!("ktstr_test: persist guest profraw: {e}");
236        }
237    }
238}
239
240/// Write profraw data to the llvm-cov-target directory.
241pub(crate) fn write_profraw(data: &[u8]) -> Result<()> {
242    let target_dir = target_dir();
243    std::fs::create_dir_all(&target_dir)
244        .with_context(|| format!("create profraw dir: {}", target_dir.display()))?;
245    let id = PROFRAW_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
246    let path = target_dir.join(format!("ktstr-test-{}-{}.profraw", std::process::id(), id));
247    std::fs::write(&path, data).with_context(|| format!("write profraw: {}", path.display()))?;
248    Ok(())
249}
250
251/// Resolve the llvm-cov-target directory for profraw output.
252///
253/// Cascade:
254/// 1. `LLVM_COV_TARGET_DIR` — explicit operator override.
255/// 2. `LLVM_PROFILE_FILE`'s parent directory — when an outer harness
256///    (cargo-llvm-cov, or the cargo-ktstr `LLVM_PROFILE_FILE` injection
257///    that prevents host-side `default.profraw` leakage from the
258///    `cargo ktstr test` path) has already pinned the output location.
259/// 3. `<current_exe parent>/llvm-cov-target/` — workspace-local
260///    fallback so an instrumented binary invoked without any
261///    coordination still drops profraw next to the build output
262///    rather than in cwd.
263///
264/// `pub` rather than `pub(crate)` so the cargo-ktstr binary can
265/// resolve the same directory before exec-ing `cargo nextest run`,
266/// keeping host-side and guest-side profraw output co-located in
267/// one tree without cargo-ktstr re-implementing the cascade.
268pub fn target_dir() -> PathBuf {
269    if let Ok(d) = std::env::var("LLVM_COV_TARGET_DIR") {
270        return PathBuf::from(d);
271    }
272    // `LLVM_PROFILE_FILE` may be a bare filename (e.g. `default.profraw`)
273    // — `Path::parent` returns `Some("")` in that shape, which would
274    // otherwise propagate a structurally-empty `PathBuf` through the
275    // cascade and surface as an unusable target dir downstream
276    // (`std::fs::create_dir_all("")` errors with EINVAL on Linux).
277    // The empty-os-str filter forces those bare-filename cases to fall
278    // through to the `current_exe`-relative fallback below.
279    if let Some(parent) = std::env::var("LLVM_PROFILE_FILE")
280        .ok()
281        .as_ref()
282        .and_then(|p| Path::new(p).parent())
283        .filter(|p| !p.as_os_str().is_empty())
284    {
285        return parent.to_path_buf();
286    }
287    let mut p = crate::resolve_current_exe().unwrap_or_else(|_| std::env::temp_dir());
288    p.pop(); // remove binary name
289    p.push("llvm-cov-target");
290    p
291}
292
293/// Pure decision logic for [`redirect_default_profraw_path`]: given the
294/// current `LLVM_PROFILE_FILE` value, the current pid, the coverage
295/// instrumentation marker, and the workspace-local target dir
296/// resolved from a callable, return the pattern to set on
297/// `LLVM_PROFILE_FILE` or `None` to leave the env untouched.
298/// Mirrors the `cargo-ktstr.rs::profraw_inject_for` predicate so
299/// the directly-invoked `cargo nextest run` and `cargo ktstr test`
300/// paths agree on when to inject.
301///
302/// Returns `Some(pattern)` only when:
303///   - `pid != 1` (a test binary running as `/init` inside the guest VM
304///     is owned by the SHM-ring flush; setting host-side env in that
305///     context would still be a no-op because `std::process::exit`
306///     bypasses atexit, but the early return keeps the in-VM startup
307///     trace clean of an irrelevant env mutation).
308///   - `existing` is `None` (operator-supplied
309///     `LLVM_PROFILE_FILE` or wrapper-injected value takes precedence —
310///     identical short-circuit to the cargo-ktstr wrapper).
311///   - `is_coverage_instrumented` is `true` (the calling binary has
312///     the LLVM compiler-rt profile runtime linked in). Without this
313///     guard, the redirect would fire in `cargo-ktstr` and other
314///     non-instrumented binaries that link the ktstr lib, polluting
315///     the env passed to child test binaries — those child binaries
316///     would then see a pre-set `LLVM_PROFILE_FILE` and short-circuit
317///     their own redirect, writing profraw into the parent's target
318///     dir rather than their own per-binary one. Detecting
319///     instrumentation via the runtime marker symbol scopes the
320///     redirect to the binaries that actually emit profraw.
321///
322/// `target_dir` is taken as a callable so the test suite can drive
323/// the predicate against a synthetic target without building the
324/// real `<current_exe parent>/llvm-cov-target/` path. `%p` (process
325/// id) and `%m` (binary hash) are LLVM runtime expansions that keep
326/// parallel-test output files distinct — the same pattern shape
327/// `cargo-ktstr.rs::profraw_inject_for` emits.
328fn redirect_pattern_for(
329    pid: libc::pid_t,
330    existing: Option<std::ffi::OsString>,
331    is_coverage_instrumented: bool,
332    target_dir: impl FnOnce() -> PathBuf,
333) -> Option<PathBuf> {
334    if pid == 1 || existing.is_some() || !is_coverage_instrumented {
335        return None;
336    }
337    Some(target_dir().join("default-%p-%m.profraw"))
338}
339
340/// Detect whether the running binary has LLVM compiler-rt's profile
341/// runtime linked in by probing `.symtab` for the buffer-API entry
342/// points `__llvm_profile_write_buffer` /
343/// `__llvm_profile_get_size_for_buffer` (see the inline comment below
344/// for why these function symbols are used rather than the bare
345/// `__llvm_profile_runtime` marker, which `--gc-sections` can strip).
346/// Coverage-instrumented binaries link these symbols; non-instrumented
347/// binaries (e.g. `cargo-ktstr` in a normal release build) do not.
348///
349/// Probes via the symtab walk that profraw flushing already uses
350/// (the symbol has hidden visibility, so dlsym would not find it).
351/// Returns `false` on any failure path (binary mmap, ELF parse,
352/// symbol absent) — false negatives leave the redirect off, which
353/// is the conservative outcome: the binary writes
354/// `default.profraw` in cwd just as before, no regression.
355fn is_coverage_instrumented_binary() -> bool {
356    let exe_file = match File::open("/proc/self/exe") {
357        Ok(f) => f,
358        Err(_) => return false,
359    };
360    // SAFETY: same invariants as `try_flush_profraw`'s mmap — see
361    // the SAFETY block there. The `/proc/self/exe` mapping pins
362    // the binary inode for the mmap's lifetime, and no part of
363    // ktstr writes to its own binary during process startup.
364    let mmap = match unsafe { memmap2::Mmap::map(&exe_file) } {
365        Ok(m) => m,
366        Err(_) => return false,
367    };
368    let elf = match goblin::elf::Elf::parse(&mmap) {
369        Ok(e) => e,
370        Err(_) => return false,
371    };
372    // Probe for the profile-write-buffer entry point rather than
373    // the bare `__llvm_profile_runtime` marker. The marker is
374    // declared `int __llvm_profile_runtime;` in compiler-rt and
375    // can be dead-stripped entirely by `--gc-sections` /
376    // `-Wl,--strip-debug` paths some toolchains apply to coverage
377    // builds, leaving no `.symtab` entry to resolve. The
378    // function-shaped symbols [`try_flush_profraw`] already
379    // resolves (`__llvm_profile_get_size_for_buffer` and
380    // `__llvm_profile_write_buffer`) are kept alive by that flush
381    // call's link reference, so they are the reliable presence
382    // signal for instrumented binaries, proved empirically by the
383    // fact that coverage profraw collection in CI succeeds via the
384    // same symtab probe.
385    let vaddrs = find_symbol_vaddrs(
386        &elf,
387        &[
388            "__llvm_profile_write_buffer",
389            "__llvm_profile_get_size_for_buffer",
390        ],
391    );
392    vaddrs.iter().any(|v| matches!(v, Some(va) if *va != 0))
393}
394
395/// Process-wide cached version of [`is_coverage_instrumented_binary`]:
396/// whether the HOST process (`/proc/self/exe`) is built with
397/// `-C instrument-coverage`. The symbol-table walk runs once per
398/// process and is memoised in a `OnceLock<bool>` so repeated probes
399/// only pay the ELF parse once.
400///
401/// History: VM-booting tests once used this to skip themselves under
402/// coverage, because the instrumented `current_exe` used as the guest
403/// `/init` OOMed early in boot (the budget in
404/// `crate::vmm::memory_budget` was payload-agnostic, sizing the
405/// non-instrumented case). That skip list is gone:
406/// [`crate::vmm::memory_budget::initramfs_min_memory_mib`] now
407/// detects an instrumented `/init` payload and reserves the extra
408/// resident memory (`__llvm_prf_cnts` + `__llvm_prf_data`), so the
409/// instrumented `/init` boots and its coverage is captured via
410/// [`persist_guest_profraw`].
411///
412/// This probes the HOST process, not the `/init` payload — the budget
413/// path probes the payload bytes directly (see
414/// `KtstrVm::init_payload_coverage_reserve`). Retained as a
415/// `#[doc(hidden)]` `pub` capability for out-of-tree consumers that
416/// want to branch on host-process instrumentation.
417///
418/// `pub` (not `pub(crate)`) so integration tests in `tests/*.rs`
419/// can reach the helper. `#[doc(hidden)]` keeps it out of the
420/// crate's rendered docs — the helper is intentionally internal
421/// to the test surface and the docs surface should not expose it.
422#[doc(hidden)]
423pub fn current_binary_is_coverage_instrumented() -> bool {
424    use std::sync::OnceLock;
425    static CACHE: OnceLock<bool> = OnceLock::new();
426    *CACHE.get_or_init(is_coverage_instrumented_binary)
427}
428
429ctor::declarative::ctor! {
430/// Set `LLVM_PROFILE_FILE` to the workspace-local target directory
431/// before the LLVM compiler-rt runtime reads it.
432///
433/// `priority = 0` lands this ctor in `.init_array.0`, which the
434/// glibc startup loop walks BEFORE the unprioritized `.init_array`
435/// slot that compiler-rt's `INSTR_PROF_PROFILE_RUNTIME_VAR` static
436/// initializer (`InstrProfilingRuntime.cpp`) lives in. By the time
437/// `__llvm_profile_initialize_file` runs and calls
438/// `getenv("LLVM_PROFILE_FILE")`, our `set_var` has already landed.
439///
440/// See the module-level "Host atexit profraw redirect" section for
441/// the full motivation. This ctor is intentionally separate from
442/// [`crate::test_support::dispatch::ktstr_test_early_dispatch`] (the
443/// unprioritized ctor that handles VM dispatch and SHM-ring flushes)
444/// because that ctor must NOT acquire the priority slot — its
445/// gauntlet-expansion and dispatch logic is order-insensitive
446/// relative to compiler-rt, but pinning a low priority on it would
447/// risk surprising interactions with future `.init_array.NN` entries.
448/// Keeping the redirect in its own minimal ctor scopes the priority
449/// promise to one well-understood operation.
450///
451/// The set_var call is sound in this ctor context: glibc invokes
452/// `.init_array` entries on the main thread before any user code
453/// has spawned an additional thread, so the env-block mutation is
454/// race-free.
455///
456/// ctor 1.0's `priority` documentation flags the 0..100 range as
457/// platform-reserved for the C runtime's own startup, so accessing
458/// libc/std services from a constructor with such a priority "may
459/// not be safe" in portable terms. On Linux/glibc the dynamic
460/// linker finishes libc initialization before walking
461/// `.init_array.0`, so `std::env::set_var` (which lowers to glibc's
462/// `setenv`) is safe here. The priority retains the .init_array.0
463/// placement that the compiler-rt ordering above depends on; other
464/// platforms would need re-validation.
465///
466/// This site uses ctor's declarative `ctor::declarative::ctor! { ... }`
467/// form; ctor 1.0 also ships `#[ctor::ctor(...)]` (proc-macro attribute)
468/// re-exported under `crate::__private::ctor::ctor` for downstream
469/// consumers. The declarative form is the in-tree convention because
470/// it avoids the TT-muncher recursion-limit cost on the ktstr_test
471/// expansion path.
472#[ctor(unsafe, priority = 0)]
473fn redirect_default_profraw_path() {
474    // Cheap precondition checks first — pid (one syscall) and env
475    // (one var_os call) — so the ELF parse only runs in the
476    // direct-`cargo nextest run`-with-no-env case where the ctor
477    // actually has a decision to make. cargo-ktstr-wrapped runs and
478    // cargo-llvm-cov runs both pre-set `LLVM_PROFILE_FILE`, so
479    // `existing.is_some()` short-circuits before
480    // `current_binary_is_coverage_instrumented` mmaps `/proc/self/exe`
481    // and walks the symtab (first call only; memoised). pid=1 (in-VM
482    // init) similarly avoids the
483    // probe — the SHM-ring flush owns guest-side coverage.
484    let pid = unsafe { libc::getpid() };
485    let existing = std::env::var_os("LLVM_PROFILE_FILE");
486    if pid == 1 || existing.is_some() {
487        return;
488    }
489    let instrumented = current_binary_is_coverage_instrumented();
490    if let Some(pattern) = redirect_pattern_for(pid, existing, instrumented, target_dir) {
491        // SAFETY: this ctor runs from `.init_array.0`, before any
492        // user thread has spawned. The env block is single-writer,
493        // single-reader at this moment, so `set_var` is sound. The
494        // `set_var` API was deprecated in Rust 2024 for thread
495        // unsafety in non-startup contexts, but ctor-time mutation
496        // is exactly the protected case the deprecation guidance
497        // carves out via `unsafe`.
498        unsafe {
499            std::env::set_var("LLVM_PROFILE_FILE", &pattern);
500        }
501    }
502}
503}
504
505#[cfg(test)]
506mod tests {
507    use super::super::test_helpers::{EnvVarGuard, lock_env};
508    use super::*;
509
510    // -- target_dir --
511
512    #[test]
513    fn target_dir_with_env_var() {
514        let _lock = lock_env();
515        let _env = EnvVarGuard::set("LLVM_COV_TARGET_DIR", "/tmp/my-cov-dir");
516        let dir = target_dir();
517        assert_eq!(dir, PathBuf::from("/tmp/my-cov-dir"));
518    }
519
520    #[test]
521    fn target_dir_from_llvm_profile_file() {
522        let _lock = lock_env();
523        let _env_cov = EnvVarGuard::remove("LLVM_COV_TARGET_DIR");
524        let _env_prof =
525            EnvVarGuard::set("LLVM_PROFILE_FILE", "/tmp/cov-target/ktstr-%p-%m.profraw");
526        let dir = target_dir();
527        assert_eq!(dir, PathBuf::from("/tmp/cov-target"));
528    }
529
530    #[test]
531    fn target_dir_without_env_var() {
532        let _lock = lock_env();
533        let _env_cov = EnvVarGuard::remove("LLVM_COV_TARGET_DIR");
534        let _env_prof = EnvVarGuard::remove("LLVM_PROFILE_FILE");
535        let dir = target_dir();
536        // Falls back to current_exe parent + "llvm-cov-target".
537        assert!(
538            dir.ends_with("llvm-cov-target"),
539            "expected path ending in llvm-cov-target, got: {}",
540            dir.display()
541        );
542    }
543
544    /// `LLVM_PROFILE_FILE` set to a bare filename (no parent
545    /// directory component, e.g. `default.profraw`) must fall
546    /// through to the `current_exe`-relative fallback rather than
547    /// surfacing a structurally-empty `PathBuf` through the
548    /// cascade. `Path::new("default.profraw").parent()` returns
549    /// `Some("")`; without the empty-os-str filter,
550    /// `target_dir` would return `PathBuf::from("")` and downstream
551    /// `create_dir_all` calls fail with EINVAL.
552    #[test]
553    fn target_dir_bare_filename_llvm_profile_file_falls_through() {
554        let _lock = lock_env();
555        let _g_cov = EnvVarGuard::remove("LLVM_COV_TARGET_DIR");
556        let _g_prof = EnvVarGuard::set("LLVM_PROFILE_FILE", "default.profraw");
557        let dir = target_dir();
558        assert!(
559            !dir.as_os_str().is_empty(),
560            "bare-filename LLVM_PROFILE_FILE must fall through to the \
561             current_exe fallback, not return an empty PathBuf",
562        );
563        assert!(
564            dir.ends_with("llvm-cov-target"),
565            "fallback must land at the current_exe-relative llvm-cov-target \
566             dir, got: {}",
567            dir.display(),
568        );
569    }
570
571    // -- redirect_pattern_for (host-side LLVM_PROFILE_FILE redirect predicate) --
572
573    /// Pid 1 (in-VM init) must short-circuit. Even when no
574    /// `LLVM_PROFILE_FILE` is set and the binary is instrumented,
575    /// the SHM-ring flush owns guest-side coverage and the
576    /// host-side env redirect is irrelevant.
577    #[test]
578    fn redirect_pattern_for_pid_1_returns_none() {
579        let pattern =
580            redirect_pattern_for(1, None, true, || PathBuf::from("/should/not/be/called"));
581        assert!(
582            pattern.is_none(),
583            "pid=1 (guest init) must skip env redirect"
584        );
585    }
586
587    /// An already-set `LLVM_PROFILE_FILE` (operator override or
588    /// cargo-ktstr/cargo-llvm-cov wrapper injection) takes
589    /// precedence — the redirect must be a no-op so it does not
590    /// stomp on the outer harness's profile location.
591    #[test]
592    fn redirect_pattern_for_existing_env_returns_none() {
593        let pattern = redirect_pattern_for(
594            42,
595            Some(std::ffi::OsString::from("/operator/picked/path.profraw")),
596            true,
597            || PathBuf::from("/should/not/be/called"),
598        );
599        assert!(
600            pattern.is_none(),
601            "existing LLVM_PROFILE_FILE must take precedence"
602        );
603    }
604
605    /// Empty `LLVM_PROFILE_FILE` (`Some("")` from a shell that did
606    /// `export LLVM_PROFILE_FILE=`) is a degenerate but possible
607    /// shape. `var_os` returns `Some` for an empty value, so the
608    /// existing-env short-circuit fires and we leave it alone — the
609    /// LLVM runtime treats empty as "fall through to default" so
610    /// `default.profraw` lands in cwd, but that is the operator's
611    /// choice once they explicitly assigned the variable. Pinned
612    /// here so a future "treat empty as unset" change is a deliberate
613    /// decision rather than a silent drift.
614    #[test]
615    fn redirect_pattern_for_empty_env_short_circuits() {
616        let pattern = redirect_pattern_for(42, Some(std::ffi::OsString::new()), true, || {
617            PathBuf::from("/should/not/be/called")
618        });
619        assert!(
620            pattern.is_none(),
621            "Some(\"\") in LLVM_PROFILE_FILE counts as set; redirect must defer"
622        );
623    }
624
625    /// Non-instrumented binaries (cargo-ktstr in normal builds, the
626    /// `ktstr` standalone CLI) must not set the env. Otherwise the
627    /// inherited env in spawned child test binaries pre-empts their
628    /// own redirect and they write profraw into the parent's target
629    /// dir instead of their own per-binary one.
630    #[test]
631    fn redirect_pattern_for_non_instrumented_binary_returns_none() {
632        let pattern =
633            redirect_pattern_for(42, None, false, || PathBuf::from("/should/not/be/called"));
634        assert!(
635            pattern.is_none(),
636            "non-coverage-instrumented binary must not pollute the env passed \
637             to children"
638        );
639    }
640
641    /// Host-pid + unset env + instrumented binary produces a
642    /// redirect to the workspace-local target dir with the LLVM
643    /// `%p`/`%m` expansions baked into the filename.
644    #[test]
645    fn redirect_pattern_for_host_unset_returns_target_pattern() {
646        let target = PathBuf::from("/synthetic/llvm-cov-target");
647        let pattern = redirect_pattern_for(42, None, true, || target.clone())
648            .expect("host pid + unset env + instrumented must produce a redirect pattern");
649        assert_eq!(
650            pattern,
651            PathBuf::from("/synthetic/llvm-cov-target/default-%p-%m.profraw"),
652        );
653    }
654
655    /// The pattern shape matches what
656    /// `cargo-ktstr.rs::profraw_inject_for` emits — both paths
657    /// inject `default-%p-%m.profraw` so coverage merge tools
658    /// (cargo-llvm-cov) see a uniform filename suffix regardless of
659    /// which entry point launched the test binary.
660    #[test]
661    fn redirect_pattern_for_filename_matches_cargo_ktstr_wrapper() {
662        let target = PathBuf::from("/x");
663        let pattern = redirect_pattern_for(42, None, true, || target.clone()).unwrap();
664        assert_eq!(
665            pattern.file_name().and_then(|n| n.to_str()),
666            Some("default-%p-%m.profraw"),
667            "filename suffix must match cargo-ktstr's profraw_inject_for",
668        );
669    }
670
671    // -- find_symbol_vaddrs --
672
673    #[test]
674    fn find_symbol_vaddrs_resolves_known_symbol() {
675        let exe = crate::resolve_current_exe().unwrap();
676        let data = std::fs::read(&exe).unwrap();
677        let elf = goblin::elf::Elf::parse(&data).unwrap();
678        // "main" is present in the symtab of any Rust test binary.
679        let results = find_symbol_vaddrs(&elf, &["main"]);
680        assert_eq!(results.len(), 1);
681        assert!(
682            results[0].is_some(),
683            "main symbol should be resolved in test binary"
684        );
685        assert_ne!(results[0].unwrap(), 0, "main address should be nonzero");
686    }
687
688    #[test]
689    fn find_symbol_vaddrs_missing_symbol_returns_none() {
690        let exe = crate::resolve_current_exe().unwrap();
691        let data = std::fs::read(&exe).unwrap();
692        let elf = goblin::elf::Elf::parse(&data).unwrap();
693        let results = find_symbol_vaddrs(&elf, &["__nonexistent_symbol_xyz__"]);
694        assert_eq!(results.len(), 1);
695        assert!(results[0].is_none());
696    }
697
698    #[test]
699    fn find_symbol_vaddrs_mixed_results() {
700        let exe = crate::resolve_current_exe().unwrap();
701        let data = std::fs::read(&exe).unwrap();
702        let elf = goblin::elf::Elf::parse(&data).unwrap();
703        let results = find_symbol_vaddrs(&elf, &["main", "__nonexistent_symbol_xyz__"]);
704        assert_eq!(results.len(), 2);
705        assert!(results[0].is_some(), "main should resolve");
706        assert!(results[1].is_none(), "nonexistent should not resolve");
707    }
708
709    // -- profile buffer-API retention (regression) --
710
711    /// `--gc-sections` dead-strips `__llvm_profile_write_buffer` unless a
712    /// link-time reference keeps it — [`try_flush_profraw`]'s direct call
713    /// under `cfg(coverage)` is that reference. This test references the
714    /// symbol by NAME only (a `.symtab` lookup, not a link reference of
715    /// its own), so it fails if that call is ever removed and the linker
716    /// strips the function — the exact regression that left guest
717    /// coverage at 0% before the direct-call fix. Coverage-only: the
718    /// symbol does not exist in non-instrumented builds.
719    #[cfg(coverage)]
720    #[test]
721    fn write_buffer_symbol_retained_under_coverage() {
722        let exe = crate::resolve_current_exe().unwrap();
723        let data = std::fs::read(&exe).unwrap();
724        let elf = goblin::elf::Elf::parse(&data).unwrap();
725        let v = find_symbol_vaddrs(&elf, &["__llvm_profile_write_buffer"]);
726        assert!(
727            v[0].is_some(),
728            "__llvm_profile_write_buffer must survive --gc-sections under \
729             coverage; without it the guest flush silently no-ops",
730        );
731    }
732
733    /// Regression: `find_symbol_vaddrs` must resolve a symbol by name
734    /// even when its `st_size` is 0. A prior `st_size == 0` skip
735    /// silently dropped gc-sections'd zero-size markers (e.g.
736    /// `__llvm_profile_runtime`), hiding instrumented binaries from
737    /// the coverage probe. Pick a real zero-size named symbol from
738    /// this binary's own `.symtab` (linker markers like `_edata` /
739    /// `_end` are `st_size == 0`) and assert the helper resolves it.
740    #[test]
741    fn find_symbol_vaddrs_resolves_zero_size_symbol() {
742        let exe = crate::resolve_current_exe().unwrap();
743        let data = std::fs::read(&exe).unwrap();
744        let elf = goblin::elf::Elf::parse(&data).unwrap();
745        let zero_size_name = elf
746            .syms
747            .iter()
748            .filter(|s| s.st_size == 0)
749            .filter_map(|s| elf.strtab.get_at(s.st_name))
750            .find(|n| !n.is_empty())
751            .map(str::to_string)
752            .expect(
753                "test binary's .symtab should carry at least one named \
754                 zero-size symbol (e.g. a linker marker like _edata / _end)",
755            );
756        let v = find_symbol_vaddrs(&elf, &[zero_size_name.as_str()]);
757        assert!(
758            v[0].is_some(),
759            "find_symbol_vaddrs must resolve zero-size symbol \
760             {zero_size_name:?}; the removed st_size==0 filter previously \
761             dropped such symbols, losing gc-sections'd coverage markers",
762        );
763    }
764}