ktstr/test_support/profraw.rs
1//! Guest-side LLVM coverage profraw flush + host-side write-out.
2//!
3//! Under `-C instrument-coverage`, the compiler inserts profile counters
4//! and registers an atexit handler via `.init_array` that writes
5//! `.profraw` at process exit. Inside a ktstr guest VM, `std::process::exit`
6//! bypasses the atexit handler when the ktstr `#[ctor]` runs first
7//! (the ordering between `.init_array` entries is unspecified). To keep
8//! coverage data from being dropped, [`try_flush_profraw`] calls the
9//! compiler-rt buffer API (`__llvm_profile_get_size_for_buffer` +
10//! `__llvm_profile_write_buffer`) directly under `cfg(coverage)`,
11//! serializes profraw into a heap buffer, and publishes it through the
12//! guest-to-host bulk channel under `MSG_TYPE_PROFRAW`.
13//!
14//! VP data scope: the buffer flush covers coverage counters and
15//! bitmaps only; PGO value-profile data is not preserved.
16//! `__llvm_profile_write_buffer` passes a NULL `VPDataReader` to
17//! `lprofWriteData` (defined in
18//! `compiler-rt/lib/profile/InstrProfilingBuffer.c`),
19//! whereas the file-based `__llvm_profile_write_file` path passes
20//! `lprofGetVPDataReader()` (`InstrProfilingFile.c`) and DOES
21//! capture VP records. This matches the current `-C instrument-coverage`
22//! use case, which does not emit VP data. Combining coverage with PGO
23//! (`-C profile-generate`) in the same binary would silently lose VP
24//! records on this path; switch back to the file-based serializer if
25//! that combination becomes a requirement.
26//!
27//! On the host, [`write_profraw`] receives those bytes via the SHM ring
28//! and writes them into `LLVM_COV_TARGET_DIR` (or a fallback sibling
29//! directory next to the test binary) as
30//! `ktstr-test-{pid}-{counter}.profraw`.
31//!
32//! # Host atexit profraw redirect
33//!
34//! The host-side atexit path (the OS-managed dump that fires on
35//! `std::process::exit` for non-VM-dispatch test runs — including
36//! every test run via `cargo nextest run` directly, without a
37//! `cargo ktstr` wrapper) reads `LLVM_PROFILE_FILE` once during the
38//! LLVM runtime's `.init_array` initializer; if unset, the compiler-rt
39//! default is `default.profraw` in the process cwd. When the operator
40//! launched the test from a kernel source tree, that cwd points at the
41//! source tree and the dump leaks into someone else's directory.
42//!
43//! [`redirect_default_profraw_path`] is a `priority = 0` ctor that
44//! runs BEFORE the LLVM runtime's `.init_array` entry (which has no
45//! priority and lands at the default `.init_array` slot per glibc
46//! ordering rules) and points `LLVM_PROFILE_FILE` at the same
47//! workspace-local target directory the cargo-ktstr wrapper already
48//! injects, so a directly-invoked `cargo nextest run` no longer drops
49//! `default.profraw` in cwd. The redirect is a no-op when:
50//! - getpid() == 1 (in-VM init; the SHM-ring flush above owns
51//! guest-side coverage and the env is irrelevant inside the VM
52//! because `std::process::exit` bypasses atexit anyway).
53//! - `LLVM_PROFILE_FILE` is already set (operator override or
54//! wrapper injection takes precedence — same `existing_env.is_some()`
55//! short-circuit `cargo-ktstr.rs::profraw_inject_for` applies).
56//! - The target binary is NOT coverage-instrumented. Detection is a
57//! symtab probe for the `__llvm_profile_write_buffer` /
58//! `__llvm_profile_get_size_for_buffer` function symbols (the bare
59//! `__llvm_profile_runtime` marker can be dead-stripped entirely
60//! under `--gc-sections`, leaving no `.symtab` entry; see
61//! `is_coverage_instrumented_binary`); the
62//! guest-side flush [`try_flush_profraw`] calls those same compiler-rt
63//! entry points directly. Non-instrumented binaries that link the
64//! ktstr lib (e.g. `cargo-ktstr` itself in a non-coverage build)
65//! must NOT set the env, otherwise the env propagates to spawned
66//! child test binaries, which then short-circuit their own
67//! redirect on the inherited value and write profraw into the
68//! PARENT's target dir rather than their own per-binary one
69//! (cargo-ktstr's exe lives in `target/{profile}/` while test
70//! binaries live in `target/{profile}/deps/`, so the two
71//! `current_exe`-relative target dirs differ).
72//!
73//! Supporting helper:
74//! - [`find_symbol_vaddrs`] walks `.symtab` in one pass for multiple
75//! symbols at once, used by the coverage-instrumentation detection
76//! probes (in-process and on the host-side `/init` payload).
77//!
78//! Those probes read the binary via `memmap2::Mmap` rather than
79//! `std::fs::read` so the kernel page cache backs the bytes goblin
80//! parses; for coverage-instrumented binaries (hundreds of MiB up to
81//! ~1 GiB) this avoids the heap allocation + copy of the entire binary
82//! just to read its symbol table. [`try_flush_profraw`] itself no
83//! longer parses the ELF — under `cfg(coverage)` it calls the
84//! buffer-API entry points directly.
85
86use anyhow::{Context, Result};
87use std::fs::File;
88use std::path::{Path, PathBuf};
89
90#[cfg(coverage)]
91use crate::vmm;
92
93/// Flush LLVM coverage profraw to the host through the bulk channel.
94///
95/// Under `-C instrument-coverage` (cargo-llvm-cov sets `cfg(coverage)`)
96/// the compiler-rt profile runtime is linked, so the buffer-API entry
97/// points `__llvm_profile_get_size_for_buffer` and
98/// `__llvm_profile_write_buffer` are defined. This calls them directly:
99/// it allocates a buffer of the reported size, serializes the live
100/// profile counters into it, and publishes the buffer through the
101/// virtio-console bulk port for host-side extraction.
102///
103/// Calling `__llvm_profile_write_buffer` directly is also what keeps it
104/// alive under `--gc-sections`: the call site is a link-time reference.
105/// Resolving it by name through the ELF `.symtab` at runtime instead
106/// (an earlier approach) was NOT a link reference, so the linker
107/// dead-stripped `write_buffer` — nothing else in the retained graph
108/// called it (the runtime's own `write_file` path is stripped too) — and
109/// the flush silently no-op'd, leaving guest coverage at 0%.
110///
111/// No-op when not coverage-instrumented (the `cfg(coverage)` body is
112/// absent, so the symbols are never referenced and the build links
113/// without the profile runtime) or when called from host context.
114pub(crate) fn try_flush_profraw() {
115 #[cfg(coverage)]
116 {
117 if !vmm::guest_comms::is_guest() {
118 return;
119 }
120
121 // Flush at most once per process. The guest `/init` (pid 1) can
122 // reach `try_flush_profraw` from several paths in the same
123 // process — the post-dispatch site (rust_init Phase 5), the
124 // probe result-publish path, and the ctor / nextest `--exact`
125 // dispatch paths (which flush then `process::exit`). A second
126 // flush emits a second `Profraw` frame and `llvm-profdata merge`
127 // would double-count the counters. First flush per process wins.
128 {
129 use std::sync::atomic::{AtomicBool, Ordering};
130 static FLUSHED: AtomicBool = AtomicBool::new(false);
131 if FLUSHED.swap(true, Ordering::SeqCst) {
132 return;
133 }
134 }
135
136 // SAFETY: both are stable compiler-rt buffer-API entry points,
137 // defined whenever `-C instrument-coverage` linked the profile
138 // runtime (guaranteed under `cfg(coverage)`). `get_size` is
139 // `uint64_t (void)`; `write_buffer` is `int (char *)`, returning
140 // 0 on success after serializing the live counters into the
141 // caller's buffer. The dispatch context is single-threaded
142 // (guest `/init`, post-dispatch).
143 unsafe extern "C" {
144 fn __llvm_profile_get_size_for_buffer() -> u64;
145 fn __llvm_profile_write_buffer(buf: *mut std::os::raw::c_char) -> std::os::raw::c_int;
146 }
147
148 let needed = unsafe { __llvm_profile_get_size_for_buffer() } as usize;
149 if needed == 0 {
150 // Reliable Dmesg frame (NOT eprintln — the Phase-2 stdio->bulk
151 // redirect is lossy near reboot) so a zero-coverage run is never
152 // silent (frames sent, no profraw, no error).
153 vmm::guest_comms::send_dmesg(
154 b"ktstr coverage: __llvm_profile_get_size_for_buffer returned 0; no guest profile to flush\n",
155 );
156 return;
157 }
158
159 let mut buf: Vec<u8> = vec![0u8; needed];
160 // `__llvm_profile_write_buffer` returns 0 on success.
161 if unsafe { __llvm_profile_write_buffer(buf.as_mut_ptr().cast::<std::os::raw::c_char>()) }
162 != 0
163 {
164 vmm::guest_comms::send_dmesg(
165 b"ktstr coverage: __llvm_profile_write_buffer failed; guest coverage lost for this run\n",
166 );
167 return;
168 }
169
170 vmm::guest_comms::send_profraw(&buf);
171 }
172}
173
174/// Resolve multiple symbol virtual addresses in a single pass through
175/// the ELF `.symtab`. Returns addresses in the same order as `names`.
176///
177/// Matches purely by name: a symbol is resolved regardless of its
178/// `st_size`, so zero-size symbols — e.g. gc-sections'd data markers
179/// like `__llvm_profile_runtime`, whose `st_size` is dropped on some
180/// `--gc-sections` link paths — still resolve as long as the name
181/// survives in `.symtab`. (Callers match exact, specific names, so
182/// admitting zero-size symbols cannot introduce a false positive.)
183pub(crate) fn find_symbol_vaddrs(elf: &goblin::elf::Elf<'_>, names: &[&str]) -> Vec<Option<u64>> {
184 let mut results = vec![None; names.len()];
185 let mut remaining = names.len();
186
187 for sym in elf.syms.iter() {
188 if remaining == 0 {
189 break;
190 }
191 let sym_name = match elf.strtab.get_at(sym.st_name) {
192 Some(n) => n,
193 None => continue,
194 };
195 for (i, name) in names.iter().enumerate() {
196 if results[i].is_none() && sym_name == *name {
197 results[i] = Some(sym.st_value);
198 remaining -= 1;
199 break;
200 }
201 }
202 }
203 results
204}
205
206static PROFRAW_COUNTER: std::sync::atomic::AtomicU32 = std::sync::atomic::AtomicU32::new(0);
207
208/// Persist every coverage-profraw frame in a post-run guest bulk drain
209/// to the llvm-cov-target directory.
210///
211/// Walks the [`crate::vmm::host_comms::BulkDrainResult`] the host
212/// bucketed into [`crate::vmm::result::VmResult::guest_messages`] and,
213/// for each [`MsgType::Profraw`](crate::vmm::wire::MsgType::Profraw)
214/// frame that passed its per-frame CRC and carries a non-empty payload,
215/// calls [`write_profraw`]. Mirrors the CRC + non-empty gate the
216/// per-frame eval/probe dispatch applied so a corrupted or empty frame
217/// is never written.
218///
219/// Called from [`crate::vmm::KtstrVm::run`] so the direct
220/// `KtstrVm::run()` path persists guest coverage like the
221/// eval (`run_ktstr_test_inner`) and auto-repro (`probe`) paths do —
222/// previously the direct path silently dropped the profraw the guest
223/// `/init` flushed. The eval and probe paths funnel through
224/// `KtstrVm::run`, so they no longer extract `Profraw` frames
225/// themselves; doing so here AND there would write the same payload
226/// twice and `llvm-profdata merge` would double-count the counters.
227pub(crate) fn persist_guest_profraw(messages: &crate::vmm::host_comms::BulkDrainResult) {
228 use crate::vmm::wire::MsgType;
229 for entry in &messages.entries {
230 if MsgType::from_wire(entry.msg_type) == Some(MsgType::Profraw)
231 && entry.crc_ok
232 && !entry.payload.is_empty()
233 && let Err(e) = write_profraw(&entry.payload)
234 {
235 eprintln!("ktstr_test: persist guest profraw: {e}");
236 }
237 }
238}
239
240/// Write profraw data to the llvm-cov-target directory.
241pub(crate) fn write_profraw(data: &[u8]) -> Result<()> {
242 let target_dir = target_dir();
243 std::fs::create_dir_all(&target_dir)
244 .with_context(|| format!("create profraw dir: {}", target_dir.display()))?;
245 let id = PROFRAW_COUNTER.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
246 let path = target_dir.join(format!("ktstr-test-{}-{}.profraw", std::process::id(), id));
247 std::fs::write(&path, data).with_context(|| format!("write profraw: {}", path.display()))?;
248 Ok(())
249}
250
251/// Resolve the llvm-cov-target directory for profraw output.
252///
253/// Cascade:
254/// 1. `LLVM_COV_TARGET_DIR` — explicit operator override.
255/// 2. `LLVM_PROFILE_FILE`'s parent directory — when an outer harness
256/// (cargo-llvm-cov, or the cargo-ktstr `LLVM_PROFILE_FILE` injection
257/// that prevents host-side `default.profraw` leakage from the
258/// `cargo ktstr test` path) has already pinned the output location.
259/// 3. `<current_exe parent>/llvm-cov-target/` — workspace-local
260/// fallback so an instrumented binary invoked without any
261/// coordination still drops profraw next to the build output
262/// rather than in cwd.
263///
264/// `pub` rather than `pub(crate)` so the cargo-ktstr binary can
265/// resolve the same directory before exec-ing `cargo nextest run`,
266/// keeping host-side and guest-side profraw output co-located in
267/// one tree without cargo-ktstr re-implementing the cascade.
268pub fn target_dir() -> PathBuf {
269 if let Ok(d) = std::env::var("LLVM_COV_TARGET_DIR") {
270 return PathBuf::from(d);
271 }
272 // `LLVM_PROFILE_FILE` may be a bare filename (e.g. `default.profraw`)
273 // — `Path::parent` returns `Some("")` in that shape, which would
274 // otherwise propagate a structurally-empty `PathBuf` through the
275 // cascade and surface as an unusable target dir downstream
276 // (`std::fs::create_dir_all("")` errors with EINVAL on Linux).
277 // The empty-os-str filter forces those bare-filename cases to fall
278 // through to the `current_exe`-relative fallback below.
279 if let Some(parent) = std::env::var("LLVM_PROFILE_FILE")
280 .ok()
281 .as_ref()
282 .and_then(|p| Path::new(p).parent())
283 .filter(|p| !p.as_os_str().is_empty())
284 {
285 return parent.to_path_buf();
286 }
287 let mut p = crate::resolve_current_exe().unwrap_or_else(|_| std::env::temp_dir());
288 p.pop(); // remove binary name
289 p.push("llvm-cov-target");
290 p
291}
292
293/// Pure decision logic for [`redirect_default_profraw_path`]: given the
294/// current `LLVM_PROFILE_FILE` value, the current pid, the coverage
295/// instrumentation marker, and the workspace-local target dir
296/// resolved from a callable, return the pattern to set on
297/// `LLVM_PROFILE_FILE` or `None` to leave the env untouched.
298/// Mirrors the `cargo-ktstr.rs::profraw_inject_for` predicate so
299/// the directly-invoked `cargo nextest run` and `cargo ktstr test`
300/// paths agree on when to inject.
301///
302/// Returns `Some(pattern)` only when:
303/// - `pid != 1` (a test binary running as `/init` inside the guest VM
304/// is owned by the SHM-ring flush; setting host-side env in that
305/// context would still be a no-op because `std::process::exit`
306/// bypasses atexit, but the early return keeps the in-VM startup
307/// trace clean of an irrelevant env mutation).
308/// - `existing` is `None` (operator-supplied
309/// `LLVM_PROFILE_FILE` or wrapper-injected value takes precedence —
310/// identical short-circuit to the cargo-ktstr wrapper).
311/// - `is_coverage_instrumented` is `true` (the calling binary has
312/// the LLVM compiler-rt profile runtime linked in). Without this
313/// guard, the redirect would fire in `cargo-ktstr` and other
314/// non-instrumented binaries that link the ktstr lib, polluting
315/// the env passed to child test binaries — those child binaries
316/// would then see a pre-set `LLVM_PROFILE_FILE` and short-circuit
317/// their own redirect, writing profraw into the parent's target
318/// dir rather than their own per-binary one. Detecting
319/// instrumentation via the runtime marker symbol scopes the
320/// redirect to the binaries that actually emit profraw.
321///
322/// `target_dir` is taken as a callable so the test suite can drive
323/// the predicate against a synthetic target without building the
324/// real `<current_exe parent>/llvm-cov-target/` path. `%p` (process
325/// id) and `%m` (binary hash) are LLVM runtime expansions that keep
326/// parallel-test output files distinct — the same pattern shape
327/// `cargo-ktstr.rs::profraw_inject_for` emits.
328fn redirect_pattern_for(
329 pid: libc::pid_t,
330 existing: Option<std::ffi::OsString>,
331 is_coverage_instrumented: bool,
332 target_dir: impl FnOnce() -> PathBuf,
333) -> Option<PathBuf> {
334 if pid == 1 || existing.is_some() || !is_coverage_instrumented {
335 return None;
336 }
337 Some(target_dir().join("default-%p-%m.profraw"))
338}
339
340/// Detect whether the running binary has LLVM compiler-rt's profile
341/// runtime linked in by probing `.symtab` for the buffer-API entry
342/// points `__llvm_profile_write_buffer` /
343/// `__llvm_profile_get_size_for_buffer` (see the inline comment below
344/// for why these function symbols are used rather than the bare
345/// `__llvm_profile_runtime` marker, which `--gc-sections` can strip).
346/// Coverage-instrumented binaries link these symbols; non-instrumented
347/// binaries (e.g. `cargo-ktstr` in a normal release build) do not.
348///
349/// Probes via the symtab walk that profraw flushing already uses
350/// (the symbol has hidden visibility, so dlsym would not find it).
351/// Returns `false` on any failure path (binary mmap, ELF parse,
352/// symbol absent) — false negatives leave the redirect off, which
353/// is the conservative outcome: the binary writes
354/// `default.profraw` in cwd just as before, no regression.
355fn is_coverage_instrumented_binary() -> bool {
356 let exe_file = match File::open("/proc/self/exe") {
357 Ok(f) => f,
358 Err(_) => return false,
359 };
360 // SAFETY: same invariants as `try_flush_profraw`'s mmap — see
361 // the SAFETY block there. The `/proc/self/exe` mapping pins
362 // the binary inode for the mmap's lifetime, and no part of
363 // ktstr writes to its own binary during process startup.
364 let mmap = match unsafe { memmap2::Mmap::map(&exe_file) } {
365 Ok(m) => m,
366 Err(_) => return false,
367 };
368 let elf = match goblin::elf::Elf::parse(&mmap) {
369 Ok(e) => e,
370 Err(_) => return false,
371 };
372 // Probe for the profile-write-buffer entry point rather than
373 // the bare `__llvm_profile_runtime` marker. The marker is
374 // declared `int __llvm_profile_runtime;` in compiler-rt and
375 // can be dead-stripped entirely by `--gc-sections` /
376 // `-Wl,--strip-debug` paths some toolchains apply to coverage
377 // builds, leaving no `.symtab` entry to resolve. The
378 // function-shaped symbols [`try_flush_profraw`] already
379 // resolves (`__llvm_profile_get_size_for_buffer` and
380 // `__llvm_profile_write_buffer`) are kept alive by that flush
381 // call's link reference, so they are the reliable presence
382 // signal for instrumented binaries, proved empirically by the
383 // fact that coverage profraw collection in CI succeeds via the
384 // same symtab probe.
385 let vaddrs = find_symbol_vaddrs(
386 &elf,
387 &[
388 "__llvm_profile_write_buffer",
389 "__llvm_profile_get_size_for_buffer",
390 ],
391 );
392 vaddrs.iter().any(|v| matches!(v, Some(va) if *va != 0))
393}
394
395/// Process-wide cached version of [`is_coverage_instrumented_binary`]:
396/// whether the HOST process (`/proc/self/exe`) is built with
397/// `-C instrument-coverage`. The symbol-table walk runs once per
398/// process and is memoised in a `OnceLock<bool>` so repeated probes
399/// only pay the ELF parse once.
400///
401/// History: VM-booting tests once used this to skip themselves under
402/// coverage, because the instrumented `current_exe` used as the guest
403/// `/init` OOMed early in boot (the budget in
404/// `crate::vmm::memory_budget` was payload-agnostic, sizing the
405/// non-instrumented case). That skip list is gone:
406/// [`crate::vmm::memory_budget::initramfs_min_memory_mib`] now
407/// detects an instrumented `/init` payload and reserves the extra
408/// resident memory (`__llvm_prf_cnts` + `__llvm_prf_data`), so the
409/// instrumented `/init` boots and its coverage is captured via
410/// [`persist_guest_profraw`].
411///
412/// This probes the HOST process, not the `/init` payload — the budget
413/// path probes the payload bytes directly (see
414/// `KtstrVm::init_payload_coverage_reserve`). Retained as a
415/// `#[doc(hidden)]` `pub` capability for out-of-tree consumers that
416/// want to branch on host-process instrumentation.
417///
418/// `pub` (not `pub(crate)`) so integration tests in `tests/*.rs`
419/// can reach the helper. `#[doc(hidden)]` keeps it out of the
420/// crate's rendered docs — the helper is intentionally internal
421/// to the test surface and the docs surface should not expose it.
422#[doc(hidden)]
423pub fn current_binary_is_coverage_instrumented() -> bool {
424 use std::sync::OnceLock;
425 static CACHE: OnceLock<bool> = OnceLock::new();
426 *CACHE.get_or_init(is_coverage_instrumented_binary)
427}
428
429ctor::declarative::ctor! {
430/// Set `LLVM_PROFILE_FILE` to the workspace-local target directory
431/// before the LLVM compiler-rt runtime reads it.
432///
433/// `priority = 0` lands this ctor in `.init_array.0`, which the
434/// glibc startup loop walks BEFORE the unprioritized `.init_array`
435/// slot that compiler-rt's `INSTR_PROF_PROFILE_RUNTIME_VAR` static
436/// initializer (`InstrProfilingRuntime.cpp`) lives in. By the time
437/// `__llvm_profile_initialize_file` runs and calls
438/// `getenv("LLVM_PROFILE_FILE")`, our `set_var` has already landed.
439///
440/// See the module-level "Host atexit profraw redirect" section for
441/// the full motivation. This ctor is intentionally separate from
442/// [`crate::test_support::dispatch::ktstr_test_early_dispatch`] (the
443/// unprioritized ctor that handles VM dispatch and SHM-ring flushes)
444/// because that ctor must NOT acquire the priority slot — its
445/// gauntlet-expansion and dispatch logic is order-insensitive
446/// relative to compiler-rt, but pinning a low priority on it would
447/// risk surprising interactions with future `.init_array.NN` entries.
448/// Keeping the redirect in its own minimal ctor scopes the priority
449/// promise to one well-understood operation.
450///
451/// The set_var call is sound in this ctor context: glibc invokes
452/// `.init_array` entries on the main thread before any user code
453/// has spawned an additional thread, so the env-block mutation is
454/// race-free.
455///
456/// ctor 1.0's `priority` documentation flags the 0..100 range as
457/// platform-reserved for the C runtime's own startup, so accessing
458/// libc/std services from a constructor with such a priority "may
459/// not be safe" in portable terms. On Linux/glibc the dynamic
460/// linker finishes libc initialization before walking
461/// `.init_array.0`, so `std::env::set_var` (which lowers to glibc's
462/// `setenv`) is safe here. The priority retains the .init_array.0
463/// placement that the compiler-rt ordering above depends on; other
464/// platforms would need re-validation.
465///
466/// This site uses ctor's declarative `ctor::declarative::ctor! { ... }`
467/// form; ctor 1.0 also ships `#[ctor::ctor(...)]` (proc-macro attribute)
468/// re-exported under `crate::__private::ctor::ctor` for downstream
469/// consumers. The declarative form is the in-tree convention because
470/// it avoids the TT-muncher recursion-limit cost on the ktstr_test
471/// expansion path.
472#[ctor(unsafe, priority = 0)]
473fn redirect_default_profraw_path() {
474 // Cheap precondition checks first — pid (one syscall) and env
475 // (one var_os call) — so the ELF parse only runs in the
476 // direct-`cargo nextest run`-with-no-env case where the ctor
477 // actually has a decision to make. cargo-ktstr-wrapped runs and
478 // cargo-llvm-cov runs both pre-set `LLVM_PROFILE_FILE`, so
479 // `existing.is_some()` short-circuits before
480 // `current_binary_is_coverage_instrumented` mmaps `/proc/self/exe`
481 // and walks the symtab (first call only; memoised). pid=1 (in-VM
482 // init) similarly avoids the
483 // probe — the SHM-ring flush owns guest-side coverage.
484 let pid = unsafe { libc::getpid() };
485 let existing = std::env::var_os("LLVM_PROFILE_FILE");
486 if pid == 1 || existing.is_some() {
487 return;
488 }
489 let instrumented = current_binary_is_coverage_instrumented();
490 if let Some(pattern) = redirect_pattern_for(pid, existing, instrumented, target_dir) {
491 // SAFETY: this ctor runs from `.init_array.0`, before any
492 // user thread has spawned. The env block is single-writer,
493 // single-reader at this moment, so `set_var` is sound. The
494 // `set_var` API was deprecated in Rust 2024 for thread
495 // unsafety in non-startup contexts, but ctor-time mutation
496 // is exactly the protected case the deprecation guidance
497 // carves out via `unsafe`.
498 unsafe {
499 std::env::set_var("LLVM_PROFILE_FILE", &pattern);
500 }
501 }
502}
503}
504
505#[cfg(test)]
506mod tests {
507 use super::super::test_helpers::{EnvVarGuard, lock_env};
508 use super::*;
509
510 // -- target_dir --
511
512 #[test]
513 fn target_dir_with_env_var() {
514 let _lock = lock_env();
515 let _env = EnvVarGuard::set("LLVM_COV_TARGET_DIR", "/tmp/my-cov-dir");
516 let dir = target_dir();
517 assert_eq!(dir, PathBuf::from("/tmp/my-cov-dir"));
518 }
519
520 #[test]
521 fn target_dir_from_llvm_profile_file() {
522 let _lock = lock_env();
523 let _env_cov = EnvVarGuard::remove("LLVM_COV_TARGET_DIR");
524 let _env_prof =
525 EnvVarGuard::set("LLVM_PROFILE_FILE", "/tmp/cov-target/ktstr-%p-%m.profraw");
526 let dir = target_dir();
527 assert_eq!(dir, PathBuf::from("/tmp/cov-target"));
528 }
529
530 #[test]
531 fn target_dir_without_env_var() {
532 let _lock = lock_env();
533 let _env_cov = EnvVarGuard::remove("LLVM_COV_TARGET_DIR");
534 let _env_prof = EnvVarGuard::remove("LLVM_PROFILE_FILE");
535 let dir = target_dir();
536 // Falls back to current_exe parent + "llvm-cov-target".
537 assert!(
538 dir.ends_with("llvm-cov-target"),
539 "expected path ending in llvm-cov-target, got: {}",
540 dir.display()
541 );
542 }
543
544 /// `LLVM_PROFILE_FILE` set to a bare filename (no parent
545 /// directory component, e.g. `default.profraw`) must fall
546 /// through to the `current_exe`-relative fallback rather than
547 /// surfacing a structurally-empty `PathBuf` through the
548 /// cascade. `Path::new("default.profraw").parent()` returns
549 /// `Some("")`; without the empty-os-str filter,
550 /// `target_dir` would return `PathBuf::from("")` and downstream
551 /// `create_dir_all` calls fail with EINVAL.
552 #[test]
553 fn target_dir_bare_filename_llvm_profile_file_falls_through() {
554 let _lock = lock_env();
555 let _g_cov = EnvVarGuard::remove("LLVM_COV_TARGET_DIR");
556 let _g_prof = EnvVarGuard::set("LLVM_PROFILE_FILE", "default.profraw");
557 let dir = target_dir();
558 assert!(
559 !dir.as_os_str().is_empty(),
560 "bare-filename LLVM_PROFILE_FILE must fall through to the \
561 current_exe fallback, not return an empty PathBuf",
562 );
563 assert!(
564 dir.ends_with("llvm-cov-target"),
565 "fallback must land at the current_exe-relative llvm-cov-target \
566 dir, got: {}",
567 dir.display(),
568 );
569 }
570
571 // -- redirect_pattern_for (host-side LLVM_PROFILE_FILE redirect predicate) --
572
573 /// Pid 1 (in-VM init) must short-circuit. Even when no
574 /// `LLVM_PROFILE_FILE` is set and the binary is instrumented,
575 /// the SHM-ring flush owns guest-side coverage and the
576 /// host-side env redirect is irrelevant.
577 #[test]
578 fn redirect_pattern_for_pid_1_returns_none() {
579 let pattern =
580 redirect_pattern_for(1, None, true, || PathBuf::from("/should/not/be/called"));
581 assert!(
582 pattern.is_none(),
583 "pid=1 (guest init) must skip env redirect"
584 );
585 }
586
587 /// An already-set `LLVM_PROFILE_FILE` (operator override or
588 /// cargo-ktstr/cargo-llvm-cov wrapper injection) takes
589 /// precedence — the redirect must be a no-op so it does not
590 /// stomp on the outer harness's profile location.
591 #[test]
592 fn redirect_pattern_for_existing_env_returns_none() {
593 let pattern = redirect_pattern_for(
594 42,
595 Some(std::ffi::OsString::from("/operator/picked/path.profraw")),
596 true,
597 || PathBuf::from("/should/not/be/called"),
598 );
599 assert!(
600 pattern.is_none(),
601 "existing LLVM_PROFILE_FILE must take precedence"
602 );
603 }
604
605 /// Empty `LLVM_PROFILE_FILE` (`Some("")` from a shell that did
606 /// `export LLVM_PROFILE_FILE=`) is a degenerate but possible
607 /// shape. `var_os` returns `Some` for an empty value, so the
608 /// existing-env short-circuit fires and we leave it alone — the
609 /// LLVM runtime treats empty as "fall through to default" so
610 /// `default.profraw` lands in cwd, but that is the operator's
611 /// choice once they explicitly assigned the variable. Pinned
612 /// here so a future "treat empty as unset" change is a deliberate
613 /// decision rather than a silent drift.
614 #[test]
615 fn redirect_pattern_for_empty_env_short_circuits() {
616 let pattern = redirect_pattern_for(42, Some(std::ffi::OsString::new()), true, || {
617 PathBuf::from("/should/not/be/called")
618 });
619 assert!(
620 pattern.is_none(),
621 "Some(\"\") in LLVM_PROFILE_FILE counts as set; redirect must defer"
622 );
623 }
624
625 /// Non-instrumented binaries (cargo-ktstr in normal builds, the
626 /// `ktstr` standalone CLI) must not set the env. Otherwise the
627 /// inherited env in spawned child test binaries pre-empts their
628 /// own redirect and they write profraw into the parent's target
629 /// dir instead of their own per-binary one.
630 #[test]
631 fn redirect_pattern_for_non_instrumented_binary_returns_none() {
632 let pattern =
633 redirect_pattern_for(42, None, false, || PathBuf::from("/should/not/be/called"));
634 assert!(
635 pattern.is_none(),
636 "non-coverage-instrumented binary must not pollute the env passed \
637 to children"
638 );
639 }
640
641 /// Host-pid + unset env + instrumented binary produces a
642 /// redirect to the workspace-local target dir with the LLVM
643 /// `%p`/`%m` expansions baked into the filename.
644 #[test]
645 fn redirect_pattern_for_host_unset_returns_target_pattern() {
646 let target = PathBuf::from("/synthetic/llvm-cov-target");
647 let pattern = redirect_pattern_for(42, None, true, || target.clone())
648 .expect("host pid + unset env + instrumented must produce a redirect pattern");
649 assert_eq!(
650 pattern,
651 PathBuf::from("/synthetic/llvm-cov-target/default-%p-%m.profraw"),
652 );
653 }
654
655 /// The pattern shape matches what
656 /// `cargo-ktstr.rs::profraw_inject_for` emits — both paths
657 /// inject `default-%p-%m.profraw` so coverage merge tools
658 /// (cargo-llvm-cov) see a uniform filename suffix regardless of
659 /// which entry point launched the test binary.
660 #[test]
661 fn redirect_pattern_for_filename_matches_cargo_ktstr_wrapper() {
662 let target = PathBuf::from("/x");
663 let pattern = redirect_pattern_for(42, None, true, || target.clone()).unwrap();
664 assert_eq!(
665 pattern.file_name().and_then(|n| n.to_str()),
666 Some("default-%p-%m.profraw"),
667 "filename suffix must match cargo-ktstr's profraw_inject_for",
668 );
669 }
670
671 // -- find_symbol_vaddrs --
672
673 #[test]
674 fn find_symbol_vaddrs_resolves_known_symbol() {
675 let exe = crate::resolve_current_exe().unwrap();
676 let data = std::fs::read(&exe).unwrap();
677 let elf = goblin::elf::Elf::parse(&data).unwrap();
678 // "main" is present in the symtab of any Rust test binary.
679 let results = find_symbol_vaddrs(&elf, &["main"]);
680 assert_eq!(results.len(), 1);
681 assert!(
682 results[0].is_some(),
683 "main symbol should be resolved in test binary"
684 );
685 assert_ne!(results[0].unwrap(), 0, "main address should be nonzero");
686 }
687
688 #[test]
689 fn find_symbol_vaddrs_missing_symbol_returns_none() {
690 let exe = crate::resolve_current_exe().unwrap();
691 let data = std::fs::read(&exe).unwrap();
692 let elf = goblin::elf::Elf::parse(&data).unwrap();
693 let results = find_symbol_vaddrs(&elf, &["__nonexistent_symbol_xyz__"]);
694 assert_eq!(results.len(), 1);
695 assert!(results[0].is_none());
696 }
697
698 #[test]
699 fn find_symbol_vaddrs_mixed_results() {
700 let exe = crate::resolve_current_exe().unwrap();
701 let data = std::fs::read(&exe).unwrap();
702 let elf = goblin::elf::Elf::parse(&data).unwrap();
703 let results = find_symbol_vaddrs(&elf, &["main", "__nonexistent_symbol_xyz__"]);
704 assert_eq!(results.len(), 2);
705 assert!(results[0].is_some(), "main should resolve");
706 assert!(results[1].is_none(), "nonexistent should not resolve");
707 }
708
709 // -- profile buffer-API retention (regression) --
710
711 /// `--gc-sections` dead-strips `__llvm_profile_write_buffer` unless a
712 /// link-time reference keeps it — [`try_flush_profraw`]'s direct call
713 /// under `cfg(coverage)` is that reference. This test references the
714 /// symbol by NAME only (a `.symtab` lookup, not a link reference of
715 /// its own), so it fails if that call is ever removed and the linker
716 /// strips the function — the exact regression that left guest
717 /// coverage at 0% before the direct-call fix. Coverage-only: the
718 /// symbol does not exist in non-instrumented builds.
719 #[cfg(coverage)]
720 #[test]
721 fn write_buffer_symbol_retained_under_coverage() {
722 let exe = crate::resolve_current_exe().unwrap();
723 let data = std::fs::read(&exe).unwrap();
724 let elf = goblin::elf::Elf::parse(&data).unwrap();
725 let v = find_symbol_vaddrs(&elf, &["__llvm_profile_write_buffer"]);
726 assert!(
727 v[0].is_some(),
728 "__llvm_profile_write_buffer must survive --gc-sections under \
729 coverage; without it the guest flush silently no-ops",
730 );
731 }
732
733 /// Regression: `find_symbol_vaddrs` must resolve a symbol by name
734 /// even when its `st_size` is 0. A prior `st_size == 0` skip
735 /// silently dropped gc-sections'd zero-size markers (e.g.
736 /// `__llvm_profile_runtime`), hiding instrumented binaries from
737 /// the coverage probe. Pick a real zero-size named symbol from
738 /// this binary's own `.symtab` (linker markers like `_edata` /
739 /// `_end` are `st_size == 0`) and assert the helper resolves it.
740 #[test]
741 fn find_symbol_vaddrs_resolves_zero_size_symbol() {
742 let exe = crate::resolve_current_exe().unwrap();
743 let data = std::fs::read(&exe).unwrap();
744 let elf = goblin::elf::Elf::parse(&data).unwrap();
745 let zero_size_name = elf
746 .syms
747 .iter()
748 .filter(|s| s.st_size == 0)
749 .filter_map(|s| elf.strtab.get_at(s.st_name))
750 .find(|n| !n.is_empty())
751 .map(str::to_string)
752 .expect(
753 "test binary's .symtab should carry at least one named \
754 zero-size symbol (e.g. a linker marker like _edata / _end)",
755 );
756 let v = find_symbol_vaddrs(&elf, &[zero_size_name.as_str()]);
757 assert!(
758 v[0].is_some(),
759 "find_symbol_vaddrs must resolve zero-size symbol \
760 {zero_size_name:?}; the removed st_size==0 filter previously \
761 dropped such symbols, losing gc-sections'd coverage markers",
762 );
763 }
764}