ktstr/host_heap.rs
1//! Heap-state snapshot for the running ktstr binary.
2//!
3//! [`HostHeapState`] is a thin snapshot of the process's jemalloc
4//! allocator state at capture time: active / allocated / resident /
5//! mapped bytes plus the arena count. It rides along inside
6//! [`HostContext::heap_state`](crate::host_context::HostContext::heap_state)
7//! so a sidecar reader can correlate scheduler-test outcomes with
8//! the ktstr tool's own memory footprint — e.g. distinguish a
9//! legitimate regression from one where the runner itself OOM-pressured
10//! the host.
11//!
12//! # jemalloc is always linked
13//!
14//! `tikv-jemalloc-ctl` declares a non-optional dependency on
15//! `tikv-jemalloc-sys`, which builds and links libjemalloc
16//! unconditionally. So even consumers that do NOT install
17//! `tikv_jemallocator::Jemalloc` as `#[global_allocator]` carry
18//! libjemalloc in their binary, and every `mallctl` call from this
19//! module resolves to libjemalloc's implementation rather than a
20//! libc stub. `mallctl` reads succeed regardless of which allocator
21//! `#[global_allocator]` resolves to.
22//!
23//! What differs is the *meaning* of the numbers:
24//!
25//! - When jemalloc IS `#[global_allocator]` (every shipped binary in
26//! this workspace, via the central `#[global_allocator]` in
27//! `src/lib.rs` gated on the `cli-bins` feature), every heap
28//! allocation flows through jemalloc and `stats.allocated` /
29//! `stats.active` report
30//! real application usage in the tens-to-hundreds of MiB range.
31//! - When jemalloc is linked but is NOT `#[global_allocator]`
32//! (downstream consumers using ktstr as a library without opting
33//! into jemallocator), jemalloc still initializes its arenas but
34//! the application never allocates through it. `stats.allocated`
35//! and `stats.active` return `Some(0)` in that case.
36//! `arenas.narenas` is still populated (jemalloc computes it as
37//! `4 * ncpus` at init time) and `stats.resident` / `stats.mapped`
38//! reflect jemalloc's own metadata footprint — small but non-zero.
39//!
40//! [`collect`] collapses the "jemalloc linked but unused" shape
41//! (`allocated_bytes == Some(0) && active_bytes == Some(0)`) to
42//! `None` at the [`HostContext::heap_state`](crate::host_context::HostContext::heap_state)
43//! call site, so sidecars from non-jemallocator consumers do not
44//! carry misleading mostly-zero rows. The `jemalloc-used` signal
45//! (non-zero allocated AND active) is what warrants sidecar space.
46//!
47//! # `stats` feature is required for stats reads
48//!
49//! libjemalloc only tracks `stats.*` counters when the C library is
50//! built with `--enable-stats`; without it the mallctl reads still
51//! succeed but return zero. The `stats` feature on both
52//! `tikv-jemalloc-ctl` and `tikv-jemallocator` in `Cargo.toml` forces
53//! the C build flag — `host_heap::collect` depends on this.
54//! `arenas.narenas` is independent of `--enable-stats`; it reports
55//! correctly either way.
56//!
57//! # Epoch discipline
58//!
59//! `stats.*` reads return cached values; the cache refreshes when
60//! the `epoch` mallctl is advanced. [`collect`] advances the epoch
61//! exactly once before issuing reads so each snapshot reflects
62//! post-advance state. Callers that invoke [`collect`] back-to-back
63//! see fresh reads every time because each call advances the epoch
64//! again.
65//!
66//! Side effect of `epoch::advance()`: libjemalloc flushes per-thread
67//! stat caches into the shared counters under a mallctl-internal
68//! lock. The operation is thread-safe per jemalloc's mallctl
69//! contract — concurrent `collect` calls from multiple ktstr
70//! threads are defined-behavior (though pointless; each caller sees
71//! its own refreshed snapshot and the last writer wins in the
72//! caches).
73
74/// Heap-state snapshot for the running process's jemalloc allocator.
75///
76/// Every field is `Option<u64>` (or `Option<usize>` for the arena
77/// count) so a partial read lands what succeeded and consumers can
78/// distinguish "jemalloc reported X" from "jemalloc did not report
79/// this field". The `Default` impl lands every field as `None`,
80/// matching the non-jemalloc fallback path and serving as the
81/// fixture for test call sites that want the empty shape.
82///
83/// # Constructing instances in tests
84///
85/// `HostHeapState` is `#[non_exhaustive]` — see
86/// [`crate::non_exhaustive`] for the cross-crate construction and
87/// pattern-match rules. Start from [`HostHeapState::test_fixture`]
88/// (populated baseline) or [`HostHeapState::default`] (all-`None`)
89/// and mutate fields.
90#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
91#[non_exhaustive]
92pub struct HostHeapState {
93 /// `stats.active` — bytes in active pages allocated by the
94 /// application. A multiple of the page size and `>=`
95 /// [`Self::allocated_bytes`]. Populated whenever libjemalloc
96 /// was built with `--enable-stats` (the `stats` feature on
97 /// `tikv-jemalloc-ctl` forces this). `Some(0)` when jemalloc
98 /// is linked but is not `#[global_allocator]` — the whole
99 /// [`HostHeapState`] collapses to `None` at the HostContext
100 /// call site in that case (see module doc).
101 #[serde(default, skip_serializing_if = "Option::is_none")]
102 pub active_bytes: Option<u64>,
103 /// `stats.allocated` — total bytes allocated by the
104 /// application (sum of live allocations, excluding allocator
105 /// metadata and padding). `Some(0)` when jemalloc is linked
106 /// but not installed as `#[global_allocator]`.
107 #[serde(default, skip_serializing_if = "Option::is_none")]
108 pub allocated_bytes: Option<u64>,
109 /// `stats.resident` — bytes in physically resident data pages
110 /// mapped by the allocator. Overestimates by including
111 /// demand-zeroed pages that have not been touched; jemalloc
112 /// documents this. A multiple of the page size and `>=`
113 /// [`Self::active_bytes`]. Reflects jemalloc's own metadata
114 /// footprint even when jemalloc is not `#[global_allocator]`.
115 #[serde(default, skip_serializing_if = "Option::is_none")]
116 pub resident_bytes: Option<u64>,
117 /// `stats.mapped` — bytes in active extents mapped by the
118 /// allocator. Excludes inactive extents even those with
119 /// unused dirty pages, so there is no strict ordering between
120 /// this and [`Self::resident_bytes`]. A multiple of the page
121 /// size and `>=` [`Self::active_bytes`]. Reflects jemalloc's
122 /// own metadata footprint even when jemalloc is not
123 /// `#[global_allocator]`.
124 #[serde(default, skip_serializing_if = "Option::is_none")]
125 pub mapped_bytes: Option<u64>,
126 /// `arenas.narenas` — current limit on the number of arenas.
127 /// Initialized at jemalloc startup (typically `4 * ncpus` on a
128 /// multi-core Linux host) and updated as the allocator grows
129 /// new arenas. Populated whenever libjemalloc is linked into
130 /// the binary, including on consumers that use ktstr as a
131 /// library without opting into jemallocator as
132 /// `#[global_allocator]` (see the module doc). `None` only on
133 /// the rare mallctl-error path.
134 #[serde(default, skip_serializing_if = "Option::is_none")]
135 pub narenas: Option<usize>,
136}
137
138impl HostHeapState {
139 /// Populated fixture for unit tests. Values are plausible for a
140 /// small ktstr run on a 16-CPU host: a few hundred KiB
141 /// allocated, rounded up to page-size multiples for active /
142 /// resident / mapped, and `narenas = 64` (jemalloc's
143 /// `4 * ncpus` default on a 16-CPU box).
144 ///
145 /// Call sites mutate the fields they care about:
146 ///
147 /// ```
148 /// use ktstr::prelude::HostHeapState;
149 /// let mut h = HostHeapState::test_fixture();
150 /// h.allocated_bytes = Some(0);
151 /// ```
152 pub fn test_fixture() -> HostHeapState {
153 HostHeapState {
154 active_bytes: Some(1 << 20),
155 allocated_bytes: Some(512 * 1024),
156 resident_bytes: Some(2 << 20),
157 mapped_bytes: Some(4 << 20),
158 narenas: Some(64),
159 }
160 }
161
162 /// Render as a human-readable multi-line block. Each field is
163 /// one `key: value` line; absent fields render `(unknown)` so
164 /// operators see which reads failed. The block ends with a
165 /// newline. Matches [`HostContext::format_human`](crate::host_context::HostContext::format_human)'s
166 /// shape — pair the two in `cargo ktstr show-host` for a
167 /// single-block host summary.
168 pub fn format_human(&self) -> String {
169 use std::fmt::Write;
170 // Destructuring bind forces every field of HostHeapState to
171 // appear by name here; adding a field will break the build
172 // until it's rendered.
173 let HostHeapState {
174 active_bytes,
175 allocated_bytes,
176 resident_bytes,
177 mapped_bytes,
178 narenas,
179 } = self;
180 fn row<T: std::fmt::Display>(out: &mut String, key: &str, value: Option<&T>) {
181 match value {
182 Some(v) => {
183 let _ = writeln!(out, "{key}: {v}");
184 }
185 None => {
186 let _ = writeln!(out, "{key}: (unknown)");
187 }
188 }
189 }
190 let mut out = String::new();
191 row(&mut out, "allocated_bytes", allocated_bytes.as_ref());
192 row(&mut out, "active_bytes", active_bytes.as_ref());
193 row(&mut out, "resident_bytes", resident_bytes.as_ref());
194 row(&mut out, "mapped_bytes", mapped_bytes.as_ref());
195 row(&mut out, "narenas", narenas.as_ref());
196 out
197 }
198
199 /// Render a field-by-field diff as `key: before → after` lines.
200 /// Omits unchanged fields; an empty return means the two
201 /// snapshots are identical. `None` renders as `(unknown)` so a
202 /// `None → Some(..)` transition is visible.
203 pub fn diff(&self, other: &HostHeapState) -> String {
204 use std::fmt::Write;
205 let HostHeapState {
206 active_bytes: a_active,
207 allocated_bytes: a_allocated,
208 resident_bytes: a_resident,
209 mapped_bytes: a_mapped,
210 narenas: a_narenas,
211 } = self;
212 let HostHeapState {
213 active_bytes: b_active,
214 allocated_bytes: b_allocated,
215 resident_bytes: b_resident,
216 mapped_bytes: b_mapped,
217 narenas: b_narenas,
218 } = other;
219 let mut out = String::new();
220 fn row_opt<T: std::fmt::Display + PartialEq>(
221 out: &mut String,
222 key: &str,
223 a: Option<&T>,
224 b: Option<&T>,
225 ) {
226 if a == b {
227 return;
228 }
229 let render = |v: Option<&T>| match v {
230 Some(x) => format!("{x}"),
231 None => "(unknown)".to_string(),
232 };
233 let _ = writeln!(out, "{key}: {} → {}", render(a), render(b));
234 }
235 row_opt(
236 &mut out,
237 "allocated_bytes",
238 a_allocated.as_ref(),
239 b_allocated.as_ref(),
240 );
241 row_opt(
242 &mut out,
243 "active_bytes",
244 a_active.as_ref(),
245 b_active.as_ref(),
246 );
247 row_opt(
248 &mut out,
249 "resident_bytes",
250 a_resident.as_ref(),
251 b_resident.as_ref(),
252 );
253 row_opt(
254 &mut out,
255 "mapped_bytes",
256 a_mapped.as_ref(),
257 b_mapped.as_ref(),
258 );
259 row_opt(&mut out, "narenas", a_narenas.as_ref(), b_narenas.as_ref());
260 out
261 }
262}
263
264/// Capture the running process's jemalloc heap state.
265///
266/// Advances the jemalloc `epoch` exactly once so cached `stats.*`
267/// values refresh (this is a jemalloc-internal operation —
268/// libjemalloc flushes per-thread stat caches into the shared
269/// counters under its mallctl lock, and the operation is thread-safe
270/// per jemalloc's mallctl contract), then reads five mallctl values.
271/// Any individual read error lands that field as `None`; an
272/// `epoch::advance()` error short-circuits the whole function to
273/// [`HostHeapState::default`] because without a refreshed epoch the
274/// stats reads would return values from an arbitrary prior snapshot.
275///
276/// Since libjemalloc is linked unconditionally via
277/// `tikv-jemalloc-sys` (see module doc), `epoch::advance()` and the
278/// subsequent mallctl reads always succeed on a well-formed build.
279/// The `is_err()` branch below is a defensive guard against future
280/// jemalloc versions changing the error surface, not an expected
281/// fallback.
282///
283/// When jemalloc is linked but is not `#[global_allocator]`, the
284/// reads succeed and return small-or-zero values —
285/// [`HostContext::heap_state`](crate::host_context::HostContext)
286/// detects that shape and stores `None` so the sidecar does not
287/// carry an empty row. When jemalloc IS `#[global_allocator]`
288/// (every binary target in this workspace), every field reflects
289/// real runner memory usage.
290///
291/// # Cost
292///
293/// One `mallctl("epoch", ...)` call plus five
294/// `mallctl("stats.*"/"arenas.narenas", ...)` reads. Each is a
295/// `memcpy` from a cached value after a short tree walk inside
296/// jemalloc — microseconds total. Safe to call on every sidecar
297/// write.
298pub fn collect() -> HostHeapState {
299 // epoch advance refreshes jemalloc's stat cache. libjemalloc is
300 // always linked (tikv-jemalloc-sys is a hard dep of
301 // tikv-jemalloc-ctl), so this only fails on an unexpected
302 // jemalloc-internal error path. Defensive fall-through to the
303 // all-None default keeps `collect` infallible.
304 if tikv_jemalloc_ctl::epoch::advance().is_err() {
305 return HostHeapState::default();
306 }
307 // Each read is independent — a single error on `stats.allocated`
308 // does not poison `arenas.narenas`. `arenas.narenas` is
309 // initialized at jemalloc startup (typically `4 * ncpus`) and
310 // always readable on a libjemalloc-linked build regardless of
311 // `--enable-stats`. `.ok().map(|v| v as _)` lands Err as None.
312 let active_bytes = tikv_jemalloc_ctl::stats::active::read()
313 .ok()
314 .map(|v| v as u64);
315 let allocated_bytes = tikv_jemalloc_ctl::stats::allocated::read()
316 .ok()
317 .map(|v| v as u64);
318 let resident_bytes = tikv_jemalloc_ctl::stats::resident::read()
319 .ok()
320 .map(|v| v as u64);
321 let mapped_bytes = tikv_jemalloc_ctl::stats::mapped::read()
322 .ok()
323 .map(|v| v as u64);
324 let narenas = tikv_jemalloc_ctl::arenas::narenas::read()
325 .ok()
326 .map(|v| v as usize);
327 HostHeapState {
328 active_bytes,
329 allocated_bytes,
330 resident_bytes,
331 mapped_bytes,
332 narenas,
333 }
334}
335
336#[cfg(test)]
337mod tests {
338 use super::*;
339
340 #[test]
341 fn default_lands_all_none() {
342 let h = HostHeapState::default();
343 assert!(h.active_bytes.is_none());
344 assert!(h.allocated_bytes.is_none());
345 assert!(h.resident_bytes.is_none());
346 assert!(h.mapped_bytes.is_none());
347 assert!(h.narenas.is_none());
348 }
349
350 #[test]
351 fn test_fixture_populates_every_field() {
352 let h = HostHeapState::test_fixture();
353 assert!(h.active_bytes.is_some());
354 assert!(h.allocated_bytes.is_some());
355 assert!(h.resident_bytes.is_some());
356 assert!(h.mapped_bytes.is_some());
357 assert!(h.narenas.is_some());
358 }
359
360 #[test]
361 fn format_human_lists_every_field() {
362 let out = HostHeapState::test_fixture().format_human();
363 assert!(out.contains("allocated_bytes:"));
364 assert!(out.contains("active_bytes:"));
365 assert!(out.contains("resident_bytes:"));
366 assert!(out.contains("mapped_bytes:"));
367 assert!(out.contains("narenas:"));
368 assert!(out.ends_with('\n'));
369 }
370
371 /// Snapshot-style pin of the label sequence `format_human`
372 /// emits. Mirrors
373 /// `host_context::tests::format_human_field_order_is_stable` —
374 /// `HostContext::format_human` embeds this block indented under
375 /// the `heap_state:` parent label, and downstream diff tools +
376 /// operator-eye scanning depend on a stable
377 /// `allocated → active → resident → mapped → narenas`
378 /// top-to-bottom ordering. A silent reorder from a future edit
379 /// that shuffles the `row(...)` calls inside `format_human`
380 /// would slip past the order-blind `.contains(...)` checks in
381 /// the sibling tests. This test fails the moment the sequence
382 /// drifts; updating it forces the author to acknowledge the
383 /// reorder and double-check the HostContext ctprof sub-block
384 /// still reads coherently.
385 #[test]
386 fn format_human_field_order_is_stable() {
387 let out = HostHeapState::default().format_human();
388 let labels: Vec<&str> = out
389 .lines()
390 .filter_map(|l| l.split(':').next())
391 .filter(|s| !s.starts_with(' '))
392 .collect();
393 assert_eq!(
394 labels,
395 vec![
396 "allocated_bytes",
397 "active_bytes",
398 "resident_bytes",
399 "mapped_bytes",
400 "narenas",
401 ],
402 "format_human field order drifted — if intentional, update \
403 the expected vector and verify the HostContext heap_state \
404 sub-block still reads in the expected top-to-bottom order",
405 );
406 }
407
408 #[test]
409 fn format_human_renders_none_as_unknown() {
410 let out = HostHeapState::default().format_human();
411 // Every line should end with `: (unknown)`.
412 for line in out.lines() {
413 assert!(
414 line.ends_with(": (unknown)"),
415 "expected unknown, got {line:?}"
416 );
417 }
418 }
419
420 #[test]
421 fn diff_is_empty_on_equal_snapshots() {
422 let a = HostHeapState::test_fixture();
423 let b = HostHeapState::test_fixture();
424 assert_eq!(a.diff(&b), "");
425 }
426
427 #[test]
428 fn diff_reports_only_changed_fields() {
429 let a = HostHeapState::test_fixture();
430 let mut b = a.clone();
431 b.allocated_bytes = Some(9 * 1024 * 1024);
432 let d = a.diff(&b);
433 assert!(d.contains("allocated_bytes:"));
434 assert!(!d.contains("active_bytes:"));
435 assert!(!d.contains("resident_bytes:"));
436 assert!(!d.contains("mapped_bytes:"));
437 assert!(!d.contains("narenas:"));
438 assert!(d.contains("→"));
439 }
440
441 #[test]
442 fn diff_renders_none_transitions() {
443 let a = HostHeapState::default();
444 let b = HostHeapState::test_fixture();
445 let d = a.diff(&b);
446 // Every field changed, every line should carry the unknown→x arrow.
447 assert!(d.contains("allocated_bytes: (unknown) →"));
448 assert!(d.contains("narenas: (unknown) →"));
449 }
450
451 #[test]
452 fn diff_renders_some_to_none_transitions() {
453 // Symmetric case to `diff_renders_none_transitions`: a full
454 // fixture diffed against `default()` must surface each field
455 // as `x → (unknown)`, not be silently absorbed. Without this
456 // test a one-sided `(unknown) → x` match could mask a
457 // formatting bug in the reverse direction (e.g. the renderer
458 // inadvertently suppressing `Some → None` as unchanged).
459 let a = HostHeapState::test_fixture();
460 let b = HostHeapState::default();
461 let d = a.diff(&b);
462 assert!(
463 d.contains("allocated_bytes:") && d.contains("→ (unknown)"),
464 "expected allocated_bytes → (unknown), got:\n{d}",
465 );
466 assert!(d.contains("active_bytes:"));
467 assert!(d.contains("resident_bytes:"));
468 assert!(d.contains("mapped_bytes:"));
469 assert!(d.contains("narenas:"));
470 }
471
472 #[test]
473 fn serde_round_trip_preserves_fields() {
474 let h = HostHeapState::test_fixture();
475 let s = serde_json::to_string(&h).expect("serialize");
476 let back: HostHeapState = serde_json::from_str(&s).expect("deserialize");
477 assert_eq!(back, h);
478 }
479
480 #[test]
481 fn serde_skips_none_fields() {
482 let h = HostHeapState::default();
483 let s = serde_json::to_string(&h).expect("serialize");
484 // Every field is None and skip_serializing_if drops all of
485 // them, so the JSON object is empty.
486 assert_eq!(s, "{}");
487 }
488
489 #[test]
490 fn serde_accepts_missing_fields_via_defaults() {
491 // Older sidecar with no heap_state fields still
492 // deserializes to a valid (all-None) snapshot.
493 let back: HostHeapState = serde_json::from_str("{}").expect("deserialize");
494 assert_eq!(back, HostHeapState::default());
495 }
496
497 /// Under the library-crate test harness, `tikv-jemallocator` is
498 /// NOT installed as `#[global_allocator]` — the ktstr library
499 /// itself declares no allocator so downstream consumers can
500 /// pick their own. So even though libjemalloc is linked (hard
501 /// dep of `tikv-jemalloc-ctl`) and `collect()` returns a
502 /// populated struct with real mallctl values, `stats.allocated`
503 /// and `stats.active` are both zero because the application
504 /// (the test binary running under libc's malloc) never
505 /// allocates through libjemalloc.
506 ///
507 /// Under this shape, the jemalloc invariants `active >=
508 /// allocated`, `resident >= active`, `mapped >= active` all
509 /// hold trivially (`0 >= 0`, small >= 0, small >= 0). They do
510 /// NOT validate jemalloc behavior — they are tautologies.
511 /// Real invariant coverage lives in the shipped ktstr binaries
512 /// that link the ktstr library and so inherit
513 /// `tikv_jemallocator::Jemalloc` as the `#[global_allocator]`
514 /// from `src/lib.rs`; a
515 /// live production run of any of those binaries exercises the
516 /// non-trivial invariants. Documenting rather than
517 /// feature-gating because a lib-crate integration test with its
518 /// own `#[global_allocator]` would add its own binary target
519 /// and is heavier than the coverage warrants.
520 ///
521 /// What this test DOES pin, non-trivially:
522 /// - libjemalloc-linked-build contract: `narenas` is always
523 /// populated after `epoch::advance()` because arena count is
524 /// a jemalloc-init-time constant, independent of whether
525 /// jemalloc served any allocations.
526 /// - `collect()` infallibility on a libjemalloc build — the
527 /// epoch::advance defensive guard does not fire.
528 #[test]
529 fn collect_returns_populated_snapshot_under_jemallocator() {
530 let h = collect();
531 // libjemalloc is linked unconditionally, so every field
532 // populates. `narenas` in particular is a jemalloc-init
533 // constant (4*ncpus by default) and always non-zero on a
534 // multi-core host.
535 assert!(
536 h.narenas.is_some(),
537 "narenas must populate on a libjemalloc build"
538 );
539 assert!(
540 h.narenas.unwrap() > 0,
541 "narenas must be > 0; jemalloc computes 4*ncpus at init",
542 );
543 // stats reads also populate (they're not None) because
544 // `--enable-stats` is forced by the `stats` feature in
545 // Cargo.toml. Their VALUES depend on whether jemalloc is
546 // `#[global_allocator]` — see the doc comment on this test.
547 assert!(h.allocated_bytes.is_some());
548 assert!(h.active_bytes.is_some());
549 assert!(h.resident_bytes.is_some());
550 assert!(h.mapped_bytes.is_some());
551 }
552}