ktstr/test_support/metrics.rs
1//! Metric extraction pipeline for payload outputs.
2//!
3//! Payloads declared with [`OutputFormat::Json`] emit JSON to either
4//! stdout or stderr — `PayloadRun` applies a stdout-primary /
5//! stderr-fallback contract and hands whichever stream produced a
6//! non-empty metric set to this module. Benchmark tools split
7//! across the two conventions (schbench writes structured summaries
8//! to stderr, fio / stress-ng to stdout); the fallback lets either
9//! discipline round-trip through the same extractor. This module
10//! locates the JSON document region inside mixed text output
11//! (many tools emit a banner line before their structured body) and
12//! walks numeric leaves into [`Metric`]s keyed by dotted paths.
13//!
14//! [`OutputFormat::ExitCode`] returns an empty metric set; exit-code
15//! pass/fail is handled by the `MetricCheck::ExitCodeEq` pre-pass
16//! elsewhere.
17
18use crate::test_support::{Metric, MetricStream, OutputFormat, Polarity};
19
20/// Extract metrics from a payload's captured output per its declared
21/// [`OutputFormat`].
22///
23/// `output` carries whichever stream `PayloadRun` decided to extract
24/// from — stdout on the happy path, stderr under the stdout-primary
25/// stderr-fallback contract when stdout produced an empty result.
26/// The extractor itself is stream-agnostic; it parses whatever byte
27/// blob it is handed.
28///
29/// Returns an empty `Vec` for [`OutputFormat::ExitCode`] and for
30/// [`OutputFormat::Json`] when no JSON document is located or the
31/// document carries no numeric leaves. JSON-parse errors are
32/// non-fatal: the extraction returns `Vec::new()` so downstream
33/// [`MetricCheck`](crate::test_support::MetricCheck) evaluation reports each
34/// referenced metric as missing rather than failing the whole run.
35///
36/// # Known truncation point: depth cap
37///
38/// The `Json` arm routes through [`walk_json_leaves`], which enforces
39/// a hard recursion cap of [`MAX_WALK_DEPTH`] (currently 64).
40/// Subtrees past that depth are silently dropped from the metric
41/// list — a `tracing::warn!` fires and a sentinel metric named
42/// [`WALK_TRUNCATION_SENTINEL_NAME`]
43/// (`__walk_json_leaves_truncated`) is appended to the return
44/// value, with `value` set to the depth at which truncation
45/// occurred. Callers that want to distinguish "no deep metrics"
46/// from "deep metrics dropped by the cap" scan the returned `Vec`
47/// for a metric with that name. Practical upper bound: 64 is well
48/// below serde_json's default parse recursion limit (128) and
49/// covers every realistic payload schema observed in the crate
50/// (fio maxes out around depth 8, schbench around depth 3).
51pub fn extract_metrics(
52 output: &str,
53 format: &OutputFormat,
54 stream: MetricStream,
55) -> Result<Vec<Metric>, String> {
56 match format {
57 OutputFormat::ExitCode => Ok(Vec::new()),
58 OutputFormat::Json => Ok(find_and_parse_json(output)
59 .map(|v| walk_json_leaves(&v, stream))
60 .unwrap_or_default()),
61 }
62}
63
64/// Locate a JSON document within mixed text output and parse it.
65///
66/// Many benchmark tools emit a banner line (fio, stress-ng)
67/// before the structured JSON body. A strict
68/// `serde_json::from_str(output)` fails for those. This helper
69/// first tries the whole input; on failure, scans for the first
70/// balanced `{...}` (or `[...]`) region and parses that.
71///
72/// Returns `None` when no JSON document is locatable or parsing
73/// both candidates fails. Does NOT heuristically repair malformed
74/// JSON — only brace-balancing for region extraction; serde_json
75/// does the actual parse strictly.
76///
77/// # Multiple JSON objects in one output
78///
79/// When `output` contains more than one balanced top-level region
80/// (e.g. `{"first": 1} noise {"second": 2}`), only the FIRST is
81/// returned. The region finder scans left-to-right for the first
82/// `{` or `[`, walks to its matching closer, and stops — it does
83/// not merge or concatenate subsequent balanced regions. Payloads
84/// that emit multiple JSON documents per run therefore lose all
85/// but the first; authors needing full capture should switch the
86/// payload to a wrapper that emits a single aggregate document.
87pub(crate) fn find_and_parse_json(output: &str) -> Option<serde_json::Value> {
88 // Fast path: whole input is a single JSON document.
89 if let Ok(v) = serde_json::from_str::<serde_json::Value>(output.trim()) {
90 return Some(v);
91 }
92 // Slow path: find the first balanced `{...}` or `[...]` region.
93 let region = extract_json_region(output)?;
94 serde_json::from_str::<serde_json::Value>(region).ok()
95}
96
97/// Find the first balanced `{...}` or `[...]` region in `s`.
98///
99/// Scans left-to-right for the first `{` or `[` and returns a slice
100/// spanning to its matching closer, tracking nesting + escaped
101/// quotes. Returns `None` if no opener found or no balanced match
102/// within the input.
103///
104/// This is NOT a JSON parser — it's a region locator. The returned
105/// slice is handed to `serde_json::from_str` for strict parsing.
106/// Mismatched structures (e.g. `{...}]`) are detected there, not
107/// here.
108fn extract_json_region(s: &str) -> Option<&str> {
109 let bytes = s.as_bytes();
110 let start = bytes.iter().position(|&c| c == b'{' || c == b'[')?;
111 let opener = bytes[start];
112 let closer = if opener == b'{' { b'}' } else { b']' };
113 let mut depth = 0i32;
114 let mut in_string = false;
115 let mut escape = false;
116 for (i, &c) in bytes.iter().enumerate().skip(start) {
117 if escape {
118 escape = false;
119 continue;
120 }
121 if in_string {
122 match c {
123 b'\\' => escape = true,
124 b'"' => in_string = false,
125 _ => {}
126 }
127 continue;
128 }
129 match c {
130 b'"' => in_string = true,
131 x if x == opener => depth += 1,
132 x if x == closer => {
133 depth -= 1;
134 if depth == 0 {
135 return Some(&s[start..=i]);
136 }
137 }
138 _ => {}
139 }
140 }
141 None
142}
143
144/// Walk numeric leaves of a JSON value, emitting [`Metric`]s keyed
145/// by dotted paths.
146///
147/// Objects contribute `"key.subkey"` paths; arrays contribute
148/// `"key.0", "key.1"`. Numeric leaves where `as_f64()` yields a
149/// finite value are emitted; String, Bool, and Null leaves are
150/// skipped. NaN/infinite f64s are rejected by serde_json at parse
151/// time, so natural inputs never reach this walker with non-finite
152/// numbers; the defensive `is_finite()` guard catches hand-built
153/// `Value` constructions.
154///
155/// Each [`Metric`] is emitted with [`Polarity::Unknown`] and empty
156/// unit; the caller resolves these against the payload's declared
157/// [`MetricHint`](crate::test_support::MetricHint)s to upgrade
158/// polarity.
159///
160/// # Stability contract (pre-1.0)
161///
162/// This function, [`MAX_WALK_DEPTH`], [`WALK_TRUNCATION_SENTINEL_NAME`],
163/// and [`is_truncation_sentinel_name`] together form the public
164/// numeric-JSON-extraction surface ktstr offers to in-tree sibling
165/// binaries (`ktstr-jemalloc-probe` is the one current external
166/// consumer — see `src/bin/jemalloc_probe.rs`). Their visibility
167/// is aligned at `pub` so an external consumer that wants to
168/// distinguish "no deep metrics present" from "deep metrics
169/// dropped by the depth cap" can reach every piece of the
170/// contract from outside the crate.
171///
172/// ktstr is pre-1.0: the four items above are free to change in
173/// signature or behaviour without a compat shim. A caller depending
174/// on them must vendor the ktstr version at a known commit, not
175/// track `main`. Concretely:
176///
177/// - Path format (`key.subkey` / `key.0`): may grow a shape
178/// option to prefer arrays-by-key over positional index.
179/// - Depth cap ([`MAX_WALK_DEPTH`]): may raise or lower as
180/// pathological inputs are observed; consumers must not hard-code
181/// the literal value.
182/// - Sentinel shape: may migrate to a typed return
183/// (`WalkResult { metrics, truncated: Option<u64> }`) per the
184/// note on [`WALK_TRUNCATION_SENTINEL_NAME`]. Consumers that
185/// need zero-collision certainty should gate on
186/// [`is_truncation_sentinel_name`] (the predicate, not the
187/// literal string) so a sentinel-name rewording lands in one
188/// place.
189pub fn walk_json_leaves(value: &serde_json::Value, stream: MetricStream) -> Vec<Metric> {
190 let mut out = Vec::new();
191 // Single reusable path buffer: children push their segment,
192 // recurse, then truncate back. O(total_path_chars) work across
193 // the whole walk instead of O(depth × path_chars) per leaf.
194 let mut path = String::new();
195 walk(value, &mut path, 0, stream, &mut out);
196 out
197}
198
199/// Hard cap on recursion depth in `walk`. Object and array
200/// children past this depth are skipped and a single
201/// [`tracing::warn!`] fires. Serde_json's default parser recursion
202/// limit is 128, so this caps us well below that; a hand-built
203/// `serde_json::Value` that bypasses the parser can still reach
204/// arbitrary depth, so an explicit walker guard is the last line of
205/// defence against a stack overflow.
206///
207/// See [`walk_json_leaves`]'s stability contract — the concrete
208/// value may change across ktstr pre-1.0 versions.
209pub const MAX_WALK_DEPTH: usize = 64;
210
211/// Sentinel metric name emitted when `walk` hits
212/// [`MAX_WALK_DEPTH`] and skips a subtree. Callers of
213/// [`walk_json_leaves`] / [`extract_metrics`] that want to
214/// distinguish "no deep metrics present" from "deep metrics
215/// dropped by the depth cap" scan the returned `Vec<Metric>` for
216/// a metric whose `name` equals this constant — its `value` is
217/// the depth at which truncation occurred, so nested failures at
218/// different subtrees produce one sentinel per trigger.
219///
220/// # Accepted collision risk
221///
222/// The double-underscore prefix makes collision extremely unlikely
223/// in practice, but not impossible: a benchmark whose JSON has
224/// this exact literal string as a **top-level** key produces a
225/// `Metric.name` indistinguishable from the cap-hit sentinel
226/// (nested leaves get at least one `.` injected by `walk`, so only
227/// the top-level depth-0 push can produce a name without a `.`).
228/// Consumers treat the sentinel as advisory, not authoritative —
229/// a caller that depends on zero-collision guarantees must reject
230/// sentinel-named paths from its input schema.
231///
232/// A future refactor could eliminate the risk structurally by
233/// widening the return type to `WalkResult { metrics: Vec<Metric>,
234/// truncated: Option<u64> }` — separating the truncation signal
235/// from the metric stream. Held off pending a consumer that
236/// materially benefits from zero-collision certainty; the current
237/// advisory contract is sufficient for every in-crate consumer.
238///
239/// Exported `pub` so sibling binaries that embed ktstr as a
240/// library (e.g. `ktstr-jemalloc-probe`) can gate on the
241/// sentinel from their own consumer code. See
242/// [`walk_json_leaves`]'s stability contract — consumers
243/// comparing against the sentinel should prefer
244/// [`is_truncation_sentinel_name`] over the literal string so a
245/// future rewording lands in one place.
246pub const WALK_TRUNCATION_SENTINEL_NAME: &str = "__walk_json_leaves_truncated";
247
248/// Predicate for "this metric name / map key is the
249/// walk-truncation sentinel." Centralises the literal-equality
250/// check so every consumer stays in sync when the sentinel name
251/// changes, and so future sentinel variants (e.g. a
252/// parser-rejection sentinel) can be threaded through one
253/// predicate instead of scattered string literals.
254///
255/// Visibility aligned with [`walk_json_leaves`] and
256/// [`WALK_TRUNCATION_SENTINEL_NAME`] so external consumers have a
257/// complete sentinel-discrimination API.
258pub fn is_truncation_sentinel_name(name: &str) -> bool {
259 name == WALK_TRUNCATION_SENTINEL_NAME
260}
261
262fn walk(
263 value: &serde_json::Value,
264 path: &mut String,
265 depth: usize,
266 stream: MetricStream,
267 out: &mut Vec<Metric>,
268) {
269 if depth > MAX_WALK_DEPTH {
270 tracing::warn!(
271 depth,
272 max = MAX_WALK_DEPTH,
273 path = %path,
274 "walk_json_leaves: depth cap hit, subtree skipped",
275 );
276 // Emit a sentinel metric so callers inspecting only the
277 // returned `Vec<Metric>` see the truncation — the
278 // `tracing::warn!` above only reaches a subscriber, which
279 // the default test dispatch path does not install. See
280 // [`WALK_TRUNCATION_SENTINEL_NAME`] for the discrimination
281 // contract.
282 out.push(Metric {
283 name: WALK_TRUNCATION_SENTINEL_NAME.to_string(),
284 value: depth as f64,
285 polarity: Polarity::Unknown,
286 unit: String::new(),
287 stream,
288 });
289 return;
290 }
291 match value {
292 serde_json::Value::Object(map) => {
293 for (k, v) in map {
294 let saved_len = path.len();
295 if !path.is_empty() {
296 path.push('.');
297 }
298 path.push_str(k);
299 walk(v, path, depth + 1, stream, out);
300 path.truncate(saved_len);
301 }
302 }
303 serde_json::Value::Array(items) => {
304 for (i, v) in items.iter().enumerate() {
305 let saved_len = path.len();
306 if !path.is_empty() {
307 path.push('.');
308 }
309 // Avoid an extra String allocation for the index
310 // segment by writing directly into `path` via the
311 // fmt::Write impl (infallible for String).
312 use std::fmt::Write;
313 let _ = write!(path, "{i}");
314 walk(v, path, depth + 1, stream, out);
315 path.truncate(saved_len);
316 }
317 }
318 serde_json::Value::Number(n) => {
319 if let Some(f) = n.as_f64()
320 && f.is_finite()
321 {
322 // Leaf emission is the one unavoidable allocation —
323 // `Metric.name` is owned. `clone()` copies exactly
324 // the current path bytes, not every intermediate
325 // ancestor path that `format!` used to materialize.
326 out.push(Metric {
327 name: path.clone(),
328 value: f,
329 polarity: Polarity::Unknown,
330 unit: String::new(),
331 stream,
332 });
333 }
334 }
335 // Strings/bools/null: skipped. MetricCheck::Exists can gate on
336 // presence via the PayloadMetrics lookup — a missing
337 // string-valued key is treated the same as a missing numeric.
338 _ => {}
339 }
340}
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345
346 #[test]
347 fn is_truncation_sentinel_name_matches_the_constant() {
348 assert!(is_truncation_sentinel_name(WALK_TRUNCATION_SENTINEL_NAME));
349 }
350
351 #[test]
352 fn is_truncation_sentinel_name_rejects_other_names() {
353 assert!(!is_truncation_sentinel_name("foo"));
354 assert!(!is_truncation_sentinel_name(""));
355 // Near-miss: same prefix but not the full sentinel name.
356 assert!(!is_truncation_sentinel_name("__walk_json_leaves"));
357 }
358
359 #[test]
360 fn exit_code_returns_empty() {
361 let m = extract_metrics("whatever", &OutputFormat::ExitCode, MetricStream::Stdout).unwrap();
362 assert!(m.is_empty());
363 }
364
365 #[test]
366 fn json_full_document_extracts_numeric_leaves() {
367 let s = r#"{"iops": 10000, "lat_ns": 500}"#;
368 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
369 assert_eq!(m.len(), 2);
370 let names: Vec<_> = m.iter().map(|x| x.name.as_str()).collect();
371 assert!(names.contains(&"iops"));
372 assert!(names.contains(&"lat_ns"));
373 for metric in &m {
374 assert_eq!(metric.polarity, Polarity::Unknown);
375 assert_eq!(metric.unit, "");
376 }
377 }
378
379 #[test]
380 fn json_with_banner_prefix_extracts_region() {
381 // Fio-style: banner line then JSON body.
382 let s = "fio-3.36 starting up\n{\"iops\": 500}";
383 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
384 assert_eq!(m.len(), 1);
385 assert_eq!(m[0].name, "iops");
386 assert_eq!(m[0].value, 500.0);
387 }
388
389 #[test]
390 fn json_nested_objects_use_dotted_paths() {
391 let s = r#"{"jobs": {"0": {"read": {"iops": 123}}}}"#;
392 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
393 assert_eq!(m.len(), 1);
394 assert_eq!(m[0].name, "jobs.0.read.iops");
395 assert_eq!(m[0].value, 123.0);
396 }
397
398 #[test]
399 fn json_arrays_use_numeric_index_paths() {
400 let s = r#"{"samples": [100, 200, 300]}"#;
401 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
402 assert_eq!(m.len(), 3);
403 let mut actual: Vec<(&str, f64)> = m.iter().map(|x| (x.name.as_str(), x.value)).collect();
404 actual.sort_by_key(|(n, _)| n.to_string());
405 assert_eq!(
406 actual,
407 vec![
408 ("samples.0", 100.0),
409 ("samples.1", 200.0),
410 ("samples.2", 300.0),
411 ]
412 );
413 }
414
415 #[test]
416 fn json_malformed_returns_empty() {
417 let m = extract_metrics(
418 "garbage not json",
419 &OutputFormat::Json,
420 MetricStream::Stdout,
421 )
422 .unwrap();
423 assert!(m.is_empty());
424 }
425
426 #[test]
427 fn json_empty_stdout_returns_empty() {
428 let m = extract_metrics("", &OutputFormat::Json, MetricStream::Stdout).unwrap();
429 assert!(m.is_empty());
430 }
431
432 #[test]
433 fn json_skips_string_and_bool_leaves() {
434 let s = r#"{"name": "fio", "ok": true, "iops": 42}"#;
435 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
436 // Only iops is numeric.
437 assert_eq!(m.len(), 1);
438 assert_eq!(m[0].name, "iops");
439 }
440
441 #[test]
442 fn json_top_level_array_extracts_entries() {
443 let s = "[1, 2, 3]";
444 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
445 assert_eq!(m.len(), 3);
446 }
447
448 #[test]
449 fn extract_json_region_finds_braced_region() {
450 let r = extract_json_region("prefix {\"a\": 1} suffix").unwrap();
451 assert_eq!(r, "{\"a\": 1}");
452 }
453
454 #[test]
455 fn extract_json_region_handles_nested_braces() {
456 let r = extract_json_region("log: {\"a\": {\"b\": 1}} done").unwrap();
457 assert_eq!(r, "{\"a\": {\"b\": 1}}");
458 }
459
460 #[test]
461 fn extract_json_region_skips_braces_in_strings() {
462 let r = extract_json_region(r#"{"text": "not a }"}"#).unwrap();
463 assert_eq!(r, r#"{"text": "not a }"}"#);
464 }
465
466 #[test]
467 fn extract_json_region_handles_escaped_quotes() {
468 let r = extract_json_region(r#"{"text": "has \"escaped\" quotes"}"#).unwrap();
469 assert_eq!(r, r#"{"text": "has \"escaped\" quotes"}"#);
470 }
471
472 #[test]
473 fn extract_json_region_returns_none_for_no_brace() {
474 assert!(extract_json_region("no braces here").is_none());
475 }
476
477 #[test]
478 fn extract_json_region_returns_none_for_unbalanced() {
479 assert!(extract_json_region("incomplete {").is_none());
480 }
481
482 // -- extract_json_region stderr-bracket-noise scenarios --
483 //
484 // Interaction coverage for the realistic payload scenario:
485 // the captured stream carries stderr-style bracket noise
486 // (dmesg-like `[ TIME] message` lines, stress-ng status,
487 // kernel stacktraces printed with `[...]` prefixes, etc.)
488 // BEFORE the actual stdout JSON document. The region finder
489 // scans left-to-right and returns the FIRST balanced
490 // `{...}` or `[...]` region regardless of what the outer
491 // content looks like, so a leading `[ 0.001234]`-shaped
492 // token can in fact parse as a valid one-element JSON array
493 // and masquerade as the payload's data.
494 //
495 // Each of the three scenarios below is isolated in its own
496 // `#[test]` so an individual failure points directly at the
497 // limitation that regressed, not a shared composite that
498 // requires scanning the whole test body to localize. Each
499 // scenario documents EXPECTED behaviour, not desired behaviour.
500 //
501 // If a future change adds a "skip unparseable regions and
502 // keep scanning" rule, the stderr-balanced-noise scenario
503 // will flip â the `[stderr bracket message]` region fails to
504 // parse as JSON, so a richer extractor would advance past
505 // it to the stdout `{..}`. The dmesg-timestamp scenario
506 // would NOT flip under that rule alone: `[ 0.001234]` IS
507 // valid JSON (a single-element array), so an
508 // "unparseable-skip" heuristic never triggers on it.
509 // Correcting the dmesg scenario requires a stronger rule â
510 // e.g. prefer regions whose shape matches the payload's
511 // expected metric keys, or reject numeric-leaf-only regions
512 // that lack object wrappers. Its flip is therefore the
513 // correct signal that `extract_metrics` gained shape-aware
514 // selection, not just unparseable-skip.
515
516 /// Scenario 1: dmesg-style timestamp prefix
517 /// `[ 0.001234] kernel boot banner {"iops": 100}`.
518 /// The leading bracket parses as a JSON array
519 /// (`[0.001234]`). `extract_metrics` on this input returns
520 /// a metric for the array's single element keyed by its
521 /// positional path, NOT the stdout `{"iops": 100}`.
522 /// Documents the known-limitation behaviour: callers that
523 /// need robust stderr-noise rejection must pre-strip
524 /// timestamped brackets before handing the blob to
525 /// `extract_metrics`.
526 #[test]
527 fn extract_json_region_dmesg_timestamp_prefix_wins_over_stdout_json() {
528 let input = "[ 0.001234] kernel boot banner\n{\"iops\": 100}";
529 let first_region = extract_json_region(input)
530 .expect("dmesg-style prefix starts with `[` â finder must return SOME region");
531 assert_eq!(
532 first_region, "[ 0.001234]",
533 "left-to-right scan picks the FIRST balanced region; \
534 the dmesg prefix is self-balancing and wins over the \
535 stdout JSON that follows",
536 );
537 // End-to-end: `extract_metrics` on the same input should
538 // emit a metric derived from the leading array, not from
539 // the stdout `{"iops": 100}`. This pins that payloads
540 // needing stderr-noise tolerance must pre-strip
541 // timestamps rather than relying on the extractor.
542 let m = extract_metrics(input, &OutputFormat::Json, MetricStream::Stdout).unwrap();
543 let names: Vec<&str> = m.iter().map(|x| x.name.as_str()).collect();
544 assert!(
545 !names.contains(&"iops"),
546 "regression check: the finder picking up the dmesg \
547 timestamp array means the real stdout `iops` metric \
548 is NOT extracted; if this ever starts containing \
549 `iops`, the finder must have gained smarter \
550 noise-skipping (update this test and the \
551 documented contract); got: {names:?}",
552 );
553 }
554
555 /// Scenario 2: stderr noise that is NOT self-balancing (an
556 /// open bracket without its closer: `[FAIL stress-ng ...`).
557 /// The finder commits to the first opener and returns
558 /// `None` rather than retrying past its failure point.
559 /// Stdout JSON that follows the unbalanced prefix is LOST,
560 /// not recovered. Documents a known limitation: the finder
561 /// has no fallback-search after a failed region.
562 #[test]
563 fn extract_json_region_unbalanced_stderr_prefix_returns_none() {
564 let input = "[FAIL stress-ng: worker timed out\n{\"iops\": 200}";
565 let r = extract_json_region(input);
566 assert!(
567 r.is_none(),
568 "unbalanced leading `[` makes the finder return \
569 None even though valid stdout JSON follows â the \
570 finder commits to the first opener and does not \
571 retry past its failure point. Known limitation; \
572 callers pre-strip stderr noise. Got: {r:?}",
573 );
574 }
575
576 /// Scenario 3: stderr noise with BALANCED brackets containing
577 /// non-numeric content (`[stderr bracket message]`). The
578 /// region is balanced at the byte level so the finder
579 /// returns it, but `serde_json` rejects the unquoted content
580 /// as invalid JSON. `extract_metrics` yields no metrics
581 /// because the extractor only tries the FIRST region â the
582 /// following valid stdout `{..}` is never inspected.
583 ///
584 /// Also acts as a positive control: the same stdout JSON
585 /// with NO preceding noise extracts cleanly, so the failure
586 /// above is specifically due to the preceding bracket noise,
587 /// not the JSON itself.
588 #[test]
589 fn extract_json_region_balanced_unparseable_stderr_wins_first_region() {
590 let input = "[stderr bracket message]\n{\"iops\": 300}";
591 let first_region = extract_json_region(input).expect(
592 "leading `[stderr bracket message]` is a balanced region \
593 at the byte level (ignoring JSON validity); finder must \
594 return it",
595 );
596 assert_eq!(
597 first_region, "[stderr bracket message]",
598 "balanced-but-invalid-JSON regions still win the \
599 first-region scan; the following valid `{{..}}` is \
600 never inspected by the finder",
601 );
602 let m = extract_metrics(input, &OutputFormat::Json, MetricStream::Stdout).unwrap();
603 assert!(
604 m.is_empty(),
605 "region parses unsuccessfully as JSON â fallback \
606 yields no metrics. The stdout `iops` metric is lost. \
607 Documents the known limitation: mixed-stream \
608 captures lose valid JSON when a preceding balanced \
609 region fails to parse; got: {m:?}",
610 );
611
612 // Positive control: the same stdout JSON with NO
613 // preceding noise extracts cleanly, so the failure
614 // above is specifically due to the preceding bracket
615 // noise, not the JSON itself.
616 let m_clean = extract_metrics(
617 r#"{"iops": 400}"#,
618 &OutputFormat::Json,
619 MetricStream::Stdout,
620 )
621 .unwrap();
622 let clean_names: Vec<&str> = m_clean.iter().map(|x| x.name.as_str()).collect();
623 assert!(
624 clean_names.contains(&"iops"),
625 "control: stdout JSON in isolation must extract the \
626 `iops` metric so the preceding assertion is \
627 isolating the noise-interaction behaviour, not \
628 hiding a broken extractor; got: {clean_names:?}",
629 );
630 }
631
632 #[test]
633 fn walk_json_leaves_polarity_is_unknown_before_hint_resolution() {
634 let v: serde_json::Value = serde_json::from_str(r#"{"a": 1}"#).unwrap();
635 let m = walk_json_leaves(&v, MetricStream::Stdout);
636 assert_eq!(m[0].polarity, Polarity::Unknown);
637 }
638
639 /// `stream` attribution must round-trip through the walker for
640 /// both `Stdout` and `Stderr`, and the stream field on the
641 /// emitted `Metric` must match the argument. Pins the stream
642 /// contract asserted in the [`MetricStream`] docstring.
643 ///
644 /// Exercises BOTH walker branches: the object-recurse branch
645 /// (via `{"a": 1}`) AND the array-recurse branch (via
646 /// `[{"a": 1}, {"b": 2}]`) — a regression that stamped the
647 /// stream only on one branch's emitted metrics would pass the
648 /// object fixture but fail the array leaves. The array
649 /// fixture also doubles coverage (two leaves per call).
650 #[test]
651 fn walk_json_leaves_tags_stream() {
652 let obj: serde_json::Value = serde_json::from_str(r#"{"a": 1}"#).unwrap();
653 let arr: serde_json::Value = serde_json::from_str(r#"[{"a": 1}, {"b": 2}]"#).unwrap();
654 for (fixture_label, value, expected_len) in
655 [("object", &obj, 1_usize), ("array", &arr, 2_usize)]
656 {
657 for stream in [MetricStream::Stdout, MetricStream::Stderr] {
658 let tagged = walk_json_leaves(value, stream);
659 assert_eq!(
660 tagged.len(),
661 expected_len,
662 "walker on {fixture_label} fixture must produce \
663 exactly {expected_len} leaf(s); got {}",
664 tagged.len(),
665 );
666 for (i, m) in tagged.iter().enumerate() {
667 assert_eq!(
668 m.stream, stream,
669 "{fixture_label} leaf {i}: stream tag must \
670 match the argument ({stream:?}); got {:?}. \
671 A regression that stamped the stream only \
672 on the object-recurse branch would pass \
673 the object fixture but fail here.",
674 m.stream,
675 );
676 }
677 }
678 }
679 }
680
681 /// `extract_metrics` on the `Json` format threads the `stream`
682 /// argument through to every emitted `Metric.stream`. Pins the
683 /// wire between `extract_metrics` (caller-facing API) and the
684 /// walker that actually stamps the field — a regression that
685 /// dropped the `stream` parameter on the way in, or hardcoded
686 /// `Stdout` inside the `Json` arm, silently drops the stderr
687 /// attribution and breaks the `PayloadRun` stderr-fallback
688 /// labelling. Mirrors the `walk_json_leaves_tags_stream_...`
689 /// test one layer up.
690 #[test]
691 fn extract_metrics_threads_stream_from_argument_to_emitted_metric() {
692 let json = r#"{"iops": 100}"#;
693 for stream in [MetricStream::Stdout, MetricStream::Stderr] {
694 let metrics = extract_metrics(json, &OutputFormat::Json, stream).unwrap();
695 assert_eq!(metrics.len(), 1, "one leaf expected from {{\"iops\": 100}}",);
696 assert_eq!(
697 metrics[0].stream, stream,
698 "extract_metrics must thread stream={stream:?} to the \
699 emitted Metric; got {:?}",
700 metrics[0].stream,
701 );
702 }
703 }
704
705 // Additional edge-case coverage for walk_json_leaves paths.
706
707 #[test]
708 fn json_deeply_nested_array_of_objects() {
709 // Edge case: array of objects. Each object's field should
710 // emit `samples.N.field` paths.
711 let s = r#"{"samples": [{"iops": 100}, {"iops": 200}, {"iops": 300}]}"#;
712 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
713 assert_eq!(m.len(), 3);
714 let names: Vec<&str> = m.iter().map(|x| x.name.as_str()).collect();
715 assert!(names.contains(&"samples.0.iops"));
716 assert!(names.contains(&"samples.1.iops"));
717 assert!(names.contains(&"samples.2.iops"));
718 }
719
720 #[test]
721 fn json_large_integer_round_trip_via_f64() {
722 // Large but f64-safe integer (below 2^53). serde_json's
723 // Number::as_f64 lossily converts any JSON number to f64;
724 // values below 2^53 are exact.
725 let s = r#"{"big_iops": 1000000000000}"#; // 1e12 = 2^40
726 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
727 assert_eq!(m.len(), 1);
728 assert_eq!(m[0].value, 1_000_000_000_000.0);
729 }
730
731 #[test]
732 fn json_fio_style_full_output_with_multiline_banner() {
733 // Real-world fio output has multiple banner lines + a large
734 // JSON object. Region-finder must skip all non-JSON prefix
735 // and parse the JSON body.
736 let s = "fio-3.36 starting up\n\
737 Running fio with 4 jobs\n\
738 test: (g=0): rw=randread, bs=4k, ioengine=libaio\n\
739 \n\
740 {\"jobs\": [{\"jobname\": \"test\", \"read\": {\"iops\": 12345, \"bw_bytes\": 50593792}}], \
741 \"disk_util\": [{\"util\": 99.5}]}";
742 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
743 // Extracted: jobs.0.read.iops, jobs.0.read.bw_bytes, disk_util.0.util.
744 // jobname is a string, skipped.
745 assert_eq!(m.len(), 3);
746 let by_name: std::collections::BTreeMap<&str, f64> =
747 m.iter().map(|x| (x.name.as_str(), x.value)).collect();
748 assert_eq!(by_name.get("jobs.0.read.iops"), Some(&12345.0));
749 assert_eq!(by_name.get("jobs.0.read.bw_bytes"), Some(&50593792.0));
750 assert_eq!(by_name.get("disk_util.0.util"), Some(&99.5));
751 }
752
753 #[test]
754 fn walk_json_leaves_skips_nonfinite_defensively() {
755 // serde_json rejects NaN/Infinity at parse time (strict JSON),
756 // so naturally-occurring inputs never reach walk_json_leaves
757 // with non-finite numbers. The defensive filter is still
758 // verified by constructing a Value directly with
759 // Number::from_f64 which returns None for non-finite.
760 assert!(serde_json::Number::from_f64(f64::NAN).is_none());
761 assert!(serde_json::Number::from_f64(f64::INFINITY).is_none());
762 assert!(serde_json::Number::from_f64(f64::NEG_INFINITY).is_none());
763 // Finite values ARE accepted:
764 assert!(serde_json::Number::from_f64(2.78).is_some());
765 }
766
767 /// JSON integers above 2^53 lose precision when coerced to
768 /// f64 via `serde_json::Number::as_f64` — the f64 mantissa is 52
769 /// bits, so consecutive integers beyond 9007199254740992 round
770 /// to the nearest representable f64. Pin the observed behavior:
771 /// `9007199254740993` (2^53 + 1) round-trips as `9007199254740992.0`
772 /// (2^53). Payloads emitting integer metrics larger than 2^53
773 /// must scale down (µs → s) or encode as strings — the Json
774 /// walker cannot preserve integer identity past that boundary.
775 #[test]
776 fn json_large_integer_above_2_pow_53_loses_precision() {
777 // 2^53 = 9_007_199_254_740_992 is the last exactly-representable
778 // consecutive integer in f64. 2^53 + 1 rounds down to 2^53
779 // (banker's rounding lands on the even representable
780 // neighbor). Test via u64 → f64 to pin the u64 input value
781 // distinct from the emitted f64 — a direct f64 literal of
782 // 2^53+1 would itself round at parse time, obscuring what
783 // the walker did.
784 let s = r#"{"huge": 9007199254740993}"#;
785 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
786 assert_eq!(m.len(), 1);
787 // The emitted f64 IS the nearest representable value —
788 // which is 2^53, not 2^53+1. Both literals happen to print
789 // as "9007199254740992.0" because f64 can't distinguish
790 // them; compare against the exact f64 produced by the
791 // next-representable-below path.
792 assert_eq!(m[0].value, 9_007_199_254_740_992.0_f64);
793 // Cast the u64 source input to f64 to reproduce the same
794 // rounding serde_json performed. Both sides land at 2^53;
795 // that equality IS the lossy cast being documented.
796 let rounded: f64 = 9_007_199_254_740_993_u64 as f64;
797 assert_eq!(m[0].value, rounded);
798 // Confirm bit-level that the u64 input and the resulting
799 // f64 are NOT identity-preserving: casting the f64 back to
800 // u64 yields 2^53, not 2^53+1.
801 assert_eq!(m[0].value as u64, 9_007_199_254_740_992_u64);
802 assert_ne!(m[0].value as u64, 9_007_199_254_740_993_u64);
803 }
804
805 /// At exactly 2^53 the f64 IS exact — the precision loss is
806 /// strictly one-above-the-boundary. Pair with
807 /// `json_large_integer_above_2_pow_53_loses_precision` so both
808 /// sides of the precision cliff are pinned.
809 #[test]
810 fn json_integer_at_2_pow_53_is_exact() {
811 let s = r#"{"exact": 9007199254740992}"#;
812 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
813 assert_eq!(m.len(), 1);
814 assert_eq!(m[0].value, 9_007_199_254_740_992.0_f64);
815 }
816
817 /// `find_and_parse_json` tries the whole trimmed input as
818 /// a single document on the fast path. If the input has a
819 /// balanced object followed by trailing non-JSON text, the
820 /// whole-input parse fails (strict serde) and the region-
821 /// finder slow path extracts the leading `{...}` region and
822 /// parses that. Pins the "trailing garbage is stripped by the
823 /// region finder" behavior.
824 #[test]
825 fn find_and_parse_json_recovers_object_with_trailing_garbage() {
826 let s = r#"{"a": 1, "b": 2} --- trailing prose from banner"#;
827 let v = find_and_parse_json(s).expect("trailing garbage must not block parse");
828 assert_eq!(v["a"], serde_json::json!(1));
829 assert_eq!(v["b"], serde_json::json!(2));
830 }
831
832 /// A leading array followed by trailing garbage recovers
833 /// symmetrically — the region finder handles `[...]` the same
834 /// way it handles `{...}`.
835 #[test]
836 fn find_and_parse_json_recovers_array_with_trailing_garbage() {
837 let s = "[1, 2, 3]\nextra: banner line\n";
838 let v = find_and_parse_json(s).expect("array with trailing garbage must parse");
839 assert_eq!(v, serde_json::json!([1, 2, 3]));
840 }
841
842 /// Real-world fio pattern — banner line, JSON body,
843 /// *and* trailing "done" marker. The region finder locks to
844 /// the first balanced opener/closer, so the trailing content
845 /// is ignored even if it contains unbalanced braces.
846 #[test]
847 fn find_and_parse_json_with_banner_and_trailer() {
848 let s = "fio-3.36 starting up\n{\"iops\": 100}\nfio done }";
849 let v = find_and_parse_json(s).expect("banner + trailer must resolve to body");
850 assert_eq!(v["iops"], serde_json::json!(100));
851 }
852
853 /// When the trailing garbage itself contains a
854 /// BALANCED brace pair, the region finder still returns the
855 /// first one — downstream parsing uses the first match, not
856 /// a merged document.
857 #[test]
858 fn find_and_parse_json_returns_first_region_when_trailer_also_balanced() {
859 let s = r#"{"first": 1} unrelated {"second": 2}"#;
860 let v = find_and_parse_json(s).expect("first balanced region parses");
861 assert_eq!(v["first"], serde_json::json!(1));
862 assert!(v.get("second").is_none(), "second region must not merge in");
863 }
864
865 /// Embedded `{` / `}` characters inside a JSON string literal
866 /// must NOT be counted as structural openers/closers by the
867 /// region finder. The in-string tracker flips on `"` and
868 /// suppresses nesting accounting until the matching closing
869 /// `"`, so the only braces that affect `depth` are the
870 /// structural outer ones. Pins that a log message which happens
871 /// to contain `{` / `}` inside a quoted string still round-trips
872 /// through the slow path.
873 #[test]
874 fn find_and_parse_json_ignores_braces_inside_string_literals() {
875 let s = "fio-3.36 starting up\n\
876 {\"msg\": \"look at {nested} in text\", \"ok\": 1}\n\
877 trailing banner";
878 let v = find_and_parse_json(s).expect("embedded braces in string must not break scan");
879 assert_eq!(v["msg"], serde_json::json!("look at {nested} in text"));
880 assert_eq!(v["ok"], serde_json::json!(1));
881 }
882
883 /// Negative numeric leaves extract at their declared value
884 /// without any sign-absoluting or filtering. Canonical for
885 /// metrics like scheduler_delta_ns that can legitimately be
886 /// negative (improvement from baseline).
887 #[test]
888 fn json_negative_numbers_extract_preserving_sign() {
889 let s = r#"{"delta_ns": -500.5, "underflow": -1000000}"#;
890 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
891 let by_name: std::collections::BTreeMap<&str, f64> =
892 m.iter().map(|x| (x.name.as_str(), x.value)).collect();
893 assert_eq!(by_name.get("delta_ns"), Some(&-500.5));
894 assert_eq!(by_name.get("underflow"), Some(&-1_000_000.0));
895 }
896
897 /// Zero is emitted as a real metric value, not filtered
898 /// out. A payload that genuinely measured zero (idle CPU, no
899 /// errors) must produce a zero metric — otherwise downstream
900 /// checks like `MetricCheck::exit_code_eq(0)` against an `exit_code`
901 /// metric of 0.0 would spuriously report "missing" instead of
902 /// passing.
903 #[test]
904 fn json_zero_values_are_emitted_not_filtered() {
905 let s = r#"{"errors": 0, "cpu_idle_pct": 0.0, "count": -0.0}"#;
906 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
907 let by_name: std::collections::BTreeMap<&str, f64> =
908 m.iter().map(|x| (x.name.as_str(), x.value)).collect();
909 assert_eq!(by_name.len(), 3, "all three zeros must extract: {m:?}");
910 assert_eq!(by_name.get("errors"), Some(&0.0));
911 assert_eq!(by_name.get("cpu_idle_pct"), Some(&0.0));
912 // -0.0 round-trips via f64; assert the numeric equality.
913 assert_eq!(by_name.get("count"), Some(&0.0));
914 }
915
916 /// Mixed positive + negative + zero in one document
917 /// exercises the walker's sign-agnostic branch.
918 #[test]
919 fn json_mixed_signs_and_zero_all_extract() {
920 let s = r#"{"pos": 10.0, "neg": -10.0, "zero": 0.0}"#;
921 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
922 assert_eq!(m.len(), 3);
923 }
924
925 /// An empty JSON object `{}` at the top level parses
926 /// successfully but yields no metric leaves — the walker
927 /// traverses zero children and falls through to produce an
928 /// empty Vec. No `None` return, no panic.
929 #[test]
930 fn json_empty_object_yields_no_metrics() {
931 let m = extract_metrics("{}", &OutputFormat::Json, MetricStream::Stdout).unwrap();
932 assert!(m.is_empty(), "empty object has no leaves: {m:?}");
933 }
934
935 /// An empty array at the top level likewise yields zero
936 /// metrics.
937 #[test]
938 fn json_empty_array_yields_no_metrics() {
939 let m = extract_metrics("[]", &OutputFormat::Json, MetricStream::Stdout).unwrap();
940 assert!(m.is_empty(), "empty array has no leaves: {m:?}");
941 }
942
943 /// Nested empty containers also produce no leaves — the
944 /// walker still recurses but finds nothing numeric at the
945 /// bottom. Pins the "no ghost metrics from empty containers"
946 /// invariant.
947 #[test]
948 fn json_nested_empty_containers_yield_no_metrics() {
949 let s = r#"{"outer": {"inner": {}, "also": []}}"#;
950 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
951 assert!(m.is_empty(), "nested empties emit nothing: {m:?}");
952 }
953
954 /// Empty container alongside real metrics — empties are
955 /// silent, real leaves still emit.
956 #[test]
957 fn json_empty_container_mixed_with_real_metrics() {
958 let s = r#"{"iops": 100.0, "meta": {}, "samples": []}"#;
959 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
960 assert_eq!(m.len(), 1);
961 assert_eq!(m[0].name, "iops");
962 assert_eq!(m[0].value, 100.0);
963 }
964
965 /// walk_json_leaves uses push/pop on a single
966 /// path buffer instead of per-level format!(). This test pins
967 /// the *behavior* (path output unchanged across deep nesting)
968 /// so a future refactor of the path plumbing can't silently
969 /// drop a segment or duplicate a dot.
970 #[test]
971 fn walk_json_leaves_deep_nesting_paths_are_correct() {
972 // 6 levels deep → one leaf at a.b.c.d.e.f.
973 let s = r#"{"a":{"b":{"c":{"d":{"e":{"f": 42.0}}}}}}"#;
974 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
975 assert_eq!(m.len(), 1);
976 assert_eq!(m[0].name, "a.b.c.d.e.f");
977 assert_eq!(m[0].value, 42.0);
978 }
979
980 /// Sibling keys under the same parent must see the parent
981 /// segment truncated between each child — the bug that the
982 /// push/pop refactor would hit is "path accumulates across
983 /// siblings" producing `root.a.b`, `root.a.b.c` etc. instead
984 /// of `root.a.b`, `root.a.c`.
985 #[test]
986 fn walk_json_leaves_siblings_do_not_accumulate_path() {
987 let s = r#"{"root":{"a": 1, "b": 2, "c": 3}}"#;
988 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
989 assert_eq!(m.len(), 3);
990 let names: std::collections::BTreeSet<&str> = m.iter().map(|x| x.name.as_str()).collect();
991 let expected: std::collections::BTreeSet<&str> =
992 ["root.a", "root.b", "root.c"].into_iter().collect();
993 assert_eq!(names, expected, "path must truncate between siblings");
994 }
995
996 /// Array indices use the same push/pop path: `arr.0`, `arr.1`.
997 /// Deep array-of-array-of-object combinations exercise every
998 /// code path in the walker.
999 #[test]
1000 fn walk_json_leaves_deep_array_object_interleaving() {
1001 let s = r#"{"data":[{"vals":[10.0, 20.0]},{"vals":[30.0]}]}"#;
1002 let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
1003 let by_name: std::collections::BTreeMap<&str, f64> =
1004 m.iter().map(|x| (x.name.as_str(), x.value)).collect();
1005 assert_eq!(by_name.get("data.0.vals.0"), Some(&10.0));
1006 assert_eq!(by_name.get("data.0.vals.1"), Some(&20.0));
1007 assert_eq!(by_name.get("data.1.vals.0"), Some(&30.0));
1008 assert_eq!(by_name.len(), 3);
1009 }
1010
1011 /// Programmatically build a `serde_json::Value` nested deeper than
1012 /// [`MAX_WALK_DEPTH`] and confirm that `walk_json_leaves` returns
1013 /// without a stack overflow and without emitting metrics from
1014 /// beyond the cap. Serde_json's own parser depth limit (128 by
1015 /// default) blocks malicious JSON strings before the walker sees
1016 /// them, so a parser-bypass (direct `Value::Object` construction)
1017 /// is the only way to reach this depth — the test exercises
1018 /// exactly that path.
1019 #[test]
1020 fn walk_json_leaves_depth_cap_skips_deeply_nested_subtree() {
1021 // Build an Object nested 100 deep with a numeric leaf at the
1022 // bottom. The leaf at depth > MAX_WALK_DEPTH (64) must be
1023 // skipped by the guard. A sentinel metric with
1024 // `WALK_TRUNCATION_SENTINEL_NAME` MUST appear in the return
1025 // so callers without a tracing subscriber still observe the
1026 // truncation.
1027 let mut value = serde_json::json!({"leaf": 42.0});
1028 for _ in 0..100 {
1029 let mut m = serde_json::Map::new();
1030 m.insert("x".to_string(), value);
1031 value = serde_json::Value::Object(m);
1032 }
1033 let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1034 let real_leaves: Vec<_> = metrics
1035 .iter()
1036 .filter(|m| m.name != WALK_TRUNCATION_SENTINEL_NAME)
1037 .collect();
1038 assert!(
1039 real_leaves.is_empty(),
1040 "leaf beyond MAX_WALK_DEPTH cap must not be emitted, got {real_leaves:?}"
1041 );
1042 let sentinel = metrics
1043 .iter()
1044 .find(|m| m.name == WALK_TRUNCATION_SENTINEL_NAME)
1045 .expect("truncation sentinel must be present on cap hit");
1046 assert!(
1047 sentinel.value > MAX_WALK_DEPTH as f64,
1048 "sentinel value must carry the depth at which truncation fired, got {}",
1049 sentinel.value,
1050 );
1051 }
1052
1053 /// A leaf exactly at [`MAX_WALK_DEPTH`] is still emitted — the
1054 /// cap bails BEFORE recursing past `depth > MAX_WALK_DEPTH`, so a
1055 /// leaf reached at `depth == MAX_WALK_DEPTH` is preserved.
1056 /// Boundary pair with the depth_cap_skips test above so an
1057 /// off-by-one in the guard (e.g. `>=` instead of `>`) surfaces.
1058 #[test]
1059 fn walk_json_leaves_depth_cap_boundary_leaf_preserved() {
1060 // Build Object of exactly MAX_WALK_DEPTH nesting: top-level
1061 // holds an Object, which holds an Object, ... for
1062 // MAX_WALK_DEPTH levels, with the numeric leaf at the bottom.
1063 // The leaf's path has MAX_WALK_DEPTH segments and walk() is
1064 // called at depths 0..=MAX_WALK_DEPTH — the leaf call at
1065 // depth MAX_WALK_DEPTH must pass the guard.
1066 let mut value = serde_json::Value::Number(serde_json::Number::from_f64(42.0).unwrap());
1067 for _ in 0..MAX_WALK_DEPTH {
1068 let mut m = serde_json::Map::new();
1069 m.insert("x".to_string(), value);
1070 value = serde_json::Value::Object(m);
1071 }
1072 let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1073 assert_eq!(metrics.len(), 1, "boundary leaf must be preserved");
1074 assert_eq!(metrics[0].value, 42.0);
1075 }
1076
1077 /// Mixed-depth invariant: a single walk must emit every finite
1078 /// numeric leaf regardless of the depth at which it appears, so
1079 /// long as the depth is ≤ MAX_WALK_DEPTH. Mirrors real payload
1080 /// schemas (fio's `jobs[0].read.lat_ns.mean` sits at depth 5
1081 /// while `jobs[0].jobname` sits at depth 2). A single-depth
1082 /// regression — e.g. a premature `return` inside the Object arm
1083 /// — would skip the shallower siblings of a deep subtree.
1084 #[test]
1085 fn walk_json_leaves_mixed_depth_leaves_all_emitted() {
1086 let value = serde_json::json!({
1087 "shallow": 1.0,
1088 "mid": {
1089 "leaf": 2.0,
1090 "deeper": {
1091 "still": {
1092 "further": 3.0
1093 }
1094 }
1095 },
1096 "also_shallow": 4.0,
1097 "deeper_sibling": {
1098 "only_child": 5.0
1099 }
1100 });
1101 let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1102 let by_name: std::collections::BTreeMap<&str, f64> =
1103 metrics.iter().map(|m| (m.name.as_str(), m.value)).collect();
1104 assert_eq!(by_name.get("shallow"), Some(&1.0));
1105 assert_eq!(by_name.get("mid.leaf"), Some(&2.0));
1106 assert_eq!(by_name.get("mid.deeper.still.further"), Some(&3.0));
1107 assert_eq!(by_name.get("also_shallow"), Some(&4.0));
1108 assert_eq!(by_name.get("deeper_sibling.only_child"), Some(&5.0));
1109 assert_eq!(metrics.len(), 5, "exactly five numeric leaves expected");
1110 }
1111
1112 /// Array-chain invariant: nested arrays produce dotted-index
1113 /// paths with no stray separators. An off-by-one in the
1114 /// separator injection at :203-205 (array arm) or a swapped
1115 /// push-path/truncate order would surface as either a leading
1116 /// dot, a doubled separator, or an index segment merged into
1117 /// the previous one.
1118 #[test]
1119 fn walk_json_leaves_array_chain_paths_correct() {
1120 // `a` is a 2x2x2 array of numeric leaves; the walker must
1121 // produce paths `a.0.0.0`, `a.0.0.1`, `a.0.1.0`, …, `a.1.1.1`.
1122 let value = serde_json::json!({
1123 "a": [
1124 [[1.0, 2.0], [3.0, 4.0]],
1125 [[5.0, 6.0], [7.0, 8.0]]
1126 ]
1127 });
1128 let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1129 let names: Vec<&str> = metrics.iter().map(|m| m.name.as_str()).collect();
1130 // 8 leaves in lexicographic index order.
1131 assert_eq!(names.len(), 8);
1132 assert_eq!(names[0], "a.0.0.0");
1133 assert_eq!(names[1], "a.0.0.1");
1134 assert_eq!(names[2], "a.0.1.0");
1135 assert_eq!(names[3], "a.0.1.1");
1136 assert_eq!(names[4], "a.1.0.0");
1137 assert_eq!(names[5], "a.1.0.1");
1138 assert_eq!(names[6], "a.1.1.0");
1139 assert_eq!(names[7], "a.1.1.1");
1140 // Values map 1:1 against path order — confirm no segment
1141 // got dropped or reordered.
1142 assert_eq!(metrics[0].value, 1.0);
1143 assert_eq!(metrics[7].value, 8.0);
1144 }
1145
1146 /// Null-at-boundary invariant: a `serde_json::Value::Null` leaf
1147 /// is skipped by the `_ => {}` arm and contributes nothing — no
1148 /// metric, no sentinel, no side effect — regardless of the
1149 /// depth at which it sits. Specifically pins the case where the
1150 /// null is the direct child of a depth-MAX_WALK_DEPTH container,
1151 /// ensuring the cap check fires first when the container would
1152 /// itself be above the cap rather than the null stopping
1153 /// recursion harmlessly short. A regression that treats Null
1154 /// the same as a Number would surface as a spurious leaf with
1155 /// `value = 0.0` (or a panic) on this fixture.
1156 #[test]
1157 fn walk_json_leaves_null_at_boundary_produces_no_metric() {
1158 // Build `{a: {a: {a: ... {a: null}}}}` at exactly
1159 // MAX_WALK_DEPTH nesting — the Null sits at depth
1160 // MAX_WALK_DEPTH; the walker recurses into the outer Objects
1161 // at depths 0..=MAX_WALK_DEPTH-1, sees Null at the
1162 // boundary, and falls through the `_ => {}` arm.
1163 let mut value = serde_json::Value::Null;
1164 for _ in 0..MAX_WALK_DEPTH {
1165 let mut m = serde_json::Map::new();
1166 m.insert("a".to_string(), value);
1167 value = serde_json::Value::Object(m);
1168 }
1169 let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1170 assert!(
1171 metrics.is_empty(),
1172 "Null leaves must produce no metrics (and no truncation sentinel), \
1173 got {metrics:?}",
1174 );
1175 }
1176
1177 #[test]
1178 fn module_level_example_usage() {
1179 // Canonical invocation: declare a Payload with
1180 // OutputFormat::Json, feed stdout, get Vec<Metric>.
1181 const EXAMPLE_PAYLOAD: crate::test_support::Payload = crate::test_support::Payload {
1182 name: "example",
1183 kind: crate::test_support::PayloadKind::Binary("example"),
1184 output: OutputFormat::Json,
1185 default_args: &[],
1186 default_checks: &[],
1187 metrics: &[],
1188 include_files: &[],
1189 uses_parent_pgrp: false,
1190 known_flags: None,
1191 };
1192 let stdout = r#"{"throughput": 42.5}"#;
1193 let m = extract_metrics(stdout, &EXAMPLE_PAYLOAD.output, MetricStream::Stdout).unwrap();
1194 assert_eq!(m.len(), 1);
1195 assert_eq!(m[0].name, "throughput");
1196 assert_eq!(m[0].value, 42.5);
1197 }
1198}