ktstr/test_support/
metrics.rs

1//! Metric extraction pipeline for payload outputs.
2//!
3//! Payloads declared with [`OutputFormat::Json`] emit JSON to either
4//! stdout or stderr — `PayloadRun` applies a stdout-primary /
5//! stderr-fallback contract and hands whichever stream produced a
6//! non-empty metric set to this module. Benchmark tools split
7//! across the two conventions (schbench writes structured summaries
8//! to stderr, fio / stress-ng to stdout); the fallback lets either
9//! discipline round-trip through the same extractor. This module
10//! locates the JSON document region inside mixed text output
11//! (many tools emit a banner line before their structured body) and
12//! walks numeric leaves into [`Metric`]s keyed by dotted paths.
13//!
14//! [`OutputFormat::ExitCode`] returns an empty metric set; exit-code
15//! pass/fail is handled by the `MetricCheck::ExitCodeEq` pre-pass
16//! elsewhere.
17
18use crate::test_support::{Metric, MetricStream, OutputFormat, Polarity};
19
20/// Extract metrics from a payload's captured output per its declared
21/// [`OutputFormat`].
22///
23/// `output` carries whichever stream `PayloadRun` decided to extract
24/// from — stdout on the happy path, stderr under the stdout-primary
25/// stderr-fallback contract when stdout produced an empty result.
26/// The extractor itself is stream-agnostic; it parses whatever byte
27/// blob it is handed.
28///
29/// Returns an empty `Vec` for [`OutputFormat::ExitCode`] and for
30/// [`OutputFormat::Json`] when no JSON document is located or the
31/// document carries no numeric leaves. JSON-parse errors are
32/// non-fatal: the extraction returns `Vec::new()` so downstream
33/// [`MetricCheck`](crate::test_support::MetricCheck) evaluation reports each
34/// referenced metric as missing rather than failing the whole run.
35///
36/// # Known truncation point: depth cap
37///
38/// The `Json` arm routes through [`walk_json_leaves`], which enforces
39/// a hard recursion cap of [`MAX_WALK_DEPTH`] (currently 64).
40/// Subtrees past that depth are silently dropped from the metric
41/// list — a `tracing::warn!` fires and a sentinel metric named
42/// [`WALK_TRUNCATION_SENTINEL_NAME`]
43/// (`__walk_json_leaves_truncated`) is appended to the return
44/// value, with `value` set to the depth at which truncation
45/// occurred. Callers that want to distinguish "no deep metrics"
46/// from "deep metrics dropped by the cap" scan the returned `Vec`
47/// for a metric with that name. Practical upper bound: 64 is well
48/// below serde_json's default parse recursion limit (128) and
49/// covers every realistic payload schema observed in the crate
50/// (fio maxes out around depth 8, schbench around depth 3).
51pub fn extract_metrics(
52    output: &str,
53    format: &OutputFormat,
54    stream: MetricStream,
55) -> Result<Vec<Metric>, String> {
56    match format {
57        OutputFormat::ExitCode => Ok(Vec::new()),
58        OutputFormat::Json => Ok(find_and_parse_json(output)
59            .map(|v| walk_json_leaves(&v, stream))
60            .unwrap_or_default()),
61    }
62}
63
64/// Locate a JSON document within mixed text output and parse it.
65///
66/// Many benchmark tools emit a banner line (fio, stress-ng)
67/// before the structured JSON body. A strict
68/// `serde_json::from_str(output)` fails for those. This helper
69/// first tries the whole input; on failure, scans for the first
70/// balanced `{...}` (or `[...]`) region and parses that.
71///
72/// Returns `None` when no JSON document is locatable or parsing
73/// both candidates fails. Does NOT heuristically repair malformed
74/// JSON — only brace-balancing for region extraction; serde_json
75/// does the actual parse strictly.
76///
77/// # Multiple JSON objects in one output
78///
79/// When `output` contains more than one balanced top-level region
80/// (e.g. `{"first": 1} noise {"second": 2}`), only the FIRST is
81/// returned. The region finder scans left-to-right for the first
82/// `{` or `[`, walks to its matching closer, and stops — it does
83/// not merge or concatenate subsequent balanced regions. Payloads
84/// that emit multiple JSON documents per run therefore lose all
85/// but the first; authors needing full capture should switch the
86/// payload to a wrapper that emits a single aggregate document.
87pub(crate) fn find_and_parse_json(output: &str) -> Option<serde_json::Value> {
88    // Fast path: whole input is a single JSON document.
89    if let Ok(v) = serde_json::from_str::<serde_json::Value>(output.trim()) {
90        return Some(v);
91    }
92    // Slow path: find the first balanced `{...}` or `[...]` region.
93    let region = extract_json_region(output)?;
94    serde_json::from_str::<serde_json::Value>(region).ok()
95}
96
97/// Find the first balanced `{...}` or `[...]` region in `s`.
98///
99/// Scans left-to-right for the first `{` or `[` and returns a slice
100/// spanning to its matching closer, tracking nesting + escaped
101/// quotes. Returns `None` if no opener found or no balanced match
102/// within the input.
103///
104/// This is NOT a JSON parser — it's a region locator. The returned
105/// slice is handed to `serde_json::from_str` for strict parsing.
106/// Mismatched structures (e.g. `{...}]`) are detected there, not
107/// here.
108fn extract_json_region(s: &str) -> Option<&str> {
109    let bytes = s.as_bytes();
110    let start = bytes.iter().position(|&c| c == b'{' || c == b'[')?;
111    let opener = bytes[start];
112    let closer = if opener == b'{' { b'}' } else { b']' };
113    let mut depth = 0i32;
114    let mut in_string = false;
115    let mut escape = false;
116    for (i, &c) in bytes.iter().enumerate().skip(start) {
117        if escape {
118            escape = false;
119            continue;
120        }
121        if in_string {
122            match c {
123                b'\\' => escape = true,
124                b'"' => in_string = false,
125                _ => {}
126            }
127            continue;
128        }
129        match c {
130            b'"' => in_string = true,
131            x if x == opener => depth += 1,
132            x if x == closer => {
133                depth -= 1;
134                if depth == 0 {
135                    return Some(&s[start..=i]);
136                }
137            }
138            _ => {}
139        }
140    }
141    None
142}
143
144/// Walk numeric leaves of a JSON value, emitting [`Metric`]s keyed
145/// by dotted paths.
146///
147/// Objects contribute `"key.subkey"` paths; arrays contribute
148/// `"key.0", "key.1"`. Numeric leaves where `as_f64()` yields a
149/// finite value are emitted; String, Bool, and Null leaves are
150/// skipped. NaN/infinite f64s are rejected by serde_json at parse
151/// time, so natural inputs never reach this walker with non-finite
152/// numbers; the defensive `is_finite()` guard catches hand-built
153/// `Value` constructions.
154///
155/// Each [`Metric`] is emitted with [`Polarity::Unknown`] and empty
156/// unit; the caller resolves these against the payload's declared
157/// [`MetricHint`](crate::test_support::MetricHint)s to upgrade
158/// polarity.
159///
160/// # Stability contract (pre-1.0)
161///
162/// This function, [`MAX_WALK_DEPTH`], [`WALK_TRUNCATION_SENTINEL_NAME`],
163/// and [`is_truncation_sentinel_name`] together form the public
164/// numeric-JSON-extraction surface ktstr offers to in-tree sibling
165/// binaries (`ktstr-jemalloc-probe` is the one current external
166/// consumer — see `src/bin/jemalloc_probe.rs`). Their visibility
167/// is aligned at `pub` so an external consumer that wants to
168/// distinguish "no deep metrics present" from "deep metrics
169/// dropped by the depth cap" can reach every piece of the
170/// contract from outside the crate.
171///
172/// ktstr is pre-1.0: the four items above are free to change in
173/// signature or behaviour without a compat shim. A caller depending
174/// on them must vendor the ktstr version at a known commit, not
175/// track `main`. Concretely:
176///
177/// - Path format (`key.subkey` / `key.0`): may grow a shape
178///   option to prefer arrays-by-key over positional index.
179/// - Depth cap ([`MAX_WALK_DEPTH`]): may raise or lower as
180///   pathological inputs are observed; consumers must not hard-code
181///   the literal value.
182/// - Sentinel shape: may migrate to a typed return
183///   (`WalkResult { metrics, truncated: Option<u64> }`) per the
184///   note on [`WALK_TRUNCATION_SENTINEL_NAME`]. Consumers that
185///   need zero-collision certainty should gate on
186///   [`is_truncation_sentinel_name`] (the predicate, not the
187///   literal string) so a sentinel-name rewording lands in one
188///   place.
189pub fn walk_json_leaves(value: &serde_json::Value, stream: MetricStream) -> Vec<Metric> {
190    let mut out = Vec::new();
191    // Single reusable path buffer: children push their segment,
192    // recurse, then truncate back. O(total_path_chars) work across
193    // the whole walk instead of O(depth × path_chars) per leaf.
194    let mut path = String::new();
195    walk(value, &mut path, 0, stream, &mut out);
196    out
197}
198
199/// Hard cap on recursion depth in `walk`. Object and array
200/// children past this depth are skipped and a single
201/// [`tracing::warn!`] fires. Serde_json's default parser recursion
202/// limit is 128, so this caps us well below that; a hand-built
203/// `serde_json::Value` that bypasses the parser can still reach
204/// arbitrary depth, so an explicit walker guard is the last line of
205/// defence against a stack overflow.
206///
207/// See [`walk_json_leaves`]'s stability contract — the concrete
208/// value may change across ktstr pre-1.0 versions.
209pub const MAX_WALK_DEPTH: usize = 64;
210
211/// Sentinel metric name emitted when `walk` hits
212/// [`MAX_WALK_DEPTH`] and skips a subtree. Callers of
213/// [`walk_json_leaves`] / [`extract_metrics`] that want to
214/// distinguish "no deep metrics present" from "deep metrics
215/// dropped by the depth cap" scan the returned `Vec<Metric>` for
216/// a metric whose `name` equals this constant — its `value` is
217/// the depth at which truncation occurred, so nested failures at
218/// different subtrees produce one sentinel per trigger.
219///
220/// # Accepted collision risk
221///
222/// The double-underscore prefix makes collision extremely unlikely
223/// in practice, but not impossible: a benchmark whose JSON has
224/// this exact literal string as a **top-level** key produces a
225/// `Metric.name` indistinguishable from the cap-hit sentinel
226/// (nested leaves get at least one `.` injected by `walk`, so only
227/// the top-level depth-0 push can produce a name without a `.`).
228/// Consumers treat the sentinel as advisory, not authoritative —
229/// a caller that depends on zero-collision guarantees must reject
230/// sentinel-named paths from its input schema.
231///
232/// A future refactor could eliminate the risk structurally by
233/// widening the return type to `WalkResult { metrics: Vec<Metric>,
234/// truncated: Option<u64> }` — separating the truncation signal
235/// from the metric stream. Held off pending a consumer that
236/// materially benefits from zero-collision certainty; the current
237/// advisory contract is sufficient for every in-crate consumer.
238///
239/// Exported `pub` so sibling binaries that embed ktstr as a
240/// library (e.g. `ktstr-jemalloc-probe`) can gate on the
241/// sentinel from their own consumer code. See
242/// [`walk_json_leaves`]'s stability contract — consumers
243/// comparing against the sentinel should prefer
244/// [`is_truncation_sentinel_name`] over the literal string so a
245/// future rewording lands in one place.
246pub const WALK_TRUNCATION_SENTINEL_NAME: &str = "__walk_json_leaves_truncated";
247
248/// Predicate for "this metric name / map key is the
249/// walk-truncation sentinel." Centralises the literal-equality
250/// check so every consumer stays in sync when the sentinel name
251/// changes, and so future sentinel variants (e.g. a
252/// parser-rejection sentinel) can be threaded through one
253/// predicate instead of scattered string literals.
254///
255/// Visibility aligned with [`walk_json_leaves`] and
256/// [`WALK_TRUNCATION_SENTINEL_NAME`] so external consumers have a
257/// complete sentinel-discrimination API.
258pub fn is_truncation_sentinel_name(name: &str) -> bool {
259    name == WALK_TRUNCATION_SENTINEL_NAME
260}
261
262fn walk(
263    value: &serde_json::Value,
264    path: &mut String,
265    depth: usize,
266    stream: MetricStream,
267    out: &mut Vec<Metric>,
268) {
269    if depth > MAX_WALK_DEPTH {
270        tracing::warn!(
271            depth,
272            max = MAX_WALK_DEPTH,
273            path = %path,
274            "walk_json_leaves: depth cap hit, subtree skipped",
275        );
276        // Emit a sentinel metric so callers inspecting only the
277        // returned `Vec<Metric>` see the truncation — the
278        // `tracing::warn!` above only reaches a subscriber, which
279        // the default test dispatch path does not install. See
280        // [`WALK_TRUNCATION_SENTINEL_NAME`] for the discrimination
281        // contract.
282        out.push(Metric {
283            name: WALK_TRUNCATION_SENTINEL_NAME.to_string(),
284            value: depth as f64,
285            polarity: Polarity::Unknown,
286            unit: String::new(),
287            stream,
288        });
289        return;
290    }
291    match value {
292        serde_json::Value::Object(map) => {
293            for (k, v) in map {
294                let saved_len = path.len();
295                if !path.is_empty() {
296                    path.push('.');
297                }
298                path.push_str(k);
299                walk(v, path, depth + 1, stream, out);
300                path.truncate(saved_len);
301            }
302        }
303        serde_json::Value::Array(items) => {
304            for (i, v) in items.iter().enumerate() {
305                let saved_len = path.len();
306                if !path.is_empty() {
307                    path.push('.');
308                }
309                // Avoid an extra String allocation for the index
310                // segment by writing directly into `path` via the
311                // fmt::Write impl (infallible for String).
312                use std::fmt::Write;
313                let _ = write!(path, "{i}");
314                walk(v, path, depth + 1, stream, out);
315                path.truncate(saved_len);
316            }
317        }
318        serde_json::Value::Number(n) => {
319            if let Some(f) = n.as_f64()
320                && f.is_finite()
321            {
322                // Leaf emission is the one unavoidable allocation —
323                // `Metric.name` is owned. `clone()` copies exactly
324                // the current path bytes, not every intermediate
325                // ancestor path that `format!` used to materialize.
326                out.push(Metric {
327                    name: path.clone(),
328                    value: f,
329                    polarity: Polarity::Unknown,
330                    unit: String::new(),
331                    stream,
332                });
333            }
334        }
335        // Strings/bools/null: skipped. MetricCheck::Exists can gate on
336        // presence via the PayloadMetrics lookup — a missing
337        // string-valued key is treated the same as a missing numeric.
338        _ => {}
339    }
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    #[test]
347    fn is_truncation_sentinel_name_matches_the_constant() {
348        assert!(is_truncation_sentinel_name(WALK_TRUNCATION_SENTINEL_NAME));
349    }
350
351    #[test]
352    fn is_truncation_sentinel_name_rejects_other_names() {
353        assert!(!is_truncation_sentinel_name("foo"));
354        assert!(!is_truncation_sentinel_name(""));
355        // Near-miss: same prefix but not the full sentinel name.
356        assert!(!is_truncation_sentinel_name("__walk_json_leaves"));
357    }
358
359    #[test]
360    fn exit_code_returns_empty() {
361        let m = extract_metrics("whatever", &OutputFormat::ExitCode, MetricStream::Stdout).unwrap();
362        assert!(m.is_empty());
363    }
364
365    #[test]
366    fn json_full_document_extracts_numeric_leaves() {
367        let s = r#"{"iops": 10000, "lat_ns": 500}"#;
368        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
369        assert_eq!(m.len(), 2);
370        let names: Vec<_> = m.iter().map(|x| x.name.as_str()).collect();
371        assert!(names.contains(&"iops"));
372        assert!(names.contains(&"lat_ns"));
373        for metric in &m {
374            assert_eq!(metric.polarity, Polarity::Unknown);
375            assert_eq!(metric.unit, "");
376        }
377    }
378
379    #[test]
380    fn json_with_banner_prefix_extracts_region() {
381        // Fio-style: banner line then JSON body.
382        let s = "fio-3.36 starting up\n{\"iops\": 500}";
383        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
384        assert_eq!(m.len(), 1);
385        assert_eq!(m[0].name, "iops");
386        assert_eq!(m[0].value, 500.0);
387    }
388
389    #[test]
390    fn json_nested_objects_use_dotted_paths() {
391        let s = r#"{"jobs": {"0": {"read": {"iops": 123}}}}"#;
392        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
393        assert_eq!(m.len(), 1);
394        assert_eq!(m[0].name, "jobs.0.read.iops");
395        assert_eq!(m[0].value, 123.0);
396    }
397
398    #[test]
399    fn json_arrays_use_numeric_index_paths() {
400        let s = r#"{"samples": [100, 200, 300]}"#;
401        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
402        assert_eq!(m.len(), 3);
403        let mut actual: Vec<(&str, f64)> = m.iter().map(|x| (x.name.as_str(), x.value)).collect();
404        actual.sort_by_key(|(n, _)| n.to_string());
405        assert_eq!(
406            actual,
407            vec![
408                ("samples.0", 100.0),
409                ("samples.1", 200.0),
410                ("samples.2", 300.0),
411            ]
412        );
413    }
414
415    #[test]
416    fn json_malformed_returns_empty() {
417        let m = extract_metrics(
418            "garbage not json",
419            &OutputFormat::Json,
420            MetricStream::Stdout,
421        )
422        .unwrap();
423        assert!(m.is_empty());
424    }
425
426    #[test]
427    fn json_empty_stdout_returns_empty() {
428        let m = extract_metrics("", &OutputFormat::Json, MetricStream::Stdout).unwrap();
429        assert!(m.is_empty());
430    }
431
432    #[test]
433    fn json_skips_string_and_bool_leaves() {
434        let s = r#"{"name": "fio", "ok": true, "iops": 42}"#;
435        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
436        // Only iops is numeric.
437        assert_eq!(m.len(), 1);
438        assert_eq!(m[0].name, "iops");
439    }
440
441    #[test]
442    fn json_top_level_array_extracts_entries() {
443        let s = "[1, 2, 3]";
444        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
445        assert_eq!(m.len(), 3);
446    }
447
448    #[test]
449    fn extract_json_region_finds_braced_region() {
450        let r = extract_json_region("prefix {\"a\": 1} suffix").unwrap();
451        assert_eq!(r, "{\"a\": 1}");
452    }
453
454    #[test]
455    fn extract_json_region_handles_nested_braces() {
456        let r = extract_json_region("log: {\"a\": {\"b\": 1}} done").unwrap();
457        assert_eq!(r, "{\"a\": {\"b\": 1}}");
458    }
459
460    #[test]
461    fn extract_json_region_skips_braces_in_strings() {
462        let r = extract_json_region(r#"{"text": "not a }"}"#).unwrap();
463        assert_eq!(r, r#"{"text": "not a }"}"#);
464    }
465
466    #[test]
467    fn extract_json_region_handles_escaped_quotes() {
468        let r = extract_json_region(r#"{"text": "has \"escaped\" quotes"}"#).unwrap();
469        assert_eq!(r, r#"{"text": "has \"escaped\" quotes"}"#);
470    }
471
472    #[test]
473    fn extract_json_region_returns_none_for_no_brace() {
474        assert!(extract_json_region("no braces here").is_none());
475    }
476
477    #[test]
478    fn extract_json_region_returns_none_for_unbalanced() {
479        assert!(extract_json_region("incomplete {").is_none());
480    }
481
482    // -- extract_json_region stderr-bracket-noise scenarios --
483    //
484    // Interaction coverage for the realistic payload scenario:
485    // the captured stream carries stderr-style bracket noise
486    // (dmesg-like `[ TIME] message` lines, stress-ng status,
487    // kernel stacktraces printed with `[...]` prefixes, etc.)
488    // BEFORE the actual stdout JSON document. The region finder
489    // scans left-to-right and returns the FIRST balanced
490    // `{...}` or `[...]` region regardless of what the outer
491    // content looks like, so a leading `[   0.001234]`-shaped
492    // token can in fact parse as a valid one-element JSON array
493    // and masquerade as the payload's data.
494    //
495    // Each of the three scenarios below is isolated in its own
496    // `#[test]` so an individual failure points directly at the
497    // limitation that regressed, not a shared composite that
498    // requires scanning the whole test body to localize. Each
499    // scenario documents EXPECTED behaviour, not desired behaviour.
500    //
501    // If a future change adds a "skip unparseable regions and
502    // keep scanning" rule, the stderr-balanced-noise scenario
503    // will flip — the `[stderr bracket message]` region fails to
504    // parse as JSON, so a richer extractor would advance past
505    // it to the stdout `{..}`. The dmesg-timestamp scenario
506    // would NOT flip under that rule alone: `[   0.001234]` IS
507    // valid JSON (a single-element array), so an
508    // "unparseable-skip" heuristic never triggers on it.
509    // Correcting the dmesg scenario requires a stronger rule —
510    // e.g. prefer regions whose shape matches the payload's
511    // expected metric keys, or reject numeric-leaf-only regions
512    // that lack object wrappers. Its flip is therefore the
513    // correct signal that `extract_metrics` gained shape-aware
514    // selection, not just unparseable-skip.
515
516    /// Scenario 1: dmesg-style timestamp prefix
517    /// `[   0.001234] kernel boot banner {"iops": 100}`.
518    /// The leading bracket parses as a JSON array
519    /// (`[0.001234]`). `extract_metrics` on this input returns
520    /// a metric for the array's single element keyed by its
521    /// positional path, NOT the stdout `{"iops": 100}`.
522    /// Documents the known-limitation behaviour: callers that
523    /// need robust stderr-noise rejection must pre-strip
524    /// timestamped brackets before handing the blob to
525    /// `extract_metrics`.
526    #[test]
527    fn extract_json_region_dmesg_timestamp_prefix_wins_over_stdout_json() {
528        let input = "[   0.001234] kernel boot banner\n{\"iops\": 100}";
529        let first_region = extract_json_region(input)
530            .expect("dmesg-style prefix starts with `[` — finder must return SOME region");
531        assert_eq!(
532            first_region, "[   0.001234]",
533            "left-to-right scan picks the FIRST balanced region; \
534             the dmesg prefix is self-balancing and wins over the \
535             stdout JSON that follows",
536        );
537        // End-to-end: `extract_metrics` on the same input should
538        // emit a metric derived from the leading array, not from
539        // the stdout `{"iops": 100}`. This pins that payloads
540        // needing stderr-noise tolerance must pre-strip
541        // timestamps rather than relying on the extractor.
542        let m = extract_metrics(input, &OutputFormat::Json, MetricStream::Stdout).unwrap();
543        let names: Vec<&str> = m.iter().map(|x| x.name.as_str()).collect();
544        assert!(
545            !names.contains(&"iops"),
546            "regression check: the finder picking up the dmesg \
547             timestamp array means the real stdout `iops` metric \
548             is NOT extracted; if this ever starts containing \
549             `iops`, the finder must have gained smarter \
550             noise-skipping (update this test and the \
551             documented contract); got: {names:?}",
552        );
553    }
554
555    /// Scenario 2: stderr noise that is NOT self-balancing (an
556    /// open bracket without its closer: `[FAIL stress-ng ...`).
557    /// The finder commits to the first opener and returns
558    /// `None` rather than retrying past its failure point.
559    /// Stdout JSON that follows the unbalanced prefix is LOST,
560    /// not recovered. Documents a known limitation: the finder
561    /// has no fallback-search after a failed region.
562    #[test]
563    fn extract_json_region_unbalanced_stderr_prefix_returns_none() {
564        let input = "[FAIL stress-ng: worker timed out\n{\"iops\": 200}";
565        let r = extract_json_region(input);
566        assert!(
567            r.is_none(),
568            "unbalanced leading `[` makes the finder return \
569             None even though valid stdout JSON follows — the \
570             finder commits to the first opener and does not \
571             retry past its failure point. Known limitation; \
572             callers pre-strip stderr noise. Got: {r:?}",
573        );
574    }
575
576    /// Scenario 3: stderr noise with BALANCED brackets containing
577    /// non-numeric content (`[stderr bracket message]`). The
578    /// region is balanced at the byte level so the finder
579    /// returns it, but `serde_json` rejects the unquoted content
580    /// as invalid JSON. `extract_metrics` yields no metrics
581    /// because the extractor only tries the FIRST region — the
582    /// following valid stdout `{..}` is never inspected.
583    ///
584    /// Also acts as a positive control: the same stdout JSON
585    /// with NO preceding noise extracts cleanly, so the failure
586    /// above is specifically due to the preceding bracket noise,
587    /// not the JSON itself.
588    #[test]
589    fn extract_json_region_balanced_unparseable_stderr_wins_first_region() {
590        let input = "[stderr bracket message]\n{\"iops\": 300}";
591        let first_region = extract_json_region(input).expect(
592            "leading `[stderr bracket message]` is a balanced region \
593             at the byte level (ignoring JSON validity); finder must \
594             return it",
595        );
596        assert_eq!(
597            first_region, "[stderr bracket message]",
598            "balanced-but-invalid-JSON regions still win the \
599             first-region scan; the following valid `{{..}}` is \
600             never inspected by the finder",
601        );
602        let m = extract_metrics(input, &OutputFormat::Json, MetricStream::Stdout).unwrap();
603        assert!(
604            m.is_empty(),
605            "region parses unsuccessfully as JSON → fallback \
606             yields no metrics. The stdout `iops` metric is lost. \
607             Documents the known limitation: mixed-stream \
608             captures lose valid JSON when a preceding balanced \
609             region fails to parse; got: {m:?}",
610        );
611
612        // Positive control: the same stdout JSON with NO
613        // preceding noise extracts cleanly, so the failure
614        // above is specifically due to the preceding bracket
615        // noise, not the JSON itself.
616        let m_clean = extract_metrics(
617            r#"{"iops": 400}"#,
618            &OutputFormat::Json,
619            MetricStream::Stdout,
620        )
621        .unwrap();
622        let clean_names: Vec<&str> = m_clean.iter().map(|x| x.name.as_str()).collect();
623        assert!(
624            clean_names.contains(&"iops"),
625            "control: stdout JSON in isolation must extract the \
626             `iops` metric so the preceding assertion is \
627             isolating the noise-interaction behaviour, not \
628             hiding a broken extractor; got: {clean_names:?}",
629        );
630    }
631
632    #[test]
633    fn walk_json_leaves_polarity_is_unknown_before_hint_resolution() {
634        let v: serde_json::Value = serde_json::from_str(r#"{"a": 1}"#).unwrap();
635        let m = walk_json_leaves(&v, MetricStream::Stdout);
636        assert_eq!(m[0].polarity, Polarity::Unknown);
637    }
638
639    /// `stream` attribution must round-trip through the walker for
640    /// both `Stdout` and `Stderr`, and the stream field on the
641    /// emitted `Metric` must match the argument. Pins the stream
642    /// contract asserted in the [`MetricStream`] docstring.
643    ///
644    /// Exercises BOTH walker branches: the object-recurse branch
645    /// (via `{"a": 1}`) AND the array-recurse branch (via
646    /// `[{"a": 1}, {"b": 2}]`) — a regression that stamped the
647    /// stream only on one branch's emitted metrics would pass the
648    /// object fixture but fail the array leaves. The array
649    /// fixture also doubles coverage (two leaves per call).
650    #[test]
651    fn walk_json_leaves_tags_stream() {
652        let obj: serde_json::Value = serde_json::from_str(r#"{"a": 1}"#).unwrap();
653        let arr: serde_json::Value = serde_json::from_str(r#"[{"a": 1}, {"b": 2}]"#).unwrap();
654        for (fixture_label, value, expected_len) in
655            [("object", &obj, 1_usize), ("array", &arr, 2_usize)]
656        {
657            for stream in [MetricStream::Stdout, MetricStream::Stderr] {
658                let tagged = walk_json_leaves(value, stream);
659                assert_eq!(
660                    tagged.len(),
661                    expected_len,
662                    "walker on {fixture_label} fixture must produce \
663                     exactly {expected_len} leaf(s); got {}",
664                    tagged.len(),
665                );
666                for (i, m) in tagged.iter().enumerate() {
667                    assert_eq!(
668                        m.stream, stream,
669                        "{fixture_label} leaf {i}: stream tag must \
670                         match the argument ({stream:?}); got {:?}. \
671                         A regression that stamped the stream only \
672                         on the object-recurse branch would pass \
673                         the object fixture but fail here.",
674                        m.stream,
675                    );
676                }
677            }
678        }
679    }
680
681    /// `extract_metrics` on the `Json` format threads the `stream`
682    /// argument through to every emitted `Metric.stream`. Pins the
683    /// wire between `extract_metrics` (caller-facing API) and the
684    /// walker that actually stamps the field — a regression that
685    /// dropped the `stream` parameter on the way in, or hardcoded
686    /// `Stdout` inside the `Json` arm, silently drops the stderr
687    /// attribution and breaks the `PayloadRun` stderr-fallback
688    /// labelling. Mirrors the `walk_json_leaves_tags_stream_...`
689    /// test one layer up.
690    #[test]
691    fn extract_metrics_threads_stream_from_argument_to_emitted_metric() {
692        let json = r#"{"iops": 100}"#;
693        for stream in [MetricStream::Stdout, MetricStream::Stderr] {
694            let metrics = extract_metrics(json, &OutputFormat::Json, stream).unwrap();
695            assert_eq!(metrics.len(), 1, "one leaf expected from {{\"iops\": 100}}",);
696            assert_eq!(
697                metrics[0].stream, stream,
698                "extract_metrics must thread stream={stream:?} to the \
699                 emitted Metric; got {:?}",
700                metrics[0].stream,
701            );
702        }
703    }
704
705    // Additional edge-case coverage for walk_json_leaves paths.
706
707    #[test]
708    fn json_deeply_nested_array_of_objects() {
709        // Edge case: array of objects. Each object's field should
710        // emit `samples.N.field` paths.
711        let s = r#"{"samples": [{"iops": 100}, {"iops": 200}, {"iops": 300}]}"#;
712        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
713        assert_eq!(m.len(), 3);
714        let names: Vec<&str> = m.iter().map(|x| x.name.as_str()).collect();
715        assert!(names.contains(&"samples.0.iops"));
716        assert!(names.contains(&"samples.1.iops"));
717        assert!(names.contains(&"samples.2.iops"));
718    }
719
720    #[test]
721    fn json_large_integer_round_trip_via_f64() {
722        // Large but f64-safe integer (below 2^53). serde_json's
723        // Number::as_f64 lossily converts any JSON number to f64;
724        // values below 2^53 are exact.
725        let s = r#"{"big_iops": 1000000000000}"#; // 1e12 = 2^40
726        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
727        assert_eq!(m.len(), 1);
728        assert_eq!(m[0].value, 1_000_000_000_000.0);
729    }
730
731    #[test]
732    fn json_fio_style_full_output_with_multiline_banner() {
733        // Real-world fio output has multiple banner lines + a large
734        // JSON object. Region-finder must skip all non-JSON prefix
735        // and parse the JSON body.
736        let s = "fio-3.36 starting up\n\
737                 Running fio with 4 jobs\n\
738                 test: (g=0): rw=randread, bs=4k, ioengine=libaio\n\
739                 \n\
740                 {\"jobs\": [{\"jobname\": \"test\", \"read\": {\"iops\": 12345, \"bw_bytes\": 50593792}}], \
741                 \"disk_util\": [{\"util\": 99.5}]}";
742        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
743        // Extracted: jobs.0.read.iops, jobs.0.read.bw_bytes, disk_util.0.util.
744        // jobname is a string, skipped.
745        assert_eq!(m.len(), 3);
746        let by_name: std::collections::BTreeMap<&str, f64> =
747            m.iter().map(|x| (x.name.as_str(), x.value)).collect();
748        assert_eq!(by_name.get("jobs.0.read.iops"), Some(&12345.0));
749        assert_eq!(by_name.get("jobs.0.read.bw_bytes"), Some(&50593792.0));
750        assert_eq!(by_name.get("disk_util.0.util"), Some(&99.5));
751    }
752
753    #[test]
754    fn walk_json_leaves_skips_nonfinite_defensively() {
755        // serde_json rejects NaN/Infinity at parse time (strict JSON),
756        // so naturally-occurring inputs never reach walk_json_leaves
757        // with non-finite numbers. The defensive filter is still
758        // verified by constructing a Value directly with
759        // Number::from_f64 which returns None for non-finite.
760        assert!(serde_json::Number::from_f64(f64::NAN).is_none());
761        assert!(serde_json::Number::from_f64(f64::INFINITY).is_none());
762        assert!(serde_json::Number::from_f64(f64::NEG_INFINITY).is_none());
763        // Finite values ARE accepted:
764        assert!(serde_json::Number::from_f64(2.78).is_some());
765    }
766
767    /// JSON integers above 2^53 lose precision when coerced to
768    /// f64 via `serde_json::Number::as_f64` — the f64 mantissa is 52
769    /// bits, so consecutive integers beyond 9007199254740992 round
770    /// to the nearest representable f64. Pin the observed behavior:
771    /// `9007199254740993` (2^53 + 1) round-trips as `9007199254740992.0`
772    /// (2^53). Payloads emitting integer metrics larger than 2^53
773    /// must scale down (µs → s) or encode as strings — the Json
774    /// walker cannot preserve integer identity past that boundary.
775    #[test]
776    fn json_large_integer_above_2_pow_53_loses_precision() {
777        // 2^53 = 9_007_199_254_740_992 is the last exactly-representable
778        // consecutive integer in f64. 2^53 + 1 rounds down to 2^53
779        // (banker's rounding lands on the even representable
780        // neighbor). Test via u64 → f64 to pin the u64 input value
781        // distinct from the emitted f64 — a direct f64 literal of
782        // 2^53+1 would itself round at parse time, obscuring what
783        // the walker did.
784        let s = r#"{"huge": 9007199254740993}"#;
785        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
786        assert_eq!(m.len(), 1);
787        // The emitted f64 IS the nearest representable value —
788        // which is 2^53, not 2^53+1. Both literals happen to print
789        // as "9007199254740992.0" because f64 can't distinguish
790        // them; compare against the exact f64 produced by the
791        // next-representable-below path.
792        assert_eq!(m[0].value, 9_007_199_254_740_992.0_f64);
793        // Cast the u64 source input to f64 to reproduce the same
794        // rounding serde_json performed. Both sides land at 2^53;
795        // that equality IS the lossy cast being documented.
796        let rounded: f64 = 9_007_199_254_740_993_u64 as f64;
797        assert_eq!(m[0].value, rounded);
798        // Confirm bit-level that the u64 input and the resulting
799        // f64 are NOT identity-preserving: casting the f64 back to
800        // u64 yields 2^53, not 2^53+1.
801        assert_eq!(m[0].value as u64, 9_007_199_254_740_992_u64);
802        assert_ne!(m[0].value as u64, 9_007_199_254_740_993_u64);
803    }
804
805    /// At exactly 2^53 the f64 IS exact — the precision loss is
806    /// strictly one-above-the-boundary. Pair with
807    /// `json_large_integer_above_2_pow_53_loses_precision` so both
808    /// sides of the precision cliff are pinned.
809    #[test]
810    fn json_integer_at_2_pow_53_is_exact() {
811        let s = r#"{"exact": 9007199254740992}"#;
812        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
813        assert_eq!(m.len(), 1);
814        assert_eq!(m[0].value, 9_007_199_254_740_992.0_f64);
815    }
816
817    /// `find_and_parse_json` tries the whole trimmed input as
818    /// a single document on the fast path. If the input has a
819    /// balanced object followed by trailing non-JSON text, the
820    /// whole-input parse fails (strict serde) and the region-
821    /// finder slow path extracts the leading `{...}` region and
822    /// parses that. Pins the "trailing garbage is stripped by the
823    /// region finder" behavior.
824    #[test]
825    fn find_and_parse_json_recovers_object_with_trailing_garbage() {
826        let s = r#"{"a": 1, "b": 2} --- trailing prose from banner"#;
827        let v = find_and_parse_json(s).expect("trailing garbage must not block parse");
828        assert_eq!(v["a"], serde_json::json!(1));
829        assert_eq!(v["b"], serde_json::json!(2));
830    }
831
832    /// A leading array followed by trailing garbage recovers
833    /// symmetrically — the region finder handles `[...]` the same
834    /// way it handles `{...}`.
835    #[test]
836    fn find_and_parse_json_recovers_array_with_trailing_garbage() {
837        let s = "[1, 2, 3]\nextra: banner line\n";
838        let v = find_and_parse_json(s).expect("array with trailing garbage must parse");
839        assert_eq!(v, serde_json::json!([1, 2, 3]));
840    }
841
842    /// Real-world fio pattern — banner line, JSON body,
843    /// *and* trailing "done" marker. The region finder locks to
844    /// the first balanced opener/closer, so the trailing content
845    /// is ignored even if it contains unbalanced braces.
846    #[test]
847    fn find_and_parse_json_with_banner_and_trailer() {
848        let s = "fio-3.36 starting up\n{\"iops\": 100}\nfio done }";
849        let v = find_and_parse_json(s).expect("banner + trailer must resolve to body");
850        assert_eq!(v["iops"], serde_json::json!(100));
851    }
852
853    /// When the trailing garbage itself contains a
854    /// BALANCED brace pair, the region finder still returns the
855    /// first one — downstream parsing uses the first match, not
856    /// a merged document.
857    #[test]
858    fn find_and_parse_json_returns_first_region_when_trailer_also_balanced() {
859        let s = r#"{"first": 1} unrelated {"second": 2}"#;
860        let v = find_and_parse_json(s).expect("first balanced region parses");
861        assert_eq!(v["first"], serde_json::json!(1));
862        assert!(v.get("second").is_none(), "second region must not merge in");
863    }
864
865    /// Embedded `{` / `}` characters inside a JSON string literal
866    /// must NOT be counted as structural openers/closers by the
867    /// region finder. The in-string tracker flips on `"` and
868    /// suppresses nesting accounting until the matching closing
869    /// `"`, so the only braces that affect `depth` are the
870    /// structural outer ones. Pins that a log message which happens
871    /// to contain `{` / `}` inside a quoted string still round-trips
872    /// through the slow path.
873    #[test]
874    fn find_and_parse_json_ignores_braces_inside_string_literals() {
875        let s = "fio-3.36 starting up\n\
876                 {\"msg\": \"look at {nested} in text\", \"ok\": 1}\n\
877                 trailing banner";
878        let v = find_and_parse_json(s).expect("embedded braces in string must not break scan");
879        assert_eq!(v["msg"], serde_json::json!("look at {nested} in text"));
880        assert_eq!(v["ok"], serde_json::json!(1));
881    }
882
883    /// Negative numeric leaves extract at their declared value
884    /// without any sign-absoluting or filtering. Canonical for
885    /// metrics like scheduler_delta_ns that can legitimately be
886    /// negative (improvement from baseline).
887    #[test]
888    fn json_negative_numbers_extract_preserving_sign() {
889        let s = r#"{"delta_ns": -500.5, "underflow": -1000000}"#;
890        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
891        let by_name: std::collections::BTreeMap<&str, f64> =
892            m.iter().map(|x| (x.name.as_str(), x.value)).collect();
893        assert_eq!(by_name.get("delta_ns"), Some(&-500.5));
894        assert_eq!(by_name.get("underflow"), Some(&-1_000_000.0));
895    }
896
897    /// Zero is emitted as a real metric value, not filtered
898    /// out. A payload that genuinely measured zero (idle CPU, no
899    /// errors) must produce a zero metric — otherwise downstream
900    /// checks like `MetricCheck::exit_code_eq(0)` against an `exit_code`
901    /// metric of 0.0 would spuriously report "missing" instead of
902    /// passing.
903    #[test]
904    fn json_zero_values_are_emitted_not_filtered() {
905        let s = r#"{"errors": 0, "cpu_idle_pct": 0.0, "count": -0.0}"#;
906        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
907        let by_name: std::collections::BTreeMap<&str, f64> =
908            m.iter().map(|x| (x.name.as_str(), x.value)).collect();
909        assert_eq!(by_name.len(), 3, "all three zeros must extract: {m:?}");
910        assert_eq!(by_name.get("errors"), Some(&0.0));
911        assert_eq!(by_name.get("cpu_idle_pct"), Some(&0.0));
912        // -0.0 round-trips via f64; assert the numeric equality.
913        assert_eq!(by_name.get("count"), Some(&0.0));
914    }
915
916    /// Mixed positive + negative + zero in one document
917    /// exercises the walker's sign-agnostic branch.
918    #[test]
919    fn json_mixed_signs_and_zero_all_extract() {
920        let s = r#"{"pos": 10.0, "neg": -10.0, "zero": 0.0}"#;
921        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
922        assert_eq!(m.len(), 3);
923    }
924
925    /// An empty JSON object `{}` at the top level parses
926    /// successfully but yields no metric leaves — the walker
927    /// traverses zero children and falls through to produce an
928    /// empty Vec. No `None` return, no panic.
929    #[test]
930    fn json_empty_object_yields_no_metrics() {
931        let m = extract_metrics("{}", &OutputFormat::Json, MetricStream::Stdout).unwrap();
932        assert!(m.is_empty(), "empty object has no leaves: {m:?}");
933    }
934
935    /// An empty array at the top level likewise yields zero
936    /// metrics.
937    #[test]
938    fn json_empty_array_yields_no_metrics() {
939        let m = extract_metrics("[]", &OutputFormat::Json, MetricStream::Stdout).unwrap();
940        assert!(m.is_empty(), "empty array has no leaves: {m:?}");
941    }
942
943    /// Nested empty containers also produce no leaves — the
944    /// walker still recurses but finds nothing numeric at the
945    /// bottom. Pins the "no ghost metrics from empty containers"
946    /// invariant.
947    #[test]
948    fn json_nested_empty_containers_yield_no_metrics() {
949        let s = r#"{"outer": {"inner": {}, "also": []}}"#;
950        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
951        assert!(m.is_empty(), "nested empties emit nothing: {m:?}");
952    }
953
954    /// Empty container alongside real metrics — empties are
955    /// silent, real leaves still emit.
956    #[test]
957    fn json_empty_container_mixed_with_real_metrics() {
958        let s = r#"{"iops": 100.0, "meta": {}, "samples": []}"#;
959        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
960        assert_eq!(m.len(), 1);
961        assert_eq!(m[0].name, "iops");
962        assert_eq!(m[0].value, 100.0);
963    }
964
965    /// walk_json_leaves uses push/pop on a single
966    /// path buffer instead of per-level format!(). This test pins
967    /// the *behavior* (path output unchanged across deep nesting)
968    /// so a future refactor of the path plumbing can't silently
969    /// drop a segment or duplicate a dot.
970    #[test]
971    fn walk_json_leaves_deep_nesting_paths_are_correct() {
972        // 6 levels deep → one leaf at a.b.c.d.e.f.
973        let s = r#"{"a":{"b":{"c":{"d":{"e":{"f": 42.0}}}}}}"#;
974        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
975        assert_eq!(m.len(), 1);
976        assert_eq!(m[0].name, "a.b.c.d.e.f");
977        assert_eq!(m[0].value, 42.0);
978    }
979
980    /// Sibling keys under the same parent must see the parent
981    /// segment truncated between each child — the bug that the
982    /// push/pop refactor would hit is "path accumulates across
983    /// siblings" producing `root.a.b`, `root.a.b.c` etc. instead
984    /// of `root.a.b`, `root.a.c`.
985    #[test]
986    fn walk_json_leaves_siblings_do_not_accumulate_path() {
987        let s = r#"{"root":{"a": 1, "b": 2, "c": 3}}"#;
988        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
989        assert_eq!(m.len(), 3);
990        let names: std::collections::BTreeSet<&str> = m.iter().map(|x| x.name.as_str()).collect();
991        let expected: std::collections::BTreeSet<&str> =
992            ["root.a", "root.b", "root.c"].into_iter().collect();
993        assert_eq!(names, expected, "path must truncate between siblings");
994    }
995
996    /// Array indices use the same push/pop path: `arr.0`, `arr.1`.
997    /// Deep array-of-array-of-object combinations exercise every
998    /// code path in the walker.
999    #[test]
1000    fn walk_json_leaves_deep_array_object_interleaving() {
1001        let s = r#"{"data":[{"vals":[10.0, 20.0]},{"vals":[30.0]}]}"#;
1002        let m = extract_metrics(s, &OutputFormat::Json, MetricStream::Stdout).unwrap();
1003        let by_name: std::collections::BTreeMap<&str, f64> =
1004            m.iter().map(|x| (x.name.as_str(), x.value)).collect();
1005        assert_eq!(by_name.get("data.0.vals.0"), Some(&10.0));
1006        assert_eq!(by_name.get("data.0.vals.1"), Some(&20.0));
1007        assert_eq!(by_name.get("data.1.vals.0"), Some(&30.0));
1008        assert_eq!(by_name.len(), 3);
1009    }
1010
1011    /// Programmatically build a `serde_json::Value` nested deeper than
1012    /// [`MAX_WALK_DEPTH`] and confirm that `walk_json_leaves` returns
1013    /// without a stack overflow and without emitting metrics from
1014    /// beyond the cap. Serde_json's own parser depth limit (128 by
1015    /// default) blocks malicious JSON strings before the walker sees
1016    /// them, so a parser-bypass (direct `Value::Object` construction)
1017    /// is the only way to reach this depth — the test exercises
1018    /// exactly that path.
1019    #[test]
1020    fn walk_json_leaves_depth_cap_skips_deeply_nested_subtree() {
1021        // Build an Object nested 100 deep with a numeric leaf at the
1022        // bottom. The leaf at depth > MAX_WALK_DEPTH (64) must be
1023        // skipped by the guard. A sentinel metric with
1024        // `WALK_TRUNCATION_SENTINEL_NAME` MUST appear in the return
1025        // so callers without a tracing subscriber still observe the
1026        // truncation.
1027        let mut value = serde_json::json!({"leaf": 42.0});
1028        for _ in 0..100 {
1029            let mut m = serde_json::Map::new();
1030            m.insert("x".to_string(), value);
1031            value = serde_json::Value::Object(m);
1032        }
1033        let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1034        let real_leaves: Vec<_> = metrics
1035            .iter()
1036            .filter(|m| m.name != WALK_TRUNCATION_SENTINEL_NAME)
1037            .collect();
1038        assert!(
1039            real_leaves.is_empty(),
1040            "leaf beyond MAX_WALK_DEPTH cap must not be emitted, got {real_leaves:?}"
1041        );
1042        let sentinel = metrics
1043            .iter()
1044            .find(|m| m.name == WALK_TRUNCATION_SENTINEL_NAME)
1045            .expect("truncation sentinel must be present on cap hit");
1046        assert!(
1047            sentinel.value > MAX_WALK_DEPTH as f64,
1048            "sentinel value must carry the depth at which truncation fired, got {}",
1049            sentinel.value,
1050        );
1051    }
1052
1053    /// A leaf exactly at [`MAX_WALK_DEPTH`] is still emitted — the
1054    /// cap bails BEFORE recursing past `depth > MAX_WALK_DEPTH`, so a
1055    /// leaf reached at `depth == MAX_WALK_DEPTH` is preserved.
1056    /// Boundary pair with the depth_cap_skips test above so an
1057    /// off-by-one in the guard (e.g. `>=` instead of `>`) surfaces.
1058    #[test]
1059    fn walk_json_leaves_depth_cap_boundary_leaf_preserved() {
1060        // Build Object of exactly MAX_WALK_DEPTH nesting: top-level
1061        // holds an Object, which holds an Object, ... for
1062        // MAX_WALK_DEPTH levels, with the numeric leaf at the bottom.
1063        // The leaf's path has MAX_WALK_DEPTH segments and walk() is
1064        // called at depths 0..=MAX_WALK_DEPTH — the leaf call at
1065        // depth MAX_WALK_DEPTH must pass the guard.
1066        let mut value = serde_json::Value::Number(serde_json::Number::from_f64(42.0).unwrap());
1067        for _ in 0..MAX_WALK_DEPTH {
1068            let mut m = serde_json::Map::new();
1069            m.insert("x".to_string(), value);
1070            value = serde_json::Value::Object(m);
1071        }
1072        let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1073        assert_eq!(metrics.len(), 1, "boundary leaf must be preserved");
1074        assert_eq!(metrics[0].value, 42.0);
1075    }
1076
1077    /// Mixed-depth invariant: a single walk must emit every finite
1078    /// numeric leaf regardless of the depth at which it appears, so
1079    /// long as the depth is ≤ MAX_WALK_DEPTH. Mirrors real payload
1080    /// schemas (fio's `jobs[0].read.lat_ns.mean` sits at depth 5
1081    /// while `jobs[0].jobname` sits at depth 2). A single-depth
1082    /// regression — e.g. a premature `return` inside the Object arm
1083    /// — would skip the shallower siblings of a deep subtree.
1084    #[test]
1085    fn walk_json_leaves_mixed_depth_leaves_all_emitted() {
1086        let value = serde_json::json!({
1087            "shallow": 1.0,
1088            "mid": {
1089                "leaf": 2.0,
1090                "deeper": {
1091                    "still": {
1092                        "further": 3.0
1093                    }
1094                }
1095            },
1096            "also_shallow": 4.0,
1097            "deeper_sibling": {
1098                "only_child": 5.0
1099            }
1100        });
1101        let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1102        let by_name: std::collections::BTreeMap<&str, f64> =
1103            metrics.iter().map(|m| (m.name.as_str(), m.value)).collect();
1104        assert_eq!(by_name.get("shallow"), Some(&1.0));
1105        assert_eq!(by_name.get("mid.leaf"), Some(&2.0));
1106        assert_eq!(by_name.get("mid.deeper.still.further"), Some(&3.0));
1107        assert_eq!(by_name.get("also_shallow"), Some(&4.0));
1108        assert_eq!(by_name.get("deeper_sibling.only_child"), Some(&5.0));
1109        assert_eq!(metrics.len(), 5, "exactly five numeric leaves expected");
1110    }
1111
1112    /// Array-chain invariant: nested arrays produce dotted-index
1113    /// paths with no stray separators. An off-by-one in the
1114    /// separator injection at :203-205 (array arm) or a swapped
1115    /// push-path/truncate order would surface as either a leading
1116    /// dot, a doubled separator, or an index segment merged into
1117    /// the previous one.
1118    #[test]
1119    fn walk_json_leaves_array_chain_paths_correct() {
1120        // `a` is a 2x2x2 array of numeric leaves; the walker must
1121        // produce paths `a.0.0.0`, `a.0.0.1`, `a.0.1.0`, …, `a.1.1.1`.
1122        let value = serde_json::json!({
1123            "a": [
1124                [[1.0, 2.0], [3.0, 4.0]],
1125                [[5.0, 6.0], [7.0, 8.0]]
1126            ]
1127        });
1128        let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1129        let names: Vec<&str> = metrics.iter().map(|m| m.name.as_str()).collect();
1130        // 8 leaves in lexicographic index order.
1131        assert_eq!(names.len(), 8);
1132        assert_eq!(names[0], "a.0.0.0");
1133        assert_eq!(names[1], "a.0.0.1");
1134        assert_eq!(names[2], "a.0.1.0");
1135        assert_eq!(names[3], "a.0.1.1");
1136        assert_eq!(names[4], "a.1.0.0");
1137        assert_eq!(names[5], "a.1.0.1");
1138        assert_eq!(names[6], "a.1.1.0");
1139        assert_eq!(names[7], "a.1.1.1");
1140        // Values map 1:1 against path order — confirm no segment
1141        // got dropped or reordered.
1142        assert_eq!(metrics[0].value, 1.0);
1143        assert_eq!(metrics[7].value, 8.0);
1144    }
1145
1146    /// Null-at-boundary invariant: a `serde_json::Value::Null` leaf
1147    /// is skipped by the `_ => {}` arm and contributes nothing — no
1148    /// metric, no sentinel, no side effect — regardless of the
1149    /// depth at which it sits. Specifically pins the case where the
1150    /// null is the direct child of a depth-MAX_WALK_DEPTH container,
1151    /// ensuring the cap check fires first when the container would
1152    /// itself be above the cap rather than the null stopping
1153    /// recursion harmlessly short. A regression that treats Null
1154    /// the same as a Number would surface as a spurious leaf with
1155    /// `value = 0.0` (or a panic) on this fixture.
1156    #[test]
1157    fn walk_json_leaves_null_at_boundary_produces_no_metric() {
1158        // Build `{a: {a: {a: ... {a: null}}}}` at exactly
1159        // MAX_WALK_DEPTH nesting — the Null sits at depth
1160        // MAX_WALK_DEPTH; the walker recurses into the outer Objects
1161        // at depths 0..=MAX_WALK_DEPTH-1, sees Null at the
1162        // boundary, and falls through the `_ => {}` arm.
1163        let mut value = serde_json::Value::Null;
1164        for _ in 0..MAX_WALK_DEPTH {
1165            let mut m = serde_json::Map::new();
1166            m.insert("a".to_string(), value);
1167            value = serde_json::Value::Object(m);
1168        }
1169        let metrics = walk_json_leaves(&value, MetricStream::Stdout);
1170        assert!(
1171            metrics.is_empty(),
1172            "Null leaves must produce no metrics (and no truncation sentinel), \
1173             got {metrics:?}",
1174        );
1175    }
1176
1177    #[test]
1178    fn module_level_example_usage() {
1179        // Canonical invocation: declare a Payload with
1180        // OutputFormat::Json, feed stdout, get Vec<Metric>.
1181        const EXAMPLE_PAYLOAD: crate::test_support::Payload = crate::test_support::Payload {
1182            name: "example",
1183            kind: crate::test_support::PayloadKind::Binary("example"),
1184            output: OutputFormat::Json,
1185            default_args: &[],
1186            default_checks: &[],
1187            metrics: &[],
1188            include_files: &[],
1189            uses_parent_pgrp: false,
1190            known_flags: None,
1191        };
1192        let stdout = r#"{"throughput": 42.5}"#;
1193        let m = extract_metrics(stdout, &EXAMPLE_PAYLOAD.output, MetricStream::Stdout).unwrap();
1194        assert_eq!(m.len(), 1);
1195        assert_eq!(m[0].name, "throughput");
1196        assert_eq!(m[0].value, 42.5);
1197    }
1198}