ktstr/vmm/virtio_blk/
counters.rs

1//! Per-device host-side observability counters for virtio-block.
2//!
3//! Pure atomic counters + their `record_*` mutator helpers and `pub fn`
4//! readers. No MMIO, no FSM, no IO — split out from `device.rs` for
5//! module locality so the counter taxonomy doc and the per-helper
6//! invariants (per-event vs per-request vs gauge) sit together.
7//!
8//! See `super::drain_bracket_impl` and the per-handler `handle_*_impl`
9//! functions for the writer sites; see `VirtioBlk::counters()` for the
10//! external Arc handle the host monitor uses to read without locking
11//! the device struct.
12
13use std::sync::atomic::{AtomicU64, Ordering};
14
15// ----------------------------------------------------------------------------
16// Counters (host-side observability)
17// ----------------------------------------------------------------------------
18
19/// Per-device counters surfaced to the host monitor. All atomic so
20/// the monitor can read them without locking the device struct.
21///
22/// Mutation goes through the `record_*` helper methods, NOT direct
23/// `field.fetch_add(...)` calls. The helpers enforce the
24/// "completion + bytes" pairing for reads and writes — every
25/// `record_read(bytes)` increments both `reads_completed` AND
26/// `bytes_read` in one call. A bare `reads_completed.fetch_add(1)`
27/// without a paired `bytes_read.fetch_add(n)` would let the
28/// failure-dump renderer compute a misleading bytes-per-op
29/// average. The helpers also keep the call site one line each,
30/// matching the SPSC-style accounting common in network/block
31/// device fast paths.
32///
33/// Fields are `pub(crate)` so the helper-mutation rule is enforced
34/// across the crate by visibility. External consumers reach in via
35/// the per-field `pub fn` accessors below — each performs a
36/// `Relaxed` load and returns the current value as `u64`.
37///
38/// # Counter taxonomy: events vs requests vs gauges
39///
40/// Counters fall into three semantic categories. Operators
41/// reading the failure-dump must understand which is which to
42/// avoid drawing wrong conclusions:
43///
44/// - **Per-event cumulative counters** (`io_errors`,
45///   `throttled_count`): bumped each time the underlying event
46///   fires, with no per-request deduplication. A single hostile
47///   request can produce multiple `io_errors` bumps if it trips
48///   several gates in sequence (see `io_errors` doc below for the
49///   double-bump scenarios). Use these to compare event rates
50///   over time, not to count requests.
51/// - **Per-request cumulative counters** (`reads_completed`,
52///   `writes_completed`, `flushes_completed`, `bytes_read`,
53///   `bytes_written`): bumped exactly once per successfully
54///   serviced request. Each surfaces a one-to-one mapping with
55///   guest-observable completions. Use these to compute
56///   throughput, average request size, and per-direction IO
57///   share.
58/// - **Per-request live gauges** (`currently_throttled_gauge`):
59///   "how many requests are RIGHT NOW in this state." Increments
60///   when a request enters the state, decrements when it exits.
61///   The cumulative event counter for the same condition lives
62///   in `throttled_count` (events, not requests). Reading
63///   `currently_throttled_gauge == 5` means 5 chains are pinned
64///   in the avail ring at this instant; `throttled_count == 100`
65///   over the same period means 100 stall events have occurred.
66///   The two answer different questions and operators MUST NOT
67///   compare or sum them.
68///
69/// # Lifetime semantics
70///
71/// Counters are **cumulative for the device's lifetime** —
72/// `VirtioBlk::reset()` does NOT zero them. A guest issuing
73/// STATUS=0 (driver re-bind) re-uses the existing counter Arc; an
74/// operator monitoring `reads_completed` etc. observes a
75/// monotonically non-decreasing series across resets. Only
76/// destruction of the device (`Drop`) reclaims the counters Arc.
77/// This matches operator expectation that failure-dump counters
78/// reflect the device's full IO history, not just the post-reset
79/// fragment.
80///
81/// Per-request live gauges (`currently_throttled_gauge`) decrement
82/// across the device's lifetime as requests exit the gauged
83/// state, but the gauge value itself is "right now," not
84/// cumulative. A reset that strands a chain in the
85/// "currently throttled" state would leak the gauge increment;
86/// the production reset path joins the worker thread before
87/// rebuilding the queue, and the worker decrements the gauge on
88/// any subsequent successful drain — but a worker that never
89/// observes a successful drain (e.g. the device is destroyed
90/// while the chain is still rolled back) leaves the increment
91/// pinned for the device's lifetime. This is acceptable because
92/// the gauge is informational and the device is going away
93/// anyway; downstream consumers must not depend on a strictly
94/// zero-on-shutdown property.
95///
96/// We diverge from virtio-v1.2 §2.1 ("device returned to its
97/// initial state") for counters because operator-side
98/// failure-dump observability requires cumulative IO history
99/// spanning the device's full lifetime, not just the post-reset
100/// fragment.
101#[derive(Debug, Default)]
102pub struct VirtioBlkCounters {
103    pub(crate) reads_completed: AtomicU64,
104    pub(crate) writes_completed: AtomicU64,
105    pub(crate) flushes_completed: AtomicU64,
106    pub(crate) bytes_read: AtomicU64,
107    pub(crate) bytes_written: AtomicU64,
108    /// Cumulative throttle-stall **events** for the device's
109    /// lifetime. Bumped each time `drain_bracket_impl` returns
110    /// `DrainOutcome::ThrottleStalled`. A single chain that
111    /// stalls, refills, stalls again, and finally completes
112    /// produces TWO `throttled_count` bumps but ONE
113    /// `reads_completed` (or `writes_completed`/etc.) bump on
114    /// final success.
115    ///
116    /// To answer "how many requests are stuck right now," read
117    /// `currently_throttled_gauge` instead — the per-event
118    /// cumulative counter and the per-request live gauge are
119    /// distinct semantics and answer different questions.
120    pub(crate) throttled_count: AtomicU64,
121    pub(crate) io_errors: AtomicU64,
122    /// Live "how many requests are currently waiting for tokens"
123    /// gauge. Incremented when a chain transitions into the
124    /// stalled state; decremented when the next successful drain
125    /// confirms the chain has been serviced.
126    ///
127    /// On a single-queue virtio-blk device the gauge is bounded
128    /// at 0 or 1 in practice — only the head-of-queue chain can
129    /// be stalled at a time, because the FIFO drain rolls back
130    /// the popped chain on stall and the next successful drain
131    /// always processes that same chain first before any newer
132    /// arrivals. A multi-queue extension would lift the bound to
133    /// "1 per queue currently stalled."
134    ///
135    /// Distinct from `throttled_count` (cumulative events): the
136    /// gauge tracks the live state, the counter tracks the
137    /// historical event rate. See the type-level "Counter
138    /// taxonomy" doc for why operators must not conflate the
139    /// two.
140    pub(crate) currently_throttled_gauge: AtomicU64,
141    /// Cumulative count of `Error::InvalidAvailRingIndex` events
142    /// observed by `drain_bracket_impl`. Bumped each time the
143    /// virtio-queue iter() rejects an avail.idx whose distance
144    /// from `next_avail` exceeds the queue size — a hostile or
145    /// buggy guest condition that, if not detected, would loop
146    /// the worker forever (the swallowed-error livelock fixed by
147    /// the queue_poisoned gate).
148    ///
149    /// Per-event counter (NOT per-request): a single drain pass
150    /// produces at most one bump (the poison flag short-circuits
151    /// further attempts on the same queue). Successive
152    /// QUEUE_NOTIFY kicks against an unresetted poisoned queue
153    /// take the early-return path and produce zero additional
154    /// bumps until the guest performs a virtio reset.
155    pub(crate) invalid_avail_idx_count: AtomicU64,
156}
157
158impl VirtioBlkCounters {
159    /// Record one completed read: bumps `reads_completed` and adds
160    /// `bytes` to `bytes_read`. The pairing is enforced — bare
161    /// reads_completed bumps without the paired bytes_read add are
162    /// caught at refactor time.
163    ///
164    /// `bytes` MUST be the count actually returned by `read_at`
165    /// summed across the request's data segments — NOT the
166    /// descriptor length. On a short read the zero-padded tail is
167    /// delivered to the guest but does not count here; see
168    /// [`Self::bytes_read`] for the rationale.
169    pub(crate) fn record_read(&self, bytes: u64) {
170        self.reads_completed.fetch_add(1, Ordering::Relaxed);
171        self.bytes_read.fetch_add(bytes, Ordering::Relaxed);
172    }
173
174    /// Record one completed write: bumps `writes_completed` and
175    /// adds `bytes` to `bytes_written`.
176    pub(crate) fn record_write(&self, bytes: u64) {
177        self.writes_completed.fetch_add(1, Ordering::Relaxed);
178        self.bytes_written.fetch_add(bytes, Ordering::Relaxed);
179    }
180
181    /// Record one completed flush.
182    pub(crate) fn record_flush(&self) {
183        self.flushes_completed.fetch_add(1, Ordering::Relaxed);
184    }
185
186    /// Bumped on every host-observed IO failure **event**, whether
187    /// the guest saw S_IOERR or not (e.g. unmapped status-byte
188    /// address that prevented the status write). Covers spec
189    /// violations, backend IO errors, malformed chains, add_used
190    /// failures, and status-write failures where the chain stays
191    /// in the avail ring (no S_IOERR ever reaches the guest, but
192    /// the host still counts the silent-stall event).
193    ///
194    /// # Events, not requests
195    ///
196    /// `io_errors` is an **events** counter, not a per-request
197    /// counter. A single hostile request can produce multiple
198    /// `io_errors` bumps if it trips several gates in sequence.
199    /// Concretely:
200    ///
201    /// - **Pre-publish gates that bump io_errors then call
202    ///   `publish_completion`**: bad header,
203    ///   header-read failure, SIZE_MAX reject, zero-data,
204    ///   sub-sector data_len, direction violation. Each of these
205    ///   records one io_errors event for the validation
206    ///   rejection. If the subsequent `publish_completion`'s
207    ///   status-byte write or `add_used` then fails (e.g. the
208    ///   guest also placed the status descriptor at unmapped
209    ///   GPA), `publish_completion` records a SECOND io_errors
210    ///   event for the silent-stall failure mode. A pathological
211    ///   chain with a malformed header AND an unmapped status
212    ///   descriptor surfaces as `io_errors += 2` for one chain.
213    /// - **Handler error paths**: `handle_read_impl` /
214    ///   `handle_write_impl` / `handle_get_id_impl` /
215    ///   `handle_flush_impl` each record io_errors on backing-file
216    ///   error or guest-memory access failure. The handler
217    ///   produces an S_IOERR status which `process_requests`
218    ///   passes to `publish_completion`. If the status-write or
219    ///   add_used then fails, `publish_completion` records a
220    ///   SECOND io_errors event for that request.
221    /// - **publish_completion's own failure modes**: status-write
222    ///   failure or add_used failure each record one io_errors
223    ///   event independently of any prior caller bump.
224    ///
225    /// The double-bump under hostile-guest scenarios is
226    /// **intentional**. Hoisting all error bumps to a single
227    /// outermost catch site would lose the "silent-stall failure
228    /// distinct from validation rejection" signal: an operator
229    /// reading io_errors needs to see a separate event each time
230    /// the device hits a failure mode, even if multiple events
231    /// happen on the same request.
232    ///
233    /// Operators who want a per-request error count must not
234    /// derive it from io_errors — they need a separate counter
235    /// (deliberately not provided here; the per-request semantic
236    /// is reachable via `reads_completed + writes_completed +
237    /// flushes_completed` for the success side, with the failure
238    /// side inferable from `total_chains_observed - success_count`
239    /// once a `total_chains_observed` counter is added).
240    ///
241    /// See also `currently_throttled_gauge` (per-request live
242    /// gauge) and `throttled_count` (per-event cumulative
243    /// counter) for the throttle-side distinction; the same
244    /// events-vs-requests split applies there.
245    pub(crate) fn record_io_error(&self) {
246        self.io_errors.fetch_add(1, Ordering::Relaxed);
247    }
248
249    /// Record one throttle-stall **event**. virtio-spec doesn't
250    /// reserve a "throttled" status code; on stall the device
251    /// rolls back the pop and arms a retry timer (see
252    /// `drain_bracket_impl` and `worker_thread_main`) — the chain
253    /// stays invisible to the guest until enough tokens refill.
254    /// Retry fires within `RETRY_TIMER_MAX_NANOS` (1 s);
255    /// pathological refill rates re-stall at the cap. The
256    /// counter is separate from `io_errors` so operators can
257    /// distinguish "real IO problem" from "throttle bucket
258    /// drained, request deferred."
259    ///
260    /// # Events, not requests
261    ///
262    /// `throttled_count` is the cumulative event rate, not the
263    /// number of stuck requests. A single chain that stalls
264    /// twice (initial stall + premature retry that re-stalls)
265    /// bumps `throttled_count` twice but represents one stuck
266    /// request. To answer "how many requests are stuck right
267    /// now," read `currently_throttled_gauge` instead.
268    pub(crate) fn record_throttled(&self) {
269        self.throttled_count.fetch_add(1, Ordering::Relaxed);
270    }
271
272    /// Increment the live "currently waiting for tokens" gauge.
273    /// Called by `drain_bracket_impl` when a chain transitions
274    /// from "running" to "stalled" — i.e. the per-worker
275    /// `currently_stalled` flag was false before this stall.
276    /// Idempotent stall observations (same chain, multiple
277    /// retries that all re-stall) MUST NOT double-increment; the
278    /// caller gates this on the per-worker flag transition.
279    pub(crate) fn record_throttle_pending_inc(&self) {
280        self.currently_throttled_gauge
281            .fetch_add(1, Ordering::Relaxed);
282    }
283
284    /// Decrement the live "currently waiting for tokens" gauge,
285    /// saturating at 0. Called by `drain_bracket_impl` when the
286    /// worker observes a successful drain after a prior stall, by
287    /// `reset_engine_inline` / `respawn_worker` on a reset that
288    /// strands a stalled chain,
289    /// and by `Drop` on device destruction while the
290    /// rollback-stalled flag is still set. The per-worker
291    /// `currently_stalled` flag gates the transition so a paired
292    /// inc precedes every dec under correct operation; the
293    /// saturating CAS exists as a defence-in-depth against any
294    /// future caller that decrements an already-zero gauge —
295    /// vanilla `fetch_sub(1)` would wrap to `u64::MAX` and the
296    /// failure-dump renderer would then surface a 17-exabyte
297    /// "currently stalled" reading.
298    pub(crate) fn record_throttle_pending_dec(&self) {
299        let _ = self.currently_throttled_gauge.fetch_update(
300            Ordering::Relaxed,
301            Ordering::Relaxed,
302            |v| v.checked_sub(1),
303        );
304    }
305
306    /// Record one observed `Error::InvalidAvailRingIndex` event
307    /// from `Queue::iter`. Called by `drain_bracket_impl` when the
308    /// avail ring's `idx` is more than `queue.size` ahead of
309    /// `next_avail` — a virtio-spec violation by the guest. The
310    /// caller also sets `BlkWorkerState::queue_poisoned` so a
311    /// single hostile-guest event produces exactly one bump,
312    /// regardless of how many subsequent kicks land before the
313    /// next reset (subsequent drains short-circuit on the poison
314    /// flag and never re-call `iter`).
315    pub(crate) fn record_invalid_avail_idx(&self) {
316        self.invalid_avail_idx_count.fetch_add(1, Ordering::Relaxed);
317    }
318
319    /// Read the cumulative count of successfully completed read
320    /// requests for this device's lifetime. Per-request counter:
321    /// bumped exactly once per successful read via
322    /// [`Self::record_read`] (paired with a `bytes_read` add).
323    /// `Relaxed` ordering matches the writer side — counters are
324    /// publish-only observability and do not establish
325    /// happens-before with other operations.
326    pub fn reads_completed(&self) -> u64 {
327        self.reads_completed.load(Ordering::Relaxed)
328    }
329
330    /// Read the cumulative count of successfully completed write
331    /// requests for this device's lifetime. Per-request counter:
332    /// bumped exactly once per successful write via
333    /// [`Self::record_write`] (paired with a `bytes_written` add).
334    pub fn writes_completed(&self) -> u64 {
335        self.writes_completed.load(Ordering::Relaxed)
336    }
337
338    /// Read the cumulative count of successfully completed flush
339    /// requests for this device's lifetime. Per-request counter:
340    /// bumped once per successful flush via
341    /// [`Self::record_flush`].
342    pub fn flushes_completed(&self) -> u64 {
343        self.flushes_completed.load(Ordering::Relaxed)
344    }
345
346    /// Read the cumulative number of bytes the device's backing
347    /// file actually returned for read requests. Per-request
348    /// counter: incremented in lockstep with `reads_completed`.
349    ///
350    /// This counts the `n` returned by each `read_at` call (i.e.
351    /// the bytes actually sourced from the backing file), NOT the
352    /// full descriptor length delivered to the guest. On a short
353    /// read at backing-file EOF, the device zero-pads the
354    /// remaining bytes of the descriptor (sparse-file semantics)
355    /// and delivers them to the guest, but those zero-pad bytes
356    /// do not count here — they were not "read" from any source.
357    /// The virtio-spec used.elem.len reported via `add_used`
358    /// includes the zero-pad (per virtio-v1.2 §2.7.7.2 it counts
359    /// bytes written to device-writable buffers); operators
360    /// comparing `bytes_read` to guest-side accounting must
361    /// account for the zero-pad gap in sparse-file scenarios.
362    pub fn bytes_read(&self) -> u64 {
363        self.bytes_read.load(Ordering::Relaxed)
364    }
365
366    /// Read the cumulative number of bytes successfully written
367    /// from guest memory to the backing file. Per-request counter:
368    /// incremented in lockstep with `writes_completed`.
369    pub fn bytes_written(&self) -> u64 {
370        self.bytes_written.load(Ordering::Relaxed)
371    }
372
373    /// Read the cumulative count of throttle-stall **events** for
374    /// this device's lifetime. Per-event counter (NOT per-request):
375    /// a single chain that stalls multiple times produces multiple
376    /// bumps. To answer "how many requests are stuck right now,"
377    /// read [`Self::currently_throttled_gauge`] instead.
378    pub fn throttled_count(&self) -> u64 {
379        self.throttled_count.load(Ordering::Relaxed)
380    }
381
382    /// Read the cumulative count of host-observed IO failure
383    /// **events**. Per-event counter (NOT per-request): a single
384    /// hostile chain can produce multiple bumps if it trips
385    /// several gates in sequence. See [`Self::record_io_error`]
386    /// for the double-bump scenarios.
387    pub fn io_errors(&self) -> u64 {
388        self.io_errors.load(Ordering::Relaxed)
389    }
390
391    /// Read the live "how many requests are currently waiting for
392    /// throttle tokens" gauge. NOT cumulative — increments when a
393    /// chain enters the stalled state, decrements when it exits.
394    /// On a single-queue device the value is bounded at 0 or 1 in
395    /// practice.
396    pub fn currently_throttled_gauge(&self) -> u64 {
397        self.currently_throttled_gauge.load(Ordering::Relaxed)
398    }
399
400    /// Read the cumulative count of `Error::InvalidAvailRingIndex`
401    /// events the device has observed. Per-event counter (NOT
402    /// per-request): the queue-poison flag short-circuits
403    /// subsequent kicks against the same hostile state, so one
404    /// guest fault produces exactly one bump regardless of how
405    /// many notifications follow before reset. A non-zero value
406    /// means the guest violated virtio-v1.2 §2.7.13.3 — the
407    /// device is in the "structurally broken queue" state and
408    /// will not service IO until the guest issues a virtio reset.
409    pub fn invalid_avail_idx_count(&self) -> u64 {
410        self.invalid_avail_idx_count.load(Ordering::Relaxed)
411    }
412
413    /// Freeze every atomic into a plain-u64 snapshot. Intended for
414    /// the host-side post-mortem path in
415    /// [`crate::vmm::VmResult`]: by the time `collect_results`
416    /// reaches the snapshot site the only sources of QUEUE_NOTIFY
417    /// kicks (vCPU threads) have joined, so the worker thread that
418    /// bumps these counters can receive no new work and the
419    /// post-AP-join cleanup phases (monitor join, bulk drain) give
420    /// it ample time to park. The relaxed loads therefore observe
421    /// the worker's final cumulative state.
422    pub fn snapshot(&self) -> VirtioBlkCountersSnapshot {
423        VirtioBlkCountersSnapshot {
424            reads_completed: self.reads_completed(),
425            writes_completed: self.writes_completed(),
426            flushes_completed: self.flushes_completed(),
427            bytes_read: self.bytes_read(),
428            bytes_written: self.bytes_written(),
429            throttled_count: self.throttled_count(),
430            io_errors: self.io_errors(),
431            currently_throttled_gauge: self.currently_throttled_gauge(),
432            invalid_avail_idx_count: self.invalid_avail_idx_count(),
433        }
434    }
435}
436
437/// Plain-u64 snapshot of `VirtioBlkCounters` taken at VM-result
438/// construction time. Mirrors every atomic field by name.
439///
440/// Decouples the public-facing [`crate::vmm::VmResult`] from the
441/// internal atomic-shared writer state — consumers see immutable
442/// owned data they can `Clone`, compare, and round-trip through
443/// serde without the `Arc<AtomicU64>` ceremony. The worker thread
444/// continues to bump the atomics via the `VirtioBlkCounters`
445/// `record_*` mutators; only the result-construction path moves
446/// to the snapshot.
447///
448/// Field semantics match the atomic source one-for-one — see
449/// `VirtioBlkCounters` for the cumulative-vs-gauge taxonomy
450/// distinguishing each field. The live-gauge field
451/// `currently_throttled_gauge` reflects the gauge state at
452/// snapshot time — typically `0` after a clean shutdown, but
453/// per the `VirtioBlkCounters` doc a worker-strand stall
454/// during teardown can leave a non-zero residual.
455#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
456pub struct VirtioBlkCountersSnapshot {
457    pub reads_completed: u64,
458    pub writes_completed: u64,
459    pub flushes_completed: u64,
460    pub bytes_read: u64,
461    pub bytes_written: u64,
462    pub throttled_count: u64,
463    pub io_errors: u64,
464    pub currently_throttled_gauge: u64,
465    pub invalid_avail_idx_count: u64,
466}
467
468#[cfg(test)]
469mod tests {
470    //! Helper-level unit tests for the `record_*` mutators. These
471    //! pin per-helper invariants (paired-counter lockstep,
472    //! single-counter bumps, gauge inc/dec idempotence + saturating
473    //! decrement) directly on `VirtioBlkCounters` without crossing
474    //! the chain-parsing or worker-thread boundary. Cross-thread
475    //! atomicity and end-to-end production-path coverage live in
476    //! `tests_atomics.rs`; these helper-level tests catch regressions
477    //! to the helpers themselves before the chain-level tests would.
478    //!
479    //! Each test starts from a fresh `VirtioBlkCounters::default()`
480    //! so the pre-conditions are pinned at zero by the type
481    //! contract — no shared state leaks across tests.
482    use super::*;
483    /// Fresh counters initialise every field to zero. Pinned
484    /// here as a pre-condition for the rest of the helper tests
485    /// — they all rely on `default()` producing an
486    /// all-zero starting state. A regression that gave
487    /// `AtomicU64::new(non_zero)` to any field would surface
488    /// here before downstream tests' "increments by N" math
489    /// silently reads a stale base.
490    #[test]
491    fn default_counters_are_all_zero() {
492        let c = VirtioBlkCounters::default();
493        assert_eq!(c.reads_completed(), 0, "reads_completed must default to 0");
494        assert_eq!(
495            c.writes_completed(),
496            0,
497            "writes_completed must default to 0"
498        );
499        assert_eq!(
500            c.flushes_completed(),
501            0,
502            "flushes_completed must default to 0"
503        );
504        assert_eq!(c.bytes_read(), 0, "bytes_read must default to 0");
505        assert_eq!(c.bytes_written(), 0, "bytes_written must default to 0");
506        assert_eq!(c.throttled_count(), 0, "throttled_count must default to 0");
507        assert_eq!(c.io_errors(), 0, "io_errors must default to 0");
508        assert_eq!(
509            c.currently_throttled_gauge(),
510            0,
511            "currently_throttled_gauge must default to 0",
512        );
513        assert_eq!(
514            c.invalid_avail_idx_count(),
515            0,
516            "invalid_avail_idx_count must default to 0",
517        );
518    }
519
520    /// `record_read(bytes)` bumps BOTH `reads_completed` AND
521    /// `bytes_read` in one call. The pairing is the helper's
522    /// reason to exist — a regression that dropped the
523    /// `bytes_read.fetch_add(bytes)` line (e.g. a refactor that
524    /// inlined just the completion bump) would let the
525    /// failure-dump renderer compute a misleading bytes-per-op
526    /// average. Pin both increments side-by-side so a half-fix
527    /// can't pass.
528    ///
529    /// Also pins that `record_read` does NOT touch any other
530    /// counter — write-side counters, flushes, throttle counters,
531    /// io_errors, and the gauge must stay at zero. A regression
532    /// that copy-pasted `record_read` from `record_write` and
533    /// left the wrong field name would be caught by the
534    /// "everything else stays zero" check.
535    #[test]
536    fn record_read_bumps_completion_and_bytes_in_lockstep() {
537        let c = VirtioBlkCounters::default();
538        c.record_read(512);
539        assert_eq!(
540            c.reads_completed(),
541            1,
542            "first record_read must bump reads_completed to 1",
543        );
544        assert_eq!(
545            c.bytes_read(),
546            512,
547            "first record_read must add bytes to bytes_read",
548        );
549        // Second call: counters increment in lockstep.
550        c.record_read(1024);
551        assert_eq!(
552            c.reads_completed(),
553            2,
554            "second record_read must bump reads_completed to 2",
555        );
556        assert_eq!(
557            c.bytes_read(),
558            512 + 1024,
559            "second record_read must accumulate bytes",
560        );
561        // Untouched counters stay at zero.
562        assert_eq!(
563            c.writes_completed(),
564            0,
565            "record_read must NOT bump writes_completed",
566        );
567        assert_eq!(
568            c.bytes_written(),
569            0,
570            "record_read must NOT bump bytes_written",
571        );
572        assert_eq!(
573            c.flushes_completed(),
574            0,
575            "record_read must NOT bump flushes_completed",
576        );
577        assert_eq!(
578            c.throttled_count(),
579            0,
580            "record_read must NOT bump throttled_count",
581        );
582        assert_eq!(c.io_errors(), 0, "record_read must NOT bump io_errors");
583        assert_eq!(
584            c.currently_throttled_gauge(),
585            0,
586            "record_read must NOT touch the throttle gauge",
587        );
588        assert_eq!(
589            c.invalid_avail_idx_count(),
590            0,
591            "record_read must NOT bump invalid_avail_idx_count",
592        );
593    }
594
595    /// Zero-byte reads are valid: the helper bumps
596    /// `reads_completed` even when `bytes == 0`. The contract is
597    /// "one completion, n bytes," not "one completion conditional
598    /// on n > 0." A regression that gated the completion bump on
599    /// `bytes > 0` would mis-count completions in scenarios where
600    /// the chain returned zero data (e.g. an EOF-truncated read).
601    #[test]
602    fn record_read_zero_bytes_still_bumps_completion() {
603        let c = VirtioBlkCounters::default();
604        c.record_read(0);
605        assert_eq!(
606            c.reads_completed(),
607            1,
608            "zero-byte read must still increment reads_completed",
609        );
610        assert_eq!(
611            c.bytes_read(),
612            0,
613            "zero-byte read must leave bytes_read at 0",
614        );
615    }
616
617    /// `record_write(bytes)` bumps BOTH `writes_completed` AND
618    /// `bytes_written`, mirroring `record_read`. Same paired-
619    /// counter rationale: the failure-dump renderer's
620    /// bytes-per-write average becomes misleading if either
621    /// half is missing.
622    #[test]
623    fn record_write_bumps_completion_and_bytes_in_lockstep() {
624        let c = VirtioBlkCounters::default();
625        c.record_write(4096);
626        assert_eq!(
627            c.writes_completed(),
628            1,
629            "first record_write must bump writes_completed to 1",
630        );
631        assert_eq!(
632            c.bytes_written(),
633            4096,
634            "first record_write must add bytes to bytes_written",
635        );
636        c.record_write(8192);
637        assert_eq!(
638            c.writes_completed(),
639            2,
640            "second record_write must bump writes_completed to 2",
641        );
642        assert_eq!(
643            c.bytes_written(),
644            4096 + 8192,
645            "second record_write must accumulate bytes",
646        );
647        // Untouched counters stay at zero — pins that
648        // record_write doesn't accidentally bump read-side
649        // counters via a copy-paste regression.
650        assert_eq!(
651            c.reads_completed(),
652            0,
653            "record_write must NOT bump reads_completed",
654        );
655        assert_eq!(c.bytes_read(), 0, "record_write must NOT bump bytes_read");
656        assert_eq!(
657            c.flushes_completed(),
658            0,
659            "record_write must NOT bump flushes_completed",
660        );
661        assert_eq!(
662            c.throttled_count(),
663            0,
664            "record_write must NOT bump throttled_count",
665        );
666        assert_eq!(c.io_errors(), 0, "record_write must NOT bump io_errors");
667        assert_eq!(
668            c.currently_throttled_gauge(),
669            0,
670            "record_write must NOT touch the throttle gauge",
671        );
672        assert_eq!(
673            c.invalid_avail_idx_count(),
674            0,
675            "record_write must NOT bump invalid_avail_idx_count",
676        );
677    }
678
679    /// Zero-byte writes parallel zero-byte reads: the completion
680    /// counter advances regardless. A guest issuing a zero-data
681    /// write (chain with header + status only and no data
682    /// segments) is rejected upstream by the
683    /// classify_pre_throttle gate, but the helper itself does
684    /// not enforce a non-zero invariant — pinned here so a
685    /// future refactor that adds defensive checks at the helper
686    /// layer is a deliberate decision, not an accidental
687    /// regression of the "one completion, n bytes" contract.
688    #[test]
689    fn record_write_zero_bytes_still_bumps_completion() {
690        let c = VirtioBlkCounters::default();
691        c.record_write(0);
692        assert_eq!(
693            c.writes_completed(),
694            1,
695            "zero-byte write must still increment writes_completed",
696        );
697        assert_eq!(
698            c.bytes_written(),
699            0,
700            "zero-byte write must leave bytes_written at 0",
701        );
702    }
703
704    /// `record_flush()` bumps `flushes_completed` and ONLY
705    /// `flushes_completed`. Distinct from read/write because
706    /// flush has no associated byte count — there's no paired
707    /// counter to keep in lockstep, only a single completion.
708    /// A regression that conflated flush with write (e.g. a
709    /// refactor that routed flush through `record_write(0)`)
710    /// would surface here as `writes_completed == 1` instead of
711    /// `flushes_completed == 1`.
712    #[test]
713    fn record_flush_bumps_only_flushes_completed() {
714        let c = VirtioBlkCounters::default();
715        c.record_flush();
716        assert_eq!(
717            c.flushes_completed(),
718            1,
719            "record_flush must bump flushes_completed to 1",
720        );
721        c.record_flush();
722        c.record_flush();
723        assert_eq!(
724            c.flushes_completed(),
725            3,
726            "three record_flush calls must accumulate to 3",
727        );
728        // Every other counter stays at zero — flush has no
729        // paired bytes counter and must not splash onto any
730        // other field.
731        assert_eq!(
732            c.reads_completed(),
733            0,
734            "record_flush must NOT bump reads_completed",
735        );
736        assert_eq!(c.bytes_read(), 0, "record_flush must NOT bump bytes_read");
737        assert_eq!(
738            c.writes_completed(),
739            0,
740            "record_flush must NOT bump writes_completed",
741        );
742        assert_eq!(
743            c.bytes_written(),
744            0,
745            "record_flush must NOT bump bytes_written",
746        );
747        assert_eq!(
748            c.throttled_count(),
749            0,
750            "record_flush must NOT bump throttled_count",
751        );
752        assert_eq!(c.io_errors(), 0, "record_flush must NOT bump io_errors");
753        assert_eq!(
754            c.currently_throttled_gauge(),
755            0,
756            "record_flush must NOT touch the throttle gauge",
757        );
758        assert_eq!(
759            c.invalid_avail_idx_count(),
760            0,
761            "record_flush must NOT bump invalid_avail_idx_count",
762        );
763    }
764
765    /// `record_throttle_pending_inc()` bumps the live gauge by
766    /// exactly one per call. The helper itself is NOT idempotent
767    /// — back-to-back calls increment twice (gauge 0→1→2). The
768    /// production caller (`drain_bracket_impl`) gates each
769    /// invocation on the per-worker `currently_stalled` flag's
770    /// false→true transition; the helper relies on the caller
771    /// to enforce idempotence and faithfully bumps every time
772    /// it's invoked. Pinning this distinction matters: a
773    /// regression that pushed the flag-gate INTO the helper
774    /// would break the helper's contract with cross-cutting
775    /// callers (e.g. a future test seam that simulates back-
776    /// to-back stalls without going through the production
777    /// gate). The events-vs-requests semantic is a property of
778    /// the CALLER (which only invokes `record_throttle_pending_inc`
779    /// on transitions), NOT of the helper.
780    ///
781    /// The complementary "no double-inc on re-stall via the
782    /// production gate" invariant is pinned by
783    /// `currently_throttled_gauge_no_double_inc_on_re_stall` in
784    /// tests_atomics.rs which exercises the full
785    /// drain_bracket_impl path.
786    #[test]
787    fn record_throttle_pending_inc_increments_each_call() {
788        let c = VirtioBlkCounters::default();
789        c.record_throttle_pending_inc();
790        assert_eq!(
791            c.currently_throttled_gauge(),
792            1,
793            "first inc must bump gauge from 0 to 1",
794        );
795        // Helper is not idempotent — the production caller's
796        // currently_stalled flag prevents the second call from
797        // happening, but the helper itself does increment again
798        // when invoked.
799        c.record_throttle_pending_inc();
800        assert_eq!(
801            c.currently_throttled_gauge(),
802            2,
803            "second inc must bump gauge from 1 to 2 (helper itself \
804                 is not idempotent — caller must gate)",
805        );
806        c.record_throttle_pending_inc();
807        assert_eq!(
808            c.currently_throttled_gauge(),
809            3,
810            "third inc must bump gauge from 2 to 3",
811        );
812        // The other counters stay at zero — gauge ops must not
813        // splash onto throttled_count (events) or any other
814        // field. throttled_count is bumped by `record_throttled`,
815        // a SEPARATE helper.
816        assert_eq!(
817            c.throttled_count(),
818            0,
819            "record_throttle_pending_inc must NOT bump throttled_count \
820                 (events vs gauge are separate counters with separate helpers)",
821        );
822        assert_eq!(
823            c.reads_completed(),
824            0,
825            "record_throttle_pending_inc must NOT bump reads_completed",
826        );
827        assert_eq!(
828            c.io_errors(),
829            0,
830            "record_throttle_pending_inc must NOT bump io_errors",
831        );
832    }
833
834    /// `record_throttle_pending_dec()` decrements the gauge by
835    /// one when it is non-zero, mirror of inc.
836    #[test]
837    fn record_throttle_pending_dec_decrements_when_positive() {
838        let c = VirtioBlkCounters::default();
839        c.record_throttle_pending_inc();
840        c.record_throttle_pending_inc();
841        c.record_throttle_pending_inc();
842        assert_eq!(c.currently_throttled_gauge(), 3, "pre-cond: gauge at 3");
843        c.record_throttle_pending_dec();
844        assert_eq!(
845            c.currently_throttled_gauge(),
846            2,
847            "first dec must drop gauge from 3 to 2",
848        );
849        c.record_throttle_pending_dec();
850        assert_eq!(
851            c.currently_throttled_gauge(),
852            1,
853            "second dec must drop gauge from 2 to 1",
854        );
855        c.record_throttle_pending_dec();
856        assert_eq!(
857            c.currently_throttled_gauge(),
858            0,
859            "third dec must drop gauge from 1 to 0",
860        );
861    }
862
863    /// `record_throttle_pending_dec()` SATURATES at zero. The
864    /// implementation uses `fetch_update(|v| v.checked_sub(1))`
865    /// — if the gauge is already 0, the update returns `Err`
866    /// and the helper drops the result via `let _`. A regression
867    /// that swapped `checked_sub` for plain `fetch_sub(1)` would
868    /// wrap to `u64::MAX` and the failure-dump renderer would
869    /// surface a 17-exabyte "currently stalled" reading.
870    ///
871    /// Pin the saturating contract: dec on an already-zero gauge
872    /// MUST leave the gauge at 0, not wrap to u64::MAX.
873    #[test]
874    fn record_throttle_pending_dec_saturates_at_zero() {
875        let c = VirtioBlkCounters::default();
876        // Gauge starts at 0; multiple decs must NOT wrap.
877        c.record_throttle_pending_dec();
878        assert_eq!(
879            c.currently_throttled_gauge(),
880            0,
881            "dec on a zero gauge MUST saturate at 0, not wrap to u64::MAX \
882                 (regression: fetch_sub instead of fetch_update + checked_sub)",
883        );
884        // Repeated dec stays at 0 — the failure mode is "wraps
885        // to u64::MAX on the first underflowing dec," so multiple
886        // decs each pin that the saturate-at-zero contract holds
887        // across consecutive calls.
888        for i in 0..5 {
889            c.record_throttle_pending_dec();
890            assert_eq!(
891                c.currently_throttled_gauge(),
892                0,
893                "dec on a zero gauge must stay 0 across {} repeated calls",
894                i + 1,
895            );
896        }
897    }
898
899    /// Inc-then-dec pair returns the gauge to zero. Pins the
900    /// matching-pair invariant the production caller depends on:
901    /// every chain that stalls (inc) and later succeeds (dec)
902    /// must net to a delta of zero on the gauge. A regression
903    /// to the inc/dec arithmetic that failed to undo the inc
904    /// would surface as a non-zero residual gauge after the
905    /// pair.
906    #[test]
907    fn record_throttle_pending_inc_then_dec_nets_to_zero() {
908        let c = VirtioBlkCounters::default();
909        c.record_throttle_pending_inc();
910        c.record_throttle_pending_dec();
911        assert_eq!(
912            c.currently_throttled_gauge(),
913            0,
914            "inc-then-dec must net to 0 on the gauge",
915        );
916        // Also check N inc / N dec for N > 1 — pins that the
917        // counter-style accounting holds regardless of pair
918        // count.
919        for _ in 0..10 {
920            c.record_throttle_pending_inc();
921        }
922        assert_eq!(c.currently_throttled_gauge(), 10, "10 incs → gauge=10");
923        for _ in 0..10 {
924            c.record_throttle_pending_dec();
925        }
926        assert_eq!(
927            c.currently_throttled_gauge(),
928            0,
929            "10 incs + 10 decs must net to 0",
930        );
931    }
932
933    /// `record_io_error()` bumps `io_errors` and ONLY `io_errors`.
934    /// The events-counter contract is at the call sites (a single
935    /// hostile chain can produce multiple bumps if it trips
936    /// several gates in sequence — pinned by the doc comment on
937    /// `record_io_error`); the helper itself faithfully bumps
938    /// per call. Pin that the bump lands on the right field and
939    /// no other counter is touched: a regression that copy-pasted
940    /// the helper from `record_throttled` and left the wrong
941    /// field name would surface as `throttled_count == 1` in
942    /// place of the expected `io_errors == 1`.
943    #[test]
944    fn record_io_error_increments_only_io_errors() {
945        let c = VirtioBlkCounters::default();
946        c.record_io_error();
947        assert_eq!(
948            c.io_errors(),
949            1,
950            "first record_io_error must bump io_errors to 1",
951        );
952        c.record_io_error();
953        c.record_io_error();
954        assert_eq!(
955            c.io_errors(),
956            3,
957            "three record_io_error calls must accumulate to 3 \
958                 (events counter, no per-request dedup)",
959        );
960        // Every other counter stays at zero — io_errors must not
961        // splash onto throttled_count, gauges, or completion
962        // counters.
963        assert_eq!(
964            c.reads_completed(),
965            0,
966            "record_io_error must NOT bump reads_completed",
967        );
968        assert_eq!(
969            c.writes_completed(),
970            0,
971            "record_io_error must NOT bump writes_completed",
972        );
973        assert_eq!(
974            c.flushes_completed(),
975            0,
976            "record_io_error must NOT bump flushes_completed",
977        );
978        assert_eq!(
979            c.bytes_read(),
980            0,
981            "record_io_error must NOT bump bytes_read"
982        );
983        assert_eq!(
984            c.bytes_written(),
985            0,
986            "record_io_error must NOT bump bytes_written",
987        );
988        assert_eq!(
989            c.throttled_count(),
990            0,
991            "record_io_error must NOT bump throttled_count \
992                 (events-vs-events distinction — IO errors and \
993                 throttle stalls are separately classified)",
994        );
995        assert_eq!(
996            c.currently_throttled_gauge(),
997            0,
998            "record_io_error must NOT touch the throttle gauge",
999        );
1000        assert_eq!(
1001            c.invalid_avail_idx_count(),
1002            0,
1003            "record_io_error must NOT bump invalid_avail_idx_count",
1004        );
1005    }
1006
1007    /// `record_throttled()` bumps `throttled_count` and ONLY
1008    /// `throttled_count`. Per-event counter, not per-request:
1009    /// a single chain that stalls multiple times produces
1010    /// multiple bumps. The events-vs-requests distinction lives
1011    /// at the CALLER (drain_bracket_impl); the helper itself is
1012    /// just an unconditional bump. Pin parity with the other
1013    /// "single-counter" helpers — io_errors, flushes — so a
1014    /// copy-paste regression that wrote to the wrong field
1015    /// surfaces here.
1016    #[test]
1017    fn record_throttled_increments_only_throttled_count() {
1018        let c = VirtioBlkCounters::default();
1019        c.record_throttled();
1020        assert_eq!(
1021            c.throttled_count(),
1022            1,
1023            "first record_throttled must bump throttled_count to 1",
1024        );
1025        c.record_throttled();
1026        assert_eq!(
1027            c.throttled_count(),
1028            2,
1029            "second record_throttled must bump throttled_count to 2 \
1030                 (events counter — same chain re-stalling produces \
1031                 multiple bumps in production)",
1032        );
1033        // Crucially, the gauge is NOT touched — gauge has its own
1034        // helper (record_throttle_pending_inc/dec). A regression
1035        // that conflated the two would surface as gauge != 0.
1036        assert_eq!(
1037            c.currently_throttled_gauge(),
1038            0,
1039            "record_throttled (events counter) must NOT touch \
1040                 currently_throttled_gauge (live gauge — separate helper)",
1041        );
1042        // Other counters stay at zero.
1043        assert_eq!(c.io_errors(), 0, "record_throttled must NOT bump io_errors");
1044        assert_eq!(
1045            c.reads_completed(),
1046            0,
1047            "record_throttled must NOT bump reads_completed",
1048        );
1049        assert_eq!(
1050            c.writes_completed(),
1051            0,
1052            "record_throttled must NOT bump writes_completed",
1053        );
1054        assert_eq!(
1055            c.flushes_completed(),
1056            0,
1057            "record_throttled must NOT bump flushes_completed",
1058        );
1059        assert_eq!(
1060            c.invalid_avail_idx_count(),
1061            0,
1062            "record_throttled must NOT bump invalid_avail_idx_count",
1063        );
1064    }
1065
1066    /// `record_invalid_avail_idx()` bumps `invalid_avail_idx_count`
1067    /// and ONLY that field. Per-event counter; the production
1068    /// caller's queue-poison flag short-circuits subsequent kicks
1069    /// so one guest fault produces exactly one bump regardless of
1070    /// notification count. The helper itself is just an
1071    /// unconditional bump; the no-double-bump invariant is a
1072    /// property of the CALLER (gated on queue_poisoned), pinned
1073    /// by `inflated_avail_idx_poisons_queue_no_livelock` and
1074    /// `poisoned_queue_clears_on_reset` in tests_atomics.rs.
1075    #[test]
1076    fn record_invalid_avail_idx_increments_only_that_field() {
1077        let c = VirtioBlkCounters::default();
1078        c.record_invalid_avail_idx();
1079        assert_eq!(
1080            c.invalid_avail_idx_count(),
1081            1,
1082            "first record_invalid_avail_idx must bump counter to 1",
1083        );
1084        c.record_invalid_avail_idx();
1085        assert_eq!(
1086            c.invalid_avail_idx_count(),
1087            2,
1088            "second record_invalid_avail_idx must bump counter to 2 \
1089                 (helper itself does not enforce single-bump; the \
1090                 caller's poison gate does)",
1091        );
1092        // Every other counter stays at zero.
1093        assert_eq!(
1094            c.io_errors(),
1095            0,
1096            "record_invalid_avail_idx must NOT bump io_errors \
1097                 (separate event class — guest spec violation \
1098                 vs IO failure)",
1099        );
1100        assert_eq!(
1101            c.throttled_count(),
1102            0,
1103            "record_invalid_avail_idx must NOT bump throttled_count",
1104        );
1105        assert_eq!(
1106            c.currently_throttled_gauge(),
1107            0,
1108            "record_invalid_avail_idx must NOT touch the throttle gauge",
1109        );
1110        assert_eq!(
1111            c.reads_completed(),
1112            0,
1113            "record_invalid_avail_idx must NOT bump reads_completed",
1114        );
1115        assert_eq!(
1116            c.writes_completed(),
1117            0,
1118            "record_invalid_avail_idx must NOT bump writes_completed",
1119        );
1120        assert_eq!(
1121            c.flushes_completed(),
1122            0,
1123            "record_invalid_avail_idx must NOT bump flushes_completed",
1124        );
1125    }
1126
1127    /// Pump every counter to a DISTINCT non-zero value, then call
1128    /// `snapshot()` and assert each Snapshot field equals the
1129    /// matching source value. The distinct-per-field setup is
1130    /// load-bearing: a copy-paste swap in `snapshot()` (e.g.
1131    /// `bytes_read: self.bytes_written.load(...)`) would route the
1132    /// wrong source into the post-mortem path and the only way to
1133    /// detect cross-wiring is to give every field a unique value
1134    /// so any swap surfaces as an assert mismatch.
1135    #[test]
1136    fn snapshot_captures_every_field_independently() {
1137        let c = VirtioBlkCounters::default();
1138        c.record_read(101);
1139        c.record_write(202);
1140        c.record_write(303);
1141        c.record_flush();
1142        c.record_flush();
1143        c.record_flush();
1144        c.record_throttled();
1145        c.record_throttled();
1146        c.record_throttled();
1147        c.record_throttled();
1148        c.record_io_error();
1149        c.record_io_error();
1150        c.record_io_error();
1151        c.record_io_error();
1152        c.record_io_error();
1153        c.record_invalid_avail_idx();
1154        c.record_invalid_avail_idx();
1155        c.record_invalid_avail_idx();
1156        c.record_invalid_avail_idx();
1157        c.record_invalid_avail_idx();
1158        c.record_invalid_avail_idx();
1159        // Pump the gauge to 7 so every Snapshot field below holds a
1160        // DISTINCT value (1..=7 for counts, 101 / 505 for bytes); a
1161        // copy-paste source-field swap inside `snapshot()` (e.g.
1162        // `currently_throttled_gauge: self.reads_completed()`) would
1163        // surface as an assert mismatch because no two source fields
1164        // collide. The helper docs note the 0-or-1 invariant is
1165        // enforced at the dispatch site, not in the helper — the
1166        // helper itself accepts N consecutive calls and bumps by N.
1167        for _ in 0..7 {
1168            c.record_throttle_pending_inc();
1169        }
1170
1171        let s = c.snapshot();
1172        assert_eq!(
1173            s.reads_completed, 1,
1174            "reads_completed source-of-truth check"
1175        );
1176        assert_eq!(s.bytes_read, 101, "bytes_read source-of-truth check");
1177        assert_eq!(
1178            s.writes_completed, 2,
1179            "writes_completed source-of-truth check"
1180        );
1181        assert_eq!(
1182            s.bytes_written, 505,
1183            "bytes_written source-of-truth check (202+303)"
1184        );
1185        assert_eq!(
1186            s.flushes_completed, 3,
1187            "flushes_completed source-of-truth check"
1188        );
1189        assert_eq!(
1190            s.throttled_count, 4,
1191            "throttled_count source-of-truth check"
1192        );
1193        assert_eq!(s.io_errors, 5, "io_errors source-of-truth check");
1194        assert_eq!(
1195            s.invalid_avail_idx_count, 6,
1196            "invalid_avail_idx_count source-of-truth check"
1197        );
1198        assert_eq!(
1199            s.currently_throttled_gauge, 7,
1200            "currently_throttled_gauge source-of-truth check"
1201        );
1202    }
1203
1204    /// Pin the all-zero Default snapshot: a future field added to
1205    /// `VirtioBlkCountersSnapshot` that doesn't initialise to 0 would
1206    /// break the "fresh device reports zero activity" contract that
1207    /// VmResult readers rely on. Parity with the virtio-net side's
1208    /// `default_snapshot_is_all_zero` in `src/vmm/virtio_net/tests.rs`.
1209    #[test]
1210    fn default_snapshot_is_all_zero() {
1211        let s = VirtioBlkCountersSnapshot::default();
1212        assert_eq!(s.reads_completed, 0);
1213        assert_eq!(s.writes_completed, 0);
1214        assert_eq!(s.flushes_completed, 0);
1215        assert_eq!(s.bytes_read, 0);
1216        assert_eq!(s.bytes_written, 0);
1217        assert_eq!(s.throttled_count, 0);
1218        assert_eq!(s.io_errors, 0);
1219        assert_eq!(s.currently_throttled_gauge, 0);
1220        assert_eq!(s.invalid_avail_idx_count, 0);
1221    }
1222
1223    /// Pin the derived `PartialEq` semantic: two snapshots compare
1224    /// equal iff every field matches, and a single differing field
1225    /// is sufficient to make them compare unequal. Cheap insurance
1226    /// against a maintainer adding a non-`PartialEq` field (e.g. a
1227    /// `Mutex<T>`, `Box<dyn Any>`, or any type that fails the
1228    /// `derive(PartialEq)` requirement) without weighing the cost —
1229    /// that change would force a custom impl and silently drop the
1230    /// field from the equality check.
1231    #[test]
1232    fn snapshot_partial_eq_pins_field_equivalence() {
1233        let a = VirtioBlkCountersSnapshot {
1234            reads_completed: 5,
1235            ..Default::default()
1236        };
1237        let b = VirtioBlkCountersSnapshot {
1238            reads_completed: 5,
1239            ..Default::default()
1240        };
1241        assert_eq!(a, b);
1242        let c = VirtioBlkCountersSnapshot {
1243            reads_completed: 6,
1244            ..Default::default()
1245        };
1246        assert_ne!(a, c);
1247        let d = VirtioBlkCountersSnapshot {
1248            currently_throttled_gauge: 1,
1249            ..a.clone()
1250        };
1251        assert_ne!(a, d, "single differing field must break equality");
1252    }
1253
1254    /// Serde JSON round-trip preserves every field. Snapshot types
1255    /// land in sidecar JSON / failure-dump artifacts (the
1256    /// `#[allow(dead_code)]` doc on `VmResult` anticipates external
1257    /// readers). A field-rename or `#[serde(rename = ...)]` slip-up
1258    /// here would produce JSON the next ktstr revision can't
1259    /// deserialize, silently breaking baseline replay.
1260    #[test]
1261    fn snapshot_roundtrips_through_json() {
1262        let original = VirtioBlkCountersSnapshot {
1263            reads_completed: 11,
1264            writes_completed: 22,
1265            flushes_completed: 33,
1266            bytes_read: 4400,
1267            bytes_written: 5500,
1268            throttled_count: 66,
1269            io_errors: 77,
1270            currently_throttled_gauge: 88,
1271            invalid_avail_idx_count: 99,
1272        };
1273        let json = serde_json::to_string(&original).expect("serialize");
1274        let parsed: VirtioBlkCountersSnapshot = serde_json::from_str(&json).expect("deserialize");
1275        assert_eq!(parsed, original);
1276    }
1277
1278    /// Round-trip `u64::MAX` and other 64-bit edge values through
1279    /// JSON to guard against a future serde-json mode swap that
1280    /// silently coerces u64 to f64 (IEEE 754 double, 53-bit
1281    /// mantissa). Today serde-json stores integers as a
1282    /// discriminated `Number` (i64/u64/f64) and preserves u64::MAX
1283    /// exactly; if a future revision opts into JSON-spec-strict
1284    /// Number-as-f64 semantics, values above 2^53-1 would lose
1285    /// precision on round-trip and this test would fail before any
1286    /// downstream consumer silently sees a truncated bytes_read /
1287    /// bytes_written counter (each can legitimately reach u64::MAX
1288    /// for a long-lived high-IO device).
1289    #[test]
1290    fn snapshot_roundtrips_u64_max_precision() {
1291        let original = VirtioBlkCountersSnapshot {
1292            reads_completed: u64::MAX,
1293            writes_completed: u64::MAX - 1,
1294            flushes_completed: (1u64 << 53),
1295            bytes_read: (1u64 << 53) + 1,
1296            bytes_written: (1u64 << 60),
1297            throttled_count: u64::MAX / 2,
1298            io_errors: u64::MAX / 3,
1299            currently_throttled_gauge: u64::MAX / 5,
1300            invalid_avail_idx_count: u64::MAX / 7,
1301        };
1302        let json = serde_json::to_string(&original).expect("serialize");
1303        let parsed: VirtioBlkCountersSnapshot = serde_json::from_str(&json).expect("deserialize");
1304        assert_eq!(parsed, original);
1305        // Spot-check the most-fragile fields directly so a precision
1306        // loss surfaces with a clear u64::MAX-specific failure message
1307        // rather than a generic struct-comparison mismatch.
1308        // Every spot-check below is on a value that is NOT exactly
1309        // representable in f64 — a hypothetical regression that swaps
1310        // serde_json's Number backing to f64 would fail at least one
1311        // spot-check with a specific value mismatch. (Note: not every
1312        // bulk-struct field meets this bar — e.g. `bytes_written =
1313        // 1<<60` is exactly representable in f64 — but the bulk
1314        // struct equality at `assert_eq!(parsed, original)` above
1315        // still catches it via the precision-fragile siblings.)
1316        assert_eq!(parsed.reads_completed, u64::MAX);
1317        // The 2^53 boundary is where f64 mantissa precision runs out;
1318        // 2^53 + 1 is the smallest integer NOT exactly representable
1319        // in f64 (it rounds to 2^53), so this is the canonical canary.
1320        assert_eq!(parsed.bytes_read, (1u64 << 53) + 1);
1321        // u64::MAX / 2 = 0x7FFF_FFFF_FFFF_FFFF — far above the 2^53
1322        // mantissa boundary and not a power-of-2, so f64 backing
1323        // would round it to a different value on round-trip.
1324        assert_eq!(parsed.throttled_count, u64::MAX / 2);
1325    }
1326}