ktstr/vmm/virtio_net/
counters.rs

1//! Virtio-net cumulative event counters and their serializable
2//! snapshot, split from device.rs for module locality. Self-contained:
3//! the counter fields are AtomicU64 and the snapshot derives serde; the
4//! MAX_FRAME_SIZE / VIRTIO_NET_HDR_LEN names below appear only in doc
5//! comments referring to device.rs constants.
6use std::sync::atomic::{AtomicU64, Ordering};
7
8// ---------------------------------------------------------------------------
9// Counters (host-side observability)
10// ---------------------------------------------------------------------------
11
12/// Per-device counters surfaced to the host monitor. All atomic so
13/// the monitor can read them without locking the device struct.
14///
15/// Mirrors the [`super::super::virtio_blk::VirtioBlkCounters`] pattern:
16/// `record_*` helper methods enforce field-pairing invariants, and
17/// per-field `pub fn` accessors perform `Relaxed` loads. Counters are
18/// cumulative for the device's lifetime — `VirtioNet::reset()` does
19/// NOT zero them, so an operator monitoring `tx_packets` etc. observes
20/// a monotonically non-decreasing series across guest re-binds.
21///
22/// # Counter taxonomy
23///
24/// All counters here are **per-event cumulative**. There are no
25/// per-request live gauges in v0 — the loopback path is synchronous
26/// (no deferred RX, no throttle) so there is no "currently waiting"
27/// state to gauge. A future async backend (TAP, AF_PACKET) would add
28/// a `currently_deferred_rx_gauge` mirroring virtio-blk's
29/// `currently_throttled_gauge`.
30#[derive(Debug, Default)]
31pub struct VirtioNetCounters {
32    /// Cumulative count of TX chains the device accepted from the
33    /// guest, parsed cleanly, AND successfully marked used (TX-side
34    /// `add_used` returned Ok). A TX chain rejected for malformed
35    /// shape (short header, wrong direction) bumps `tx_chain_invalid`
36    /// only. A parsed TX chain whose `add_used` then fails bumps
37    /// `tx_add_used_failures` only. So `tx_packets` reflects chains
38    /// the guest can actually observe as completed.
39    ///
40    /// Each TX chain the device accepts lands in exactly one
41    /// observable outcome: successful loopback delivery (bumps
42    /// rx_packets); dropped because the RX queue had no buffer
43    /// (bumps tx_dropped_no_rx_buffer); RX chain-shape rejection
44    /// during loopback (bumps rx_chain_invalid); RX guest-memory
45    /// `write_slice` failure during loopback (bumps
46    /// rx_write_failed — chain shape was fine but the
47    /// descriptor's GPA was unmapped); RX `add_used` failure
48    /// (bumps rx_add_used_failures); TX-side chain-shape
49    /// rejection at parse time (bumps tx_chain_invalid, no TX
50    /// add_used attempted); or TX `add_used` failure (bumps
51    /// tx_add_used_failures). `tx_packets` reflects only the
52    /// chains where the TX-side add_used actually succeeded;
53    /// `tx_packets - rx_packets` is NOT a generic shortfall
54    /// formula because chains lost on the TX side
55    /// (tx_chain_invalid, tx_add_used_failures) never bumped
56    /// tx_packets in the first place.
57    pub(crate) tx_packets: AtomicU64,
58    /// Cumulative bytes of L2 frame data accepted from successfully
59    /// completed TX chains (i.e. those that bumped `tx_packets`).
60    /// Excludes the 12-byte virtio header. Paired with `tx_packets`
61    /// via [`Self::record_tx_completed`].
62    pub(crate) tx_bytes: AtomicU64,
63    /// Cumulative count of RX chains the device successfully wrote
64    /// (header + frame) AND successfully marked used (`add_used`
65    /// returned Ok AND the used-ring index advanced). RX chains
66    /// where `add_used` failed bump `rx_add_used_failures` only —
67    /// the guest never observes the publish, so it would be wrong
68    /// to count it as a delivery.
69    /// Paired with `rx_bytes` via [`Self::record_rx_delivered`].
70    pub(crate) rx_packets: AtomicU64,
71    /// Cumulative bytes of L2 frame data successfully delivered to
72    /// the guest's RX chains (i.e. paired with `rx_packets`).
73    /// Excludes the 12-byte virtio header. On a chain whose RX
74    /// buffer was smaller than `header + frame`, this counter
75    /// reflects the actual bytes written into the descriptor minus
76    /// the header — NOT the source `frame_len`. An operator sees
77    /// the real bytes the guest can read, not the bytes the device
78    /// intended to deliver.
79    pub(crate) rx_bytes: AtomicU64,
80    /// Cumulative count of successfully-captured TX frames the
81    /// device could not deliver to RX because the RX queue was
82    /// empty. Per-event counter; a guest that never posts RX buffers
83    /// and floods TX produces one bump per dropped TX frame. The TX
84    /// chain is still marked used (the guest sees TX completion via
85    /// `tx_packets`); the frame never arrives on RX (no `rx_packets`
86    /// bump). Distinct from `tx_chain_invalid` (TX chain shape
87    /// rejected before any RX delivery was attempted).
88    pub(crate) tx_dropped_no_rx_buffer: AtomicU64,
89    /// Cumulative count of successfully-captured TX frames the device
90    /// dropped because the RX queue was POISONED — a prior structural
91    /// `Error::InvalidAvailRingIndex` left `queue_poisoned[RXQ]` set.
92    /// Per-event counter. Like `tx_dropped_no_rx_buffer`, the TX chain
93    /// is still marked used (the guest sees TX completion via
94    /// `tx_packets`); the frame never reaches RX (no `rx_packets`
95    /// bump). Distinct from `tx_dropped_no_rx_buffer` so an operator
96    /// can tell "RX queue was simply empty (transient back-pressure)"
97    /// from "RX queue is wedged on a guest avail-ring violation" —
98    /// the latter does not clear until the guest issues a virtio
99    /// reset. Bumped on BOTH RX-poison loopback outcomes: the queue
100    /// was just poisoned this drain (`JustRxPoisoned`) or was already
101    /// poisoned from a prior kick (`RxAlreadyPoisoned`).
102    pub(crate) tx_dropped_rx_poisoned: AtomicU64,
103    /// Cumulative count of TX chains rejected for malformed shape:
104    /// missing header, write-only descriptor in TX (TX descriptors
105    /// must be device-readable), header-read failure. The TX chain
106    /// is still marked used so the guest doesn't hang on the
107    /// request, but the frame is dropped without an RX delivery and
108    /// neither `tx_packets` nor `rx_packets` is bumped. Per-event
109    /// counter.
110    pub(crate) tx_chain_invalid: AtomicU64,
111    /// Cumulative count of TX chains DROPPED because the captured
112    /// post-header frame data exceeded `MAX_FRAME_SIZE` (the largest
113    /// L2 frame the guest's `max_mtu` permits). The TX chain is still
114    /// marked used so the guest doesn't hang, but the frame is dropped
115    /// — NOT truncated — and neither `tx_packets` nor `rx_packets` is
116    /// bumped (mutually exclusive with `tx_packets` per chain, like
117    /// `tx_chain_invalid`). Per-event counter. Distinct from
118    /// `tx_chain_invalid`: the chain shape was well-formed (readable
119    /// descriptors, full 12-byte header) — it was simply too large.
120    /// Silently truncating an over-size frame would corrupt traffic
121    /// the guest believes it transmitted intact, so the device drops
122    /// the whole frame and surfaces the event here. A non-zero value
123    /// means the guest emitted a frame larger than its advertised
124    /// `max_mtu` allows — a guest bug or a hostile descriptor chain.
125    pub(crate) tx_oversize_dropped: AtomicU64,
126    /// Cumulative count of RX chains rejected for malformed shape on
127    /// the loopback delivery side: read-only descriptor in RX (RX
128    /// descriptors must be device-writable) or attacker-controlled
129    /// `desc.addr() + take` overflow (the descriptor's address itself
130    /// is malformed). The RX chain is still marked used (with
131    /// `len = 0`) so the guest's network-stack equivalent of a
132    /// hung-task watchdog doesn't fire on a stuck request.
133    /// Per-event counter; bumped exactly once per chain rejected for
134    /// shape (the `tx_dropped_no_rx_buffer` counter is NOT also
135    /// bumped — they are mutually exclusive failure modes, see
136    /// [`Self::record_rx_chain_invalid`]).
137    ///
138    /// **Distinct from [`Self::rx_write_failed`]**: a guest-memory
139    /// `write_slice` failure (header or frame bytes) means the
140    /// chain's SHAPE was acceptable but the GPA targeted by a
141    /// device-writable descriptor isn't mapped — that bumps
142    /// `rx_write_failed`, NOT this counter. Operators
143    /// distinguishing "guest sent malformed RX chain" from "guest's
144    /// posted RX buffer points at unmapped memory" need the two
145    /// counters separated.
146    pub(crate) rx_chain_invalid: AtomicU64,
147    /// Cumulative count of RX chains where the chain shape was valid
148    /// (every descriptor was device-writable, addresses didn't
149    /// overflow) but a guest-memory `write_slice` to one of the
150    /// descriptors failed — typically because the descriptor's GPA
151    /// is unmapped. Either the 12-byte header `write_slice` or the
152    /// frame-data `write_slice` can fail; both bump this counter.
153    /// The RX chain is still marked used (with `len = 0`) so the
154    /// guest doesn't hang on the request. Per-event counter;
155    /// bumped exactly once per chain whose write actually failed
156    /// (chain-shape rejections route to `rx_chain_invalid`
157    /// instead — the two counters are mutually exclusive per
158    /// chain).
159    ///
160    /// Distinct from `rx_chain_invalid` so an operator's failure
161    /// dump can separate "guest violated the RX descriptor-direction
162    /// rule" from "guest posted a buffer at an unmapped GPA". A
163    /// non-zero `rx_write_failed` with `rx_chain_invalid == 0`
164    /// points at GPA / page-table breakage rather than driver-side
165    /// malformation; the inverse points at driver-side direction
166    /// violations or address-overflow attacks.
167    pub(crate) rx_write_failed: AtomicU64,
168    /// Cumulative count of `add_used` failures on the TX queue. A
169    /// non-zero value means the queue's used-ring address is
170    /// unmapped or otherwise inaccessible — distinct from a chain-
171    /// shape rejection (which uses `tx_chain_invalid`). Per-event
172    /// counter. Operators monitoring `tx_add_used_failures > 0`
173    /// know the queue itself is broken and the guest has not seen
174    /// any TX completion since the failure started; the typical
175    /// recovery path is a virtio reset (write `STATUS=0`). Distinct
176    /// from `tx_chain_invalid` so an operator can tell "guest sent
177    /// malformed frame" from "queue itself is broken".
178    pub(crate) tx_add_used_failures: AtomicU64,
179    /// Cumulative count of `add_used` failures on the RX queue. As
180    /// with `tx_add_used_failures`, indicates a queue-state failure
181    /// (used-ring unmapped) distinct from chain-shape rejection.
182    /// Bumped on the RX side from both the malformed-chain branch
183    /// and the successful-frame-write branch when the trailing
184    /// `add_used` fails — both branches mean the device tried to
185    /// publish a used-ring entry and the publish itself failed.
186    pub(crate) rx_add_used_failures: AtomicU64,
187    /// Cumulative count of `Error::InvalidAvailRingIndex` events
188    /// observed across all queues. Bumped each time the
189    /// virtio-queue iter() rejects an avail.idx whose distance from
190    /// `next_avail` exceeds the queue size — a hostile or buggy
191    /// guest condition.
192    ///
193    /// Per-event counter (NOT per-request): the per-queue poison
194    /// flag short-circuits further attempts on the same queue, so
195    /// the false→true transition produces exactly one bump per
196    /// poison event. Without the flag, every QUEUE_NOTIFY kick
197    /// would re-enter `iter()`, observe the same error, log via
198    /// `error!()`, return None from the swallowing default impl,
199    /// and re-bump this counter — three concrete problems:
200    /// (a) the per-event counter taxonomy is violated (counter
201    /// reflects kick rate rather than poison event rate),
202    /// (b) the operator has no signal that the device is wedged
203    /// (no NEEDS_RESET, no STATUS bit change), and (c) every kick
204    /// floods the host log with the same error line. The poison
205    /// flag fixes all three. Note: this is NOT a "livelock" —
206    /// virtio-net has no enable_notification/disable_notification
207    /// bracket, so each kick re-trips the error ONCE per MMIO
208    /// exit, then returns. The harm is observability + log spam,
209    /// not unbounded CPU consumption.
210    ///
211    /// Successive QUEUE_NOTIFY kicks against an unresetted
212    /// poisoned queue take the entry-gate short-circuit and
213    /// produce zero additional bumps until the guest performs a
214    /// virtio reset.
215    pub(crate) invalid_avail_idx_count: AtomicU64,
216    /// Cumulative count of successful control-vq `VIRTIO_NET_CTRL_MQ` /
217    /// `VQ_PAIRS_SET` commands (the device updated `curr_queue_pairs` and
218    /// wrote `VIRTIO_NET_OK`). Per-event counter — observability of how many
219    /// times the guest (re)activated a multiqueue pair count.
220    pub(crate) ctrl_mq_set: AtomicU64,
221    /// Cumulative count of control-vq chains the device could not satisfy:
222    /// malformed shape (no device-writable status descriptor, too few
223    /// readable command bytes), an unknown `(class, cmd)`, or a
224    /// `virtqueue_pairs` outside `[1, queue_pairs]`. The device answers
225    /// `VIRTIO_NET_ERR` when a writable status descriptor exists, else drops
226    /// the chain (no `add_used` — the guest cannot observe a status it posted
227    /// no buffer for). Per-event hostile/buggy-guest counter; the control-vq
228    /// analog of `tx_chain_invalid`.
229    pub(crate) ctrl_chain_invalid: AtomicU64,
230    /// Cumulative count of control-vq status-write or `add_used` failures
231    /// (the status byte's GPA or the used-ring address is unmapped).
232    /// Queue-state breakage distinct from `ctrl_chain_invalid` (a malformed
233    /// request); the control-vq analog of `tx_add_used_failures`.
234    pub(crate) ctrl_add_used_failures: AtomicU64,
235}
236
237impl VirtioNetCounters {
238    /// Record TX-side completion: a parsed TX chain whose
239    /// `add_used` returned Ok. Bumps `tx_packets` + `tx_bytes`.
240    /// MUST be called AFTER the TX `add_used` succeeds — calling
241    /// it before would let the counter lie if the publish fails
242    /// (the guest would never observe the completion).
243    pub(crate) fn record_tx_completed(&self, frame_bytes: u64) {
244        self.tx_packets.fetch_add(1, Ordering::Relaxed);
245        self.tx_bytes.fetch_add(frame_bytes, Ordering::Relaxed);
246    }
247
248    /// Record successful RX delivery (frame written to a guest
249    /// descriptor chain, `add_used` returned Ok). Bumps
250    /// `rx_packets` + `rx_bytes`. MUST be called AFTER the RX
251    /// `add_used` succeeds — if the publish fails, the guest never
252    /// observes the frame and the counter would lie. The byte count
253    /// is the actual L2 bytes written into the descriptor (i.e.
254    /// `bytes_written - VIRTIO_NET_HDR_LEN`), which differs from
255    /// the source `frame_len` when the guest's RX buffer was
256    /// smaller than `header + frame`.
257    pub(crate) fn record_rx_delivered(&self, frame_bytes: u64) {
258        self.rx_packets.fetch_add(1, Ordering::Relaxed);
259        self.rx_bytes.fetch_add(frame_bytes, Ordering::Relaxed);
260    }
261
262    /// Record one TX chain dropped because the RX queue is empty
263    /// (the TX-side already completed via [`Self::record_tx_completed`];
264    /// this counter records the RX-delivery failure).
265    pub(crate) fn record_tx_dropped_no_rx_buffer(&self) {
266        self.tx_dropped_no_rx_buffer.fetch_add(1, Ordering::Relaxed);
267    }
268
269    /// Record one successfully-captured TX frame dropped because the
270    /// RX queue is poisoned (the TX-side still completes via
271    /// [`Self::record_tx_completed`] when its `add_used` succeeds;
272    /// this counter records the RX-delivery failure). Distinct from
273    /// [`Self::record_tx_dropped_no_rx_buffer`] (empty RX queue,
274    /// transient) — a poisoned RX queue stays wedged until the guest
275    /// resets the device.
276    pub(crate) fn record_tx_dropped_rx_poisoned(&self) {
277        self.tx_dropped_rx_poisoned.fetch_add(1, Ordering::Relaxed);
278    }
279
280    /// Record one TX chain rejected for malformed shape (short
281    /// header, wrong direction, header-read failure). The TX chain
282    /// is marked used but neither `tx_packets` nor `rx_packets` is
283    /// bumped — this is the protocol-violation path.
284    pub(crate) fn record_tx_chain_invalid(&self) {
285        self.tx_chain_invalid.fetch_add(1, Ordering::Relaxed);
286    }
287
288    /// Record one TX chain dropped because its post-header frame data
289    /// exceeded `MAX_FRAME_SIZE`. The TX chain is marked used but
290    /// neither `tx_packets` nor `rx_packets` is bumped — the frame is
291    /// dropped, not truncated. Distinct from
292    /// [`Self::record_tx_chain_invalid`]: the chain shape was valid,
293    /// it was simply over the maximum frame size the guest's `max_mtu`
294    /// permits.
295    pub(crate) fn record_tx_oversize_dropped(&self) {
296        self.tx_oversize_dropped.fetch_add(1, Ordering::Relaxed);
297    }
298
299    /// Record one RX chain rejected for malformed shape on the
300    /// loopback delivery side (read-only descriptor or
301    /// address-overflow on the descriptor's GPA). Mutually exclusive
302    /// with [`Self::record_tx_dropped_no_rx_buffer`]: a chain is
303    /// either missing entirely (queue empty →
304    /// `tx_dropped_no_rx_buffer`) or present but shape-malformed
305    /// (this counter). Mutually exclusive PER CHAIN with
306    /// [`Self::record_rx_write_failed`]: a chain is either
307    /// shape-rejected (this counter) or write-rejected
308    /// (`rx_write_failed`); the caller routes each malformed RX
309    /// chain to exactly one of the two so the per-event counter
310    /// taxonomy stays 1:1 with chains.
311    pub(crate) fn record_rx_chain_invalid(&self) {
312        self.rx_chain_invalid.fetch_add(1, Ordering::Relaxed);
313    }
314
315    /// Record one RX chain whose shape was valid (every descriptor
316    /// was device-writable, no address overflow) but whose guest-
317    /// memory `write_slice` failed mid-walk — header or frame
318    /// bytes hit an unmapped GPA. Mutually exclusive PER CHAIN with
319    /// [`Self::record_rx_chain_invalid`]: a chain rejected for
320    /// shape NEVER also bumps this counter, and vice versa. The
321    /// caller routes via the module-scope `InvalidReason` enum (set
322    /// in `write_rx_chain`, routed in `finalize_rx`).
323    pub(crate) fn record_rx_write_failed(&self) {
324        self.rx_write_failed.fetch_add(1, Ordering::Relaxed);
325    }
326
327    /// Record one `add_used` failure on the TX queue. Distinct from
328    /// `record_tx_chain_invalid` so operators can tell queue-state
329    /// breakage from chain-shape rejection.
330    pub(crate) fn record_tx_add_used_failure(&self) {
331        self.tx_add_used_failures.fetch_add(1, Ordering::Relaxed);
332    }
333
334    /// Record one `add_used` failure on the RX queue. Distinct from
335    /// `record_rx_chain_invalid` so operators can tell queue-state
336    /// breakage from chain-shape rejection.
337    pub(crate) fn record_rx_add_used_failure(&self) {
338        self.rx_add_used_failures.fetch_add(1, Ordering::Relaxed);
339    }
340
341    /// Record one observed `Error::InvalidAvailRingIndex` event
342    /// from `Queue::iter`. Called by `process_tx_loopback` /
343    /// `pop_rx_chain` (the RX-pull phase of `try_loopback_to_rx`) /
344    /// `process_ctrl_queue` (the control vq)
345    /// when the avail ring's `idx` is more than `queue.size` ahead
346    /// of `next_avail` — a virtio-spec
347    /// violation by the guest. The caller also sets
348    /// `VirtioNet::queue_poisoned` so a single hostile-guest event
349    /// produces exactly one bump regardless of how many subsequent
350    /// kicks land before the next reset (subsequent drains
351    /// short-circuit on the poison flag and never re-call `iter`).
352    pub(crate) fn record_invalid_avail_idx(&self) {
353        self.invalid_avail_idx_count.fetch_add(1, Ordering::Relaxed);
354    }
355
356    /// Record one successful control-vq `VQ_PAIRS_SET` (the device updated
357    /// `curr_queue_pairs` and wrote `VIRTIO_NET_OK`). Bumped only after the
358    /// status write succeeds.
359    pub(crate) fn record_ctrl_mq_set(&self) {
360        self.ctrl_mq_set.fetch_add(1, Ordering::Relaxed);
361    }
362
363    /// Record one control-vq chain the device could not satisfy (malformed
364    /// shape, unknown `(class, cmd)`, or out-of-range `virtqueue_pairs`).
365    pub(crate) fn record_ctrl_chain_invalid(&self) {
366        self.ctrl_chain_invalid.fetch_add(1, Ordering::Relaxed);
367    }
368
369    /// Record one control-vq status-write or `add_used` failure (queue-state
370    /// breakage). Distinct from `record_ctrl_chain_invalid`.
371    pub(crate) fn record_ctrl_add_used_failure(&self) {
372        self.ctrl_add_used_failures.fetch_add(1, Ordering::Relaxed);
373    }
374
375    /// Read the cumulative count of TX chains successfully looped to
376    /// RX. Per-event counter: bumped exactly once per TX chain that
377    /// completed both halves of the loopback.
378    pub fn tx_packets(&self) -> u64 {
379        self.tx_packets.load(Ordering::Relaxed)
380    }
381
382    /// Read the cumulative bytes of L2 frame data successfully looped
383    /// to RX. Excludes the 12-byte virtio header.
384    pub fn tx_bytes(&self) -> u64 {
385        self.tx_bytes.load(Ordering::Relaxed)
386    }
387
388    /// Read the cumulative count of RX chains delivered to the guest.
389    /// Equal to `tx_packets()` in v0's pure-loopback mode.
390    pub fn rx_packets(&self) -> u64 {
391        self.rx_packets.load(Ordering::Relaxed)
392    }
393
394    /// Read the cumulative bytes of L2 frame data delivered to the
395    /// guest's RX chains. Excludes the 12-byte virtio header.
396    pub fn rx_bytes(&self) -> u64 {
397        self.rx_bytes.load(Ordering::Relaxed)
398    }
399
400    /// Read the cumulative count of TX chains dropped because the RX
401    /// queue had no buffer.
402    pub fn tx_dropped_no_rx_buffer(&self) -> u64 {
403        self.tx_dropped_no_rx_buffer.load(Ordering::Relaxed)
404    }
405
406    /// Read the cumulative count of successfully-captured TX frames
407    /// dropped because the RX queue was poisoned. Distinct from
408    /// [`Self::tx_dropped_no_rx_buffer`] (empty RX queue): a poisoned
409    /// RX queue is wedged until a guest virtio reset.
410    pub fn tx_dropped_rx_poisoned(&self) -> u64 {
411        self.tx_dropped_rx_poisoned.load(Ordering::Relaxed)
412    }
413
414    /// Read the cumulative count of TX chains rejected for malformed
415    /// shape (missing/short header, wrong direction, header read
416    /// failure).
417    pub fn tx_chain_invalid(&self) -> u64 {
418        self.tx_chain_invalid.load(Ordering::Relaxed)
419    }
420
421    /// Read the cumulative count of TX chains dropped for exceeding
422    /// `MAX_FRAME_SIZE`. Distinct from [`Self::tx_chain_invalid`]
423    /// (malformed shape): an over-size chain was well-formed but too
424    /// large, and is dropped rather than truncated.
425    pub fn tx_oversize_dropped(&self) -> u64 {
426        self.tx_oversize_dropped.load(Ordering::Relaxed)
427    }
428
429    /// Read the cumulative count of RX chains rejected for malformed
430    /// shape (read-only descriptor on the receive side, or
431    /// attacker-controlled address overflow on the descriptor's
432    /// GPA). Distinct from [`Self::rx_write_failed`] (chain shape
433    /// was fine but a guest-memory `write_slice` hit an unmapped
434    /// GPA mid-walk).
435    pub fn rx_chain_invalid(&self) -> u64 {
436        self.rx_chain_invalid.load(Ordering::Relaxed)
437    }
438
439    /// Read the cumulative count of RX chains whose shape was valid
440    /// but whose guest-memory `write_slice` failed mid-walk
441    /// (header or frame bytes hit an unmapped GPA). Distinct from
442    /// [`Self::rx_chain_invalid`] (chain-shape rejection); the two
443    /// are mutually exclusive per chain so an operator's failure
444    /// dump can separate "guest violated the RX descriptor-direction
445    /// rule" from "guest posted a buffer at an unmapped GPA".
446    pub fn rx_write_failed(&self) -> u64 {
447        self.rx_write_failed.load(Ordering::Relaxed)
448    }
449
450    /// Read the cumulative count of TX `add_used` failures (queue's
451    /// used-ring address unmapped or otherwise inaccessible).
452    /// Non-zero means the TX queue itself is structurally broken;
453    /// distinct from `tx_chain_invalid` (chain-shape rejection).
454    pub fn tx_add_used_failures(&self) -> u64 {
455        self.tx_add_used_failures.load(Ordering::Relaxed)
456    }
457
458    /// Read the cumulative count of RX `add_used` failures.
459    /// Non-zero means the RX queue itself is structurally broken;
460    /// distinct from `rx_chain_invalid` (chain-shape rejection).
461    pub fn rx_add_used_failures(&self) -> u64 {
462        self.rx_add_used_failures.load(Ordering::Relaxed)
463    }
464
465    /// Read the cumulative count of `Error::InvalidAvailRingIndex`
466    /// events the device has observed. Per-event counter (NOT
467    /// per-request): the queue-poison flag short-circuits subsequent
468    /// kicks against the same hostile state, so one guest fault
469    /// produces exactly one bump regardless of how many notifications
470    /// follow before reset. A non-zero value means the guest violated
471    /// virtio-v1.2 §2.7.13.3 — the device is in the "structurally
472    /// broken queue" state and will not service IO until the guest
473    /// issues a virtio reset.
474    pub fn invalid_avail_idx_count(&self) -> u64 {
475        self.invalid_avail_idx_count.load(Ordering::Relaxed)
476    }
477
478    /// Read the cumulative count of successful control-vq `VQ_PAIRS_SET`
479    /// commands.
480    pub fn ctrl_mq_set(&self) -> u64 {
481        self.ctrl_mq_set.load(Ordering::Relaxed)
482    }
483
484    /// Read the cumulative count of control-vq chains the device could not
485    /// satisfy (malformed shape, unknown command, or out-of-range pairs).
486    pub fn ctrl_chain_invalid(&self) -> u64 {
487        self.ctrl_chain_invalid.load(Ordering::Relaxed)
488    }
489
490    /// Read the cumulative count of control-vq status-write / `add_used`
491    /// failures (queue-state breakage).
492    pub fn ctrl_add_used_failures(&self) -> u64 {
493        self.ctrl_add_used_failures.load(Ordering::Relaxed)
494    }
495
496    /// Freeze every atomic into a plain-u64 snapshot for the
497    /// host-side post-mortem path in [`crate::vmm::VmResult`].
498    /// virtio-net is single-threaded — `process_tx_loopback` runs
499    /// inline on the kicking vCPU thread, so the sole writers to
500    /// these counters are the vCPUs themselves. By the time
501    /// `collect_results` reaches the snapshot site every vCPU
502    /// thread has joined, so no writer remains and the relaxed
503    /// loads observe the final cumulative state.
504    pub fn snapshot(&self) -> VirtioNetCountersSnapshot {
505        VirtioNetCountersSnapshot {
506            tx_packets: self.tx_packets(),
507            tx_bytes: self.tx_bytes(),
508            rx_packets: self.rx_packets(),
509            rx_bytes: self.rx_bytes(),
510            tx_dropped_no_rx_buffer: self.tx_dropped_no_rx_buffer(),
511            tx_dropped_rx_poisoned: self.tx_dropped_rx_poisoned(),
512            tx_chain_invalid: self.tx_chain_invalid(),
513            tx_oversize_dropped: self.tx_oversize_dropped(),
514            rx_chain_invalid: self.rx_chain_invalid(),
515            rx_write_failed: self.rx_write_failed(),
516            tx_add_used_failures: self.tx_add_used_failures(),
517            rx_add_used_failures: self.rx_add_used_failures(),
518            invalid_avail_idx_count: self.invalid_avail_idx_count(),
519            ctrl_mq_set: self.ctrl_mq_set(),
520            ctrl_chain_invalid: self.ctrl_chain_invalid(),
521            ctrl_add_used_failures: self.ctrl_add_used_failures(),
522        }
523    }
524}
525
526/// Plain-u64 snapshot of `VirtioNetCounters` taken at VM-result
527/// construction time. Mirrors every atomic field by name.
528///
529/// Decouples [`crate::vmm::VmResult`] from the internal
530/// atomic-shared writer state — consumers see immutable owned
531/// data they can `Clone`, compare, and round-trip through serde
532/// without the `Arc<AtomicU64>` ceremony. virtio-net is
533/// single-threaded — the vCPU thread continues to bump the
534/// atomics inline from `process_tx_loopback` via the
535/// `VirtioNetCounters` `record_*` mutators; only the
536/// result-construction path moves to the snapshot.
537///
538/// Field semantics match the atomic source one-for-one — see
539/// `VirtioNetCounters` for the per-counter taxonomy.
540#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
541pub struct VirtioNetCountersSnapshot {
542    pub tx_packets: u64,
543    pub tx_bytes: u64,
544    pub rx_packets: u64,
545    pub rx_bytes: u64,
546    pub tx_dropped_no_rx_buffer: u64,
547    pub tx_dropped_rx_poisoned: u64,
548    pub tx_chain_invalid: u64,
549    pub tx_oversize_dropped: u64,
550    pub rx_chain_invalid: u64,
551    pub rx_write_failed: u64,
552    pub tx_add_used_failures: u64,
553    pub rx_add_used_failures: u64,
554    pub invalid_avail_idx_count: u64,
555    pub ctrl_mq_set: u64,
556    pub ctrl_chain_invalid: u64,
557    pub ctrl_add_used_failures: u64,
558}
559
560impl VirtioNetCountersSnapshot {
561    /// Sum per-NIC snapshots into one device-level total. Every field is a
562    /// monotonic counter, so the cross-NIC fold is a field-wise saturating sum
563    /// (saturating per the project's overflow-safe-arithmetic rule, though a
564    /// real run never approaches u64::MAX). Returns `None` for an empty
565    /// iterator, so a NIC-less run reports no counters — preserving the
566    /// single-NIC `Option<VirtioNetCountersSnapshot>` semantics of
567    /// [`crate::vmm::VmResult::virtio_net_counters`]. A multi-NIC guest's N
568    /// per-NIC snapshots aggregate here; per-NIC IRQ-delivery observability
569    /// comes from the per-CPU / per-IRQ(GSI) metrics axis, not these
570    /// device-internal loopback counters, so the cross-NIC sum loses no
571    /// capability.
572    pub(crate) fn aggregate(
573        snapshots: impl IntoIterator<Item = VirtioNetCountersSnapshot>,
574    ) -> Option<VirtioNetCountersSnapshot> {
575        let mut iter = snapshots.into_iter();
576        let mut acc = iter.next()?;
577        for s in iter {
578            acc.tx_packets = acc.tx_packets.saturating_add(s.tx_packets);
579            acc.tx_bytes = acc.tx_bytes.saturating_add(s.tx_bytes);
580            acc.rx_packets = acc.rx_packets.saturating_add(s.rx_packets);
581            acc.rx_bytes = acc.rx_bytes.saturating_add(s.rx_bytes);
582            acc.tx_dropped_no_rx_buffer = acc
583                .tx_dropped_no_rx_buffer
584                .saturating_add(s.tx_dropped_no_rx_buffer);
585            acc.tx_dropped_rx_poisoned = acc
586                .tx_dropped_rx_poisoned
587                .saturating_add(s.tx_dropped_rx_poisoned);
588            acc.tx_chain_invalid = acc.tx_chain_invalid.saturating_add(s.tx_chain_invalid);
589            acc.tx_oversize_dropped = acc
590                .tx_oversize_dropped
591                .saturating_add(s.tx_oversize_dropped);
592            acc.rx_chain_invalid = acc.rx_chain_invalid.saturating_add(s.rx_chain_invalid);
593            acc.rx_write_failed = acc.rx_write_failed.saturating_add(s.rx_write_failed);
594            acc.tx_add_used_failures = acc
595                .tx_add_used_failures
596                .saturating_add(s.tx_add_used_failures);
597            acc.rx_add_used_failures = acc
598                .rx_add_used_failures
599                .saturating_add(s.rx_add_used_failures);
600            acc.invalid_avail_idx_count = acc
601                .invalid_avail_idx_count
602                .saturating_add(s.invalid_avail_idx_count);
603            acc.ctrl_mq_set = acc.ctrl_mq_set.saturating_add(s.ctrl_mq_set);
604            acc.ctrl_chain_invalid = acc.ctrl_chain_invalid.saturating_add(s.ctrl_chain_invalid);
605            acc.ctrl_add_used_failures = acc
606                .ctrl_add_used_failures
607                .saturating_add(s.ctrl_add_used_failures);
608        }
609        Some(acc)
610    }
611}
612
613#[cfg(test)]
614mod tests {
615    use super::*;
616
617    /// A snapshot whose 16 fields each carry a DISTINCT offset from `base`, so
618    /// a copy-paste misfold (summing the wrong field) yields a detectably wrong
619    /// total in `aggregate_sums_every_field`.
620    fn snap(base: u64) -> VirtioNetCountersSnapshot {
621        VirtioNetCountersSnapshot {
622            tx_packets: base + 1,
623            tx_bytes: base + 2,
624            rx_packets: base + 3,
625            rx_bytes: base + 4,
626            tx_dropped_no_rx_buffer: base + 5,
627            tx_dropped_rx_poisoned: base + 6,
628            tx_chain_invalid: base + 7,
629            tx_oversize_dropped: base + 8,
630            rx_chain_invalid: base + 9,
631            rx_write_failed: base + 10,
632            tx_add_used_failures: base + 11,
633            rx_add_used_failures: base + 12,
634            invalid_avail_idx_count: base + 13,
635            ctrl_mq_set: base + 14,
636            ctrl_chain_invalid: base + 15,
637            ctrl_add_used_failures: base + 16,
638        }
639    }
640
641    #[test]
642    fn aggregate_empty_is_none() {
643        assert_eq!(
644            VirtioNetCountersSnapshot::aggregate(std::iter::empty()),
645            None,
646            "a NIC-less run reports no counters"
647        );
648    }
649
650    #[test]
651    fn aggregate_single_is_identity() {
652        let a = snap(1000);
653        assert_eq!(
654            VirtioNetCountersSnapshot::aggregate([a.clone()]),
655            Some(a),
656            "one NIC aggregates to itself"
657        );
658    }
659
660    #[test]
661    fn aggregate_sums_every_field() {
662        let a = snap(1000);
663        let b = snap(2000);
664        let got = VirtioNetCountersSnapshot::aggregate([a.clone(), b.clone()])
665            .expect("two snapshots aggregate to Some");
666        // Build the expected total field-wise; equality over the whole struct
667        // proves all 16 fields fold (none dropped, none cross-folded).
668        let want = VirtioNetCountersSnapshot {
669            tx_packets: a.tx_packets + b.tx_packets,
670            tx_bytes: a.tx_bytes + b.tx_bytes,
671            rx_packets: a.rx_packets + b.rx_packets,
672            rx_bytes: a.rx_bytes + b.rx_bytes,
673            tx_dropped_no_rx_buffer: a.tx_dropped_no_rx_buffer + b.tx_dropped_no_rx_buffer,
674            tx_dropped_rx_poisoned: a.tx_dropped_rx_poisoned + b.tx_dropped_rx_poisoned,
675            tx_chain_invalid: a.tx_chain_invalid + b.tx_chain_invalid,
676            tx_oversize_dropped: a.tx_oversize_dropped + b.tx_oversize_dropped,
677            rx_chain_invalid: a.rx_chain_invalid + b.rx_chain_invalid,
678            rx_write_failed: a.rx_write_failed + b.rx_write_failed,
679            tx_add_used_failures: a.tx_add_used_failures + b.tx_add_used_failures,
680            rx_add_used_failures: a.rx_add_used_failures + b.rx_add_used_failures,
681            invalid_avail_idx_count: a.invalid_avail_idx_count + b.invalid_avail_idx_count,
682            ctrl_mq_set: a.ctrl_mq_set + b.ctrl_mq_set,
683            ctrl_chain_invalid: a.ctrl_chain_invalid + b.ctrl_chain_invalid,
684            ctrl_add_used_failures: a.ctrl_add_used_failures + b.ctrl_add_used_failures,
685        };
686        assert_eq!(got, want, "every field is a field-wise sum");
687    }
688
689    /// A snapshot with EVERY field at u64::MAX.
690    fn maxed() -> VirtioNetCountersSnapshot {
691        VirtioNetCountersSnapshot {
692            tx_packets: u64::MAX,
693            tx_bytes: u64::MAX,
694            rx_packets: u64::MAX,
695            rx_bytes: u64::MAX,
696            tx_dropped_no_rx_buffer: u64::MAX,
697            tx_dropped_rx_poisoned: u64::MAX,
698            tx_chain_invalid: u64::MAX,
699            tx_oversize_dropped: u64::MAX,
700            rx_chain_invalid: u64::MAX,
701            rx_write_failed: u64::MAX,
702            tx_add_used_failures: u64::MAX,
703            rx_add_used_failures: u64::MAX,
704            invalid_avail_idx_count: u64::MAX,
705            ctrl_mq_set: u64::MAX,
706            ctrl_chain_invalid: u64::MAX,
707            ctrl_add_used_failures: u64::MAX,
708        }
709    }
710
711    #[test]
712    fn aggregate_saturates_every_field_on_overflow() {
713        // saturating_add must clamp at u64::MAX for EVERY field, never wrap to a
714        // small value (a wrapped counter reads as a phantom near-zero total).
715        // Seed all 16 at u64::MAX and fold a second snapshot that adds >= 1 to
716        // each (snap(0) => fields 1..=16): a single field reverted to
717        // wrapping/plain add would wrap below u64::MAX and fail the whole-struct
718        // equality, so this guards the saturating contract on all 16 folds.
719        let max = maxed();
720        let got = VirtioNetCountersSnapshot::aggregate([max.clone(), snap(0)])
721            .expect("two snapshots aggregate");
722        assert_eq!(got, max, "every field must saturate at u64::MAX, not wrap");
723    }
724}