ktstr/vmm/virtio_net/counters.rs
1//! Virtio-net cumulative event counters and their serializable
2//! snapshot, split from device.rs for module locality. Self-contained:
3//! the counter fields are AtomicU64 and the snapshot derives serde; the
4//! MAX_FRAME_SIZE / VIRTIO_NET_HDR_LEN names below appear only in doc
5//! comments referring to device.rs constants.
6use std::sync::atomic::{AtomicU64, Ordering};
7
8// ---------------------------------------------------------------------------
9// Counters (host-side observability)
10// ---------------------------------------------------------------------------
11
12/// Per-device counters surfaced to the host monitor. All atomic so
13/// the monitor can read them without locking the device struct.
14///
15/// Mirrors the [`super::super::virtio_blk::VirtioBlkCounters`] pattern:
16/// `record_*` helper methods enforce field-pairing invariants, and
17/// per-field `pub fn` accessors perform `Relaxed` loads. Counters are
18/// cumulative for the device's lifetime — `VirtioNet::reset()` does
19/// NOT zero them, so an operator monitoring `tx_packets` etc. observes
20/// a monotonically non-decreasing series across guest re-binds.
21///
22/// # Counter taxonomy
23///
24/// All counters here are **per-event cumulative**. There are no
25/// per-request live gauges in v0 — the loopback path is synchronous
26/// (no deferred RX, no throttle) so there is no "currently waiting"
27/// state to gauge. A future async backend (TAP, AF_PACKET) would add
28/// a `currently_deferred_rx_gauge` mirroring virtio-blk's
29/// `currently_throttled_gauge`.
30#[derive(Debug, Default)]
31pub struct VirtioNetCounters {
32 /// Cumulative count of TX chains the device accepted from the
33 /// guest, parsed cleanly, AND successfully marked used (TX-side
34 /// `add_used` returned Ok). A TX chain rejected for malformed
35 /// shape (short header, wrong direction) bumps `tx_chain_invalid`
36 /// only. A parsed TX chain whose `add_used` then fails bumps
37 /// `tx_add_used_failures` only. So `tx_packets` reflects chains
38 /// the guest can actually observe as completed.
39 ///
40 /// Each TX chain the device accepts lands in exactly one
41 /// observable outcome: successful loopback delivery (bumps
42 /// rx_packets); dropped because the RX queue had no buffer
43 /// (bumps tx_dropped_no_rx_buffer); RX chain-shape rejection
44 /// during loopback (bumps rx_chain_invalid); RX guest-memory
45 /// `write_slice` failure during loopback (bumps
46 /// rx_write_failed — chain shape was fine but the
47 /// descriptor's GPA was unmapped); RX `add_used` failure
48 /// (bumps rx_add_used_failures); TX-side chain-shape
49 /// rejection at parse time (bumps tx_chain_invalid, no TX
50 /// add_used attempted); or TX `add_used` failure (bumps
51 /// tx_add_used_failures). `tx_packets` reflects only the
52 /// chains where the TX-side add_used actually succeeded;
53 /// `tx_packets - rx_packets` is NOT a generic shortfall
54 /// formula because chains lost on the TX side
55 /// (tx_chain_invalid, tx_add_used_failures) never bumped
56 /// tx_packets in the first place.
57 pub(crate) tx_packets: AtomicU64,
58 /// Cumulative bytes of L2 frame data accepted from successfully
59 /// completed TX chains (i.e. those that bumped `tx_packets`).
60 /// Excludes the 12-byte virtio header. Paired with `tx_packets`
61 /// via [`Self::record_tx_completed`].
62 pub(crate) tx_bytes: AtomicU64,
63 /// Cumulative count of RX chains the device successfully wrote
64 /// (header + frame) AND successfully marked used (`add_used`
65 /// returned Ok AND the used-ring index advanced). RX chains
66 /// where `add_used` failed bump `rx_add_used_failures` only —
67 /// the guest never observes the publish, so it would be wrong
68 /// to count it as a delivery.
69 /// Paired with `rx_bytes` via [`Self::record_rx_delivered`].
70 pub(crate) rx_packets: AtomicU64,
71 /// Cumulative bytes of L2 frame data successfully delivered to
72 /// the guest's RX chains (i.e. paired with `rx_packets`).
73 /// Excludes the 12-byte virtio header. On a chain whose RX
74 /// buffer was smaller than `header + frame`, this counter
75 /// reflects the actual bytes written into the descriptor minus
76 /// the header — NOT the source `frame_len`. An operator sees
77 /// the real bytes the guest can read, not the bytes the device
78 /// intended to deliver.
79 pub(crate) rx_bytes: AtomicU64,
80 /// Cumulative count of successfully-captured TX frames the
81 /// device could not deliver to RX because the RX queue was
82 /// empty. Per-event counter; a guest that never posts RX buffers
83 /// and floods TX produces one bump per dropped TX frame. The TX
84 /// chain is still marked used (the guest sees TX completion via
85 /// `tx_packets`); the frame never arrives on RX (no `rx_packets`
86 /// bump). Distinct from `tx_chain_invalid` (TX chain shape
87 /// rejected before any RX delivery was attempted).
88 pub(crate) tx_dropped_no_rx_buffer: AtomicU64,
89 /// Cumulative count of successfully-captured TX frames the device
90 /// dropped because the RX queue was POISONED — a prior structural
91 /// `Error::InvalidAvailRingIndex` left `queue_poisoned[RXQ]` set.
92 /// Per-event counter. Like `tx_dropped_no_rx_buffer`, the TX chain
93 /// is still marked used (the guest sees TX completion via
94 /// `tx_packets`); the frame never reaches RX (no `rx_packets`
95 /// bump). Distinct from `tx_dropped_no_rx_buffer` so an operator
96 /// can tell "RX queue was simply empty (transient back-pressure)"
97 /// from "RX queue is wedged on a guest avail-ring violation" —
98 /// the latter does not clear until the guest issues a virtio
99 /// reset. Bumped on BOTH RX-poison loopback outcomes: the queue
100 /// was just poisoned this drain (`JustRxPoisoned`) or was already
101 /// poisoned from a prior kick (`RxAlreadyPoisoned`).
102 pub(crate) tx_dropped_rx_poisoned: AtomicU64,
103 /// Cumulative count of TX chains rejected for malformed shape:
104 /// missing header, write-only descriptor in TX (TX descriptors
105 /// must be device-readable), header-read failure. The TX chain
106 /// is still marked used so the guest doesn't hang on the
107 /// request, but the frame is dropped without an RX delivery and
108 /// neither `tx_packets` nor `rx_packets` is bumped. Per-event
109 /// counter.
110 pub(crate) tx_chain_invalid: AtomicU64,
111 /// Cumulative count of TX chains DROPPED because the captured
112 /// post-header frame data exceeded `MAX_FRAME_SIZE` (the largest
113 /// L2 frame the guest's `max_mtu` permits). The TX chain is still
114 /// marked used so the guest doesn't hang, but the frame is dropped
115 /// — NOT truncated — and neither `tx_packets` nor `rx_packets` is
116 /// bumped (mutually exclusive with `tx_packets` per chain, like
117 /// `tx_chain_invalid`). Per-event counter. Distinct from
118 /// `tx_chain_invalid`: the chain shape was well-formed (readable
119 /// descriptors, full 12-byte header) — it was simply too large.
120 /// Silently truncating an over-size frame would corrupt traffic
121 /// the guest believes it transmitted intact, so the device drops
122 /// the whole frame and surfaces the event here. A non-zero value
123 /// means the guest emitted a frame larger than its advertised
124 /// `max_mtu` allows — a guest bug or a hostile descriptor chain.
125 pub(crate) tx_oversize_dropped: AtomicU64,
126 /// Cumulative count of RX chains rejected for malformed shape on
127 /// the loopback delivery side: read-only descriptor in RX (RX
128 /// descriptors must be device-writable) or attacker-controlled
129 /// `desc.addr() + take` overflow (the descriptor's address itself
130 /// is malformed). The RX chain is still marked used (with
131 /// `len = 0`) so the guest's network-stack equivalent of a
132 /// hung-task watchdog doesn't fire on a stuck request.
133 /// Per-event counter; bumped exactly once per chain rejected for
134 /// shape (the `tx_dropped_no_rx_buffer` counter is NOT also
135 /// bumped — they are mutually exclusive failure modes, see
136 /// [`Self::record_rx_chain_invalid`]).
137 ///
138 /// **Distinct from [`Self::rx_write_failed`]**: a guest-memory
139 /// `write_slice` failure (header or frame bytes) means the
140 /// chain's SHAPE was acceptable but the GPA targeted by a
141 /// device-writable descriptor isn't mapped — that bumps
142 /// `rx_write_failed`, NOT this counter. Operators
143 /// distinguishing "guest sent malformed RX chain" from "guest's
144 /// posted RX buffer points at unmapped memory" need the two
145 /// counters separated.
146 pub(crate) rx_chain_invalid: AtomicU64,
147 /// Cumulative count of RX chains where the chain shape was valid
148 /// (every descriptor was device-writable, addresses didn't
149 /// overflow) but a guest-memory `write_slice` to one of the
150 /// descriptors failed — typically because the descriptor's GPA
151 /// is unmapped. Either the 12-byte header `write_slice` or the
152 /// frame-data `write_slice` can fail; both bump this counter.
153 /// The RX chain is still marked used (with `len = 0`) so the
154 /// guest doesn't hang on the request. Per-event counter;
155 /// bumped exactly once per chain whose write actually failed
156 /// (chain-shape rejections route to `rx_chain_invalid`
157 /// instead — the two counters are mutually exclusive per
158 /// chain).
159 ///
160 /// Distinct from `rx_chain_invalid` so an operator's failure
161 /// dump can separate "guest violated the RX descriptor-direction
162 /// rule" from "guest posted a buffer at an unmapped GPA". A
163 /// non-zero `rx_write_failed` with `rx_chain_invalid == 0`
164 /// points at GPA / page-table breakage rather than driver-side
165 /// malformation; the inverse points at driver-side direction
166 /// violations or address-overflow attacks.
167 pub(crate) rx_write_failed: AtomicU64,
168 /// Cumulative count of `add_used` failures on the TX queue. A
169 /// non-zero value means the queue's used-ring address is
170 /// unmapped or otherwise inaccessible — distinct from a chain-
171 /// shape rejection (which uses `tx_chain_invalid`). Per-event
172 /// counter. Operators monitoring `tx_add_used_failures > 0`
173 /// know the queue itself is broken and the guest has not seen
174 /// any TX completion since the failure started; the typical
175 /// recovery path is a virtio reset (write `STATUS=0`). Distinct
176 /// from `tx_chain_invalid` so an operator can tell "guest sent
177 /// malformed frame" from "queue itself is broken".
178 pub(crate) tx_add_used_failures: AtomicU64,
179 /// Cumulative count of `add_used` failures on the RX queue. As
180 /// with `tx_add_used_failures`, indicates a queue-state failure
181 /// (used-ring unmapped) distinct from chain-shape rejection.
182 /// Bumped on the RX side from both the malformed-chain branch
183 /// and the successful-frame-write branch when the trailing
184 /// `add_used` fails — both branches mean the device tried to
185 /// publish a used-ring entry and the publish itself failed.
186 pub(crate) rx_add_used_failures: AtomicU64,
187 /// Cumulative count of `Error::InvalidAvailRingIndex` events
188 /// observed across all queues. Bumped each time the
189 /// virtio-queue iter() rejects an avail.idx whose distance from
190 /// `next_avail` exceeds the queue size — a hostile or buggy
191 /// guest condition.
192 ///
193 /// Per-event counter (NOT per-request): the per-queue poison
194 /// flag short-circuits further attempts on the same queue, so
195 /// the false→true transition produces exactly one bump per
196 /// poison event. Without the flag, every QUEUE_NOTIFY kick
197 /// would re-enter `iter()`, observe the same error, log via
198 /// `error!()`, return None from the swallowing default impl,
199 /// and re-bump this counter — three concrete problems:
200 /// (a) the per-event counter taxonomy is violated (counter
201 /// reflects kick rate rather than poison event rate),
202 /// (b) the operator has no signal that the device is wedged
203 /// (no NEEDS_RESET, no STATUS bit change), and (c) every kick
204 /// floods the host log with the same error line. The poison
205 /// flag fixes all three. Note: this is NOT a "livelock" —
206 /// virtio-net has no enable_notification/disable_notification
207 /// bracket, so each kick re-trips the error ONCE per MMIO
208 /// exit, then returns. The harm is observability + log spam,
209 /// not unbounded CPU consumption.
210 ///
211 /// Successive QUEUE_NOTIFY kicks against an unresetted
212 /// poisoned queue take the entry-gate short-circuit and
213 /// produce zero additional bumps until the guest performs a
214 /// virtio reset.
215 pub(crate) invalid_avail_idx_count: AtomicU64,
216 /// Cumulative count of successful control-vq `VIRTIO_NET_CTRL_MQ` /
217 /// `VQ_PAIRS_SET` commands (the device updated `curr_queue_pairs` and
218 /// wrote `VIRTIO_NET_OK`). Per-event counter — observability of how many
219 /// times the guest (re)activated a multiqueue pair count.
220 pub(crate) ctrl_mq_set: AtomicU64,
221 /// Cumulative count of control-vq chains the device could not satisfy:
222 /// malformed shape (no device-writable status descriptor, too few
223 /// readable command bytes), an unknown `(class, cmd)`, or a
224 /// `virtqueue_pairs` outside `[1, queue_pairs]`. The device answers
225 /// `VIRTIO_NET_ERR` when a writable status descriptor exists, else drops
226 /// the chain (no `add_used` — the guest cannot observe a status it posted
227 /// no buffer for). Per-event hostile/buggy-guest counter; the control-vq
228 /// analog of `tx_chain_invalid`.
229 pub(crate) ctrl_chain_invalid: AtomicU64,
230 /// Cumulative count of control-vq status-write or `add_used` failures
231 /// (the status byte's GPA or the used-ring address is unmapped).
232 /// Queue-state breakage distinct from `ctrl_chain_invalid` (a malformed
233 /// request); the control-vq analog of `tx_add_used_failures`.
234 pub(crate) ctrl_add_used_failures: AtomicU64,
235}
236
237impl VirtioNetCounters {
238 /// Record TX-side completion: a parsed TX chain whose
239 /// `add_used` returned Ok. Bumps `tx_packets` + `tx_bytes`.
240 /// MUST be called AFTER the TX `add_used` succeeds — calling
241 /// it before would let the counter lie if the publish fails
242 /// (the guest would never observe the completion).
243 pub(crate) fn record_tx_completed(&self, frame_bytes: u64) {
244 self.tx_packets.fetch_add(1, Ordering::Relaxed);
245 self.tx_bytes.fetch_add(frame_bytes, Ordering::Relaxed);
246 }
247
248 /// Record successful RX delivery (frame written to a guest
249 /// descriptor chain, `add_used` returned Ok). Bumps
250 /// `rx_packets` + `rx_bytes`. MUST be called AFTER the RX
251 /// `add_used` succeeds — if the publish fails, the guest never
252 /// observes the frame and the counter would lie. The byte count
253 /// is the actual L2 bytes written into the descriptor (i.e.
254 /// `bytes_written - VIRTIO_NET_HDR_LEN`), which differs from
255 /// the source `frame_len` when the guest's RX buffer was
256 /// smaller than `header + frame`.
257 pub(crate) fn record_rx_delivered(&self, frame_bytes: u64) {
258 self.rx_packets.fetch_add(1, Ordering::Relaxed);
259 self.rx_bytes.fetch_add(frame_bytes, Ordering::Relaxed);
260 }
261
262 /// Record one TX chain dropped because the RX queue is empty
263 /// (the TX-side already completed via [`Self::record_tx_completed`];
264 /// this counter records the RX-delivery failure).
265 pub(crate) fn record_tx_dropped_no_rx_buffer(&self) {
266 self.tx_dropped_no_rx_buffer.fetch_add(1, Ordering::Relaxed);
267 }
268
269 /// Record one successfully-captured TX frame dropped because the
270 /// RX queue is poisoned (the TX-side still completes via
271 /// [`Self::record_tx_completed`] when its `add_used` succeeds;
272 /// this counter records the RX-delivery failure). Distinct from
273 /// [`Self::record_tx_dropped_no_rx_buffer`] (empty RX queue,
274 /// transient) — a poisoned RX queue stays wedged until the guest
275 /// resets the device.
276 pub(crate) fn record_tx_dropped_rx_poisoned(&self) {
277 self.tx_dropped_rx_poisoned.fetch_add(1, Ordering::Relaxed);
278 }
279
280 /// Record one TX chain rejected for malformed shape (short
281 /// header, wrong direction, header-read failure). The TX chain
282 /// is marked used but neither `tx_packets` nor `rx_packets` is
283 /// bumped — this is the protocol-violation path.
284 pub(crate) fn record_tx_chain_invalid(&self) {
285 self.tx_chain_invalid.fetch_add(1, Ordering::Relaxed);
286 }
287
288 /// Record one TX chain dropped because its post-header frame data
289 /// exceeded `MAX_FRAME_SIZE`. The TX chain is marked used but
290 /// neither `tx_packets` nor `rx_packets` is bumped — the frame is
291 /// dropped, not truncated. Distinct from
292 /// [`Self::record_tx_chain_invalid`]: the chain shape was valid,
293 /// it was simply over the maximum frame size the guest's `max_mtu`
294 /// permits.
295 pub(crate) fn record_tx_oversize_dropped(&self) {
296 self.tx_oversize_dropped.fetch_add(1, Ordering::Relaxed);
297 }
298
299 /// Record one RX chain rejected for malformed shape on the
300 /// loopback delivery side (read-only descriptor or
301 /// address-overflow on the descriptor's GPA). Mutually exclusive
302 /// with [`Self::record_tx_dropped_no_rx_buffer`]: a chain is
303 /// either missing entirely (queue empty →
304 /// `tx_dropped_no_rx_buffer`) or present but shape-malformed
305 /// (this counter). Mutually exclusive PER CHAIN with
306 /// [`Self::record_rx_write_failed`]: a chain is either
307 /// shape-rejected (this counter) or write-rejected
308 /// (`rx_write_failed`); the caller routes each malformed RX
309 /// chain to exactly one of the two so the per-event counter
310 /// taxonomy stays 1:1 with chains.
311 pub(crate) fn record_rx_chain_invalid(&self) {
312 self.rx_chain_invalid.fetch_add(1, Ordering::Relaxed);
313 }
314
315 /// Record one RX chain whose shape was valid (every descriptor
316 /// was device-writable, no address overflow) but whose guest-
317 /// memory `write_slice` failed mid-walk — header or frame
318 /// bytes hit an unmapped GPA. Mutually exclusive PER CHAIN with
319 /// [`Self::record_rx_chain_invalid`]: a chain rejected for
320 /// shape NEVER also bumps this counter, and vice versa. The
321 /// caller routes via the module-scope `InvalidReason` enum (set
322 /// in `write_rx_chain`, routed in `finalize_rx`).
323 pub(crate) fn record_rx_write_failed(&self) {
324 self.rx_write_failed.fetch_add(1, Ordering::Relaxed);
325 }
326
327 /// Record one `add_used` failure on the TX queue. Distinct from
328 /// `record_tx_chain_invalid` so operators can tell queue-state
329 /// breakage from chain-shape rejection.
330 pub(crate) fn record_tx_add_used_failure(&self) {
331 self.tx_add_used_failures.fetch_add(1, Ordering::Relaxed);
332 }
333
334 /// Record one `add_used` failure on the RX queue. Distinct from
335 /// `record_rx_chain_invalid` so operators can tell queue-state
336 /// breakage from chain-shape rejection.
337 pub(crate) fn record_rx_add_used_failure(&self) {
338 self.rx_add_used_failures.fetch_add(1, Ordering::Relaxed);
339 }
340
341 /// Record one observed `Error::InvalidAvailRingIndex` event
342 /// from `Queue::iter`. Called by `process_tx_loopback` /
343 /// `pop_rx_chain` (the RX-pull phase of `try_loopback_to_rx`) /
344 /// `process_ctrl_queue` (the control vq)
345 /// when the avail ring's `idx` is more than `queue.size` ahead
346 /// of `next_avail` — a virtio-spec
347 /// violation by the guest. The caller also sets
348 /// `VirtioNet::queue_poisoned` so a single hostile-guest event
349 /// produces exactly one bump regardless of how many subsequent
350 /// kicks land before the next reset (subsequent drains
351 /// short-circuit on the poison flag and never re-call `iter`).
352 pub(crate) fn record_invalid_avail_idx(&self) {
353 self.invalid_avail_idx_count.fetch_add(1, Ordering::Relaxed);
354 }
355
356 /// Record one successful control-vq `VQ_PAIRS_SET` (the device updated
357 /// `curr_queue_pairs` and wrote `VIRTIO_NET_OK`). Bumped only after the
358 /// status write succeeds.
359 pub(crate) fn record_ctrl_mq_set(&self) {
360 self.ctrl_mq_set.fetch_add(1, Ordering::Relaxed);
361 }
362
363 /// Record one control-vq chain the device could not satisfy (malformed
364 /// shape, unknown `(class, cmd)`, or out-of-range `virtqueue_pairs`).
365 pub(crate) fn record_ctrl_chain_invalid(&self) {
366 self.ctrl_chain_invalid.fetch_add(1, Ordering::Relaxed);
367 }
368
369 /// Record one control-vq status-write or `add_used` failure (queue-state
370 /// breakage). Distinct from `record_ctrl_chain_invalid`.
371 pub(crate) fn record_ctrl_add_used_failure(&self) {
372 self.ctrl_add_used_failures.fetch_add(1, Ordering::Relaxed);
373 }
374
375 /// Read the cumulative count of TX chains successfully looped to
376 /// RX. Per-event counter: bumped exactly once per TX chain that
377 /// completed both halves of the loopback.
378 pub fn tx_packets(&self) -> u64 {
379 self.tx_packets.load(Ordering::Relaxed)
380 }
381
382 /// Read the cumulative bytes of L2 frame data successfully looped
383 /// to RX. Excludes the 12-byte virtio header.
384 pub fn tx_bytes(&self) -> u64 {
385 self.tx_bytes.load(Ordering::Relaxed)
386 }
387
388 /// Read the cumulative count of RX chains delivered to the guest.
389 /// Equal to `tx_packets()` in v0's pure-loopback mode.
390 pub fn rx_packets(&self) -> u64 {
391 self.rx_packets.load(Ordering::Relaxed)
392 }
393
394 /// Read the cumulative bytes of L2 frame data delivered to the
395 /// guest's RX chains. Excludes the 12-byte virtio header.
396 pub fn rx_bytes(&self) -> u64 {
397 self.rx_bytes.load(Ordering::Relaxed)
398 }
399
400 /// Read the cumulative count of TX chains dropped because the RX
401 /// queue had no buffer.
402 pub fn tx_dropped_no_rx_buffer(&self) -> u64 {
403 self.tx_dropped_no_rx_buffer.load(Ordering::Relaxed)
404 }
405
406 /// Read the cumulative count of successfully-captured TX frames
407 /// dropped because the RX queue was poisoned. Distinct from
408 /// [`Self::tx_dropped_no_rx_buffer`] (empty RX queue): a poisoned
409 /// RX queue is wedged until a guest virtio reset.
410 pub fn tx_dropped_rx_poisoned(&self) -> u64 {
411 self.tx_dropped_rx_poisoned.load(Ordering::Relaxed)
412 }
413
414 /// Read the cumulative count of TX chains rejected for malformed
415 /// shape (missing/short header, wrong direction, header read
416 /// failure).
417 pub fn tx_chain_invalid(&self) -> u64 {
418 self.tx_chain_invalid.load(Ordering::Relaxed)
419 }
420
421 /// Read the cumulative count of TX chains dropped for exceeding
422 /// `MAX_FRAME_SIZE`. Distinct from [`Self::tx_chain_invalid`]
423 /// (malformed shape): an over-size chain was well-formed but too
424 /// large, and is dropped rather than truncated.
425 pub fn tx_oversize_dropped(&self) -> u64 {
426 self.tx_oversize_dropped.load(Ordering::Relaxed)
427 }
428
429 /// Read the cumulative count of RX chains rejected for malformed
430 /// shape (read-only descriptor on the receive side, or
431 /// attacker-controlled address overflow on the descriptor's
432 /// GPA). Distinct from [`Self::rx_write_failed`] (chain shape
433 /// was fine but a guest-memory `write_slice` hit an unmapped
434 /// GPA mid-walk).
435 pub fn rx_chain_invalid(&self) -> u64 {
436 self.rx_chain_invalid.load(Ordering::Relaxed)
437 }
438
439 /// Read the cumulative count of RX chains whose shape was valid
440 /// but whose guest-memory `write_slice` failed mid-walk
441 /// (header or frame bytes hit an unmapped GPA). Distinct from
442 /// [`Self::rx_chain_invalid`] (chain-shape rejection); the two
443 /// are mutually exclusive per chain so an operator's failure
444 /// dump can separate "guest violated the RX descriptor-direction
445 /// rule" from "guest posted a buffer at an unmapped GPA".
446 pub fn rx_write_failed(&self) -> u64 {
447 self.rx_write_failed.load(Ordering::Relaxed)
448 }
449
450 /// Read the cumulative count of TX `add_used` failures (queue's
451 /// used-ring address unmapped or otherwise inaccessible).
452 /// Non-zero means the TX queue itself is structurally broken;
453 /// distinct from `tx_chain_invalid` (chain-shape rejection).
454 pub fn tx_add_used_failures(&self) -> u64 {
455 self.tx_add_used_failures.load(Ordering::Relaxed)
456 }
457
458 /// Read the cumulative count of RX `add_used` failures.
459 /// Non-zero means the RX queue itself is structurally broken;
460 /// distinct from `rx_chain_invalid` (chain-shape rejection).
461 pub fn rx_add_used_failures(&self) -> u64 {
462 self.rx_add_used_failures.load(Ordering::Relaxed)
463 }
464
465 /// Read the cumulative count of `Error::InvalidAvailRingIndex`
466 /// events the device has observed. Per-event counter (NOT
467 /// per-request): the queue-poison flag short-circuits subsequent
468 /// kicks against the same hostile state, so one guest fault
469 /// produces exactly one bump regardless of how many notifications
470 /// follow before reset. A non-zero value means the guest violated
471 /// virtio-v1.2 §2.7.13.3 — the device is in the "structurally
472 /// broken queue" state and will not service IO until the guest
473 /// issues a virtio reset.
474 pub fn invalid_avail_idx_count(&self) -> u64 {
475 self.invalid_avail_idx_count.load(Ordering::Relaxed)
476 }
477
478 /// Read the cumulative count of successful control-vq `VQ_PAIRS_SET`
479 /// commands.
480 pub fn ctrl_mq_set(&self) -> u64 {
481 self.ctrl_mq_set.load(Ordering::Relaxed)
482 }
483
484 /// Read the cumulative count of control-vq chains the device could not
485 /// satisfy (malformed shape, unknown command, or out-of-range pairs).
486 pub fn ctrl_chain_invalid(&self) -> u64 {
487 self.ctrl_chain_invalid.load(Ordering::Relaxed)
488 }
489
490 /// Read the cumulative count of control-vq status-write / `add_used`
491 /// failures (queue-state breakage).
492 pub fn ctrl_add_used_failures(&self) -> u64 {
493 self.ctrl_add_used_failures.load(Ordering::Relaxed)
494 }
495
496 /// Freeze every atomic into a plain-u64 snapshot for the
497 /// host-side post-mortem path in [`crate::vmm::VmResult`].
498 /// virtio-net is single-threaded — `process_tx_loopback` runs
499 /// inline on the kicking vCPU thread, so the sole writers to
500 /// these counters are the vCPUs themselves. By the time
501 /// `collect_results` reaches the snapshot site every vCPU
502 /// thread has joined, so no writer remains and the relaxed
503 /// loads observe the final cumulative state.
504 pub fn snapshot(&self) -> VirtioNetCountersSnapshot {
505 VirtioNetCountersSnapshot {
506 tx_packets: self.tx_packets(),
507 tx_bytes: self.tx_bytes(),
508 rx_packets: self.rx_packets(),
509 rx_bytes: self.rx_bytes(),
510 tx_dropped_no_rx_buffer: self.tx_dropped_no_rx_buffer(),
511 tx_dropped_rx_poisoned: self.tx_dropped_rx_poisoned(),
512 tx_chain_invalid: self.tx_chain_invalid(),
513 tx_oversize_dropped: self.tx_oversize_dropped(),
514 rx_chain_invalid: self.rx_chain_invalid(),
515 rx_write_failed: self.rx_write_failed(),
516 tx_add_used_failures: self.tx_add_used_failures(),
517 rx_add_used_failures: self.rx_add_used_failures(),
518 invalid_avail_idx_count: self.invalid_avail_idx_count(),
519 ctrl_mq_set: self.ctrl_mq_set(),
520 ctrl_chain_invalid: self.ctrl_chain_invalid(),
521 ctrl_add_used_failures: self.ctrl_add_used_failures(),
522 }
523 }
524}
525
526/// Plain-u64 snapshot of `VirtioNetCounters` taken at VM-result
527/// construction time. Mirrors every atomic field by name.
528///
529/// Decouples [`crate::vmm::VmResult`] from the internal
530/// atomic-shared writer state — consumers see immutable owned
531/// data they can `Clone`, compare, and round-trip through serde
532/// without the `Arc<AtomicU64>` ceremony. virtio-net is
533/// single-threaded — the vCPU thread continues to bump the
534/// atomics inline from `process_tx_loopback` via the
535/// `VirtioNetCounters` `record_*` mutators; only the
536/// result-construction path moves to the snapshot.
537///
538/// Field semantics match the atomic source one-for-one — see
539/// `VirtioNetCounters` for the per-counter taxonomy.
540#[derive(Debug, Clone, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
541pub struct VirtioNetCountersSnapshot {
542 pub tx_packets: u64,
543 pub tx_bytes: u64,
544 pub rx_packets: u64,
545 pub rx_bytes: u64,
546 pub tx_dropped_no_rx_buffer: u64,
547 pub tx_dropped_rx_poisoned: u64,
548 pub tx_chain_invalid: u64,
549 pub tx_oversize_dropped: u64,
550 pub rx_chain_invalid: u64,
551 pub rx_write_failed: u64,
552 pub tx_add_used_failures: u64,
553 pub rx_add_used_failures: u64,
554 pub invalid_avail_idx_count: u64,
555 pub ctrl_mq_set: u64,
556 pub ctrl_chain_invalid: u64,
557 pub ctrl_add_used_failures: u64,
558}
559
560impl VirtioNetCountersSnapshot {
561 /// Sum per-NIC snapshots into one device-level total. Every field is a
562 /// monotonic counter, so the cross-NIC fold is a field-wise saturating sum
563 /// (saturating per the project's overflow-safe-arithmetic rule, though a
564 /// real run never approaches u64::MAX). Returns `None` for an empty
565 /// iterator, so a NIC-less run reports no counters — preserving the
566 /// single-NIC `Option<VirtioNetCountersSnapshot>` semantics of
567 /// [`crate::vmm::VmResult::virtio_net_counters`]. A multi-NIC guest's N
568 /// per-NIC snapshots aggregate here; per-NIC IRQ-delivery observability
569 /// comes from the per-CPU / per-IRQ(GSI) metrics axis, not these
570 /// device-internal loopback counters, so the cross-NIC sum loses no
571 /// capability.
572 pub(crate) fn aggregate(
573 snapshots: impl IntoIterator<Item = VirtioNetCountersSnapshot>,
574 ) -> Option<VirtioNetCountersSnapshot> {
575 let mut iter = snapshots.into_iter();
576 let mut acc = iter.next()?;
577 for s in iter {
578 acc.tx_packets = acc.tx_packets.saturating_add(s.tx_packets);
579 acc.tx_bytes = acc.tx_bytes.saturating_add(s.tx_bytes);
580 acc.rx_packets = acc.rx_packets.saturating_add(s.rx_packets);
581 acc.rx_bytes = acc.rx_bytes.saturating_add(s.rx_bytes);
582 acc.tx_dropped_no_rx_buffer = acc
583 .tx_dropped_no_rx_buffer
584 .saturating_add(s.tx_dropped_no_rx_buffer);
585 acc.tx_dropped_rx_poisoned = acc
586 .tx_dropped_rx_poisoned
587 .saturating_add(s.tx_dropped_rx_poisoned);
588 acc.tx_chain_invalid = acc.tx_chain_invalid.saturating_add(s.tx_chain_invalid);
589 acc.tx_oversize_dropped = acc
590 .tx_oversize_dropped
591 .saturating_add(s.tx_oversize_dropped);
592 acc.rx_chain_invalid = acc.rx_chain_invalid.saturating_add(s.rx_chain_invalid);
593 acc.rx_write_failed = acc.rx_write_failed.saturating_add(s.rx_write_failed);
594 acc.tx_add_used_failures = acc
595 .tx_add_used_failures
596 .saturating_add(s.tx_add_used_failures);
597 acc.rx_add_used_failures = acc
598 .rx_add_used_failures
599 .saturating_add(s.rx_add_used_failures);
600 acc.invalid_avail_idx_count = acc
601 .invalid_avail_idx_count
602 .saturating_add(s.invalid_avail_idx_count);
603 acc.ctrl_mq_set = acc.ctrl_mq_set.saturating_add(s.ctrl_mq_set);
604 acc.ctrl_chain_invalid = acc.ctrl_chain_invalid.saturating_add(s.ctrl_chain_invalid);
605 acc.ctrl_add_used_failures = acc
606 .ctrl_add_used_failures
607 .saturating_add(s.ctrl_add_used_failures);
608 }
609 Some(acc)
610 }
611}
612
613#[cfg(test)]
614mod tests {
615 use super::*;
616
617 /// A snapshot whose 16 fields each carry a DISTINCT offset from `base`, so
618 /// a copy-paste misfold (summing the wrong field) yields a detectably wrong
619 /// total in `aggregate_sums_every_field`.
620 fn snap(base: u64) -> VirtioNetCountersSnapshot {
621 VirtioNetCountersSnapshot {
622 tx_packets: base + 1,
623 tx_bytes: base + 2,
624 rx_packets: base + 3,
625 rx_bytes: base + 4,
626 tx_dropped_no_rx_buffer: base + 5,
627 tx_dropped_rx_poisoned: base + 6,
628 tx_chain_invalid: base + 7,
629 tx_oversize_dropped: base + 8,
630 rx_chain_invalid: base + 9,
631 rx_write_failed: base + 10,
632 tx_add_used_failures: base + 11,
633 rx_add_used_failures: base + 12,
634 invalid_avail_idx_count: base + 13,
635 ctrl_mq_set: base + 14,
636 ctrl_chain_invalid: base + 15,
637 ctrl_add_used_failures: base + 16,
638 }
639 }
640
641 #[test]
642 fn aggregate_empty_is_none() {
643 assert_eq!(
644 VirtioNetCountersSnapshot::aggregate(std::iter::empty()),
645 None,
646 "a NIC-less run reports no counters"
647 );
648 }
649
650 #[test]
651 fn aggregate_single_is_identity() {
652 let a = snap(1000);
653 assert_eq!(
654 VirtioNetCountersSnapshot::aggregate([a.clone()]),
655 Some(a),
656 "one NIC aggregates to itself"
657 );
658 }
659
660 #[test]
661 fn aggregate_sums_every_field() {
662 let a = snap(1000);
663 let b = snap(2000);
664 let got = VirtioNetCountersSnapshot::aggregate([a.clone(), b.clone()])
665 .expect("two snapshots aggregate to Some");
666 // Build the expected total field-wise; equality over the whole struct
667 // proves all 16 fields fold (none dropped, none cross-folded).
668 let want = VirtioNetCountersSnapshot {
669 tx_packets: a.tx_packets + b.tx_packets,
670 tx_bytes: a.tx_bytes + b.tx_bytes,
671 rx_packets: a.rx_packets + b.rx_packets,
672 rx_bytes: a.rx_bytes + b.rx_bytes,
673 tx_dropped_no_rx_buffer: a.tx_dropped_no_rx_buffer + b.tx_dropped_no_rx_buffer,
674 tx_dropped_rx_poisoned: a.tx_dropped_rx_poisoned + b.tx_dropped_rx_poisoned,
675 tx_chain_invalid: a.tx_chain_invalid + b.tx_chain_invalid,
676 tx_oversize_dropped: a.tx_oversize_dropped + b.tx_oversize_dropped,
677 rx_chain_invalid: a.rx_chain_invalid + b.rx_chain_invalid,
678 rx_write_failed: a.rx_write_failed + b.rx_write_failed,
679 tx_add_used_failures: a.tx_add_used_failures + b.tx_add_used_failures,
680 rx_add_used_failures: a.rx_add_used_failures + b.rx_add_used_failures,
681 invalid_avail_idx_count: a.invalid_avail_idx_count + b.invalid_avail_idx_count,
682 ctrl_mq_set: a.ctrl_mq_set + b.ctrl_mq_set,
683 ctrl_chain_invalid: a.ctrl_chain_invalid + b.ctrl_chain_invalid,
684 ctrl_add_used_failures: a.ctrl_add_used_failures + b.ctrl_add_used_failures,
685 };
686 assert_eq!(got, want, "every field is a field-wise sum");
687 }
688
689 /// A snapshot with EVERY field at u64::MAX.
690 fn maxed() -> VirtioNetCountersSnapshot {
691 VirtioNetCountersSnapshot {
692 tx_packets: u64::MAX,
693 tx_bytes: u64::MAX,
694 rx_packets: u64::MAX,
695 rx_bytes: u64::MAX,
696 tx_dropped_no_rx_buffer: u64::MAX,
697 tx_dropped_rx_poisoned: u64::MAX,
698 tx_chain_invalid: u64::MAX,
699 tx_oversize_dropped: u64::MAX,
700 rx_chain_invalid: u64::MAX,
701 rx_write_failed: u64::MAX,
702 tx_add_used_failures: u64::MAX,
703 rx_add_used_failures: u64::MAX,
704 invalid_avail_idx_count: u64::MAX,
705 ctrl_mq_set: u64::MAX,
706 ctrl_chain_invalid: u64::MAX,
707 ctrl_add_used_failures: u64::MAX,
708 }
709 }
710
711 #[test]
712 fn aggregate_saturates_every_field_on_overflow() {
713 // saturating_add must clamp at u64::MAX for EVERY field, never wrap to a
714 // small value (a wrapped counter reads as a phantom near-zero total).
715 // Seed all 16 at u64::MAX and fold a second snapshot that adds >= 1 to
716 // each (snap(0) => fields 1..=16): a single field reverted to
717 // wrapping/plain add would wrap below u64::MAX and fail the whole-struct
718 // equality, so this guards the saturating contract on all 16 folds.
719 let max = maxed();
720 let got = VirtioNetCountersSnapshot::aggregate([max.clone(), snap(0)])
721 .expect("two snapshots aggregate");
722 assert_eq!(got, max, "every field must saturate at u64::MAX, not wrap");
723 }
724}