ktstr/vmm/
disk_config.rs

1//! Disk configuration for virtio-blk devices.
2//!
3//! [`Filesystem::Raw`] gives the guest an unformatted block device at
4//! `/dev/vda` (a fresh sparse `tempfile()` backing per test). No mount
5//! happens.
6//!
7//! [`Filesystem::Btrfs`] is the entry point for the disk-template
8//! lifecycle. Selecting it routes through
9//! [`crate::vmm::disk_template::ensure_template`]: on cache miss
10//! the framework boots a one-shot template VM that runs
11//! `mkfs.btrfs` against `/dev/vda`, caches the formatted image
12//! under the ktstr cache root, and per-test boots reflink-copy
13//! that template via `FICLONE` so each per-test filesystem starts
14//! pre-formatted with zero host-side mkfs cost. The host never
15//! execs mkfs against a real backing file — the kernel's own mkfs
16//! (run inside the template VM) is the on-disk-format authority.
17//! See [`crate::vmm::disk_template`] for the full cache and
18//! template-VM driver implementation.
19//!
20//! `DiskConfig` is the descriptor — passed by value, copious
21//! defaults, no path field (the framework owns the per-test backing
22//! file's lifecycle).
23
24use std::num::NonZeroU64;
25
26/// Filesystem to format the backing file with.
27///
28/// `Raw` matches the actual on-disk state: no formatting happens, the
29/// guest sees `/dev/vda` as a raw unformatted block device.
30///
31/// Non-`Raw` variants activate the template-cache lifecycle (see
32/// module docs). Selecting one requires the ktstr cache directory
33/// to live on a reflink-capable filesystem (btrfs or xfs) — the
34/// per-test fan-out uses `FICLONE` to clone the cached template
35/// image and would fail on tmpfs/ext4. The host must also have the
36/// formatter named by `Self::mkfs_binary_name` on `PATH` at
37/// template-build time so the template-VM initramfs can pack it.
38#[derive(
39    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
40)]
41#[serde(rename_all = "snake_case")]
42pub enum Filesystem {
43    /// No filesystem; raw block device. The guest sees `/dev/vda` as
44    /// an unformatted volume of the configured capacity. Default.
45    #[default]
46    Raw,
47    /// btrfs filesystem. Per-test backing is a reflink clone of a
48    /// host-cached, guest-formatted btrfs image at the configured
49    /// capacity. On cache miss
50    /// `crate::vmm::disk_template::ensure_template` boots a one-shot
51    /// template VM that runs `mkfs.btrfs /dev/vda` inside the guest,
52    /// caches the formatted image under the ktstr cache root, and
53    /// returns the cached path. On cache hit
54    /// `crate::vmm::disk_template::clone_to_per_test` FICLONE-clones
55    /// the cached template into a per-test tempfile under the same
56    /// cache filesystem. The cache directory must live on a btrfs/xfs
57    /// mount, and `mkfs.btrfs` must be on the host `PATH` at
58    /// template-build time. See `crate::vmm::disk_template`.
59    Btrfs,
60}
61
62impl Filesystem {
63    /// Short identifier used in cache keys and diagnostics. The
64    /// values are intentionally short (≤8 chars), kebab-free, and
65    /// stable across rebuilds — they participate in on-disk cache
66    /// path names, so renaming a variant invalidates already-cached
67    /// templates. New variants must add a new tag rather than
68    /// reusing one.
69    pub(crate) fn cache_tag(self) -> &'static str {
70        match self {
71            Filesystem::Raw => "raw",
72            Filesystem::Btrfs => "btrfs",
73        }
74    }
75
76    /// Userspace mkfs binary name to pack into the template-VM
77    /// initramfs for variants that require pre-formatting.
78    ///
79    /// Returns `Some(name)` for variants whose template-build VM
80    /// execs an `mkfs.<fstype>` against `/dev/vda` inside the guest;
81    /// `None` for variants that need no formatter (`Raw`). The
82    /// exhaustive match here forces every future `Filesystem`
83    /// variant to wire its mkfs lookup at compile time —
84    /// [`crate::vmm::disk_template::locate_host_mkfs`] takes the
85    /// returned name verbatim and PATH-resolves it, so a new
86    /// variant that forgets to declare a binary surfaces as a
87    /// non-exhaustive-match build error rather than as a runtime
88    /// "binary not found" diagnostic at template-build time.
89    ///
90    /// # Per-variant wiring points
91    ///
92    /// A new `Filesystem` variant that requires pre-formatting wires up
93    /// four per-variant match arms. All are exhaustive matches over
94    /// `Filesystem`, so a missing arm is a non-exhaustive-match build
95    /// error rather than a runtime surprise; this paragraph exists so an
96    /// implementer reading `mkfs_binary_name` sees the companions up
97    /// front:
98    ///
99    /// - `mkfs_binary_name` (here) — the `mkfs.<fstype>` binary name.
100    /// - `mkfs_package_hint` in `src/vmm/disk_template/mod.rs` — the
101    ///   distro-package hint surfaced in the "binary not found"
102    ///   diagnostic (e.g. `btrfs-progs` for `Btrfs`), used by
103    ///   [`crate::vmm::disk_template::locate_host_mkfs`].
104    /// - [`superblock_magic`](Self::superblock_magic) — the on-disk
105    ///   magic that content-validates a built or cached image. A
106    ///   pre-formatting variant that returns `None` there skips
107    ///   content-validation, silently reviving the unformatted-image
108    ///   bug class for that variant.
109    /// - [`cache_tag`](Self::cache_tag) — the short identifier baked
110    ///   into the on-disk cache key.
111    pub(crate) fn mkfs_binary_name(self) -> Option<&'static str> {
112        match self {
113            Filesystem::Raw => None,
114            Filesystem::Btrfs => Some("mkfs.btrfs"),
115        }
116    }
117
118    /// On-disk superblock magic for content-validating a cached or
119    /// freshly-built template image, as `(byte_offset, magic_u64)`.
120    ///
121    /// The host reads 8 bytes at `byte_offset`, interprets them
122    /// little-endian, and compares to `magic_u64`. A 0-byte / all-zero
123    /// image — an unformatted staging file a prior build published, or a
124    /// torn write — reads back `0` and is rejected before it can strand
125    /// every per-test clone with a `-EINVAL` mount (the guest kernel's
126    /// superblock validator rejects the missing magic). Returns `None`
127    /// for variants with no on-disk filesystem ([`Filesystem::Raw`]),
128    /// which are never content-validated.
129    ///
130    /// btrfs: `magic` is a `__le64` whose value is `BTRFS_MAGIC =
131    /// 0x4D5F53665248425F` ("_BHRfS_M"). It sits at offset `0x10040` —
132    /// the superblock starts at `BTRFS_SUPER_INFO_OFFSET (65536)` and
133    /// `struct btrfs_super_block` places `magic` after `csum[32]` +
134    /// `fsid[16]` + `bytenr (8)` + `flags (8)` = +64. (Verified against
135    /// the kernel `include/uapi/linux/btrfs_tree.h` struct + the
136    /// `BTRFS_MAGIC` / `BTRFS_CSUM_SIZE` / `BTRFS_FSID_SIZE` defines.)
137    /// The guest kernel rejects a wrong magic in `btrfs_validate_super`
138    /// (fs/btrfs/disk-io.c) with `-EINVAL`; this host check fails the
139    /// same image up front.
140    pub(crate) fn superblock_magic(self) -> Option<(u64, u64)> {
141        match self {
142            Filesystem::Raw => None,
143            Filesystem::Btrfs => Some((0x1_0040, 0x4D5F_5366_5248_425F)),
144        }
145    }
146}
147
148/// IO throttle for one disk. Each field caps a separate dimension;
149/// `None` disables that dimension's throttle. All `None` =
150/// unthrottled (the device runs at host-pread/pwrite speed).
151///
152/// Burst capacity is the token-bucket capacity (peak instantaneous
153/// burst the device will absorb before throttling kicks in). Refill
154/// rate is the steady-state allowance (`iops` / `bytes_per_sec`).
155/// When `*_burst_capacity` is `None`, the bucket capacity equals the
156/// refill rate, giving a 1-second burst — the historical default.
157/// Setting a burst capacity larger than the refill rate models a
158/// device that tolerates transient spikes (e.g. a 1-second steady
159/// rate of 1000 IOPS with a 5000-IOPS burst capacity allows a
160/// 5-second-equivalent burst from a full bucket). A burst capacity
161/// without a corresponding rate is meaningless (a bucket that never
162/// refills); [`DiskThrottle::validate`] rejects it.
163///
164/// Throttle exhaustion stalls the request internally and retries via
165/// a timer — it is not surfaced to the guest as `VIRTIO_BLK_S_IOERR`.
166///
167/// # Worked example: cloud-style 1000 IOPS / 10 MiB·s with 5× burst
168///
169/// Model a "1000 IOPS sustained, tolerate a brief unrestricted
170/// spike from a quiescent device" disk:
171///
172/// ```
173/// use ktstr::prelude::*;
174///
175/// let disk = DiskConfig::default()
176///     // Steady-state allowance — bucket refill rate.
177///     .iops(1_000)
178///     // Peak burst — bucket capacity. 5× the refill rate (5_000 ops)
179///     // is the maximum number of unrestricted ops the device will
180///     // absorb from a full bucket before throttling kicks in.
181///     .iops_burst_capacity(5_000)
182///     // Steady-state bandwidth: 10 MiB/s = 10 * 1024 * 1024 bytes/s.
183///     .bytes_per_sec(10 * 1024 * 1024)
184///     // Bandwidth burst — 5× the rate, mirroring the iops ratio.
185///     .bytes_burst_capacity(50 * 1024 * 1024);
186/// disk.throttle.validate().expect("burst >= rate, rate set");
187/// ```
188///
189/// At VM build time the buckets are seeded full (start of the test =
190/// "quiescent device"); a burst-friendly workload draws the bucket
191/// down at peak rate until empty, then is rate-limited to the refill
192/// rate from then on.
193///
194/// The `5_000`-op burst capacity is NOT "5 seconds at 1000 IOPS"
195/// in any real-time sense — the bucket drains at whatever rate the
196/// guest workload submits ops, which is usually much faster than
197/// the refill rate. A workload submitting 10_000 IOPS empties the
198/// 5_000-op bucket in ~0.5s, after which the device steady-states
199/// at the 1000-IOPS refill rate. The "5 seconds" framing only
200/// applies as a hypothetical lower bound: a workload submitting
201/// exactly the refill rate (1000 IOPS) would never drain the
202/// bucket, and a workload submitting 2× the refill rate would
203/// drain a 5×-rate bucket over ~5 seconds. Most real workloads
204/// drain bursts much faster than that.
205///
206/// # Picking values
207///
208/// - **`iops`** — peak operations the device must sustain. Includes
209///   reads, writes, and flushes (each = 1 op).
210/// - **`bytes_per_sec`** — peak bandwidth the device must sustain
211///   for read+write data combined. Flushes do not count toward
212///   bandwidth.
213/// - **`*_burst_capacity`** — how long a burst from a full bucket
214///   should run before throttling kicks in. `burst = N * rate` gives
215///   ~N seconds of unrestricted IO from a quiescent device. Leave
216///   `None` to default to `burst = rate` (1-second burst, the
217///   pre-burst-feature behaviour).
218///
219/// # Constraint summary
220///
221/// Both rules are enforced by [`DiskThrottle::validate`] (run by
222/// `init_virtio_blk` and its x86 sibling
223/// `init_virtio_blk_pci` before the backing file is allocated):
224///
225/// - `*_burst_capacity` must be `>= *_refill_rate` when both are
226///   set; a capacity below the refill rate would silently cap the
227///   steady-state at the lower capacity instead of the configured
228///   rate.
229/// - `*_burst_capacity` must not be set without its matching refill
230///   rate; a one-shot bucket that never refills doesn't model any
231///   useful throttle.
232///
233/// Clearing a refill rate via the builder (`iops(0)` /
234/// `bytes_per_sec(0)`) auto-clears its matching `*_burst_capacity`
235/// so the second rule never trips on a cleared-rate chain.
236#[derive(
237    Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
238)]
239pub struct DiskThrottle {
240    /// Maximum operations per second (1 read = 1 op, 1 write = 1
241    /// op, 1 flush = 1 op). Refill rate of the IOPS token bucket.
242    ///
243    /// Type-enforced nonzero: `Option<NonZeroU64>` makes
244    /// `Some(0) = unlimited` impossible to express at the type
245    /// level. To disable IOPS throttling, use `None` (or set 0
246    /// through the builder, which the builder converts to `None`).
247    pub iops: Option<NonZeroU64>,
248    /// Maximum bytes per second across read+write data. Refill rate
249    /// of the bandwidth token bucket.
250    ///
251    /// Type-enforced nonzero, same reasoning as `iops`.
252    pub bytes_per_sec: Option<NonZeroU64>,
253    /// IOPS bucket capacity (peak burst). When `None`, capacity
254    /// equals the `iops` refill rate (1-second burst). When `Some`,
255    /// the value must be `>= iops` (a capacity below the refill rate
256    /// would discard refilled tokens immediately and effectively
257    /// reduce the steady-state rate); [`DiskThrottle::validate`]
258    /// enforces this. Has no effect when `iops` is `None`.
259    ///
260    /// Values above `i64::MAX` are accepted but the `TokenBucket`
261    /// seed is clamped to `i64::MAX` at construction — the effective
262    /// initial burst is ~9.2 quintillion, immaterial for realistic
263    /// settings.
264    pub iops_burst_capacity: Option<NonZeroU64>,
265    /// Bandwidth bucket capacity (peak burst, in bytes). When
266    /// `None`, capacity equals the `bytes_per_sec` refill rate
267    /// (1-second burst). When `Some`, the value must be
268    /// `>= bytes_per_sec`. Has no effect when `bytes_per_sec` is
269    /// `None`.
270    ///
271    /// Values above `i64::MAX` are accepted but the `TokenBucket`
272    /// seed is clamped to `i64::MAX` at construction — the effective
273    /// initial burst is ~9.2 exabytes, immaterial for realistic
274    /// settings.
275    pub bytes_burst_capacity: Option<NonZeroU64>,
276}
277
278/// Throttle dimension a [`DiskThrottleValidationError`] applies to.
279///
280/// `Iops` covers `iops` / `iops_burst_capacity`; `Bytes` covers
281/// `bytes_per_sec` / `bytes_burst_capacity`. The discriminant lets
282/// callers route a programmatic recovery (e.g. clearing the offending
283/// burst) without parsing the rendered error message.
284#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
285pub enum ThrottleDimension {
286    /// IOPS dimension — `iops` refill rate, `iops_burst_capacity`
287    /// bucket capacity.
288    Iops,
289    /// Bandwidth dimension — `bytes_per_sec` refill rate,
290    /// `bytes_burst_capacity` bucket capacity.
291    Bytes,
292}
293
294impl ThrottleDimension {
295    /// Field name of the offending burst capacity. Stable wire
296    /// identifier — matches the [`DiskThrottle`] field name and the
297    /// builder method name on [`DiskConfig`] so error consumers can
298    /// echo it back to the user as the field they need to change.
299    pub fn burst_field(self) -> &'static str {
300        match self {
301            ThrottleDimension::Iops => "iops_burst_capacity",
302            ThrottleDimension::Bytes => "bytes_burst_capacity",
303        }
304    }
305
306    /// Field name of the matching refill rate. Symmetric with
307    /// [`Self::burst_field`].
308    pub fn rate_field(self) -> &'static str {
309        match self {
310            ThrottleDimension::Iops => "iops",
311            ThrottleDimension::Bytes => "bytes_per_sec",
312        }
313    }
314}
315
316/// Validation failure for [`DiskThrottle::validate`].
317///
318/// Returned by [`DiskThrottle::validate`] when a throttle/burst
319/// chain violates the constraints documented on [`DiskThrottle`].
320/// The `Display` impl carries the same actionable text the previous
321/// `String`-returning shape did (with the ", or pass 0 to clear …"
322/// remediation hint preserved) so callers that bubble the error
323/// through `anyhow::Error` and match on the rendered message keep
324/// working.
325///
326/// Tests that need to assert on a specific failure variant downcast
327/// via `err.downcast_ref::<DiskThrottleValidationError>()` (when the
328/// error is wrapped in `anyhow`) or pattern-match the enum directly.
329/// The [`dimension()`](Self::dimension) accessor exposes which
330/// dimension (iops/bytes) tripped the rule for callers that route
331/// programmatic recovery (e.g. clear the offending
332/// `*_burst_capacity` and retry).
333#[derive(Clone, Debug, PartialEq, Eq, Hash, thiserror::Error)]
334pub enum DiskThrottleValidationError {
335    /// `*_burst_capacity` is set to a value strictly below the
336    /// corresponding `*` refill rate. A bucket with capacity below
337    /// its refill rate cannot hold a full second of refilled
338    /// tokens, so the effective steady-state rate would silently be
339    /// the capacity, not the configured rate.
340    #[error(
341        "{burst_field} ({burst}) must be >= {rate_field} ({rate}), \
342         or pass 0 to clear the burst override",
343        burst_field = dimension.burst_field(),
344        rate_field = dimension.rate_field(),
345    )]
346    BurstBelowRate {
347        /// Throttle dimension this failure applies to.
348        dimension: ThrottleDimension,
349        /// The offending burst-capacity value.
350        burst: u64,
351        /// The refill rate the burst was compared against.
352        rate: u64,
353    },
354    /// `*_burst_capacity` is set with no matching `*` refill rate.
355    /// A bucket with no refill rate is a functionally unbounded
356    /// one-shot capacity, which does not match any useful
357    /// throttling model.
358    #[error(
359        "{burst_field} set without {rate_field} refill rate, \
360         or pass 0 to clear the burst override",
361        burst_field = dimension.burst_field(),
362        rate_field = dimension.rate_field(),
363    )]
364    BurstWithoutRate {
365        /// Throttle dimension this failure applies to.
366        dimension: ThrottleDimension,
367    },
368}
369
370impl DiskThrottleValidationError {
371    /// Throttle dimension (iops/bytes) the failure applies to. Lets
372    /// callers route a programmatic recovery without parsing the
373    /// rendered message — e.g. "clear the offending burst override
374    /// and re-validate" can dispatch on this without string-matching
375    /// `iops_burst_capacity` vs `bytes_burst_capacity`.
376    pub fn dimension(&self) -> ThrottleDimension {
377        match self {
378            DiskThrottleValidationError::BurstBelowRate { dimension, .. } => *dimension,
379            DiskThrottleValidationError::BurstWithoutRate { dimension } => *dimension,
380        }
381    }
382}
383
384impl DiskThrottle {
385    /// Non-panicking validation of throttle/burst consistency.
386    ///
387    /// Rejects burst capacities below their corresponding refill
388    /// rate. A bucket with capacity below its refill rate cannot
389    /// hold a full second of refilled tokens, so the effective
390    /// steady-state rate would silently be the capacity, not the
391    /// configured rate — a user who sets `iops(1000).iops_burst_capacity(500)`
392    /// would expect 1000 IOPS and silently get 500.
393    ///
394    /// A burst capacity set without a corresponding rate is also
395    /// rejected: a bucket with no refill rate is functionally
396    /// unbounded one-shot capacity, which does not match any
397    /// useful throttling model.
398    ///
399    /// Returns [`DiskThrottleValidationError`] on failure — a typed
400    /// enum so callers can pattern-match the failure mode (e.g.
401    /// route a programmatic recovery via the
402    /// [`dimension()`](DiskThrottleValidationError::dimension)
403    /// accessor) rather than string-matching the rendered message.
404    /// The `Display` impl preserves the wording of the prior
405    /// `String`-returning shape, including the ", or pass 0 to
406    /// clear the burst override" remediation hint, so anyhow-bubbled
407    /// callers that match on the rendered text still work.
408    pub fn validate(&self) -> Result<(), DiskThrottleValidationError> {
409        if let Some(burst) = self.iops_burst_capacity {
410            match self.iops {
411                Some(rate) if burst < rate => {
412                    return Err(DiskThrottleValidationError::BurstBelowRate {
413                        dimension: ThrottleDimension::Iops,
414                        burst: burst.get(),
415                        rate: rate.get(),
416                    });
417                }
418                None => {
419                    return Err(DiskThrottleValidationError::BurstWithoutRate {
420                        dimension: ThrottleDimension::Iops,
421                    });
422                }
423                _ => {}
424            }
425        }
426        if let Some(burst) = self.bytes_burst_capacity {
427            match self.bytes_per_sec {
428                Some(rate) if burst < rate => {
429                    return Err(DiskThrottleValidationError::BurstBelowRate {
430                        dimension: ThrottleDimension::Bytes,
431                        burst: burst.get(),
432                        rate: rate.get(),
433                    });
434                }
435                None => {
436                    return Err(DiskThrottleValidationError::BurstWithoutRate {
437                        dimension: ThrottleDimension::Bytes,
438                    });
439                }
440                _ => {}
441            }
442        }
443        Ok(())
444    }
445}
446
447/// Per-disk config. `Default` is raw 256 MiB device on `/dev/vda`;
448/// formatting and auto-mount are deferred.
449///
450/// No backing-file path field: the framework owns the per-test
451/// backing file (`tempfile()` for `Raw`, FICLONE-cloned template
452/// for `Btrfs`). See module docs.
453#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
454pub struct DiskConfig {
455    /// Advertised capacity in mebibytes (MiB). `capacity_bytes()`
456    /// computes `capacity_mib << 20`. 256 MiB default capacity.
457    /// Sized to accommodate common guest filesystem formatters;
458    /// smaller values are accepted but may cause `mkfs` failures
459    /// inside the template VM (see
460    /// `crate::vmm::disk_template::build_template_via_vm`) for
461    /// `Filesystem::Btrfs`.
462    pub capacity_mib: u32,
463    /// Filesystem to format the per-test backing with. `Raw` leaves
464    /// the device unformatted; `Btrfs` routes through the
465    /// template-cache lifecycle.
466    pub filesystem: Filesystem,
467    /// IO throttle. Default unthrottled.
468    pub throttle: DiskThrottle,
469    /// Read-only at the device level — the device advertises
470    /// VIRTIO_BLK_F_RO so the guest mounts read-only. Useful for
471    /// tests that need protection against accidental writes.
472    pub read_only: bool,
473    /// Optional human-readable label for this disk. `None` (the
474    /// default) is an anonymous disk addressable only by index. A
475    /// name lets WorkType variants reference the disk symbolically
476    /// (e.g. `"data"`, `"log"`) instead of by index, which keeps
477    /// tests stable across topology rearrangements.
478    ///
479    /// Stored as `Option<&'static str>` so `DiskConfig` is
480    /// const-constructible — `DiskConfig::DEFAULT.with_name("data")`
481    /// works in a `static` or `const` initializer, which the
482    /// `#[ktstr_test(disk = ...)]` macro relies on. The field is
483    /// `#[serde(skip)]` because `&'static str` can't be deserialized
484    /// from arbitrary input without leaking; the name is operator
485    /// metadata that the framework computes on-the-fly from the
486    /// declaration, not state that needs to round-trip through
487    /// sidecar JSON. Sidecar consumers that need to associate a
488    /// disk identity with serialized data should use the disk's
489    /// index instead.
490    #[serde(skip)]
491    pub name: Option<&'static str>,
492    /// Opt out of guest-side auto-mount. Default `false` means a
493    /// non-`Raw` disk is auto-mounted at `/mnt/disk0` by the guest
494    /// init (see
495    /// `crate::vmm::rust_init::auto_mount_data_disks`); setting
496    /// `true` suppresses the auto-mount cmdline tokens and leaves
497    /// `/dev/vda` raw to the test author. Has no effect for
498    /// `Filesystem::Raw` disks (there is nothing to mount). The
499    /// only honest reason to flip this is a test that wants to
500    /// drive the mount path itself (e.g. exercise mount-option
501    /// fuzzing or fail-injection on the kernel mount syscall).
502    pub no_auto_mount: bool,
503}
504
505impl Default for DiskConfig {
506    /// 256 MiB, [`Filesystem::Raw`], no throttle. The `Raw` default
507    /// keeps the on-host cost minimal — no template-VM build, no
508    /// cache directory required — and the per-test backing is a
509    /// fresh sparse `tempfile()` per VM (see
510    /// `crate::vmm::KtstrVm::init_virtio_blk`).
511    ///
512    /// # Memory footprint
513    ///
514    /// The 256 MiB sparse file lives under the host's `TMPDIR`
515    /// (`tempfile()`); actual host disk/RAM consumption equals the
516    /// bytes the guest writes, not the advertised capacity. On
517    /// tmpfs-backed `TMPDIR` (the default on most Linux distros), a
518    /// fully-written disk consumes 256 MiB of host **RAM** per test
519    /// — operators running large topologies should size host memory
520    /// accordingly or override `TMPDIR` to a disk-backed path.
521    fn default() -> Self {
522        Self::DEFAULT
523    }
524}
525
526impl DiskConfig {
527    /// Const-evaluable default — same values as [`Default::default`]
528    /// but usable in `static` / `const` initializers. Required for
529    /// the `#[ktstr_test(disk = ...)]` macro surface: the macro
530    /// emits a `static` containing a `DiskConfig`, which must be
531    /// const-constructible.
532    ///
533    /// Spread via `..DiskConfig::DEFAULT` in struct-update syntax,
534    /// or chain const setters (`DiskConfig::DEFAULT.with_name("data")`).
535    pub const DEFAULT: Self = Self {
536        capacity_mib: 256,
537        filesystem: Filesystem::Raw,
538        throttle: DiskThrottle::DEFAULT,
539        read_only: false,
540        name: None,
541        no_auto_mount: false,
542    };
543}
544
545impl DiskThrottle {
546    /// Const-evaluable default — all `None` (unthrottled), matching
547    /// [`Default::default`]. Required so `DiskConfig::DEFAULT` can be
548    /// `const`.
549    pub const DEFAULT: Self = Self {
550        iops: None,
551        bytes_per_sec: None,
552        iops_burst_capacity: None,
553        bytes_burst_capacity: None,
554    };
555}
556
557impl DiskConfig {
558    /// Set capacity in mebibytes (MiB). The argument is interpreted
559    /// as binary mebibytes per `Self::capacity_bytes`, not decimal
560    /// megabytes.
561    #[must_use = "builder methods consume self; bind the result"]
562    pub fn capacity_mib(mut self, mib: u32) -> Self {
563        self.capacity_mib = mib;
564        self
565    }
566
567    /// Select the on-disk filesystem.
568    ///
569    /// `Filesystem::Raw` (the default) leaves the device unformatted.
570    /// `Filesystem::Btrfs` routes through
571    /// `crate::vmm::disk_template::ensure_template`: on cache miss
572    /// the framework boots a one-shot template VM that runs
573    /// `mkfs.btrfs` inside the guest, caches the formatted image,
574    /// and per-test boots reflink-clone it. The lifecycle requires
575    /// a reflink-capable cache directory (btrfs or xfs) and a host
576    /// `mkfs.btrfs` binary on `PATH` at template-build time. See
577    /// the module-level docs and `crate::vmm::disk_template`.
578    ///
579    /// # Disk-template lifecycle
580    ///
581    /// For `Filesystem::Btrfs`, the per-test backing file is produced
582    /// in three stages — none of which the test author needs to drive
583    /// explicitly:
584    ///
585    /// 1. **Cache lookup** —
586    ///    `disk_template::ensure_template`
587    ///    keys off `(filesystem, capacity)` and returns the cached
588    ///    image path on hit. See the module docs at
589    ///    `crate::vmm::disk_template` for the cache-key encoding
590    ///    and on-disk layout.
591    /// 2. **Template build (cache miss)** —
592    ///    `disk_template::build_template_via_vm`
593    ///    boots a one-shot guest with the host's `mkfs.btrfs` packed
594    ///    into the initramfs; the guest formats `/dev/vda` against
595    ///    a sparse staging image, and the framework atomically moves
596    ///    the formatted image into the cache via
597    ///    `disk_template::store_atomic`.
598    ///    The host never execs `mkfs.btrfs` against a real backing
599    ///    file — the guest kernel is the on-disk-format authority.
600    /// 3. **Per-test fan-out** —
601    ///    `disk_template::clone_to_per_test`
602    ///    `FICLONE`-clones the cached image into a tempfile under
603    ///    the cache root. The clone is O(metadata) and copy-on-write
604    ///    at the extent level, so per-test writes never touch the
605    ///    cached template.
606    ///
607    /// Stage 3 requires the cache directory to live on a reflink-
608    /// capable filesystem (btrfs or xfs); see
609    /// `disk_template::verify_cache_dir_supports_reflink`
610    /// for the gate and
611    /// `crate::vmm::KtstrVmBuilder::disk` for the full
612    /// builder-side wiring.
613    #[must_use = "builder methods consume self; bind the result"]
614    pub fn filesystem(mut self, fs: Filesystem) -> Self {
615        self.filesystem = fs;
616        self
617    }
618
619    /// Set IOPS throttle. Passing 0 disables IOPS throttling
620    /// (equivalent to `None`). To throttle near-zero, use `iops(1)`.
621    /// There is no "block all IO" mode — the minimum throttled rate
622    /// is 1 op/sec. Any positive value is wrapped in `NonZeroU64`.
623    ///
624    /// Clearing the rate (`iops(0)`) also clears the matching
625    /// `iops_burst_capacity` — a burst capacity without a refill
626    /// rate is invalid (caught by [`DiskThrottle::validate`]) and
627    /// keeping a stale burst around after the user explicitly
628    /// disabled the rate is a footgun: the next `validate()` call
629    /// would fail with a less-helpful "burst without rate" error
630    /// rather than the user's intent (a fully-unthrottled bucket).
631    #[must_use = "builder methods consume self; bind the result"]
632    pub fn iops(mut self, iops: u64) -> Self {
633        self.throttle.iops = NonZeroU64::new(iops);
634        if self.throttle.iops.is_none() {
635            self.throttle.iops_burst_capacity = None;
636        }
637        self
638    }
639
640    /// Set bandwidth throttle (bytes per second). A zero value
641    /// disables bandwidth throttling (stored as `None`); any
642    /// positive value is wrapped in `NonZeroU64`.
643    ///
644    /// Clearing the rate (`bytes_per_sec(0)`) also clears the
645    /// matching `bytes_burst_capacity` for the same reason as
646    /// `iops` — a burst without a rate is invalid and stale-burst
647    /// retention turns a deliberate "drop the throttle" into a
648    /// validate-time failure.
649    #[must_use = "builder methods consume self; bind the result"]
650    pub fn bytes_per_sec(mut self, bytes_per_sec: u64) -> Self {
651        self.throttle.bytes_per_sec = NonZeroU64::new(bytes_per_sec);
652        if self.throttle.bytes_per_sec.is_none() {
653            self.throttle.bytes_burst_capacity = None;
654        }
655        self
656    }
657
658    /// Set IOPS burst capacity (token-bucket peak). A zero value
659    /// clears the burst override (stored as `None`), reverting to
660    /// the default 1-second burst (capacity equals refill rate).
661    /// Any positive value is wrapped in `NonZeroU64`.
662    ///
663    /// The capacity must be `>= iops` when both are set, and must
664    /// not be set without `iops`. Both rules are enforced by
665    /// [`DiskThrottle::validate`] at VM build time, not by the
666    /// builder — the builder is order-independent (a user may set
667    /// burst before rate). Tests should call `validate()` after
668    /// chaining, or construct an invalid config and observe the
669    /// error from VM build.
670    #[must_use = "builder methods consume self; bind the result"]
671    pub fn iops_burst_capacity(mut self, capacity: u64) -> Self {
672        self.throttle.iops_burst_capacity = NonZeroU64::new(capacity);
673        self
674    }
675
676    /// Set bandwidth burst capacity in bytes (token-bucket peak).
677    /// A zero value clears the burst override (stored as `None`),
678    /// reverting to the default 1-second burst. Any positive value
679    /// is wrapped in `NonZeroU64`.
680    ///
681    /// The capacity must be `>= bytes_per_sec` when both are set,
682    /// and must not be set without `bytes_per_sec`. Both rules are
683    /// enforced by [`DiskThrottle::validate`] at VM build time, not
684    /// by the builder.
685    #[must_use = "builder methods consume self; bind the result"]
686    pub fn bytes_burst_capacity(mut self, capacity: u64) -> Self {
687        self.throttle.bytes_burst_capacity = NonZeroU64::new(capacity);
688        self
689    }
690
691    /// Mark the disk read-only (advertises `VIRTIO_BLK_F_RO`).
692    /// Default is read-write; this builder takes no argument (no
693    /// boolean footgun) and only flips the flag on. To return to
694    /// read-write, drop the call or reconstruct from
695    /// `DiskConfig::default()`.
696    #[must_use = "builder methods consume self; bind the result"]
697    pub fn read_only(mut self) -> Self {
698        self.read_only = true;
699        self
700    }
701
702    /// Attach a human-readable label to this disk. WorkType variants
703    /// that need to address a specific disk (e.g. one of several
704    /// attached) can resolve the name instead of relying on
705    /// attachment order. Default is anonymous (`None`); calling
706    /// `.with_name(...)` sets it.
707    ///
708    /// The name also drives the guest auto-mount path: a disk
709    /// named `"data"` auto-mounts at `/mnt/data` instead of the
710    /// default `/mnt/disk0`. See [`Self::no_auto_mount`] to opt
711    /// out of auto-mount entirely.
712    ///
713    /// Takes `&'static str` so the builder is `const fn` —
714    /// `DiskConfig::DEFAULT.with_name("data")` can spread into a
715    /// `static` initializer. String literals are `&'static`; tests
716    /// needing a dynamic name should build the disk programmatically
717    /// rather than going through this builder.
718    #[must_use = "builder methods consume self; bind the result"]
719    pub const fn with_name(mut self, name: &'static str) -> Self {
720        self.name = Some(name);
721        self
722    }
723
724    /// Suppress the guest-side auto-mount of this disk. Default
725    /// behavior auto-mounts a non-`Raw` disk at the path returned
726    /// by `Self::auto_mount_path`; calling this method flips
727    /// the flag on. Useful for tests that want raw access to
728    /// `/dev/vda` after a host-driven mkfs (e.g. mount-option
729    /// fuzzing, deliberate mount-failure injection, manual
730    /// subvolume traversal).
731    ///
732    /// No-op for `Filesystem::Raw` disks (there is nothing to
733    /// mount). The flag is honored at cmdline-emission time in
734    /// `crate::vmm::KtstrVm::build_guest_cmdline` (via
735    /// `disk_auto_mount_cmdline_tokens`): when set, the
736    /// `KTSTR_DISK0_FS` / `KTSTR_DISK0_MOUNT` / `KTSTR_DISK0_RO`
737    /// tokens are not emitted, and the guest's
738    /// `crate::vmm::rust_init::auto_mount_data_disks` short-
739    /// circuits at the missing-token check.
740    #[must_use = "builder methods consume self; bind the result"]
741    pub fn no_auto_mount(mut self) -> Self {
742        self.no_auto_mount = true;
743        self
744    }
745
746    /// Resolve the guest-side mount path for this disk. Returns
747    /// `/mnt/<name>` when [`Self::name`] is set, `/mnt/disk0`
748    /// otherwise. Used by the cmdline emission to populate the
749    /// `KTSTR_DISK0_MOUNT` token consumed by the guest's
750    /// `crate::vmm::rust_init::auto_mount_data_disks`.
751    #[allow(dead_code)]
752    pub(crate) fn auto_mount_path(&self) -> String {
753        match self.name {
754            Some(n) => format!("/mnt/{n}"),
755            None => "/mnt/disk0".to_string(),
756        }
757    }
758
759    /// Capacity in bytes (`capacity_mib << 20`). Used by the device
760    /// for the config-space `capacity` field.
761    pub(crate) fn capacity_bytes(&self) -> u64 {
762        (self.capacity_mib as u64) << 20
763    }
764
765    /// Capacity in 512-byte sectors.
766    ///
767    /// `dead_code` allow: only the in-file `#[cfg(test)]` tests
768    /// consume this; the production virtio-blk path uses
769    /// [`Self::capacity_bytes`] and divides by `VIRTIO_BLK_SECTOR_SIZE`
770    /// at the device layer.
771    #[allow(dead_code)]
772    pub(crate) fn capacity_sectors(&self) -> u64 {
773        self.capacity_bytes() / 512
774    }
775}
776
777#[cfg(test)]
778mod tests {
779    use super::*;
780
781    #[test]
782    fn default_is_256mib_raw() {
783        let d = DiskConfig::default();
784        assert_eq!(d.capacity_mib, 256);
785        assert_eq!(d.filesystem, Filesystem::Raw);
786        assert_eq!(d.throttle, DiskThrottle::default());
787        assert!(!d.read_only);
788        assert!(d.name.is_none());
789    }
790
791    #[test]
792    fn capacity_helpers() {
793        let d = DiskConfig::default();
794        assert_eq!(d.capacity_bytes(), 256 * 1024 * 1024);
795        assert_eq!(d.capacity_sectors(), 524_288);
796
797        let d = DiskConfig::default().capacity_mib(512);
798        assert_eq!(d.capacity_bytes(), 512 * 1024 * 1024);
799        assert_eq!(d.capacity_sectors(), 1_048_576);
800    }
801
802    #[test]
803    fn filesystem_builder_sets_variant() {
804        let d = DiskConfig::default().filesystem(Filesystem::Btrfs);
805        assert_eq!(d.filesystem, Filesystem::Btrfs);
806        // Builder is overwriting (not OR-ing) — last call wins.
807        let d = d.filesystem(Filesystem::Raw);
808        assert_eq!(d.filesystem, Filesystem::Raw);
809    }
810
811    #[test]
812    fn builder_chain() {
813        let d = DiskConfig::default()
814            .capacity_mib(128)
815            .iops(1000)
816            .bytes_per_sec(10 * 1024 * 1024)
817            .read_only();
818        assert_eq!(d.capacity_mib, 128);
819        assert_eq!(d.filesystem, Filesystem::Raw);
820        assert_eq!(d.throttle.iops, NonZeroU64::new(1000));
821        assert_eq!(d.throttle.bytes_per_sec, NonZeroU64::new(10 * 1024 * 1024));
822        assert!(d.read_only);
823    }
824
825    #[test]
826    fn iops_zero_becomes_none() {
827        // The NonZeroU64 type makes Some(0) impossible. The builder
828        // accepts u64 for ergonomics and converts 0 → None
829        // (= unthrottled) at the type boundary.
830        let d = DiskConfig::default().iops(0);
831        assert!(d.throttle.iops.is_none());
832        let d = DiskConfig::default().bytes_per_sec(0);
833        assert!(d.throttle.bytes_per_sec.is_none());
834    }
835
836    #[test]
837    fn filesystem_default_is_raw() {
838        // Default::default() must produce a working v0 config — the
839        // `Filesystem::Raw` default matches the actual v0 behaviour
840        // (no formatting). #[default] attribute on the enum variant
841        // drives this; this test pins it so a future patch that
842        // adds a non-Raw variant and changes `#[default]` (regressing
843        // the "default works" guarantee) surfaces here.
844        assert_eq!(Filesystem::default(), Filesystem::Raw);
845    }
846
847    #[test]
848    fn filesystem_serde_snake_case() {
849        assert_eq!(serde_json::to_string(&Filesystem::Raw).unwrap(), r#""raw""#);
850        assert_eq!(
851            serde_json::to_string(&Filesystem::Btrfs).unwrap(),
852            r#""btrfs""#
853        );
854        let parsed: Filesystem = serde_json::from_str(r#""raw""#).unwrap();
855        assert_eq!(parsed, Filesystem::Raw);
856        let parsed: Filesystem = serde_json::from_str(r#""btrfs""#).unwrap();
857        assert_eq!(parsed, Filesystem::Btrfs);
858    }
859
860    #[test]
861    fn filesystem_cache_tag_round_trips_serde_name() {
862        // The cache_tag is the on-disk identifier used in the
863        // template-cache key. Pinning that it matches the serde
864        // serialization keeps the two name spaces aligned — a future
865        // `#[serde(rename = "...")]` change must update cache_tag in
866        // lock-step or the cache stops finding old entries.
867        for fs in [Filesystem::Raw, Filesystem::Btrfs] {
868            let json = serde_json::to_string(&fs).unwrap();
869            let stripped = json.trim_matches('"');
870            assert_eq!(fs.cache_tag(), stripped, "cache_tag drift for {fs:?}");
871        }
872    }
873
874    #[test]
875    fn throttle_default_is_unthrottled() {
876        let t = DiskThrottle::default();
877        assert!(t.iops.is_none());
878        assert!(t.bytes_per_sec.is_none());
879        assert!(t.iops_burst_capacity.is_none());
880        assert!(t.bytes_burst_capacity.is_none());
881    }
882
883    #[test]
884    fn iops_zero_serde_roundtrip() {
885        // Build with iops(0) → throttle.iops is None. Serialize +
886        // deserialize the config and confirm the field stays None.
887        // Pins the NonZeroU64 type-level invariant against a future
888        // serde-derive regression that might silently re-introduce
889        // a Some(0) representation (impossible by construction
890        // today, but a wrong-typed `Option<u64>` migration would
891        // bring it back).
892        let original = DiskConfig::default().iops(0).bytes_per_sec(0);
893        let json = serde_json::to_string(&original).expect("serialize");
894        let parsed: DiskConfig = serde_json::from_str(&json).expect("deserialize");
895        assert!(parsed.throttle.iops.is_none());
896        assert!(parsed.throttle.bytes_per_sec.is_none());
897        // Round-trip equality works because of the PartialEq derive
898        // on DiskConfig.
899        assert_eq!(parsed, original);
900    }
901
902    /// Full serde roundtrip with every field set to a non-default
903    /// value. Pin field-by-field equality after a JSON round trip so
904    /// a future `#[serde(rename = ...)]` or `#[serde(skip)]`
905    /// regression — the typical drift mode for serde-derived structs
906    /// — surfaces here loudly.
907    #[test]
908    fn disk_config_full_serde_roundtrip() {
909        let original = DiskConfig {
910            capacity_mib: 256,
911            filesystem: Filesystem::Raw,
912            throttle: DiskThrottle {
913                iops: NonZeroU64::new(2_500),
914                bytes_per_sec: NonZeroU64::new(50 * 1024 * 1024),
915                iops_burst_capacity: NonZeroU64::new(10_000),
916                bytes_burst_capacity: NonZeroU64::new(200 * 1024 * 1024),
917            },
918            read_only: true,
919            name: Some("data-disk"),
920            no_auto_mount: false,
921        };
922
923        let json = serde_json::to_string(&original).expect("serialize DiskConfig");
924        let parsed: DiskConfig = serde_json::from_str(&json).expect("deserialize DiskConfig");
925
926        // Wire-format canonicality — the serialized key must be
927        // `capacity_mib`, not the pre-rename `capacity_mb` (legacy)
928        // form. Catches a future `#[serde(rename = "capacity_mb")]`
929        // regression that flips the emitted key.
930        assert!(
931            json.contains("\"capacity_mib\""),
932            "JSON must use the canonical `capacity_mib` key: {json}"
933        );
934        assert!(
935            !json.contains("\"capacity_mb\""),
936            "JSON must NOT contain the pre-rename `capacity_mb` key: {json}"
937        );
938
939        // Deserialize-side break: a JSON with the legacy `capacity_mb`
940        // key must FAIL to parse. Catches `#[serde(alias = "capacity_mb")]`
941        // which is deserialize-only sugar — it leaves the serialized
942        // key alone (so the contains-checks above pass) but silently
943        // accepts the old name on read. Constructed by replacing the
944        // canonical key in the just-serialized JSON, keeping every
945        // other field/value identical so the legacy_json is well-formed
946        // in every respect except the one renamed key.
947        let legacy_json = json.replace("\"capacity_mib\"", "\"capacity_mb\"");
948        assert!(
949            serde_json::from_str::<DiskConfig>(&legacy_json).is_err(),
950            "deserialization must reject the pre-rename `capacity_mb` key \
951             — a regression that added `#[serde(alias = \"capacity_mb\")]` \
952             would silently accept old sidecars on read: legacy_json={legacy_json}"
953        );
954
955        // Whole-struct equality first — catches any field drift.
956        // `name` is stripped because it's `#[serde(skip)]` (see the
957        // dedicated assert at the bottom of this fn) and always
958        // round-trips to `None` regardless of original.
959        let original_for_eq = DiskConfig {
960            name: None,
961            ..original.clone()
962        };
963        assert_eq!(parsed, original_for_eq);
964        // Field-by-field follow-up — each line catches a distinct
965        // drift mode on its own (rename, skip, type-narrowing).
966        assert_eq!(parsed.capacity_mib, original.capacity_mib);
967        assert_eq!(parsed.filesystem, original.filesystem);
968        assert_eq!(parsed.throttle.iops, original.throttle.iops);
969        assert_eq!(
970            parsed.throttle.bytes_per_sec,
971            original.throttle.bytes_per_sec
972        );
973        assert_eq!(
974            parsed.throttle.iops_burst_capacity,
975            original.throttle.iops_burst_capacity
976        );
977        assert_eq!(
978            parsed.throttle.bytes_burst_capacity,
979            original.throttle.bytes_burst_capacity
980        );
981        assert_eq!(parsed.read_only, original.read_only);
982        // `name` is `#[serde(skip)]` since `&'static str` can't be
983        // deserialized from arbitrary input — the field round-trips
984        // to `None` regardless of original. Test authors that need
985        // disk identity in serialized output should use the disk's
986        // index instead. Pin this contract here so a future serde
987        // tweak (e.g. dropping the skip) surfaces in this assertion.
988        assert!(
989            parsed.name.is_none(),
990            "DiskConfig.name uses #[serde(skip)]; round-trip must produce None regardless of original (was {:?})",
991            original.name,
992        );
993    }
994
995    /// Roundtrip the unthrottled default (both throttle fields
996    /// `None`). Distinct from `iops_zero_serde_roundtrip` (which
997    /// builds via `.iops(0)/.bytes_per_sec(0)`): this exercises the
998    /// pure `DiskConfig::default()` shape, ensuring the `None`/`None`
999    /// throttle survives serialize→JSON→deserialize and that the
1000    /// whole-struct PartialEq holds across the round trip.
1001    #[test]
1002    fn disk_config_default_unthrottled_serde_roundtrip() {
1003        let original = DiskConfig::default();
1004        assert!(original.throttle.iops.is_none());
1005        assert!(original.throttle.bytes_per_sec.is_none());
1006        assert!(original.name.is_none());
1007
1008        let json = serde_json::to_string(&original).expect("serialize default DiskConfig");
1009        let parsed: DiskConfig =
1010            serde_json::from_str(&json).expect("deserialize default DiskConfig");
1011
1012        assert_eq!(parsed, original);
1013        assert_eq!(parsed.capacity_mib, original.capacity_mib);
1014        assert_eq!(parsed.filesystem, original.filesystem);
1015        assert!(parsed.throttle.iops.is_none());
1016        assert!(parsed.throttle.bytes_per_sec.is_none());
1017        assert!(parsed.throttle.iops_burst_capacity.is_none());
1018        assert!(parsed.throttle.bytes_burst_capacity.is_none());
1019        assert_eq!(parsed.read_only, original.read_only);
1020        assert!(parsed.name.is_none());
1021    }
1022
1023    #[test]
1024    fn with_name_builder_sets_label() {
1025        let d = DiskConfig::default().with_name("data-disk");
1026        assert_eq!(d.name, Some("data-disk"));
1027
1028        // Last call wins — the builder overwrites.
1029        let d = DiskConfig::default().with_name("first").with_name("second");
1030        assert_eq!(d.name, Some("second"));
1031    }
1032
1033    /// The `with_name` builder is `const fn`, so a `static DiskConfig`
1034    /// can be constructed by chaining setters off the const DEFAULT.
1035    /// This pins the const-construction property the #[ktstr_test]
1036    /// macro's `disk = ...` arm depends on.
1037    #[test]
1038    fn with_name_works_in_const_context() {
1039        const NAMED: DiskConfig = DiskConfig::DEFAULT.with_name("static-disk");
1040        assert_eq!(NAMED.name, Some("static-disk"));
1041        assert_eq!(NAMED.capacity_mib, 256);
1042    }
1043
1044    #[test]
1045    fn burst_capacity_builders_set_fields() {
1046        let d = DiskConfig::default()
1047            .iops(1_000)
1048            .iops_burst_capacity(5_000)
1049            .bytes_per_sec(10 * 1024 * 1024)
1050            .bytes_burst_capacity(50 * 1024 * 1024);
1051        assert_eq!(d.throttle.iops, NonZeroU64::new(1_000));
1052        assert_eq!(d.throttle.iops_burst_capacity, NonZeroU64::new(5_000));
1053        assert_eq!(d.throttle.bytes_per_sec, NonZeroU64::new(10 * 1024 * 1024));
1054        assert_eq!(
1055            d.throttle.bytes_burst_capacity,
1056            NonZeroU64::new(50 * 1024 * 1024)
1057        );
1058    }
1059
1060    #[test]
1061    fn burst_capacity_zero_becomes_none() {
1062        // Mirrors the iops/bytes_per_sec ergonomics: 0 → None at the
1063        // type boundary so callers can clear a previously-set burst
1064        // override without dropping back to a fresh `DiskConfig`.
1065        let d = DiskConfig::default()
1066            .iops(1_000)
1067            .iops_burst_capacity(5_000)
1068            .iops_burst_capacity(0);
1069        assert!(d.throttle.iops_burst_capacity.is_none());
1070
1071        let d = DiskConfig::default()
1072            .bytes_per_sec(1_000)
1073            .bytes_burst_capacity(5_000)
1074            .bytes_burst_capacity(0);
1075        assert!(d.throttle.bytes_burst_capacity.is_none());
1076    }
1077
1078    #[test]
1079    fn burst_capacity_default_is_none() {
1080        let d = DiskConfig::default();
1081        assert!(d.throttle.iops_burst_capacity.is_none());
1082        assert!(d.throttle.bytes_burst_capacity.is_none());
1083    }
1084
1085    /// Clearing the rate via `iops(0)` also clears the matching
1086    /// `iops_burst_capacity`. A burst capacity without a refill
1087    /// rate is invalid per [`DiskThrottle::validate`]; without
1088    /// this auto-clear, a `.iops(1000).iops_burst_capacity(5000)
1089    /// .iops(0)` chain would leave a stale burst that turns the
1090    /// next `validate()` into a "burst without rate" error
1091    /// instead of the user's intent (a fully-unthrottled bucket).
1092    #[test]
1093    fn clearing_iops_clears_iops_burst() {
1094        let d = DiskConfig::default()
1095            .iops(1_000)
1096            .iops_burst_capacity(5_000)
1097            .iops(0);
1098        assert!(d.throttle.iops.is_none());
1099        assert!(
1100            d.throttle.iops_burst_capacity.is_none(),
1101            "clearing iops must also clear iops_burst_capacity \
1102             so validate() doesn't fail with a stale-burst error",
1103        );
1104        // bytes side untouched — per-dimension independence.
1105        let d = DiskConfig::default()
1106            .bytes_per_sec(2_000)
1107            .bytes_burst_capacity(8_000)
1108            .iops(0);
1109        assert!(d.throttle.bytes_per_sec.is_some());
1110        assert!(d.throttle.bytes_burst_capacity.is_some());
1111    }
1112
1113    /// Clearing the rate via `bytes_per_sec(0)` also clears the
1114    /// matching `bytes_burst_capacity`. Mirror of
1115    /// `clearing_iops_clears_iops_burst`.
1116    #[test]
1117    fn clearing_bytes_per_sec_clears_bytes_burst() {
1118        let d = DiskConfig::default()
1119            .bytes_per_sec(2_000)
1120            .bytes_burst_capacity(8_000)
1121            .bytes_per_sec(0);
1122        assert!(d.throttle.bytes_per_sec.is_none());
1123        assert!(
1124            d.throttle.bytes_burst_capacity.is_none(),
1125            "clearing bytes_per_sec must also clear \
1126             bytes_burst_capacity",
1127        );
1128        // iops side untouched.
1129        let d = DiskConfig::default()
1130            .iops(1_000)
1131            .iops_burst_capacity(5_000)
1132            .bytes_per_sec(0);
1133        assert!(d.throttle.iops.is_some());
1134        assert!(d.throttle.iops_burst_capacity.is_some());
1135    }
1136
1137    /// After a `clear-rate`-then-validate chain, the result must
1138    /// validate cleanly. Pins the integration: setting both rate
1139    /// and burst, then clearing the rate, leaves the throttle in
1140    /// a state that `validate()` accepts (no orphan-burst error).
1141    #[test]
1142    fn clearing_rate_leaves_throttle_validate_clean() {
1143        let throttle = DiskConfig::default()
1144            .iops(1_000)
1145            .iops_burst_capacity(5_000)
1146            .bytes_per_sec(2_000)
1147            .bytes_burst_capacity(8_000)
1148            .iops(0)
1149            .bytes_per_sec(0)
1150            .throttle;
1151        assert!(throttle.iops.is_none());
1152        assert!(throttle.bytes_per_sec.is_none());
1153        assert!(throttle.iops_burst_capacity.is_none());
1154        assert!(throttle.bytes_burst_capacity.is_none());
1155        throttle
1156            .validate()
1157            .expect("post-clear throttle must validate clean");
1158    }
1159
1160    #[test]
1161    fn validate_accepts_burst_at_or_above_rate() {
1162        // burst == rate (the historical 1-second-burst behaviour
1163        // expressed explicitly).
1164        DiskConfig::default()
1165            .iops(1_000)
1166            .iops_burst_capacity(1_000)
1167            .throttle
1168            .validate()
1169            .expect("burst == iops accepted");
1170
1171        // burst > rate (multi-second burst).
1172        DiskConfig::default()
1173            .iops(1_000)
1174            .iops_burst_capacity(5_000)
1175            .bytes_per_sec(10 * 1024 * 1024)
1176            .bytes_burst_capacity(50 * 1024 * 1024)
1177            .throttle
1178            .validate()
1179            .expect("burst > rate accepted");
1180
1181        // No throttle set → trivially valid.
1182        DiskConfig::default()
1183            .throttle
1184            .validate()
1185            .expect("no throttle accepted");
1186
1187        // Rate set, burst unset → trivially valid (burst defaults to
1188        // rate-equivalent at wire-up time).
1189        DiskConfig::default()
1190            .iops(1_000)
1191            .bytes_per_sec(1_000_000)
1192            .throttle
1193            .validate()
1194            .expect("rate without burst accepted");
1195    }
1196
1197    #[test]
1198    fn validate_rejects_burst_below_rate() {
1199        let err = DiskConfig::default()
1200            .iops(1_000)
1201            .iops_burst_capacity(500)
1202            .throttle
1203            .validate()
1204            .expect_err("burst < iops rejected");
1205        assert_eq!(
1206            err,
1207            DiskThrottleValidationError::BurstBelowRate {
1208                dimension: ThrottleDimension::Iops,
1209                burst: 500,
1210                rate: 1_000,
1211            },
1212            "unexpected error variant",
1213        );
1214        let msg = err.to_string();
1215        assert!(
1216            msg.contains("iops_burst_capacity") && msg.contains("must be >="),
1217            "unexpected error message: {msg}",
1218        );
1219        assert!(
1220            msg.contains("pass 0 to clear"),
1221            "remediation hint missing: {msg}",
1222        );
1223
1224        let err = DiskConfig::default()
1225            .bytes_per_sec(10_000)
1226            .bytes_burst_capacity(5_000)
1227            .throttle
1228            .validate()
1229            .expect_err("burst < bytes_per_sec rejected");
1230        assert_eq!(
1231            err,
1232            DiskThrottleValidationError::BurstBelowRate {
1233                dimension: ThrottleDimension::Bytes,
1234                burst: 5_000,
1235                rate: 10_000,
1236            },
1237            "unexpected error variant",
1238        );
1239        let msg = err.to_string();
1240        assert!(
1241            msg.contains("bytes_burst_capacity") && msg.contains("must be >="),
1242            "unexpected error message: {msg}",
1243        );
1244        assert!(
1245            msg.contains("pass 0 to clear"),
1246            "remediation hint missing: {msg}",
1247        );
1248    }
1249
1250    /// Off-by-one boundary: `burst == rate - 1` must be rejected. Pins
1251    /// the strict `<` vs `<=` direction of the validate predicate
1252    /// against a future flip that would silently accept a steady-state
1253    /// rate one below the configured value.
1254    #[test]
1255    fn validate_rejects_burst_one_below_rate() {
1256        let err = DiskConfig::default()
1257            .iops(1_000)
1258            .iops_burst_capacity(999)
1259            .throttle
1260            .validate()
1261            .expect_err("iops burst one below rate must be rejected");
1262        assert_eq!(
1263            err,
1264            DiskThrottleValidationError::BurstBelowRate {
1265                dimension: ThrottleDimension::Iops,
1266                burst: 999,
1267                rate: 1_000,
1268            },
1269        );
1270        let msg = err.to_string();
1271        assert!(
1272            msg.contains("iops_burst_capacity") && msg.contains("must be >="),
1273            "unexpected error message: {msg}",
1274        );
1275
1276        let err = DiskConfig::default()
1277            .bytes_per_sec(1_000)
1278            .bytes_burst_capacity(999)
1279            .throttle
1280            .validate()
1281            .expect_err("bytes burst one below rate must be rejected");
1282        assert_eq!(
1283            err,
1284            DiskThrottleValidationError::BurstBelowRate {
1285                dimension: ThrottleDimension::Bytes,
1286                burst: 999,
1287                rate: 1_000,
1288            },
1289        );
1290        let msg = err.to_string();
1291        assert!(
1292            msg.contains("bytes_burst_capacity") && msg.contains("must be >="),
1293            "unexpected error message: {msg}",
1294        );
1295    }
1296
1297    /// Builder chain that sets a rate and burst then clears the rate
1298    /// via `iops(0)` must validate clean — clearing the rate also
1299    /// clears the matching burst (per the [`DiskConfig::iops`]
1300    /// auto-clear contract), so the resulting throttle is fully
1301    /// unthrottled and validate rejects nothing. Distinct from
1302    /// `clearing_rate_leaves_throttle_validate_clean` (which clears
1303    /// both rates simultaneously); this one isolates the iops-only
1304    /// clear path so a regression in just one auto-clear branch
1305    /// surfaces here.
1306    #[test]
1307    fn iops_clear_after_burst_set_validates_clean() {
1308        DiskConfig::default()
1309            .iops(1_000)
1310            .iops_burst_capacity(5_000)
1311            .iops(0)
1312            .throttle
1313            .validate()
1314            .expect("iops-cleared throttle must validate clean");
1315    }
1316
1317    #[test]
1318    fn validate_rejects_burst_without_rate() {
1319        let err = DiskConfig::default()
1320            .iops_burst_capacity(5_000)
1321            .throttle
1322            .validate()
1323            .expect_err("burst without iops rejected");
1324        assert_eq!(
1325            err,
1326            DiskThrottleValidationError::BurstWithoutRate {
1327                dimension: ThrottleDimension::Iops,
1328            },
1329        );
1330        let msg = err.to_string();
1331        assert!(
1332            msg.contains("iops_burst_capacity") && msg.contains("without iops"),
1333            "unexpected error message: {msg}",
1334        );
1335        assert!(
1336            msg.contains("pass 0 to clear"),
1337            "remediation hint missing: {msg}",
1338        );
1339
1340        let err = DiskConfig::default()
1341            .bytes_burst_capacity(5_000)
1342            .throttle
1343            .validate()
1344            .expect_err("burst without bytes_per_sec rejected");
1345        assert_eq!(
1346            err,
1347            DiskThrottleValidationError::BurstWithoutRate {
1348                dimension: ThrottleDimension::Bytes,
1349            },
1350        );
1351        let msg = err.to_string();
1352        assert!(
1353            msg.contains("bytes_burst_capacity") && msg.contains("without bytes_per_sec"),
1354            "unexpected error message: {msg}",
1355        );
1356        assert!(
1357            msg.contains("pass 0 to clear"),
1358            "remediation hint missing: {msg}",
1359        );
1360    }
1361
1362    /// `DiskThrottleValidationError::dimension()` exposes the
1363    /// throttle dimension (iops/bytes) the failure applies to so
1364    /// callers can route a programmatic recovery without parsing
1365    /// the rendered message. Pin the accessor's mapping over both
1366    /// variants × both dimensions so a future variant addition
1367    /// that forgets to populate the dimension surfaces here.
1368    #[test]
1369    fn validation_error_dimension_accessor() {
1370        let err = DiskThrottleValidationError::BurstBelowRate {
1371            dimension: ThrottleDimension::Iops,
1372            burst: 500,
1373            rate: 1_000,
1374        };
1375        assert_eq!(err.dimension(), ThrottleDimension::Iops);
1376
1377        let err = DiskThrottleValidationError::BurstBelowRate {
1378            dimension: ThrottleDimension::Bytes,
1379            burst: 500,
1380            rate: 1_000,
1381        };
1382        assert_eq!(err.dimension(), ThrottleDimension::Bytes);
1383
1384        let err = DiskThrottleValidationError::BurstWithoutRate {
1385            dimension: ThrottleDimension::Iops,
1386        };
1387        assert_eq!(err.dimension(), ThrottleDimension::Iops);
1388
1389        let err = DiskThrottleValidationError::BurstWithoutRate {
1390            dimension: ThrottleDimension::Bytes,
1391        };
1392        assert_eq!(err.dimension(), ThrottleDimension::Bytes);
1393    }
1394
1395    /// `ThrottleDimension::burst_field()` and `rate_field()` return
1396    /// the wire field names matching [`DiskThrottle`] / [`DiskConfig`]
1397    /// builder method names so error consumers can echo the offending
1398    /// field back to the user. Pin both directions so a rename of
1399    /// either field on `DiskThrottle` without a matching update here
1400    /// surfaces as a test failure rather than silently desync'd
1401    /// error messages.
1402    #[test]
1403    fn throttle_dimension_field_names() {
1404        assert_eq!(ThrottleDimension::Iops.burst_field(), "iops_burst_capacity");
1405        assert_eq!(ThrottleDimension::Iops.rate_field(), "iops");
1406        assert_eq!(
1407            ThrottleDimension::Bytes.burst_field(),
1408            "bytes_burst_capacity",
1409        );
1410        assert_eq!(ThrottleDimension::Bytes.rate_field(), "bytes_per_sec");
1411    }
1412
1413    /// Pin downcast through anyhow: `DiskThrottle::validate` returns
1414    /// `Result<(), DiskThrottleValidationError>`, but production
1415    /// callers (e.g. `init_virtio_blk`) wrap the
1416    /// failure in `anyhow::Error`. Library consumers that need to
1417    /// pattern-match on the failure variant must therefore
1418    /// `downcast_ref::<DiskThrottleValidationError>()` through the
1419    /// anyhow chain. Without this test, a future change to the
1420    /// callsite that loses the typed error (e.g. converting the
1421    /// inner error to `String` before bubbling, or replacing
1422    /// `anyhow::Error::new(e)` with `anyhow!("...{e}...")`) would
1423    /// silently break the typed-error contract for downstream
1424    /// callers — only surfacing as a regression at the consumer
1425    /// site, which doesn't exist in-tree yet.
1426    ///
1427    /// The chain wraps with `.context(...)` to mirror the production
1428    /// shape at `init_virtio_blk` (in
1429    /// `src/vmm/setup/mod.rs`) so the downcast walks through the same
1430    /// context layer real callers see.
1431    #[test]
1432    fn disk_throttle_validation_error_downcasts_through_anyhow() {
1433        let typed = DiskConfig::default()
1434            .iops(1_000)
1435            .iops_burst_capacity(500)
1436            .throttle
1437            .validate()
1438            .expect_err("burst < iops rejected");
1439        // Wrap in anyhow exactly like the production callsite does
1440        // (KtstrVm::init_virtio_blk in src/vmm/setup/mod.rs:
1441        // anyhow!(e).context("invalid disk throttle")).
1442        let wrapped = anyhow::anyhow!(typed).context("invalid disk throttle");
1443        // The typed variant must be reachable through the anyhow
1444        // chain via downcast_ref. Walk every cause.
1445        let recovered = wrapped
1446            .chain()
1447            .find_map(|c| c.downcast_ref::<DiskThrottleValidationError>())
1448            .expect(
1449                "DiskThrottleValidationError must remain downcastable through \
1450                 the production anyhow wrap; lost typing means library \
1451                 consumers cannot route programmatic recovery",
1452            );
1453        assert_eq!(
1454            *recovered,
1455            DiskThrottleValidationError::BurstBelowRate {
1456                dimension: ThrottleDimension::Iops,
1457                burst: 500,
1458                rate: 1_000,
1459            },
1460        );
1461        // Sanity: the rendered chain still contains the operator-
1462        // facing context so logs show "invalid disk throttle: ...".
1463        let rendered = format!("{wrapped:#}");
1464        assert!(
1465            rendered.contains("invalid disk throttle"),
1466            "anyhow context must survive the wrap: {rendered}",
1467        );
1468    }
1469
1470    /// `DiskThrottle::validate` checks the iops dimension first and
1471    /// short-circuits on the first failure. When BOTH dimensions
1472    /// hold violations, the iops failure is returned; the bytes
1473    /// failure surfaces only on a subsequent re-validate after the
1474    /// caller fixes the iops side. Pin this ordering so a refactor
1475    /// that aggregates errors (e.g. returns the first non-violating
1476    /// dimension's failure) or reverses the check order surfaces
1477    /// here. The test sets both dimensions intentionally violating
1478    /// and asserts the variant carries `ThrottleDimension::Iops` —
1479    /// any other variant is wrong.
1480    #[test]
1481    fn validate_first_failure_wins_iops_before_bytes() {
1482        let throttle = DiskConfig::default()
1483            .iops(1_000)
1484            .iops_burst_capacity(500) // iops violation: burst < rate
1485            .bytes_per_sec(10_000)
1486            .bytes_burst_capacity(5_000) // bytes violation: burst < rate
1487            .throttle;
1488        let err = throttle
1489            .validate()
1490            .expect_err("both-dimensions-bad must reject");
1491        assert_eq!(
1492            err,
1493            DiskThrottleValidationError::BurstBelowRate {
1494                dimension: ThrottleDimension::Iops,
1495                burst: 500,
1496                rate: 1_000,
1497            },
1498            "iops violation must surface first; refactor that aggregates \
1499             or reverses the check order would change this",
1500        );
1501        assert_eq!(err.dimension(), ThrottleDimension::Iops);
1502
1503        // Same shape with the BurstWithoutRate variant: setting
1504        // burst capacities on both dimensions with neither rate set
1505        // exercises the "missing rate" branch with both dimensions
1506        // violating.
1507        let throttle = DiskConfig::default()
1508            .iops_burst_capacity(5_000)
1509            .bytes_burst_capacity(8_000)
1510            .throttle;
1511        let err = throttle
1512            .validate()
1513            .expect_err("both-without-rate must reject");
1514        assert_eq!(
1515            err,
1516            DiskThrottleValidationError::BurstWithoutRate {
1517                dimension: ThrottleDimension::Iops,
1518            },
1519            "iops violation must surface first across both \
1520             BurstBelowRate and BurstWithoutRate variants",
1521        );
1522    }
1523
1524    /// Dedicated serde roundtrip for the burst fields. Distinct from
1525    /// the full-roundtrip test: that one constructs a `DiskThrottle`
1526    /// literal, this one drives the builder so a future builder
1527    /// regression that fails to populate the underlying fields would
1528    /// surface here even if struct-literal construction stayed
1529    /// correct.
1530    #[test]
1531    fn disk_config_burst_serde_roundtrip() {
1532        let original = DiskConfig::default()
1533            .iops(2_500)
1534            .iops_burst_capacity(10_000)
1535            .bytes_per_sec(50 * 1024 * 1024)
1536            .bytes_burst_capacity(200 * 1024 * 1024);
1537
1538        let json = serde_json::to_string(&original).expect("serialize burst DiskConfig");
1539        let parsed: DiskConfig = serde_json::from_str(&json).expect("deserialize burst DiskConfig");
1540
1541        assert_eq!(parsed, original);
1542        assert_eq!(parsed.throttle.iops, NonZeroU64::new(2_500));
1543        assert_eq!(parsed.throttle.iops_burst_capacity, NonZeroU64::new(10_000));
1544        assert_eq!(
1545            parsed.throttle.bytes_per_sec,
1546            NonZeroU64::new(50 * 1024 * 1024)
1547        );
1548        assert_eq!(
1549            parsed.throttle.bytes_burst_capacity,
1550            NonZeroU64::new(200 * 1024 * 1024)
1551        );
1552    }
1553
1554    #[test]
1555    fn disk_throttle_validation_error_hash_consistent_with_eq() {
1556        use std::collections::HashSet;
1557        let e1 = DiskThrottleValidationError::BurstWithoutRate {
1558            dimension: ThrottleDimension::Iops,
1559        };
1560        let e2 = DiskThrottleValidationError::BurstWithoutRate {
1561            dimension: ThrottleDimension::Iops,
1562        };
1563        let mut set: HashSet<DiskThrottleValidationError> = HashSet::new();
1564        set.insert(e1);
1565        assert!(set.contains(&e2));
1566    }
1567}