ktstr/vmm/disk_config.rs
1//! Disk configuration for virtio-blk devices.
2//!
3//! [`Filesystem::Raw`] gives the guest an unformatted block device at
4//! `/dev/vda` (a fresh sparse `tempfile()` backing per test). No mount
5//! happens.
6//!
7//! [`Filesystem::Btrfs`] is the entry point for the disk-template
8//! lifecycle. Selecting it routes through
9//! [`crate::vmm::disk_template::ensure_template`]: on cache miss
10//! the framework boots a one-shot template VM that runs
11//! `mkfs.btrfs` against `/dev/vda`, caches the formatted image
12//! under the ktstr cache root, and per-test boots reflink-copy
13//! that template via `FICLONE` so each per-test filesystem starts
14//! pre-formatted with zero host-side mkfs cost. The host never
15//! execs mkfs against a real backing file — the kernel's own mkfs
16//! (run inside the template VM) is the on-disk-format authority.
17//! See [`crate::vmm::disk_template`] for the full cache and
18//! template-VM driver implementation.
19//!
20//! `DiskConfig` is the descriptor — passed by value, copious
21//! defaults, no path field (the framework owns the per-test backing
22//! file's lifecycle).
23
24use std::num::NonZeroU64;
25
26/// Filesystem to format the backing file with.
27///
28/// `Raw` matches the actual on-disk state: no formatting happens, the
29/// guest sees `/dev/vda` as a raw unformatted block device.
30///
31/// Non-`Raw` variants activate the template-cache lifecycle (see
32/// module docs). Selecting one requires the ktstr cache directory
33/// to live on a reflink-capable filesystem (btrfs or xfs) — the
34/// per-test fan-out uses `FICLONE` to clone the cached template
35/// image and would fail on tmpfs/ext4. The host must also have the
36/// formatter named by `Self::mkfs_binary_name` on `PATH` at
37/// template-build time so the template-VM initramfs can pack it.
38#[derive(
39 Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
40)]
41#[serde(rename_all = "snake_case")]
42pub enum Filesystem {
43 /// No filesystem; raw block device. The guest sees `/dev/vda` as
44 /// an unformatted volume of the configured capacity. Default.
45 #[default]
46 Raw,
47 /// btrfs filesystem. Per-test backing is a reflink clone of a
48 /// host-cached, guest-formatted btrfs image at the configured
49 /// capacity. On cache miss
50 /// `crate::vmm::disk_template::ensure_template` boots a one-shot
51 /// template VM that runs `mkfs.btrfs /dev/vda` inside the guest,
52 /// caches the formatted image under the ktstr cache root, and
53 /// returns the cached path. On cache hit
54 /// `crate::vmm::disk_template::clone_to_per_test` FICLONE-clones
55 /// the cached template into a per-test tempfile under the same
56 /// cache filesystem. The cache directory must live on a btrfs/xfs
57 /// mount, and `mkfs.btrfs` must be on the host `PATH` at
58 /// template-build time. See `crate::vmm::disk_template`.
59 Btrfs,
60}
61
62impl Filesystem {
63 /// Short identifier used in cache keys and diagnostics. The
64 /// values are intentionally short (≤8 chars), kebab-free, and
65 /// stable across rebuilds — they participate in on-disk cache
66 /// path names, so renaming a variant invalidates already-cached
67 /// templates. New variants must add a new tag rather than
68 /// reusing one.
69 pub(crate) fn cache_tag(self) -> &'static str {
70 match self {
71 Filesystem::Raw => "raw",
72 Filesystem::Btrfs => "btrfs",
73 }
74 }
75
76 /// Userspace mkfs binary name to pack into the template-VM
77 /// initramfs for variants that require pre-formatting.
78 ///
79 /// Returns `Some(name)` for variants whose template-build VM
80 /// execs an `mkfs.<fstype>` against `/dev/vda` inside the guest;
81 /// `None` for variants that need no formatter (`Raw`). The
82 /// exhaustive match here forces every future `Filesystem`
83 /// variant to wire its mkfs lookup at compile time —
84 /// [`crate::vmm::disk_template::locate_host_mkfs`] takes the
85 /// returned name verbatim and PATH-resolves it, so a new
86 /// variant that forgets to declare a binary surfaces as a
87 /// non-exhaustive-match build error rather than as a runtime
88 /// "binary not found" diagnostic at template-build time.
89 ///
90 /// # Per-variant wiring points
91 ///
92 /// A new `Filesystem` variant that requires pre-formatting wires up
93 /// four per-variant match arms. All are exhaustive matches over
94 /// `Filesystem`, so a missing arm is a non-exhaustive-match build
95 /// error rather than a runtime surprise; this paragraph exists so an
96 /// implementer reading `mkfs_binary_name` sees the companions up
97 /// front:
98 ///
99 /// - `mkfs_binary_name` (here) — the `mkfs.<fstype>` binary name.
100 /// - `mkfs_package_hint` in `src/vmm/disk_template/mod.rs` — the
101 /// distro-package hint surfaced in the "binary not found"
102 /// diagnostic (e.g. `btrfs-progs` for `Btrfs`), used by
103 /// [`crate::vmm::disk_template::locate_host_mkfs`].
104 /// - [`superblock_magic`](Self::superblock_magic) — the on-disk
105 /// magic that content-validates a built or cached image. A
106 /// pre-formatting variant that returns `None` there skips
107 /// content-validation, silently reviving the unformatted-image
108 /// bug class for that variant.
109 /// - [`cache_tag`](Self::cache_tag) — the short identifier baked
110 /// into the on-disk cache key.
111 pub(crate) fn mkfs_binary_name(self) -> Option<&'static str> {
112 match self {
113 Filesystem::Raw => None,
114 Filesystem::Btrfs => Some("mkfs.btrfs"),
115 }
116 }
117
118 /// On-disk superblock magic for content-validating a cached or
119 /// freshly-built template image, as `(byte_offset, magic_u64)`.
120 ///
121 /// The host reads 8 bytes at `byte_offset`, interprets them
122 /// little-endian, and compares to `magic_u64`. A 0-byte / all-zero
123 /// image — an unformatted staging file a prior build published, or a
124 /// torn write — reads back `0` and is rejected before it can strand
125 /// every per-test clone with a `-EINVAL` mount (the guest kernel's
126 /// superblock validator rejects the missing magic). Returns `None`
127 /// for variants with no on-disk filesystem ([`Filesystem::Raw`]),
128 /// which are never content-validated.
129 ///
130 /// btrfs: `magic` is a `__le64` whose value is `BTRFS_MAGIC =
131 /// 0x4D5F53665248425F` ("_BHRfS_M"). It sits at offset `0x10040` —
132 /// the superblock starts at `BTRFS_SUPER_INFO_OFFSET (65536)` and
133 /// `struct btrfs_super_block` places `magic` after `csum[32]` +
134 /// `fsid[16]` + `bytenr (8)` + `flags (8)` = +64. (Verified against
135 /// the kernel `include/uapi/linux/btrfs_tree.h` struct + the
136 /// `BTRFS_MAGIC` / `BTRFS_CSUM_SIZE` / `BTRFS_FSID_SIZE` defines.)
137 /// The guest kernel rejects a wrong magic in `btrfs_validate_super`
138 /// (fs/btrfs/disk-io.c) with `-EINVAL`; this host check fails the
139 /// same image up front.
140 pub(crate) fn superblock_magic(self) -> Option<(u64, u64)> {
141 match self {
142 Filesystem::Raw => None,
143 Filesystem::Btrfs => Some((0x1_0040, 0x4D5F_5366_5248_425F)),
144 }
145 }
146}
147
148/// IO throttle for one disk. Each field caps a separate dimension;
149/// `None` disables that dimension's throttle. All `None` =
150/// unthrottled (the device runs at host-pread/pwrite speed).
151///
152/// Burst capacity is the token-bucket capacity (peak instantaneous
153/// burst the device will absorb before throttling kicks in). Refill
154/// rate is the steady-state allowance (`iops` / `bytes_per_sec`).
155/// When `*_burst_capacity` is `None`, the bucket capacity equals the
156/// refill rate, giving a 1-second burst — the historical default.
157/// Setting a burst capacity larger than the refill rate models a
158/// device that tolerates transient spikes (e.g. a 1-second steady
159/// rate of 1000 IOPS with a 5000-IOPS burst capacity allows a
160/// 5-second-equivalent burst from a full bucket). A burst capacity
161/// without a corresponding rate is meaningless (a bucket that never
162/// refills); [`DiskThrottle::validate`] rejects it.
163///
164/// Throttle exhaustion stalls the request internally and retries via
165/// a timer — it is not surfaced to the guest as `VIRTIO_BLK_S_IOERR`.
166///
167/// # Worked example: cloud-style 1000 IOPS / 10 MiB·s with 5× burst
168///
169/// Model a "1000 IOPS sustained, tolerate a brief unrestricted
170/// spike from a quiescent device" disk:
171///
172/// ```
173/// use ktstr::prelude::*;
174///
175/// let disk = DiskConfig::default()
176/// // Steady-state allowance — bucket refill rate.
177/// .iops(1_000)
178/// // Peak burst — bucket capacity. 5× the refill rate (5_000 ops)
179/// // is the maximum number of unrestricted ops the device will
180/// // absorb from a full bucket before throttling kicks in.
181/// .iops_burst_capacity(5_000)
182/// // Steady-state bandwidth: 10 MiB/s = 10 * 1024 * 1024 bytes/s.
183/// .bytes_per_sec(10 * 1024 * 1024)
184/// // Bandwidth burst — 5× the rate, mirroring the iops ratio.
185/// .bytes_burst_capacity(50 * 1024 * 1024);
186/// disk.throttle.validate().expect("burst >= rate, rate set");
187/// ```
188///
189/// At VM build time the buckets are seeded full (start of the test =
190/// "quiescent device"); a burst-friendly workload draws the bucket
191/// down at peak rate until empty, then is rate-limited to the refill
192/// rate from then on.
193///
194/// The `5_000`-op burst capacity is NOT "5 seconds at 1000 IOPS"
195/// in any real-time sense — the bucket drains at whatever rate the
196/// guest workload submits ops, which is usually much faster than
197/// the refill rate. A workload submitting 10_000 IOPS empties the
198/// 5_000-op bucket in ~0.5s, after which the device steady-states
199/// at the 1000-IOPS refill rate. The "5 seconds" framing only
200/// applies as a hypothetical lower bound: a workload submitting
201/// exactly the refill rate (1000 IOPS) would never drain the
202/// bucket, and a workload submitting 2× the refill rate would
203/// drain a 5×-rate bucket over ~5 seconds. Most real workloads
204/// drain bursts much faster than that.
205///
206/// # Picking values
207///
208/// - **`iops`** — peak operations the device must sustain. Includes
209/// reads, writes, and flushes (each = 1 op).
210/// - **`bytes_per_sec`** — peak bandwidth the device must sustain
211/// for read+write data combined. Flushes do not count toward
212/// bandwidth.
213/// - **`*_burst_capacity`** — how long a burst from a full bucket
214/// should run before throttling kicks in. `burst = N * rate` gives
215/// ~N seconds of unrestricted IO from a quiescent device. Leave
216/// `None` to default to `burst = rate` (1-second burst, the
217/// pre-burst-feature behaviour).
218///
219/// # Constraint summary
220///
221/// Both rules are enforced by [`DiskThrottle::validate`] (run by
222/// `init_virtio_blk` and its x86 sibling
223/// `init_virtio_blk_pci` before the backing file is allocated):
224///
225/// - `*_burst_capacity` must be `>= *_refill_rate` when both are
226/// set; a capacity below the refill rate would silently cap the
227/// steady-state at the lower capacity instead of the configured
228/// rate.
229/// - `*_burst_capacity` must not be set without its matching refill
230/// rate; a one-shot bucket that never refills doesn't model any
231/// useful throttle.
232///
233/// Clearing a refill rate via the builder (`iops(0)` /
234/// `bytes_per_sec(0)`) auto-clears its matching `*_burst_capacity`
235/// so the second rule never trips on a cleared-rate chain.
236#[derive(
237 Clone, Copy, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize,
238)]
239pub struct DiskThrottle {
240 /// Maximum operations per second (1 read = 1 op, 1 write = 1
241 /// op, 1 flush = 1 op). Refill rate of the IOPS token bucket.
242 ///
243 /// Type-enforced nonzero: `Option<NonZeroU64>` makes
244 /// `Some(0) = unlimited` impossible to express at the type
245 /// level. To disable IOPS throttling, use `None` (or set 0
246 /// through the builder, which the builder converts to `None`).
247 pub iops: Option<NonZeroU64>,
248 /// Maximum bytes per second across read+write data. Refill rate
249 /// of the bandwidth token bucket.
250 ///
251 /// Type-enforced nonzero, same reasoning as `iops`.
252 pub bytes_per_sec: Option<NonZeroU64>,
253 /// IOPS bucket capacity (peak burst). When `None`, capacity
254 /// equals the `iops` refill rate (1-second burst). When `Some`,
255 /// the value must be `>= iops` (a capacity below the refill rate
256 /// would discard refilled tokens immediately and effectively
257 /// reduce the steady-state rate); [`DiskThrottle::validate`]
258 /// enforces this. Has no effect when `iops` is `None`.
259 ///
260 /// Values above `i64::MAX` are accepted but the `TokenBucket`
261 /// seed is clamped to `i64::MAX` at construction — the effective
262 /// initial burst is ~9.2 quintillion, immaterial for realistic
263 /// settings.
264 pub iops_burst_capacity: Option<NonZeroU64>,
265 /// Bandwidth bucket capacity (peak burst, in bytes). When
266 /// `None`, capacity equals the `bytes_per_sec` refill rate
267 /// (1-second burst). When `Some`, the value must be
268 /// `>= bytes_per_sec`. Has no effect when `bytes_per_sec` is
269 /// `None`.
270 ///
271 /// Values above `i64::MAX` are accepted but the `TokenBucket`
272 /// seed is clamped to `i64::MAX` at construction — the effective
273 /// initial burst is ~9.2 exabytes, immaterial for realistic
274 /// settings.
275 pub bytes_burst_capacity: Option<NonZeroU64>,
276}
277
278/// Throttle dimension a [`DiskThrottleValidationError`] applies to.
279///
280/// `Iops` covers `iops` / `iops_burst_capacity`; `Bytes` covers
281/// `bytes_per_sec` / `bytes_burst_capacity`. The discriminant lets
282/// callers route a programmatic recovery (e.g. clearing the offending
283/// burst) without parsing the rendered error message.
284#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
285pub enum ThrottleDimension {
286 /// IOPS dimension — `iops` refill rate, `iops_burst_capacity`
287 /// bucket capacity.
288 Iops,
289 /// Bandwidth dimension — `bytes_per_sec` refill rate,
290 /// `bytes_burst_capacity` bucket capacity.
291 Bytes,
292}
293
294impl ThrottleDimension {
295 /// Field name of the offending burst capacity. Stable wire
296 /// identifier — matches the [`DiskThrottle`] field name and the
297 /// builder method name on [`DiskConfig`] so error consumers can
298 /// echo it back to the user as the field they need to change.
299 pub fn burst_field(self) -> &'static str {
300 match self {
301 ThrottleDimension::Iops => "iops_burst_capacity",
302 ThrottleDimension::Bytes => "bytes_burst_capacity",
303 }
304 }
305
306 /// Field name of the matching refill rate. Symmetric with
307 /// [`Self::burst_field`].
308 pub fn rate_field(self) -> &'static str {
309 match self {
310 ThrottleDimension::Iops => "iops",
311 ThrottleDimension::Bytes => "bytes_per_sec",
312 }
313 }
314}
315
316/// Validation failure for [`DiskThrottle::validate`].
317///
318/// Returned by [`DiskThrottle::validate`] when a throttle/burst
319/// chain violates the constraints documented on [`DiskThrottle`].
320/// The `Display` impl carries the same actionable text the previous
321/// `String`-returning shape did (with the ", or pass 0 to clear …"
322/// remediation hint preserved) so callers that bubble the error
323/// through `anyhow::Error` and match on the rendered message keep
324/// working.
325///
326/// Tests that need to assert on a specific failure variant downcast
327/// via `err.downcast_ref::<DiskThrottleValidationError>()` (when the
328/// error is wrapped in `anyhow`) or pattern-match the enum directly.
329/// The [`dimension()`](Self::dimension) accessor exposes which
330/// dimension (iops/bytes) tripped the rule for callers that route
331/// programmatic recovery (e.g. clear the offending
332/// `*_burst_capacity` and retry).
333#[derive(Clone, Debug, PartialEq, Eq, Hash, thiserror::Error)]
334pub enum DiskThrottleValidationError {
335 /// `*_burst_capacity` is set to a value strictly below the
336 /// corresponding `*` refill rate. A bucket with capacity below
337 /// its refill rate cannot hold a full second of refilled
338 /// tokens, so the effective steady-state rate would silently be
339 /// the capacity, not the configured rate.
340 #[error(
341 "{burst_field} ({burst}) must be >= {rate_field} ({rate}), \
342 or pass 0 to clear the burst override",
343 burst_field = dimension.burst_field(),
344 rate_field = dimension.rate_field(),
345 )]
346 BurstBelowRate {
347 /// Throttle dimension this failure applies to.
348 dimension: ThrottleDimension,
349 /// The offending burst-capacity value.
350 burst: u64,
351 /// The refill rate the burst was compared against.
352 rate: u64,
353 },
354 /// `*_burst_capacity` is set with no matching `*` refill rate.
355 /// A bucket with no refill rate is a functionally unbounded
356 /// one-shot capacity, which does not match any useful
357 /// throttling model.
358 #[error(
359 "{burst_field} set without {rate_field} refill rate, \
360 or pass 0 to clear the burst override",
361 burst_field = dimension.burst_field(),
362 rate_field = dimension.rate_field(),
363 )]
364 BurstWithoutRate {
365 /// Throttle dimension this failure applies to.
366 dimension: ThrottleDimension,
367 },
368}
369
370impl DiskThrottleValidationError {
371 /// Throttle dimension (iops/bytes) the failure applies to. Lets
372 /// callers route a programmatic recovery without parsing the
373 /// rendered message — e.g. "clear the offending burst override
374 /// and re-validate" can dispatch on this without string-matching
375 /// `iops_burst_capacity` vs `bytes_burst_capacity`.
376 pub fn dimension(&self) -> ThrottleDimension {
377 match self {
378 DiskThrottleValidationError::BurstBelowRate { dimension, .. } => *dimension,
379 DiskThrottleValidationError::BurstWithoutRate { dimension } => *dimension,
380 }
381 }
382}
383
384impl DiskThrottle {
385 /// Non-panicking validation of throttle/burst consistency.
386 ///
387 /// Rejects burst capacities below their corresponding refill
388 /// rate. A bucket with capacity below its refill rate cannot
389 /// hold a full second of refilled tokens, so the effective
390 /// steady-state rate would silently be the capacity, not the
391 /// configured rate — a user who sets `iops(1000).iops_burst_capacity(500)`
392 /// would expect 1000 IOPS and silently get 500.
393 ///
394 /// A burst capacity set without a corresponding rate is also
395 /// rejected: a bucket with no refill rate is functionally
396 /// unbounded one-shot capacity, which does not match any
397 /// useful throttling model.
398 ///
399 /// Returns [`DiskThrottleValidationError`] on failure — a typed
400 /// enum so callers can pattern-match the failure mode (e.g.
401 /// route a programmatic recovery via the
402 /// [`dimension()`](DiskThrottleValidationError::dimension)
403 /// accessor) rather than string-matching the rendered message.
404 /// The `Display` impl preserves the wording of the prior
405 /// `String`-returning shape, including the ", or pass 0 to
406 /// clear the burst override" remediation hint, so anyhow-bubbled
407 /// callers that match on the rendered text still work.
408 pub fn validate(&self) -> Result<(), DiskThrottleValidationError> {
409 if let Some(burst) = self.iops_burst_capacity {
410 match self.iops {
411 Some(rate) if burst < rate => {
412 return Err(DiskThrottleValidationError::BurstBelowRate {
413 dimension: ThrottleDimension::Iops,
414 burst: burst.get(),
415 rate: rate.get(),
416 });
417 }
418 None => {
419 return Err(DiskThrottleValidationError::BurstWithoutRate {
420 dimension: ThrottleDimension::Iops,
421 });
422 }
423 _ => {}
424 }
425 }
426 if let Some(burst) = self.bytes_burst_capacity {
427 match self.bytes_per_sec {
428 Some(rate) if burst < rate => {
429 return Err(DiskThrottleValidationError::BurstBelowRate {
430 dimension: ThrottleDimension::Bytes,
431 burst: burst.get(),
432 rate: rate.get(),
433 });
434 }
435 None => {
436 return Err(DiskThrottleValidationError::BurstWithoutRate {
437 dimension: ThrottleDimension::Bytes,
438 });
439 }
440 _ => {}
441 }
442 }
443 Ok(())
444 }
445}
446
447/// Per-disk config. `Default` is raw 256 MiB device on `/dev/vda`;
448/// formatting and auto-mount are deferred.
449///
450/// No backing-file path field: the framework owns the per-test
451/// backing file (`tempfile()` for `Raw`, FICLONE-cloned template
452/// for `Btrfs`). See module docs.
453#[derive(Clone, Debug, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
454pub struct DiskConfig {
455 /// Advertised capacity in mebibytes (MiB). `capacity_bytes()`
456 /// computes `capacity_mib << 20`. 256 MiB default capacity.
457 /// Sized to accommodate common guest filesystem formatters;
458 /// smaller values are accepted but may cause `mkfs` failures
459 /// inside the template VM (see
460 /// `crate::vmm::disk_template::build_template_via_vm`) for
461 /// `Filesystem::Btrfs`.
462 pub capacity_mib: u32,
463 /// Filesystem to format the per-test backing with. `Raw` leaves
464 /// the device unformatted; `Btrfs` routes through the
465 /// template-cache lifecycle.
466 pub filesystem: Filesystem,
467 /// IO throttle. Default unthrottled.
468 pub throttle: DiskThrottle,
469 /// Read-only at the device level — the device advertises
470 /// VIRTIO_BLK_F_RO so the guest mounts read-only. Useful for
471 /// tests that need protection against accidental writes.
472 pub read_only: bool,
473 /// Optional human-readable label for this disk. `None` (the
474 /// default) is an anonymous disk addressable only by index. A
475 /// name lets WorkType variants reference the disk symbolically
476 /// (e.g. `"data"`, `"log"`) instead of by index, which keeps
477 /// tests stable across topology rearrangements.
478 ///
479 /// Stored as `Option<&'static str>` so `DiskConfig` is
480 /// const-constructible — `DiskConfig::DEFAULT.with_name("data")`
481 /// works in a `static` or `const` initializer, which the
482 /// `#[ktstr_test(disk = ...)]` macro relies on. The field is
483 /// `#[serde(skip)]` because `&'static str` can't be deserialized
484 /// from arbitrary input without leaking; the name is operator
485 /// metadata that the framework computes on-the-fly from the
486 /// declaration, not state that needs to round-trip through
487 /// sidecar JSON. Sidecar consumers that need to associate a
488 /// disk identity with serialized data should use the disk's
489 /// index instead.
490 #[serde(skip)]
491 pub name: Option<&'static str>,
492 /// Opt out of guest-side auto-mount. Default `false` means a
493 /// non-`Raw` disk is auto-mounted at `/mnt/disk0` by the guest
494 /// init (see
495 /// `crate::vmm::rust_init::auto_mount_data_disks`); setting
496 /// `true` suppresses the auto-mount cmdline tokens and leaves
497 /// `/dev/vda` raw to the test author. Has no effect for
498 /// `Filesystem::Raw` disks (there is nothing to mount). The
499 /// only honest reason to flip this is a test that wants to
500 /// drive the mount path itself (e.g. exercise mount-option
501 /// fuzzing or fail-injection on the kernel mount syscall).
502 pub no_auto_mount: bool,
503}
504
505impl Default for DiskConfig {
506 /// 256 MiB, [`Filesystem::Raw`], no throttle. The `Raw` default
507 /// keeps the on-host cost minimal — no template-VM build, no
508 /// cache directory required — and the per-test backing is a
509 /// fresh sparse `tempfile()` per VM (see
510 /// `crate::vmm::KtstrVm::init_virtio_blk`).
511 ///
512 /// # Memory footprint
513 ///
514 /// The 256 MiB sparse file lives under the host's `TMPDIR`
515 /// (`tempfile()`); actual host disk/RAM consumption equals the
516 /// bytes the guest writes, not the advertised capacity. On
517 /// tmpfs-backed `TMPDIR` (the default on most Linux distros), a
518 /// fully-written disk consumes 256 MiB of host **RAM** per test
519 /// — operators running large topologies should size host memory
520 /// accordingly or override `TMPDIR` to a disk-backed path.
521 fn default() -> Self {
522 Self::DEFAULT
523 }
524}
525
526impl DiskConfig {
527 /// Const-evaluable default — same values as [`Default::default`]
528 /// but usable in `static` / `const` initializers. Required for
529 /// the `#[ktstr_test(disk = ...)]` macro surface: the macro
530 /// emits a `static` containing a `DiskConfig`, which must be
531 /// const-constructible.
532 ///
533 /// Spread via `..DiskConfig::DEFAULT` in struct-update syntax,
534 /// or chain const setters (`DiskConfig::DEFAULT.with_name("data")`).
535 pub const DEFAULT: Self = Self {
536 capacity_mib: 256,
537 filesystem: Filesystem::Raw,
538 throttle: DiskThrottle::DEFAULT,
539 read_only: false,
540 name: None,
541 no_auto_mount: false,
542 };
543}
544
545impl DiskThrottle {
546 /// Const-evaluable default — all `None` (unthrottled), matching
547 /// [`Default::default`]. Required so `DiskConfig::DEFAULT` can be
548 /// `const`.
549 pub const DEFAULT: Self = Self {
550 iops: None,
551 bytes_per_sec: None,
552 iops_burst_capacity: None,
553 bytes_burst_capacity: None,
554 };
555}
556
557impl DiskConfig {
558 /// Set capacity in mebibytes (MiB). The argument is interpreted
559 /// as binary mebibytes per `Self::capacity_bytes`, not decimal
560 /// megabytes.
561 #[must_use = "builder methods consume self; bind the result"]
562 pub fn capacity_mib(mut self, mib: u32) -> Self {
563 self.capacity_mib = mib;
564 self
565 }
566
567 /// Select the on-disk filesystem.
568 ///
569 /// `Filesystem::Raw` (the default) leaves the device unformatted.
570 /// `Filesystem::Btrfs` routes through
571 /// `crate::vmm::disk_template::ensure_template`: on cache miss
572 /// the framework boots a one-shot template VM that runs
573 /// `mkfs.btrfs` inside the guest, caches the formatted image,
574 /// and per-test boots reflink-clone it. The lifecycle requires
575 /// a reflink-capable cache directory (btrfs or xfs) and a host
576 /// `mkfs.btrfs` binary on `PATH` at template-build time. See
577 /// the module-level docs and `crate::vmm::disk_template`.
578 ///
579 /// # Disk-template lifecycle
580 ///
581 /// For `Filesystem::Btrfs`, the per-test backing file is produced
582 /// in three stages — none of which the test author needs to drive
583 /// explicitly:
584 ///
585 /// 1. **Cache lookup** —
586 /// `disk_template::ensure_template`
587 /// keys off `(filesystem, capacity)` and returns the cached
588 /// image path on hit. See the module docs at
589 /// `crate::vmm::disk_template` for the cache-key encoding
590 /// and on-disk layout.
591 /// 2. **Template build (cache miss)** —
592 /// `disk_template::build_template_via_vm`
593 /// boots a one-shot guest with the host's `mkfs.btrfs` packed
594 /// into the initramfs; the guest formats `/dev/vda` against
595 /// a sparse staging image, and the framework atomically moves
596 /// the formatted image into the cache via
597 /// `disk_template::store_atomic`.
598 /// The host never execs `mkfs.btrfs` against a real backing
599 /// file — the guest kernel is the on-disk-format authority.
600 /// 3. **Per-test fan-out** —
601 /// `disk_template::clone_to_per_test`
602 /// `FICLONE`-clones the cached image into a tempfile under
603 /// the cache root. The clone is O(metadata) and copy-on-write
604 /// at the extent level, so per-test writes never touch the
605 /// cached template.
606 ///
607 /// Stage 3 requires the cache directory to live on a reflink-
608 /// capable filesystem (btrfs or xfs); see
609 /// `disk_template::verify_cache_dir_supports_reflink`
610 /// for the gate and
611 /// `crate::vmm::KtstrVmBuilder::disk` for the full
612 /// builder-side wiring.
613 #[must_use = "builder methods consume self; bind the result"]
614 pub fn filesystem(mut self, fs: Filesystem) -> Self {
615 self.filesystem = fs;
616 self
617 }
618
619 /// Set IOPS throttle. Passing 0 disables IOPS throttling
620 /// (equivalent to `None`). To throttle near-zero, use `iops(1)`.
621 /// There is no "block all IO" mode — the minimum throttled rate
622 /// is 1 op/sec. Any positive value is wrapped in `NonZeroU64`.
623 ///
624 /// Clearing the rate (`iops(0)`) also clears the matching
625 /// `iops_burst_capacity` — a burst capacity without a refill
626 /// rate is invalid (caught by [`DiskThrottle::validate`]) and
627 /// keeping a stale burst around after the user explicitly
628 /// disabled the rate is a footgun: the next `validate()` call
629 /// would fail with a less-helpful "burst without rate" error
630 /// rather than the user's intent (a fully-unthrottled bucket).
631 #[must_use = "builder methods consume self; bind the result"]
632 pub fn iops(mut self, iops: u64) -> Self {
633 self.throttle.iops = NonZeroU64::new(iops);
634 if self.throttle.iops.is_none() {
635 self.throttle.iops_burst_capacity = None;
636 }
637 self
638 }
639
640 /// Set bandwidth throttle (bytes per second). A zero value
641 /// disables bandwidth throttling (stored as `None`); any
642 /// positive value is wrapped in `NonZeroU64`.
643 ///
644 /// Clearing the rate (`bytes_per_sec(0)`) also clears the
645 /// matching `bytes_burst_capacity` for the same reason as
646 /// `iops` — a burst without a rate is invalid and stale-burst
647 /// retention turns a deliberate "drop the throttle" into a
648 /// validate-time failure.
649 #[must_use = "builder methods consume self; bind the result"]
650 pub fn bytes_per_sec(mut self, bytes_per_sec: u64) -> Self {
651 self.throttle.bytes_per_sec = NonZeroU64::new(bytes_per_sec);
652 if self.throttle.bytes_per_sec.is_none() {
653 self.throttle.bytes_burst_capacity = None;
654 }
655 self
656 }
657
658 /// Set IOPS burst capacity (token-bucket peak). A zero value
659 /// clears the burst override (stored as `None`), reverting to
660 /// the default 1-second burst (capacity equals refill rate).
661 /// Any positive value is wrapped in `NonZeroU64`.
662 ///
663 /// The capacity must be `>= iops` when both are set, and must
664 /// not be set without `iops`. Both rules are enforced by
665 /// [`DiskThrottle::validate`] at VM build time, not by the
666 /// builder — the builder is order-independent (a user may set
667 /// burst before rate). Tests should call `validate()` after
668 /// chaining, or construct an invalid config and observe the
669 /// error from VM build.
670 #[must_use = "builder methods consume self; bind the result"]
671 pub fn iops_burst_capacity(mut self, capacity: u64) -> Self {
672 self.throttle.iops_burst_capacity = NonZeroU64::new(capacity);
673 self
674 }
675
676 /// Set bandwidth burst capacity in bytes (token-bucket peak).
677 /// A zero value clears the burst override (stored as `None`),
678 /// reverting to the default 1-second burst. Any positive value
679 /// is wrapped in `NonZeroU64`.
680 ///
681 /// The capacity must be `>= bytes_per_sec` when both are set,
682 /// and must not be set without `bytes_per_sec`. Both rules are
683 /// enforced by [`DiskThrottle::validate`] at VM build time, not
684 /// by the builder.
685 #[must_use = "builder methods consume self; bind the result"]
686 pub fn bytes_burst_capacity(mut self, capacity: u64) -> Self {
687 self.throttle.bytes_burst_capacity = NonZeroU64::new(capacity);
688 self
689 }
690
691 /// Mark the disk read-only (advertises `VIRTIO_BLK_F_RO`).
692 /// Default is read-write; this builder takes no argument (no
693 /// boolean footgun) and only flips the flag on. To return to
694 /// read-write, drop the call or reconstruct from
695 /// `DiskConfig::default()`.
696 #[must_use = "builder methods consume self; bind the result"]
697 pub fn read_only(mut self) -> Self {
698 self.read_only = true;
699 self
700 }
701
702 /// Attach a human-readable label to this disk. WorkType variants
703 /// that need to address a specific disk (e.g. one of several
704 /// attached) can resolve the name instead of relying on
705 /// attachment order. Default is anonymous (`None`); calling
706 /// `.with_name(...)` sets it.
707 ///
708 /// The name also drives the guest auto-mount path: a disk
709 /// named `"data"` auto-mounts at `/mnt/data` instead of the
710 /// default `/mnt/disk0`. See [`Self::no_auto_mount`] to opt
711 /// out of auto-mount entirely.
712 ///
713 /// Takes `&'static str` so the builder is `const fn` —
714 /// `DiskConfig::DEFAULT.with_name("data")` can spread into a
715 /// `static` initializer. String literals are `&'static`; tests
716 /// needing a dynamic name should build the disk programmatically
717 /// rather than going through this builder.
718 #[must_use = "builder methods consume self; bind the result"]
719 pub const fn with_name(mut self, name: &'static str) -> Self {
720 self.name = Some(name);
721 self
722 }
723
724 /// Suppress the guest-side auto-mount of this disk. Default
725 /// behavior auto-mounts a non-`Raw` disk at the path returned
726 /// by `Self::auto_mount_path`; calling this method flips
727 /// the flag on. Useful for tests that want raw access to
728 /// `/dev/vda` after a host-driven mkfs (e.g. mount-option
729 /// fuzzing, deliberate mount-failure injection, manual
730 /// subvolume traversal).
731 ///
732 /// No-op for `Filesystem::Raw` disks (there is nothing to
733 /// mount). The flag is honored at cmdline-emission time in
734 /// `crate::vmm::KtstrVm::build_guest_cmdline` (via
735 /// `disk_auto_mount_cmdline_tokens`): when set, the
736 /// `KTSTR_DISK0_FS` / `KTSTR_DISK0_MOUNT` / `KTSTR_DISK0_RO`
737 /// tokens are not emitted, and the guest's
738 /// `crate::vmm::rust_init::auto_mount_data_disks` short-
739 /// circuits at the missing-token check.
740 #[must_use = "builder methods consume self; bind the result"]
741 pub fn no_auto_mount(mut self) -> Self {
742 self.no_auto_mount = true;
743 self
744 }
745
746 /// Resolve the guest-side mount path for this disk. Returns
747 /// `/mnt/<name>` when [`Self::name`] is set, `/mnt/disk0`
748 /// otherwise. Used by the cmdline emission to populate the
749 /// `KTSTR_DISK0_MOUNT` token consumed by the guest's
750 /// `crate::vmm::rust_init::auto_mount_data_disks`.
751 #[allow(dead_code)]
752 pub(crate) fn auto_mount_path(&self) -> String {
753 match self.name {
754 Some(n) => format!("/mnt/{n}"),
755 None => "/mnt/disk0".to_string(),
756 }
757 }
758
759 /// Capacity in bytes (`capacity_mib << 20`). Used by the device
760 /// for the config-space `capacity` field.
761 pub(crate) fn capacity_bytes(&self) -> u64 {
762 (self.capacity_mib as u64) << 20
763 }
764
765 /// Capacity in 512-byte sectors.
766 ///
767 /// `dead_code` allow: only the in-file `#[cfg(test)]` tests
768 /// consume this; the production virtio-blk path uses
769 /// [`Self::capacity_bytes`] and divides by `VIRTIO_BLK_SECTOR_SIZE`
770 /// at the device layer.
771 #[allow(dead_code)]
772 pub(crate) fn capacity_sectors(&self) -> u64 {
773 self.capacity_bytes() / 512
774 }
775}
776
777#[cfg(test)]
778mod tests {
779 use super::*;
780
781 #[test]
782 fn default_is_256mib_raw() {
783 let d = DiskConfig::default();
784 assert_eq!(d.capacity_mib, 256);
785 assert_eq!(d.filesystem, Filesystem::Raw);
786 assert_eq!(d.throttle, DiskThrottle::default());
787 assert!(!d.read_only);
788 assert!(d.name.is_none());
789 }
790
791 #[test]
792 fn capacity_helpers() {
793 let d = DiskConfig::default();
794 assert_eq!(d.capacity_bytes(), 256 * 1024 * 1024);
795 assert_eq!(d.capacity_sectors(), 524_288);
796
797 let d = DiskConfig::default().capacity_mib(512);
798 assert_eq!(d.capacity_bytes(), 512 * 1024 * 1024);
799 assert_eq!(d.capacity_sectors(), 1_048_576);
800 }
801
802 #[test]
803 fn filesystem_builder_sets_variant() {
804 let d = DiskConfig::default().filesystem(Filesystem::Btrfs);
805 assert_eq!(d.filesystem, Filesystem::Btrfs);
806 // Builder is overwriting (not OR-ing) — last call wins.
807 let d = d.filesystem(Filesystem::Raw);
808 assert_eq!(d.filesystem, Filesystem::Raw);
809 }
810
811 #[test]
812 fn builder_chain() {
813 let d = DiskConfig::default()
814 .capacity_mib(128)
815 .iops(1000)
816 .bytes_per_sec(10 * 1024 * 1024)
817 .read_only();
818 assert_eq!(d.capacity_mib, 128);
819 assert_eq!(d.filesystem, Filesystem::Raw);
820 assert_eq!(d.throttle.iops, NonZeroU64::new(1000));
821 assert_eq!(d.throttle.bytes_per_sec, NonZeroU64::new(10 * 1024 * 1024));
822 assert!(d.read_only);
823 }
824
825 #[test]
826 fn iops_zero_becomes_none() {
827 // The NonZeroU64 type makes Some(0) impossible. The builder
828 // accepts u64 for ergonomics and converts 0 → None
829 // (= unthrottled) at the type boundary.
830 let d = DiskConfig::default().iops(0);
831 assert!(d.throttle.iops.is_none());
832 let d = DiskConfig::default().bytes_per_sec(0);
833 assert!(d.throttle.bytes_per_sec.is_none());
834 }
835
836 #[test]
837 fn filesystem_default_is_raw() {
838 // Default::default() must produce a working v0 config — the
839 // `Filesystem::Raw` default matches the actual v0 behaviour
840 // (no formatting). #[default] attribute on the enum variant
841 // drives this; this test pins it so a future patch that
842 // adds a non-Raw variant and changes `#[default]` (regressing
843 // the "default works" guarantee) surfaces here.
844 assert_eq!(Filesystem::default(), Filesystem::Raw);
845 }
846
847 #[test]
848 fn filesystem_serde_snake_case() {
849 assert_eq!(serde_json::to_string(&Filesystem::Raw).unwrap(), r#""raw""#);
850 assert_eq!(
851 serde_json::to_string(&Filesystem::Btrfs).unwrap(),
852 r#""btrfs""#
853 );
854 let parsed: Filesystem = serde_json::from_str(r#""raw""#).unwrap();
855 assert_eq!(parsed, Filesystem::Raw);
856 let parsed: Filesystem = serde_json::from_str(r#""btrfs""#).unwrap();
857 assert_eq!(parsed, Filesystem::Btrfs);
858 }
859
860 #[test]
861 fn filesystem_cache_tag_round_trips_serde_name() {
862 // The cache_tag is the on-disk identifier used in the
863 // template-cache key. Pinning that it matches the serde
864 // serialization keeps the two name spaces aligned — a future
865 // `#[serde(rename = "...")]` change must update cache_tag in
866 // lock-step or the cache stops finding old entries.
867 for fs in [Filesystem::Raw, Filesystem::Btrfs] {
868 let json = serde_json::to_string(&fs).unwrap();
869 let stripped = json.trim_matches('"');
870 assert_eq!(fs.cache_tag(), stripped, "cache_tag drift for {fs:?}");
871 }
872 }
873
874 #[test]
875 fn throttle_default_is_unthrottled() {
876 let t = DiskThrottle::default();
877 assert!(t.iops.is_none());
878 assert!(t.bytes_per_sec.is_none());
879 assert!(t.iops_burst_capacity.is_none());
880 assert!(t.bytes_burst_capacity.is_none());
881 }
882
883 #[test]
884 fn iops_zero_serde_roundtrip() {
885 // Build with iops(0) → throttle.iops is None. Serialize +
886 // deserialize the config and confirm the field stays None.
887 // Pins the NonZeroU64 type-level invariant against a future
888 // serde-derive regression that might silently re-introduce
889 // a Some(0) representation (impossible by construction
890 // today, but a wrong-typed `Option<u64>` migration would
891 // bring it back).
892 let original = DiskConfig::default().iops(0).bytes_per_sec(0);
893 let json = serde_json::to_string(&original).expect("serialize");
894 let parsed: DiskConfig = serde_json::from_str(&json).expect("deserialize");
895 assert!(parsed.throttle.iops.is_none());
896 assert!(parsed.throttle.bytes_per_sec.is_none());
897 // Round-trip equality works because of the PartialEq derive
898 // on DiskConfig.
899 assert_eq!(parsed, original);
900 }
901
902 /// Full serde roundtrip with every field set to a non-default
903 /// value. Pin field-by-field equality after a JSON round trip so
904 /// a future `#[serde(rename = ...)]` or `#[serde(skip)]`
905 /// regression — the typical drift mode for serde-derived structs
906 /// — surfaces here loudly.
907 #[test]
908 fn disk_config_full_serde_roundtrip() {
909 let original = DiskConfig {
910 capacity_mib: 256,
911 filesystem: Filesystem::Raw,
912 throttle: DiskThrottle {
913 iops: NonZeroU64::new(2_500),
914 bytes_per_sec: NonZeroU64::new(50 * 1024 * 1024),
915 iops_burst_capacity: NonZeroU64::new(10_000),
916 bytes_burst_capacity: NonZeroU64::new(200 * 1024 * 1024),
917 },
918 read_only: true,
919 name: Some("data-disk"),
920 no_auto_mount: false,
921 };
922
923 let json = serde_json::to_string(&original).expect("serialize DiskConfig");
924 let parsed: DiskConfig = serde_json::from_str(&json).expect("deserialize DiskConfig");
925
926 // Wire-format canonicality — the serialized key must be
927 // `capacity_mib`, not the pre-rename `capacity_mb` (legacy)
928 // form. Catches a future `#[serde(rename = "capacity_mb")]`
929 // regression that flips the emitted key.
930 assert!(
931 json.contains("\"capacity_mib\""),
932 "JSON must use the canonical `capacity_mib` key: {json}"
933 );
934 assert!(
935 !json.contains("\"capacity_mb\""),
936 "JSON must NOT contain the pre-rename `capacity_mb` key: {json}"
937 );
938
939 // Deserialize-side break: a JSON with the legacy `capacity_mb`
940 // key must FAIL to parse. Catches `#[serde(alias = "capacity_mb")]`
941 // which is deserialize-only sugar — it leaves the serialized
942 // key alone (so the contains-checks above pass) but silently
943 // accepts the old name on read. Constructed by replacing the
944 // canonical key in the just-serialized JSON, keeping every
945 // other field/value identical so the legacy_json is well-formed
946 // in every respect except the one renamed key.
947 let legacy_json = json.replace("\"capacity_mib\"", "\"capacity_mb\"");
948 assert!(
949 serde_json::from_str::<DiskConfig>(&legacy_json).is_err(),
950 "deserialization must reject the pre-rename `capacity_mb` key \
951 — a regression that added `#[serde(alias = \"capacity_mb\")]` \
952 would silently accept old sidecars on read: legacy_json={legacy_json}"
953 );
954
955 // Whole-struct equality first — catches any field drift.
956 // `name` is stripped because it's `#[serde(skip)]` (see the
957 // dedicated assert at the bottom of this fn) and always
958 // round-trips to `None` regardless of original.
959 let original_for_eq = DiskConfig {
960 name: None,
961 ..original.clone()
962 };
963 assert_eq!(parsed, original_for_eq);
964 // Field-by-field follow-up — each line catches a distinct
965 // drift mode on its own (rename, skip, type-narrowing).
966 assert_eq!(parsed.capacity_mib, original.capacity_mib);
967 assert_eq!(parsed.filesystem, original.filesystem);
968 assert_eq!(parsed.throttle.iops, original.throttle.iops);
969 assert_eq!(
970 parsed.throttle.bytes_per_sec,
971 original.throttle.bytes_per_sec
972 );
973 assert_eq!(
974 parsed.throttle.iops_burst_capacity,
975 original.throttle.iops_burst_capacity
976 );
977 assert_eq!(
978 parsed.throttle.bytes_burst_capacity,
979 original.throttle.bytes_burst_capacity
980 );
981 assert_eq!(parsed.read_only, original.read_only);
982 // `name` is `#[serde(skip)]` since `&'static str` can't be
983 // deserialized from arbitrary input — the field round-trips
984 // to `None` regardless of original. Test authors that need
985 // disk identity in serialized output should use the disk's
986 // index instead. Pin this contract here so a future serde
987 // tweak (e.g. dropping the skip) surfaces in this assertion.
988 assert!(
989 parsed.name.is_none(),
990 "DiskConfig.name uses #[serde(skip)]; round-trip must produce None regardless of original (was {:?})",
991 original.name,
992 );
993 }
994
995 /// Roundtrip the unthrottled default (both throttle fields
996 /// `None`). Distinct from `iops_zero_serde_roundtrip` (which
997 /// builds via `.iops(0)/.bytes_per_sec(0)`): this exercises the
998 /// pure `DiskConfig::default()` shape, ensuring the `None`/`None`
999 /// throttle survives serialize→JSON→deserialize and that the
1000 /// whole-struct PartialEq holds across the round trip.
1001 #[test]
1002 fn disk_config_default_unthrottled_serde_roundtrip() {
1003 let original = DiskConfig::default();
1004 assert!(original.throttle.iops.is_none());
1005 assert!(original.throttle.bytes_per_sec.is_none());
1006 assert!(original.name.is_none());
1007
1008 let json = serde_json::to_string(&original).expect("serialize default DiskConfig");
1009 let parsed: DiskConfig =
1010 serde_json::from_str(&json).expect("deserialize default DiskConfig");
1011
1012 assert_eq!(parsed, original);
1013 assert_eq!(parsed.capacity_mib, original.capacity_mib);
1014 assert_eq!(parsed.filesystem, original.filesystem);
1015 assert!(parsed.throttle.iops.is_none());
1016 assert!(parsed.throttle.bytes_per_sec.is_none());
1017 assert!(parsed.throttle.iops_burst_capacity.is_none());
1018 assert!(parsed.throttle.bytes_burst_capacity.is_none());
1019 assert_eq!(parsed.read_only, original.read_only);
1020 assert!(parsed.name.is_none());
1021 }
1022
1023 #[test]
1024 fn with_name_builder_sets_label() {
1025 let d = DiskConfig::default().with_name("data-disk");
1026 assert_eq!(d.name, Some("data-disk"));
1027
1028 // Last call wins — the builder overwrites.
1029 let d = DiskConfig::default().with_name("first").with_name("second");
1030 assert_eq!(d.name, Some("second"));
1031 }
1032
1033 /// The `with_name` builder is `const fn`, so a `static DiskConfig`
1034 /// can be constructed by chaining setters off the const DEFAULT.
1035 /// This pins the const-construction property the #[ktstr_test]
1036 /// macro's `disk = ...` arm depends on.
1037 #[test]
1038 fn with_name_works_in_const_context() {
1039 const NAMED: DiskConfig = DiskConfig::DEFAULT.with_name("static-disk");
1040 assert_eq!(NAMED.name, Some("static-disk"));
1041 assert_eq!(NAMED.capacity_mib, 256);
1042 }
1043
1044 #[test]
1045 fn burst_capacity_builders_set_fields() {
1046 let d = DiskConfig::default()
1047 .iops(1_000)
1048 .iops_burst_capacity(5_000)
1049 .bytes_per_sec(10 * 1024 * 1024)
1050 .bytes_burst_capacity(50 * 1024 * 1024);
1051 assert_eq!(d.throttle.iops, NonZeroU64::new(1_000));
1052 assert_eq!(d.throttle.iops_burst_capacity, NonZeroU64::new(5_000));
1053 assert_eq!(d.throttle.bytes_per_sec, NonZeroU64::new(10 * 1024 * 1024));
1054 assert_eq!(
1055 d.throttle.bytes_burst_capacity,
1056 NonZeroU64::new(50 * 1024 * 1024)
1057 );
1058 }
1059
1060 #[test]
1061 fn burst_capacity_zero_becomes_none() {
1062 // Mirrors the iops/bytes_per_sec ergonomics: 0 → None at the
1063 // type boundary so callers can clear a previously-set burst
1064 // override without dropping back to a fresh `DiskConfig`.
1065 let d = DiskConfig::default()
1066 .iops(1_000)
1067 .iops_burst_capacity(5_000)
1068 .iops_burst_capacity(0);
1069 assert!(d.throttle.iops_burst_capacity.is_none());
1070
1071 let d = DiskConfig::default()
1072 .bytes_per_sec(1_000)
1073 .bytes_burst_capacity(5_000)
1074 .bytes_burst_capacity(0);
1075 assert!(d.throttle.bytes_burst_capacity.is_none());
1076 }
1077
1078 #[test]
1079 fn burst_capacity_default_is_none() {
1080 let d = DiskConfig::default();
1081 assert!(d.throttle.iops_burst_capacity.is_none());
1082 assert!(d.throttle.bytes_burst_capacity.is_none());
1083 }
1084
1085 /// Clearing the rate via `iops(0)` also clears the matching
1086 /// `iops_burst_capacity`. A burst capacity without a refill
1087 /// rate is invalid per [`DiskThrottle::validate`]; without
1088 /// this auto-clear, a `.iops(1000).iops_burst_capacity(5000)
1089 /// .iops(0)` chain would leave a stale burst that turns the
1090 /// next `validate()` into a "burst without rate" error
1091 /// instead of the user's intent (a fully-unthrottled bucket).
1092 #[test]
1093 fn clearing_iops_clears_iops_burst() {
1094 let d = DiskConfig::default()
1095 .iops(1_000)
1096 .iops_burst_capacity(5_000)
1097 .iops(0);
1098 assert!(d.throttle.iops.is_none());
1099 assert!(
1100 d.throttle.iops_burst_capacity.is_none(),
1101 "clearing iops must also clear iops_burst_capacity \
1102 so validate() doesn't fail with a stale-burst error",
1103 );
1104 // bytes side untouched — per-dimension independence.
1105 let d = DiskConfig::default()
1106 .bytes_per_sec(2_000)
1107 .bytes_burst_capacity(8_000)
1108 .iops(0);
1109 assert!(d.throttle.bytes_per_sec.is_some());
1110 assert!(d.throttle.bytes_burst_capacity.is_some());
1111 }
1112
1113 /// Clearing the rate via `bytes_per_sec(0)` also clears the
1114 /// matching `bytes_burst_capacity`. Mirror of
1115 /// `clearing_iops_clears_iops_burst`.
1116 #[test]
1117 fn clearing_bytes_per_sec_clears_bytes_burst() {
1118 let d = DiskConfig::default()
1119 .bytes_per_sec(2_000)
1120 .bytes_burst_capacity(8_000)
1121 .bytes_per_sec(0);
1122 assert!(d.throttle.bytes_per_sec.is_none());
1123 assert!(
1124 d.throttle.bytes_burst_capacity.is_none(),
1125 "clearing bytes_per_sec must also clear \
1126 bytes_burst_capacity",
1127 );
1128 // iops side untouched.
1129 let d = DiskConfig::default()
1130 .iops(1_000)
1131 .iops_burst_capacity(5_000)
1132 .bytes_per_sec(0);
1133 assert!(d.throttle.iops.is_some());
1134 assert!(d.throttle.iops_burst_capacity.is_some());
1135 }
1136
1137 /// After a `clear-rate`-then-validate chain, the result must
1138 /// validate cleanly. Pins the integration: setting both rate
1139 /// and burst, then clearing the rate, leaves the throttle in
1140 /// a state that `validate()` accepts (no orphan-burst error).
1141 #[test]
1142 fn clearing_rate_leaves_throttle_validate_clean() {
1143 let throttle = DiskConfig::default()
1144 .iops(1_000)
1145 .iops_burst_capacity(5_000)
1146 .bytes_per_sec(2_000)
1147 .bytes_burst_capacity(8_000)
1148 .iops(0)
1149 .bytes_per_sec(0)
1150 .throttle;
1151 assert!(throttle.iops.is_none());
1152 assert!(throttle.bytes_per_sec.is_none());
1153 assert!(throttle.iops_burst_capacity.is_none());
1154 assert!(throttle.bytes_burst_capacity.is_none());
1155 throttle
1156 .validate()
1157 .expect("post-clear throttle must validate clean");
1158 }
1159
1160 #[test]
1161 fn validate_accepts_burst_at_or_above_rate() {
1162 // burst == rate (the historical 1-second-burst behaviour
1163 // expressed explicitly).
1164 DiskConfig::default()
1165 .iops(1_000)
1166 .iops_burst_capacity(1_000)
1167 .throttle
1168 .validate()
1169 .expect("burst == iops accepted");
1170
1171 // burst > rate (multi-second burst).
1172 DiskConfig::default()
1173 .iops(1_000)
1174 .iops_burst_capacity(5_000)
1175 .bytes_per_sec(10 * 1024 * 1024)
1176 .bytes_burst_capacity(50 * 1024 * 1024)
1177 .throttle
1178 .validate()
1179 .expect("burst > rate accepted");
1180
1181 // No throttle set → trivially valid.
1182 DiskConfig::default()
1183 .throttle
1184 .validate()
1185 .expect("no throttle accepted");
1186
1187 // Rate set, burst unset → trivially valid (burst defaults to
1188 // rate-equivalent at wire-up time).
1189 DiskConfig::default()
1190 .iops(1_000)
1191 .bytes_per_sec(1_000_000)
1192 .throttle
1193 .validate()
1194 .expect("rate without burst accepted");
1195 }
1196
1197 #[test]
1198 fn validate_rejects_burst_below_rate() {
1199 let err = DiskConfig::default()
1200 .iops(1_000)
1201 .iops_burst_capacity(500)
1202 .throttle
1203 .validate()
1204 .expect_err("burst < iops rejected");
1205 assert_eq!(
1206 err,
1207 DiskThrottleValidationError::BurstBelowRate {
1208 dimension: ThrottleDimension::Iops,
1209 burst: 500,
1210 rate: 1_000,
1211 },
1212 "unexpected error variant",
1213 );
1214 let msg = err.to_string();
1215 assert!(
1216 msg.contains("iops_burst_capacity") && msg.contains("must be >="),
1217 "unexpected error message: {msg}",
1218 );
1219 assert!(
1220 msg.contains("pass 0 to clear"),
1221 "remediation hint missing: {msg}",
1222 );
1223
1224 let err = DiskConfig::default()
1225 .bytes_per_sec(10_000)
1226 .bytes_burst_capacity(5_000)
1227 .throttle
1228 .validate()
1229 .expect_err("burst < bytes_per_sec rejected");
1230 assert_eq!(
1231 err,
1232 DiskThrottleValidationError::BurstBelowRate {
1233 dimension: ThrottleDimension::Bytes,
1234 burst: 5_000,
1235 rate: 10_000,
1236 },
1237 "unexpected error variant",
1238 );
1239 let msg = err.to_string();
1240 assert!(
1241 msg.contains("bytes_burst_capacity") && msg.contains("must be >="),
1242 "unexpected error message: {msg}",
1243 );
1244 assert!(
1245 msg.contains("pass 0 to clear"),
1246 "remediation hint missing: {msg}",
1247 );
1248 }
1249
1250 /// Off-by-one boundary: `burst == rate - 1` must be rejected. Pins
1251 /// the strict `<` vs `<=` direction of the validate predicate
1252 /// against a future flip that would silently accept a steady-state
1253 /// rate one below the configured value.
1254 #[test]
1255 fn validate_rejects_burst_one_below_rate() {
1256 let err = DiskConfig::default()
1257 .iops(1_000)
1258 .iops_burst_capacity(999)
1259 .throttle
1260 .validate()
1261 .expect_err("iops burst one below rate must be rejected");
1262 assert_eq!(
1263 err,
1264 DiskThrottleValidationError::BurstBelowRate {
1265 dimension: ThrottleDimension::Iops,
1266 burst: 999,
1267 rate: 1_000,
1268 },
1269 );
1270 let msg = err.to_string();
1271 assert!(
1272 msg.contains("iops_burst_capacity") && msg.contains("must be >="),
1273 "unexpected error message: {msg}",
1274 );
1275
1276 let err = DiskConfig::default()
1277 .bytes_per_sec(1_000)
1278 .bytes_burst_capacity(999)
1279 .throttle
1280 .validate()
1281 .expect_err("bytes burst one below rate must be rejected");
1282 assert_eq!(
1283 err,
1284 DiskThrottleValidationError::BurstBelowRate {
1285 dimension: ThrottleDimension::Bytes,
1286 burst: 999,
1287 rate: 1_000,
1288 },
1289 );
1290 let msg = err.to_string();
1291 assert!(
1292 msg.contains("bytes_burst_capacity") && msg.contains("must be >="),
1293 "unexpected error message: {msg}",
1294 );
1295 }
1296
1297 /// Builder chain that sets a rate and burst then clears the rate
1298 /// via `iops(0)` must validate clean — clearing the rate also
1299 /// clears the matching burst (per the [`DiskConfig::iops`]
1300 /// auto-clear contract), so the resulting throttle is fully
1301 /// unthrottled and validate rejects nothing. Distinct from
1302 /// `clearing_rate_leaves_throttle_validate_clean` (which clears
1303 /// both rates simultaneously); this one isolates the iops-only
1304 /// clear path so a regression in just one auto-clear branch
1305 /// surfaces here.
1306 #[test]
1307 fn iops_clear_after_burst_set_validates_clean() {
1308 DiskConfig::default()
1309 .iops(1_000)
1310 .iops_burst_capacity(5_000)
1311 .iops(0)
1312 .throttle
1313 .validate()
1314 .expect("iops-cleared throttle must validate clean");
1315 }
1316
1317 #[test]
1318 fn validate_rejects_burst_without_rate() {
1319 let err = DiskConfig::default()
1320 .iops_burst_capacity(5_000)
1321 .throttle
1322 .validate()
1323 .expect_err("burst without iops rejected");
1324 assert_eq!(
1325 err,
1326 DiskThrottleValidationError::BurstWithoutRate {
1327 dimension: ThrottleDimension::Iops,
1328 },
1329 );
1330 let msg = err.to_string();
1331 assert!(
1332 msg.contains("iops_burst_capacity") && msg.contains("without iops"),
1333 "unexpected error message: {msg}",
1334 );
1335 assert!(
1336 msg.contains("pass 0 to clear"),
1337 "remediation hint missing: {msg}",
1338 );
1339
1340 let err = DiskConfig::default()
1341 .bytes_burst_capacity(5_000)
1342 .throttle
1343 .validate()
1344 .expect_err("burst without bytes_per_sec rejected");
1345 assert_eq!(
1346 err,
1347 DiskThrottleValidationError::BurstWithoutRate {
1348 dimension: ThrottleDimension::Bytes,
1349 },
1350 );
1351 let msg = err.to_string();
1352 assert!(
1353 msg.contains("bytes_burst_capacity") && msg.contains("without bytes_per_sec"),
1354 "unexpected error message: {msg}",
1355 );
1356 assert!(
1357 msg.contains("pass 0 to clear"),
1358 "remediation hint missing: {msg}",
1359 );
1360 }
1361
1362 /// `DiskThrottleValidationError::dimension()` exposes the
1363 /// throttle dimension (iops/bytes) the failure applies to so
1364 /// callers can route a programmatic recovery without parsing
1365 /// the rendered message. Pin the accessor's mapping over both
1366 /// variants × both dimensions so a future variant addition
1367 /// that forgets to populate the dimension surfaces here.
1368 #[test]
1369 fn validation_error_dimension_accessor() {
1370 let err = DiskThrottleValidationError::BurstBelowRate {
1371 dimension: ThrottleDimension::Iops,
1372 burst: 500,
1373 rate: 1_000,
1374 };
1375 assert_eq!(err.dimension(), ThrottleDimension::Iops);
1376
1377 let err = DiskThrottleValidationError::BurstBelowRate {
1378 dimension: ThrottleDimension::Bytes,
1379 burst: 500,
1380 rate: 1_000,
1381 };
1382 assert_eq!(err.dimension(), ThrottleDimension::Bytes);
1383
1384 let err = DiskThrottleValidationError::BurstWithoutRate {
1385 dimension: ThrottleDimension::Iops,
1386 };
1387 assert_eq!(err.dimension(), ThrottleDimension::Iops);
1388
1389 let err = DiskThrottleValidationError::BurstWithoutRate {
1390 dimension: ThrottleDimension::Bytes,
1391 };
1392 assert_eq!(err.dimension(), ThrottleDimension::Bytes);
1393 }
1394
1395 /// `ThrottleDimension::burst_field()` and `rate_field()` return
1396 /// the wire field names matching [`DiskThrottle`] / [`DiskConfig`]
1397 /// builder method names so error consumers can echo the offending
1398 /// field back to the user. Pin both directions so a rename of
1399 /// either field on `DiskThrottle` without a matching update here
1400 /// surfaces as a test failure rather than silently desync'd
1401 /// error messages.
1402 #[test]
1403 fn throttle_dimension_field_names() {
1404 assert_eq!(ThrottleDimension::Iops.burst_field(), "iops_burst_capacity");
1405 assert_eq!(ThrottleDimension::Iops.rate_field(), "iops");
1406 assert_eq!(
1407 ThrottleDimension::Bytes.burst_field(),
1408 "bytes_burst_capacity",
1409 );
1410 assert_eq!(ThrottleDimension::Bytes.rate_field(), "bytes_per_sec");
1411 }
1412
1413 /// Pin downcast through anyhow: `DiskThrottle::validate` returns
1414 /// `Result<(), DiskThrottleValidationError>`, but production
1415 /// callers (e.g. `init_virtio_blk`) wrap the
1416 /// failure in `anyhow::Error`. Library consumers that need to
1417 /// pattern-match on the failure variant must therefore
1418 /// `downcast_ref::<DiskThrottleValidationError>()` through the
1419 /// anyhow chain. Without this test, a future change to the
1420 /// callsite that loses the typed error (e.g. converting the
1421 /// inner error to `String` before bubbling, or replacing
1422 /// `anyhow::Error::new(e)` with `anyhow!("...{e}...")`) would
1423 /// silently break the typed-error contract for downstream
1424 /// callers — only surfacing as a regression at the consumer
1425 /// site, which doesn't exist in-tree yet.
1426 ///
1427 /// The chain wraps with `.context(...)` to mirror the production
1428 /// shape at `init_virtio_blk` (in
1429 /// `src/vmm/setup/mod.rs`) so the downcast walks through the same
1430 /// context layer real callers see.
1431 #[test]
1432 fn disk_throttle_validation_error_downcasts_through_anyhow() {
1433 let typed = DiskConfig::default()
1434 .iops(1_000)
1435 .iops_burst_capacity(500)
1436 .throttle
1437 .validate()
1438 .expect_err("burst < iops rejected");
1439 // Wrap in anyhow exactly like the production callsite does
1440 // (KtstrVm::init_virtio_blk in src/vmm/setup/mod.rs:
1441 // anyhow!(e).context("invalid disk throttle")).
1442 let wrapped = anyhow::anyhow!(typed).context("invalid disk throttle");
1443 // The typed variant must be reachable through the anyhow
1444 // chain via downcast_ref. Walk every cause.
1445 let recovered = wrapped
1446 .chain()
1447 .find_map(|c| c.downcast_ref::<DiskThrottleValidationError>())
1448 .expect(
1449 "DiskThrottleValidationError must remain downcastable through \
1450 the production anyhow wrap; lost typing means library \
1451 consumers cannot route programmatic recovery",
1452 );
1453 assert_eq!(
1454 *recovered,
1455 DiskThrottleValidationError::BurstBelowRate {
1456 dimension: ThrottleDimension::Iops,
1457 burst: 500,
1458 rate: 1_000,
1459 },
1460 );
1461 // Sanity: the rendered chain still contains the operator-
1462 // facing context so logs show "invalid disk throttle: ...".
1463 let rendered = format!("{wrapped:#}");
1464 assert!(
1465 rendered.contains("invalid disk throttle"),
1466 "anyhow context must survive the wrap: {rendered}",
1467 );
1468 }
1469
1470 /// `DiskThrottle::validate` checks the iops dimension first and
1471 /// short-circuits on the first failure. When BOTH dimensions
1472 /// hold violations, the iops failure is returned; the bytes
1473 /// failure surfaces only on a subsequent re-validate after the
1474 /// caller fixes the iops side. Pin this ordering so a refactor
1475 /// that aggregates errors (e.g. returns the first non-violating
1476 /// dimension's failure) or reverses the check order surfaces
1477 /// here. The test sets both dimensions intentionally violating
1478 /// and asserts the variant carries `ThrottleDimension::Iops` —
1479 /// any other variant is wrong.
1480 #[test]
1481 fn validate_first_failure_wins_iops_before_bytes() {
1482 let throttle = DiskConfig::default()
1483 .iops(1_000)
1484 .iops_burst_capacity(500) // iops violation: burst < rate
1485 .bytes_per_sec(10_000)
1486 .bytes_burst_capacity(5_000) // bytes violation: burst < rate
1487 .throttle;
1488 let err = throttle
1489 .validate()
1490 .expect_err("both-dimensions-bad must reject");
1491 assert_eq!(
1492 err,
1493 DiskThrottleValidationError::BurstBelowRate {
1494 dimension: ThrottleDimension::Iops,
1495 burst: 500,
1496 rate: 1_000,
1497 },
1498 "iops violation must surface first; refactor that aggregates \
1499 or reverses the check order would change this",
1500 );
1501 assert_eq!(err.dimension(), ThrottleDimension::Iops);
1502
1503 // Same shape with the BurstWithoutRate variant: setting
1504 // burst capacities on both dimensions with neither rate set
1505 // exercises the "missing rate" branch with both dimensions
1506 // violating.
1507 let throttle = DiskConfig::default()
1508 .iops_burst_capacity(5_000)
1509 .bytes_burst_capacity(8_000)
1510 .throttle;
1511 let err = throttle
1512 .validate()
1513 .expect_err("both-without-rate must reject");
1514 assert_eq!(
1515 err,
1516 DiskThrottleValidationError::BurstWithoutRate {
1517 dimension: ThrottleDimension::Iops,
1518 },
1519 "iops violation must surface first across both \
1520 BurstBelowRate and BurstWithoutRate variants",
1521 );
1522 }
1523
1524 /// Dedicated serde roundtrip for the burst fields. Distinct from
1525 /// the full-roundtrip test: that one constructs a `DiskThrottle`
1526 /// literal, this one drives the builder so a future builder
1527 /// regression that fails to populate the underlying fields would
1528 /// surface here even if struct-literal construction stayed
1529 /// correct.
1530 #[test]
1531 fn disk_config_burst_serde_roundtrip() {
1532 let original = DiskConfig::default()
1533 .iops(2_500)
1534 .iops_burst_capacity(10_000)
1535 .bytes_per_sec(50 * 1024 * 1024)
1536 .bytes_burst_capacity(200 * 1024 * 1024);
1537
1538 let json = serde_json::to_string(&original).expect("serialize burst DiskConfig");
1539 let parsed: DiskConfig = serde_json::from_str(&json).expect("deserialize burst DiskConfig");
1540
1541 assert_eq!(parsed, original);
1542 assert_eq!(parsed.throttle.iops, NonZeroU64::new(2_500));
1543 assert_eq!(parsed.throttle.iops_burst_capacity, NonZeroU64::new(10_000));
1544 assert_eq!(
1545 parsed.throttle.bytes_per_sec,
1546 NonZeroU64::new(50 * 1024 * 1024)
1547 );
1548 assert_eq!(
1549 parsed.throttle.bytes_burst_capacity,
1550 NonZeroU64::new(200 * 1024 * 1024)
1551 );
1552 }
1553
1554 #[test]
1555 fn disk_throttle_validation_error_hash_consistent_with_eq() {
1556 use std::collections::HashSet;
1557 let e1 = DiskThrottleValidationError::BurstWithoutRate {
1558 dimension: ThrottleDimension::Iops,
1559 };
1560 let e2 = DiskThrottleValidationError::BurstWithoutRate {
1561 dimension: ThrottleDimension::Iops,
1562 };
1563 let mut set: HashSet<DiskThrottleValidationError> = HashSet::new();
1564 set.insert(e1);
1565 assert!(set.contains(&e2));
1566 }
1567}