ktstr/cli/
parse.rs

1//! CLI argument parsers shared between `ktstr` and `cargo-ktstr`.
2//!
3//! Holds the topology-string and disk-size parsers along with the
4//! `--disk` help text. Lives outside `kernel_cmd` because the
5//! parsers are dispatch-time helpers, not clap-attribute fixtures.
6
7use anyhow::{Result, bail};
8
9/// Parse a comma-separated topology string into its four dimensions:
10/// `(numa_nodes, llcs, cores, threads)`. The canonical format is
11/// `"numa_nodes,llcs,cores,threads"` — the same shape accepted by the
12/// `ktstr shell --topology` and `cargo ktstr shell --topology` flags.
13///
14/// Validation:
15/// - Exactly four comma-separated components are required.
16/// - Each component must parse as `u32`. A parse failure names the
17///   failing field explicitly (e.g. `"invalid llcs value: 'abc'"`)
18///   so the user can see which dimension they mistyped without
19///   counting commas.
20/// - Every dimension must be at least 1 — a zero in any position
21///   produces an unusable VM topology, so we reject it up front.
22///
23/// Consolidating the parse + validate in one helper eliminates the
24/// identical 4-arm `parts[i].parse().map_err(...)` block that the two
25/// binary entry points (`src/bin/ktstr.rs` Command::Shell and
26/// `src/bin/cargo_ktstr/misc/shell.rs` `run_shell`) would otherwise drift on.
27/// Error shape is `anyhow::Error`; callers that need a `String` (like
28/// cargo-ktstr's `Result<(), String>` surface) bridge via
29/// `.map_err(|e| format!("{e:#}"))` at the call site.
30pub fn parse_topology_string(topology: &str) -> Result<(u32, u32, u32, u32)> {
31    let parts: Vec<&str> = topology.split(',').collect();
32    if parts.len() != 4 {
33        bail!(
34            "invalid topology '{topology}': expected 'numa_nodes,llcs,cores,threads' \
35             (e.g. '1,2,4,1')"
36        );
37    }
38    // Stable field order mirrors the 4-tuple return so a future
39    // field-rename lands consistently in one place.
40    let fields: [(&str, &str); 4] = [
41        ("numa_nodes", parts[0]),
42        ("llcs", parts[1]),
43        ("cores", parts[2]),
44        ("threads", parts[3]),
45    ];
46    let mut vals: [u32; 4] = [0; 4];
47    for (i, (name, raw)) in fields.iter().enumerate() {
48        vals[i] = raw
49            .parse::<u32>()
50            .map_err(|_| anyhow::anyhow!("invalid {name} value: '{raw}'"))?;
51    }
52    let [numa_nodes, llcs, cores, threads] = vals;
53    if numa_nodes == 0 || llcs == 0 || cores == 0 || threads == 0 {
54        bail!("invalid topology '{topology}': all values must be >= 1");
55    }
56    Ok((numa_nodes, llcs, cores, threads))
57}
58
59/// Parse a human-readable size string (e.g. `"256mib"`, `"10gib"`, `"1gib"`)
60/// into a count of mebibytes (MiB), rounded down. Returns `Err` when the
61/// suffix is unrecognized, the numeric portion fails to parse, the value
62/// is not a positive integer multiple of one MiB, or the result exceeds
63/// `u32::MAX` MiB (the [`crate::vmm::disk_config::DiskConfig::capacity_mib`]
64/// capacity).
65///
66/// Accepted suffixes (case-insensitive): `b`, `kib`, `mib`, `gib`. All
67/// IEC (powers of two): `kib`=2^10, `mib`=2^20, `gib`=2^30. SI variants
68/// (`kb`/`mb`/`gb`) are intentionally NOT accepted; they're rejected by
69/// a dedicated SI-suffix check at the top of the function — before any
70/// number-parsing or MiB-alignment runs — so the diagnostic names the
71/// IEC-only policy directly instead of leaking through as a misleading
72/// "numeric portion not an unsigned integer" message after the suffix
73/// strip eats the trailing `b`. IEC-only is unambiguous and consistent.
74/// The bare suffix-less form is also rejected so units are never
75/// implicit.
76///
77/// The output unit is MiB to match
78/// [`crate::vmm::disk_config::DiskConfig::capacity_mib`] (despite the
79/// field name, `DiskConfig::capacity_bytes` left-shifts by 20 — i.e.
80/// the field is MiB, not SI MB). A future rename of that field would
81/// land in this function in lockstep.
82pub fn parse_disk_size_mib(s: &str) -> Result<u32> {
83    let lower = s.trim().to_ascii_lowercase();
84    if lower.is_empty() {
85        bail!("invalid disk size '{s}': empty");
86    }
87    // Reject SI-suffix forms (kb/mb/gb) up front. The IEC-only
88    // policy keeps the contract unambiguous: 1mib means exactly
89    // 2^20 bytes, never 10^6. Without this short-circuit the
90    // generic `b` (byte) suffix below would chew off the trailing
91    // 'b' and then fail to parse e.g. "1k" as a u64, producing a
92    // misleading "numeric portion not an unsigned integer" error
93    // instead of the unit-list diagnostic the user needs.
94    if lower.ends_with("kb") || lower.ends_with("mb") || lower.ends_with("gb") {
95        bail!(
96            "invalid disk size '{s}': SI suffixes (kb/mb/gb) are \
97             not supported. Use one of b, kib, mib, gib \
98             (case-insensitive)."
99        );
100    }
101    let (num_str, suffix, unit_bytes): (&str, &str, u64) =
102        if let Some(rest) = lower.strip_suffix("gib") {
103            (rest, "gib", 1u64 << 30)
104        } else if let Some(rest) = lower.strip_suffix("mib") {
105            (rest, "mib", 1u64 << 20)
106        } else if let Some(rest) = lower.strip_suffix("kib") {
107            (rest, "kib", 1u64 << 10)
108        } else if let Some(rest) = lower.strip_suffix('b') {
109            (rest, "b", 1u64)
110        } else {
111            bail!(
112                "invalid disk size '{s}': missing unit suffix. Use one of \
113             b, kib, mib, gib (case-insensitive)."
114            );
115        };
116    let n = num_str.trim().parse::<u64>().map_err(|_| {
117        anyhow::anyhow!(
118            "invalid disk size '{s}': numeric portion '{num_str}' before \
119             '{suffix}' is not an unsigned integer"
120        )
121    })?;
122    let bytes = n
123        .checked_mul(unit_bytes)
124        .ok_or_else(|| anyhow::anyhow!("invalid disk size '{s}': {n}{suffix} overflows u64"))?;
125    if bytes == 0 {
126        bail!("invalid disk size '{s}': must be > 0");
127    }
128    let mib = 1u64 << 20;
129    if bytes % mib != 0 {
130        bail!(
131            "invalid disk size '{s}': {bytes} bytes is not a whole number \
132             of mebibytes (MiB). Round to a multiple of 1 MiB (= 1048576 \
133             bytes)."
134        );
135    }
136    let mib_count = bytes / mib;
137    if mib_count > u32::MAX as u64 {
138        bail!(
139            "invalid disk size '{s}': {mib_count} MiB exceeds u32::MAX \
140             (DiskConfig.capacity_mib is u32)"
141        );
142    }
143    Ok(mib_count as u32)
144}
145
146/// Help text for the `--disk <SIZE>` shell flag, shared between
147/// `cargo ktstr shell` (`src/bin/cargo_ktstr/cli.rs`) and
148/// `ktstr shell` (`src/bin/ktstr.rs`) so a future tweak lands in
149/// one place. Mirrors the [`super::CPU_CAP_HELP`] pattern.
150pub const DISK_HELP: &str = "Attach a raw virtio-blk disk to /dev/vda. \
151     Accepts a human-readable size with a unit suffix (case-insensitive): \
152     b, kib, mib, gib. IEC-only — SI variants (kb/mb/gb) are rejected to \
153     keep the contract unambiguous. The size must be a positive whole \
154     number of MiB (e.g. 256mib, 1gib). Omit to boot without a disk.";
155
156/// Parse the `--disk <SIZE>` CLI argument into an
157/// [`Option<crate::vmm::disk_config::DiskConfig>`]. `None` input
158/// returns `Ok(None)` (no disk attached); a `Some(s)` input runs
159/// `s` through [`parse_disk_size_mib`] and wraps the result in a
160/// `DiskConfig` whose remaining fields fall through to
161/// [`crate::vmm::disk_config::DiskConfig::default`] (raw filesystem,
162/// no throttle, read-write). Shared between `cargo ktstr shell` and
163/// `ktstr shell` so both bins parse identically; a malformed size
164/// surfaces here at CLI-argument time, never mid-VM-setup.
165pub fn parse_disk_arg(s: Option<&str>) -> Result<Option<crate::vmm::disk_config::DiskConfig>> {
166    match s {
167        Some(raw) => {
168            let mib = parse_disk_size_mib(raw)?;
169            Ok(Some(crate::vmm::disk_config::DiskConfig {
170                capacity_mib: mib,
171                ..crate::vmm::disk_config::DiskConfig::default()
172            }))
173        }
174        None => Ok(None),
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    /// Happy path: a canonical `"n,l,c,t"` string round-trips to the
183    /// four u32 dimensions in positional order. Pins the field order
184    /// so a future refactor that reshuffles (numa_nodes/llcs/cores/
185    /// threads) → something else can't silently swap one dimension
186    /// for another without flipping this pin.
187    #[test]
188    fn parse_topology_string_happy_path() {
189        let (n, l, c, t) = parse_topology_string("1,2,4,8").expect("valid");
190        assert_eq!((n, l, c, t), (1, 2, 4, 8));
191    }
192
193    /// Wrong component count: fewer than 4 parts names the expected
194    /// shape in the error so the user sees the canonical format.
195    #[test]
196    fn parse_topology_string_rejects_too_few_parts() {
197        let err = parse_topology_string("1,2,4").expect_err("3 parts must fail");
198        let rendered = format!("{err:#}");
199        assert!(
200            rendered.contains("invalid topology '1,2,4'"),
201            "error must echo the bad input: {rendered}",
202        );
203        assert!(
204            rendered.contains("numa_nodes,llcs,cores,threads"),
205            "error must name the expected shape: {rendered}",
206        );
207    }
208
209    /// Too MANY parts is rejected the same way. Pairs with the
210    /// too-few case so the guard is symmetric.
211    #[test]
212    fn parse_topology_string_rejects_too_many_parts() {
213        let err = parse_topology_string("1,2,4,8,16").expect_err("5 parts must fail");
214        assert!(format!("{err:#}").contains("invalid topology"));
215    }
216
217    /// A non-numeric component fails with a message that names the
218    /// offending FIELD, not just the bad token — a user who mistypes
219    /// the second dimension sees `"invalid llcs value: 'abc'"` and
220    /// knows immediately which dimension needs fixing. Pin all four
221    /// position-to-name mappings so a field-order refactor surfaces
222    /// here.
223    #[test]
224    fn parse_topology_string_names_failing_field() {
225        for (pos, field) in [(0, "numa_nodes"), (1, "llcs"), (2, "cores"), (3, "threads")] {
226            let mut parts = ["1"; 4];
227            parts[pos] = "abc";
228            let input = parts.join(",");
229            let err = parse_topology_string(&input).expect_err("non-numeric must fail");
230            let rendered = format!("{err:#}");
231            assert!(
232                rendered.contains(&format!("invalid {field} value: 'abc'")),
233                "pos {pos}: error must name the `{field}` field, got: {rendered}",
234            );
235        }
236    }
237
238    /// Zero in any position fails the `>= 1` guard with the
239    /// "all values must be >= 1" phrasing. A zero topology would
240    /// build a non-bootable VM, so rejecting it up-front is a
241    /// correctness requirement, not a style choice.
242    #[test]
243    fn parse_topology_string_rejects_zero_dimensions() {
244        for pos in 0..4 {
245            let mut parts = ["1"; 4];
246            parts[pos] = "0";
247            let input = parts.join(",");
248            let err = parse_topology_string(&input).expect_err("zero must fail");
249            let rendered = format!("{err:#}");
250            assert!(
251                rendered.contains(">= 1"),
252                "pos {pos}: error must cite the >=1 rule: {rendered}",
253            );
254        }
255    }
256
257    /// Upper bound: u32::MAX in every position parses successfully.
258    /// Pins the return-type decision (u32, not u16 / usize) so a
259    /// future refactor that narrows the type surfaces here rather
260    /// than truncating large-host topology strings.
261    #[test]
262    fn parse_topology_string_accepts_u32_max() {
263        let big = u32::MAX;
264        let input = format!("{big},{big},{big},{big}");
265        let (n, l, c, t) = parse_topology_string(&input).expect("u32::MAX valid");
266        assert_eq!((n, l, c, t), (big, big, big, big));
267    }
268
269    /// u32 overflow (value above u32::MAX) fails with the field
270    /// name, not a generic parse error. Exercises the `parse::<u32>`
271    /// failure path rather than only the non-numeric path.
272    #[test]
273    fn parse_topology_string_rejects_u32_overflow() {
274        let too_big = (u32::MAX as u64) + 1;
275        let input = format!("1,{too_big},4,1");
276        let err = parse_topology_string(&input).expect_err("overflow must fail");
277        assert!(
278            format!("{err:#}").contains(&format!("invalid llcs value: '{too_big}'")),
279            "overflow must surface field + bad token: {err:#}",
280        );
281    }
282
283    /// IEC suffixes (`mib`, `gib`) round-trip to whole MiB counts. Pins
284    /// the binary-base interpretation of the IEC family.
285    #[test]
286    fn parse_disk_size_mib_iec_suffixes() {
287        assert_eq!(parse_disk_size_mib("256mib").unwrap(), 256);
288        assert_eq!(parse_disk_size_mib("1gib").unwrap(), 1024);
289        assert_eq!(parse_disk_size_mib("10GIB").unwrap(), 10 * 1024);
290        assert_eq!(parse_disk_size_mib("1024kib").unwrap(), 1);
291    }
292
293    /// SI suffixes (`kb`, `mb`, `gb`) are rejected as unrecognized so
294    /// the user sees the unit-list diagnostic instead of a confusing
295    /// MiB-alignment failure. IEC-only is the unambiguous contract.
296    #[test]
297    fn parse_disk_size_mib_rejects_si_suffixes() {
298        for input in ["1kb", "1mb", "1gb", "256MB", "10GB"] {
299            let err = parse_disk_size_mib(input)
300                .expect_err(&format!("SI suffix '{input}' must be rejected"));
301            let rendered = format!("{err:#}");
302            assert!(
303                rendered.contains("SI suffixes"),
304                "expected SI-rejection diagnostic for {input:?}, got: {rendered}",
305            );
306        }
307    }
308
309    /// Bare `b` with a value that aligns to a MiB succeeds; a value
310    /// off-by-one fails. Pins the byte-suffix path.
311    #[test]
312    fn parse_disk_size_mib_byte_suffix() {
313        assert_eq!(parse_disk_size_mib("1048576b").unwrap(), 1);
314        let err = parse_disk_size_mib("1048575b").expect_err("off-by-one byte must fail");
315        assert!(format!("{err:#}").contains("not a whole number"));
316    }
317
318    /// Whitespace + mixed case in the input are tolerated by trim +
319    /// to_lowercase.
320    #[test]
321    fn parse_disk_size_mib_normalizes_input() {
322        assert_eq!(parse_disk_size_mib("  256MiB  ").unwrap(), 256);
323        assert_eq!(parse_disk_size_mib("1GiB").unwrap(), 1024);
324    }
325
326    /// Missing suffix is rejected with a unit-list diagnostic so the
327    /// user sees what's accepted.
328    #[test]
329    fn parse_disk_size_mib_rejects_missing_suffix() {
330        let err = parse_disk_size_mib("256").expect_err("bare integer must fail");
331        let rendered = format!("{err:#}");
332        assert!(rendered.contains("missing unit suffix"));
333        assert!(rendered.contains("kib"));
334        assert!(rendered.contains("mib"));
335        assert!(rendered.contains("gib"));
336    }
337
338    /// Empty / whitespace-only input is rejected up front.
339    #[test]
340    fn parse_disk_size_mib_rejects_empty() {
341        assert!(parse_disk_size_mib("").is_err());
342        assert!(parse_disk_size_mib("   ").is_err());
343    }
344
345    /// Zero is rejected — a 0-byte disk is a configuration footgun
346    /// (every IO IOERRs per `VirtioBlk::with_options`).
347    #[test]
348    fn parse_disk_size_mib_rejects_zero() {
349        let err = parse_disk_size_mib("0mib").expect_err("zero must fail");
350        assert!(format!("{err:#}").contains("must be > 0"));
351    }
352
353    /// Non-numeric prefix is rejected.
354    #[test]
355    fn parse_disk_size_mib_rejects_garbage_number() {
356        assert!(parse_disk_size_mib("abcmib").is_err());
357        assert!(parse_disk_size_mib("-5mib").is_err());
358        assert!(parse_disk_size_mib("3.5mib").is_err());
359    }
360
361    /// Unknown suffix is rejected.
362    #[test]
363    fn parse_disk_size_mib_rejects_unknown_suffix() {
364        let err = parse_disk_size_mib("1tb").expect_err("tb is not currently accepted");
365        let rendered = format!("{err:#}");
366        // Last matching strip_suffix is "b", which leaves "1t" as the
367        // numeric portion and surfaces the parse error there.
368        assert!(rendered.contains("invalid disk size '1tb'"));
369    }
370
371    /// A value that overflows u32::MAX MiB is rejected (capacity_mib is u32).
372    #[test]
373    fn parse_disk_size_mib_rejects_u32_overflow() {
374        // (u32::MAX + 1) MiB
375        let too_big_mib = (u32::MAX as u64) + 1;
376        let input = format!("{too_big_mib}mib");
377        let err = parse_disk_size_mib(&input).expect_err("> u32::MAX MiB must fail");
378        assert!(format!("{err:#}").contains("exceeds u32::MAX"));
379    }
380
381    /// A value whose byte product overflows u64 is rejected before
382    /// the MiB conversion runs.
383    #[test]
384    fn parse_disk_size_mib_rejects_u64_overflow() {
385        // u64::MAX gib is way past u64::MAX bytes.
386        let input = format!("{}gib", u64::MAX);
387        let err = parse_disk_size_mib(&input).expect_err("u64 overflow must fail");
388        assert!(format!("{err:#}").contains("overflows u64"));
389    }
390
391    /// Absent `--disk` flag → `Ok(None)`. Pins the
392    /// no-disk-attached default so a future refactor that flips the
393    /// arm to a `Some(default())` placeholder fails the test
394    /// instead of silently changing the boot shape (a disk where
395    /// the user asked for none).
396    #[test]
397    fn parse_disk_arg_none_yields_no_disk() {
398        let got = parse_disk_arg(None).expect("None input must not error");
399        assert!(
400            got.is_none(),
401            "absent --disk must produce Ok(None), got: {got:?}",
402        );
403    }
404
405    /// `--disk 256mib` → `Some(DiskConfig)` with `capacity_mib=256`
406    /// and the remaining fields equal to `DiskConfig::default()`.
407    /// Pins the size-only fast path (the only shape `parse_disk_arg`
408    /// accepts today) and guards against drift in the spread of
409    /// non-size fields — if a future change flips a default
410    /// (read_only=true, throttle non-default), this test surfaces it
411    /// at the CLI parse boundary rather than mid-VM-setup.
412    #[test]
413    fn parse_disk_arg_some_size_uses_default_other_fields() {
414        let got = parse_disk_arg(Some("256mib"))
415            .expect("256mib must parse")
416            .expect("Some(...) input must yield Some(DiskConfig)");
417        let expected = crate::vmm::disk_config::DiskConfig {
418            capacity_mib: 256,
419            ..crate::vmm::disk_config::DiskConfig::default()
420        };
421        assert_eq!(
422            got, expected,
423            "parse_disk_arg(\"256mib\") must equal DiskConfig::default() \
424             with capacity_mib=256: got {got:?}, expected {expected:?}",
425        );
426    }
427
428    /// Malformed size → `Err`. Pins that a CLI typo surfaces at
429    /// argument time with a parse-error message, not mid-VM-setup
430    /// or as a confusing zero-size disk.
431    #[test]
432    fn parse_disk_arg_garbage_propagates_size_error() {
433        let err =
434            parse_disk_arg(Some("garbage")).expect_err("malformed size must propagate parse error");
435        let rendered = format!("{err:#}");
436        // Every `parse_disk_size_mib` bail prefixes its message with
437        // `invalid disk size '...'` (the input echoed back), so a
438        // single-substring check is sufficient — every error path
439        // satisfies it. A future message-format change that drops the
440        // prefix would surface here instead of being silently absorbed.
441        assert!(
442            rendered.contains("invalid disk size"),
443            "expected size-parse diagnostic in disk-arg error, got: {rendered}",
444        );
445    }
446}