ktstr/cli/parse.rs
1//! CLI argument parsers shared between `ktstr` and `cargo-ktstr`.
2//!
3//! Holds the topology-string and disk-size parsers along with the
4//! `--disk` help text. Lives outside `kernel_cmd` because the
5//! parsers are dispatch-time helpers, not clap-attribute fixtures.
6
7use anyhow::{Result, bail};
8
9/// Parse a comma-separated topology string into its four dimensions:
10/// `(numa_nodes, llcs, cores, threads)`. The canonical format is
11/// `"numa_nodes,llcs,cores,threads"` — the same shape accepted by the
12/// `ktstr shell --topology` and `cargo ktstr shell --topology` flags.
13///
14/// Validation:
15/// - Exactly four comma-separated components are required.
16/// - Each component must parse as `u32`. A parse failure names the
17/// failing field explicitly (e.g. `"invalid llcs value: 'abc'"`)
18/// so the user can see which dimension they mistyped without
19/// counting commas.
20/// - Every dimension must be at least 1 — a zero in any position
21/// produces an unusable VM topology, so we reject it up front.
22///
23/// Consolidating the parse + validate in one helper eliminates the
24/// identical 4-arm `parts[i].parse().map_err(...)` block that the two
25/// binary entry points (`src/bin/ktstr.rs` Command::Shell and
26/// `src/bin/cargo_ktstr/misc/shell.rs` `run_shell`) would otherwise drift on.
27/// Error shape is `anyhow::Error`; callers that need a `String` (like
28/// cargo-ktstr's `Result<(), String>` surface) bridge via
29/// `.map_err(|e| format!("{e:#}"))` at the call site.
30pub fn parse_topology_string(topology: &str) -> Result<(u32, u32, u32, u32)> {
31 let parts: Vec<&str> = topology.split(',').collect();
32 if parts.len() != 4 {
33 bail!(
34 "invalid topology '{topology}': expected 'numa_nodes,llcs,cores,threads' \
35 (e.g. '1,2,4,1')"
36 );
37 }
38 // Stable field order mirrors the 4-tuple return so a future
39 // field-rename lands consistently in one place.
40 let fields: [(&str, &str); 4] = [
41 ("numa_nodes", parts[0]),
42 ("llcs", parts[1]),
43 ("cores", parts[2]),
44 ("threads", parts[3]),
45 ];
46 let mut vals: [u32; 4] = [0; 4];
47 for (i, (name, raw)) in fields.iter().enumerate() {
48 vals[i] = raw
49 .parse::<u32>()
50 .map_err(|_| anyhow::anyhow!("invalid {name} value: '{raw}'"))?;
51 }
52 let [numa_nodes, llcs, cores, threads] = vals;
53 if numa_nodes == 0 || llcs == 0 || cores == 0 || threads == 0 {
54 bail!("invalid topology '{topology}': all values must be >= 1");
55 }
56 Ok((numa_nodes, llcs, cores, threads))
57}
58
59/// Parse a human-readable size string (e.g. `"256mib"`, `"10gib"`, `"1gib"`)
60/// into a count of mebibytes (MiB), rounded down. Returns `Err` when the
61/// suffix is unrecognized, the numeric portion fails to parse, the value
62/// is not a positive integer multiple of one MiB, or the result exceeds
63/// `u32::MAX` MiB (the [`crate::vmm::disk_config::DiskConfig::capacity_mib`]
64/// capacity).
65///
66/// Accepted suffixes (case-insensitive): `b`, `kib`, `mib`, `gib`. All
67/// IEC (powers of two): `kib`=2^10, `mib`=2^20, `gib`=2^30. SI variants
68/// (`kb`/`mb`/`gb`) are intentionally NOT accepted; they're rejected by
69/// a dedicated SI-suffix check at the top of the function — before any
70/// number-parsing or MiB-alignment runs — so the diagnostic names the
71/// IEC-only policy directly instead of leaking through as a misleading
72/// "numeric portion not an unsigned integer" message after the suffix
73/// strip eats the trailing `b`. IEC-only is unambiguous and consistent.
74/// The bare suffix-less form is also rejected so units are never
75/// implicit.
76///
77/// The output unit is MiB to match
78/// [`crate::vmm::disk_config::DiskConfig::capacity_mib`] (despite the
79/// field name, `DiskConfig::capacity_bytes` left-shifts by 20 — i.e.
80/// the field is MiB, not SI MB). A future rename of that field would
81/// land in this function in lockstep.
82pub fn parse_disk_size_mib(s: &str) -> Result<u32> {
83 let lower = s.trim().to_ascii_lowercase();
84 if lower.is_empty() {
85 bail!("invalid disk size '{s}': empty");
86 }
87 // Reject SI-suffix forms (kb/mb/gb) up front. The IEC-only
88 // policy keeps the contract unambiguous: 1mib means exactly
89 // 2^20 bytes, never 10^6. Without this short-circuit the
90 // generic `b` (byte) suffix below would chew off the trailing
91 // 'b' and then fail to parse e.g. "1k" as a u64, producing a
92 // misleading "numeric portion not an unsigned integer" error
93 // instead of the unit-list diagnostic the user needs.
94 if lower.ends_with("kb") || lower.ends_with("mb") || lower.ends_with("gb") {
95 bail!(
96 "invalid disk size '{s}': SI suffixes (kb/mb/gb) are \
97 not supported. Use one of b, kib, mib, gib \
98 (case-insensitive)."
99 );
100 }
101 let (num_str, suffix, unit_bytes): (&str, &str, u64) =
102 if let Some(rest) = lower.strip_suffix("gib") {
103 (rest, "gib", 1u64 << 30)
104 } else if let Some(rest) = lower.strip_suffix("mib") {
105 (rest, "mib", 1u64 << 20)
106 } else if let Some(rest) = lower.strip_suffix("kib") {
107 (rest, "kib", 1u64 << 10)
108 } else if let Some(rest) = lower.strip_suffix('b') {
109 (rest, "b", 1u64)
110 } else {
111 bail!(
112 "invalid disk size '{s}': missing unit suffix. Use one of \
113 b, kib, mib, gib (case-insensitive)."
114 );
115 };
116 let n = num_str.trim().parse::<u64>().map_err(|_| {
117 anyhow::anyhow!(
118 "invalid disk size '{s}': numeric portion '{num_str}' before \
119 '{suffix}' is not an unsigned integer"
120 )
121 })?;
122 let bytes = n
123 .checked_mul(unit_bytes)
124 .ok_or_else(|| anyhow::anyhow!("invalid disk size '{s}': {n}{suffix} overflows u64"))?;
125 if bytes == 0 {
126 bail!("invalid disk size '{s}': must be > 0");
127 }
128 let mib = 1u64 << 20;
129 if bytes % mib != 0 {
130 bail!(
131 "invalid disk size '{s}': {bytes} bytes is not a whole number \
132 of mebibytes (MiB). Round to a multiple of 1 MiB (= 1048576 \
133 bytes)."
134 );
135 }
136 let mib_count = bytes / mib;
137 if mib_count > u32::MAX as u64 {
138 bail!(
139 "invalid disk size '{s}': {mib_count} MiB exceeds u32::MAX \
140 (DiskConfig.capacity_mib is u32)"
141 );
142 }
143 Ok(mib_count as u32)
144}
145
146/// Help text for the `--disk <SIZE>` shell flag, shared between
147/// `cargo ktstr shell` (`src/bin/cargo_ktstr/cli.rs`) and
148/// `ktstr shell` (`src/bin/ktstr.rs`) so a future tweak lands in
149/// one place. Mirrors the [`super::CPU_CAP_HELP`] pattern.
150pub const DISK_HELP: &str = "Attach a raw virtio-blk disk to /dev/vda. \
151 Accepts a human-readable size with a unit suffix (case-insensitive): \
152 b, kib, mib, gib. IEC-only — SI variants (kb/mb/gb) are rejected to \
153 keep the contract unambiguous. The size must be a positive whole \
154 number of MiB (e.g. 256mib, 1gib). Omit to boot without a disk.";
155
156/// Parse the `--disk <SIZE>` CLI argument into an
157/// [`Option<crate::vmm::disk_config::DiskConfig>`]. `None` input
158/// returns `Ok(None)` (no disk attached); a `Some(s)` input runs
159/// `s` through [`parse_disk_size_mib`] and wraps the result in a
160/// `DiskConfig` whose remaining fields fall through to
161/// [`crate::vmm::disk_config::DiskConfig::default`] (raw filesystem,
162/// no throttle, read-write). Shared between `cargo ktstr shell` and
163/// `ktstr shell` so both bins parse identically; a malformed size
164/// surfaces here at CLI-argument time, never mid-VM-setup.
165pub fn parse_disk_arg(s: Option<&str>) -> Result<Option<crate::vmm::disk_config::DiskConfig>> {
166 match s {
167 Some(raw) => {
168 let mib = parse_disk_size_mib(raw)?;
169 Ok(Some(crate::vmm::disk_config::DiskConfig {
170 capacity_mib: mib,
171 ..crate::vmm::disk_config::DiskConfig::default()
172 }))
173 }
174 None => Ok(None),
175 }
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 /// Happy path: a canonical `"n,l,c,t"` string round-trips to the
183 /// four u32 dimensions in positional order. Pins the field order
184 /// so a future refactor that reshuffles (numa_nodes/llcs/cores/
185 /// threads) → something else can't silently swap one dimension
186 /// for another without flipping this pin.
187 #[test]
188 fn parse_topology_string_happy_path() {
189 let (n, l, c, t) = parse_topology_string("1,2,4,8").expect("valid");
190 assert_eq!((n, l, c, t), (1, 2, 4, 8));
191 }
192
193 /// Wrong component count: fewer than 4 parts names the expected
194 /// shape in the error so the user sees the canonical format.
195 #[test]
196 fn parse_topology_string_rejects_too_few_parts() {
197 let err = parse_topology_string("1,2,4").expect_err("3 parts must fail");
198 let rendered = format!("{err:#}");
199 assert!(
200 rendered.contains("invalid topology '1,2,4'"),
201 "error must echo the bad input: {rendered}",
202 );
203 assert!(
204 rendered.contains("numa_nodes,llcs,cores,threads"),
205 "error must name the expected shape: {rendered}",
206 );
207 }
208
209 /// Too MANY parts is rejected the same way. Pairs with the
210 /// too-few case so the guard is symmetric.
211 #[test]
212 fn parse_topology_string_rejects_too_many_parts() {
213 let err = parse_topology_string("1,2,4,8,16").expect_err("5 parts must fail");
214 assert!(format!("{err:#}").contains("invalid topology"));
215 }
216
217 /// A non-numeric component fails with a message that names the
218 /// offending FIELD, not just the bad token — a user who mistypes
219 /// the second dimension sees `"invalid llcs value: 'abc'"` and
220 /// knows immediately which dimension needs fixing. Pin all four
221 /// position-to-name mappings so a field-order refactor surfaces
222 /// here.
223 #[test]
224 fn parse_topology_string_names_failing_field() {
225 for (pos, field) in [(0, "numa_nodes"), (1, "llcs"), (2, "cores"), (3, "threads")] {
226 let mut parts = ["1"; 4];
227 parts[pos] = "abc";
228 let input = parts.join(",");
229 let err = parse_topology_string(&input).expect_err("non-numeric must fail");
230 let rendered = format!("{err:#}");
231 assert!(
232 rendered.contains(&format!("invalid {field} value: 'abc'")),
233 "pos {pos}: error must name the `{field}` field, got: {rendered}",
234 );
235 }
236 }
237
238 /// Zero in any position fails the `>= 1` guard with the
239 /// "all values must be >= 1" phrasing. A zero topology would
240 /// build a non-bootable VM, so rejecting it up-front is a
241 /// correctness requirement, not a style choice.
242 #[test]
243 fn parse_topology_string_rejects_zero_dimensions() {
244 for pos in 0..4 {
245 let mut parts = ["1"; 4];
246 parts[pos] = "0";
247 let input = parts.join(",");
248 let err = parse_topology_string(&input).expect_err("zero must fail");
249 let rendered = format!("{err:#}");
250 assert!(
251 rendered.contains(">= 1"),
252 "pos {pos}: error must cite the >=1 rule: {rendered}",
253 );
254 }
255 }
256
257 /// Upper bound: u32::MAX in every position parses successfully.
258 /// Pins the return-type decision (u32, not u16 / usize) so a
259 /// future refactor that narrows the type surfaces here rather
260 /// than truncating large-host topology strings.
261 #[test]
262 fn parse_topology_string_accepts_u32_max() {
263 let big = u32::MAX;
264 let input = format!("{big},{big},{big},{big}");
265 let (n, l, c, t) = parse_topology_string(&input).expect("u32::MAX valid");
266 assert_eq!((n, l, c, t), (big, big, big, big));
267 }
268
269 /// u32 overflow (value above u32::MAX) fails with the field
270 /// name, not a generic parse error. Exercises the `parse::<u32>`
271 /// failure path rather than only the non-numeric path.
272 #[test]
273 fn parse_topology_string_rejects_u32_overflow() {
274 let too_big = (u32::MAX as u64) + 1;
275 let input = format!("1,{too_big},4,1");
276 let err = parse_topology_string(&input).expect_err("overflow must fail");
277 assert!(
278 format!("{err:#}").contains(&format!("invalid llcs value: '{too_big}'")),
279 "overflow must surface field + bad token: {err:#}",
280 );
281 }
282
283 /// IEC suffixes (`mib`, `gib`) round-trip to whole MiB counts. Pins
284 /// the binary-base interpretation of the IEC family.
285 #[test]
286 fn parse_disk_size_mib_iec_suffixes() {
287 assert_eq!(parse_disk_size_mib("256mib").unwrap(), 256);
288 assert_eq!(parse_disk_size_mib("1gib").unwrap(), 1024);
289 assert_eq!(parse_disk_size_mib("10GIB").unwrap(), 10 * 1024);
290 assert_eq!(parse_disk_size_mib("1024kib").unwrap(), 1);
291 }
292
293 /// SI suffixes (`kb`, `mb`, `gb`) are rejected as unrecognized so
294 /// the user sees the unit-list diagnostic instead of a confusing
295 /// MiB-alignment failure. IEC-only is the unambiguous contract.
296 #[test]
297 fn parse_disk_size_mib_rejects_si_suffixes() {
298 for input in ["1kb", "1mb", "1gb", "256MB", "10GB"] {
299 let err = parse_disk_size_mib(input)
300 .expect_err(&format!("SI suffix '{input}' must be rejected"));
301 let rendered = format!("{err:#}");
302 assert!(
303 rendered.contains("SI suffixes"),
304 "expected SI-rejection diagnostic for {input:?}, got: {rendered}",
305 );
306 }
307 }
308
309 /// Bare `b` with a value that aligns to a MiB succeeds; a value
310 /// off-by-one fails. Pins the byte-suffix path.
311 #[test]
312 fn parse_disk_size_mib_byte_suffix() {
313 assert_eq!(parse_disk_size_mib("1048576b").unwrap(), 1);
314 let err = parse_disk_size_mib("1048575b").expect_err("off-by-one byte must fail");
315 assert!(format!("{err:#}").contains("not a whole number"));
316 }
317
318 /// Whitespace + mixed case in the input are tolerated by trim +
319 /// to_lowercase.
320 #[test]
321 fn parse_disk_size_mib_normalizes_input() {
322 assert_eq!(parse_disk_size_mib(" 256MiB ").unwrap(), 256);
323 assert_eq!(parse_disk_size_mib("1GiB").unwrap(), 1024);
324 }
325
326 /// Missing suffix is rejected with a unit-list diagnostic so the
327 /// user sees what's accepted.
328 #[test]
329 fn parse_disk_size_mib_rejects_missing_suffix() {
330 let err = parse_disk_size_mib("256").expect_err("bare integer must fail");
331 let rendered = format!("{err:#}");
332 assert!(rendered.contains("missing unit suffix"));
333 assert!(rendered.contains("kib"));
334 assert!(rendered.contains("mib"));
335 assert!(rendered.contains("gib"));
336 }
337
338 /// Empty / whitespace-only input is rejected up front.
339 #[test]
340 fn parse_disk_size_mib_rejects_empty() {
341 assert!(parse_disk_size_mib("").is_err());
342 assert!(parse_disk_size_mib(" ").is_err());
343 }
344
345 /// Zero is rejected — a 0-byte disk is a configuration footgun
346 /// (every IO IOERRs per `VirtioBlk::with_options`).
347 #[test]
348 fn parse_disk_size_mib_rejects_zero() {
349 let err = parse_disk_size_mib("0mib").expect_err("zero must fail");
350 assert!(format!("{err:#}").contains("must be > 0"));
351 }
352
353 /// Non-numeric prefix is rejected.
354 #[test]
355 fn parse_disk_size_mib_rejects_garbage_number() {
356 assert!(parse_disk_size_mib("abcmib").is_err());
357 assert!(parse_disk_size_mib("-5mib").is_err());
358 assert!(parse_disk_size_mib("3.5mib").is_err());
359 }
360
361 /// Unknown suffix is rejected.
362 #[test]
363 fn parse_disk_size_mib_rejects_unknown_suffix() {
364 let err = parse_disk_size_mib("1tb").expect_err("tb is not currently accepted");
365 let rendered = format!("{err:#}");
366 // Last matching strip_suffix is "b", which leaves "1t" as the
367 // numeric portion and surfaces the parse error there.
368 assert!(rendered.contains("invalid disk size '1tb'"));
369 }
370
371 /// A value that overflows u32::MAX MiB is rejected (capacity_mib is u32).
372 #[test]
373 fn parse_disk_size_mib_rejects_u32_overflow() {
374 // (u32::MAX + 1) MiB
375 let too_big_mib = (u32::MAX as u64) + 1;
376 let input = format!("{too_big_mib}mib");
377 let err = parse_disk_size_mib(&input).expect_err("> u32::MAX MiB must fail");
378 assert!(format!("{err:#}").contains("exceeds u32::MAX"));
379 }
380
381 /// A value whose byte product overflows u64 is rejected before
382 /// the MiB conversion runs.
383 #[test]
384 fn parse_disk_size_mib_rejects_u64_overflow() {
385 // u64::MAX gib is way past u64::MAX bytes.
386 let input = format!("{}gib", u64::MAX);
387 let err = parse_disk_size_mib(&input).expect_err("u64 overflow must fail");
388 assert!(format!("{err:#}").contains("overflows u64"));
389 }
390
391 /// Absent `--disk` flag → `Ok(None)`. Pins the
392 /// no-disk-attached default so a future refactor that flips the
393 /// arm to a `Some(default())` placeholder fails the test
394 /// instead of silently changing the boot shape (a disk where
395 /// the user asked for none).
396 #[test]
397 fn parse_disk_arg_none_yields_no_disk() {
398 let got = parse_disk_arg(None).expect("None input must not error");
399 assert!(
400 got.is_none(),
401 "absent --disk must produce Ok(None), got: {got:?}",
402 );
403 }
404
405 /// `--disk 256mib` → `Some(DiskConfig)` with `capacity_mib=256`
406 /// and the remaining fields equal to `DiskConfig::default()`.
407 /// Pins the size-only fast path (the only shape `parse_disk_arg`
408 /// accepts today) and guards against drift in the spread of
409 /// non-size fields — if a future change flips a default
410 /// (read_only=true, throttle non-default), this test surfaces it
411 /// at the CLI parse boundary rather than mid-VM-setup.
412 #[test]
413 fn parse_disk_arg_some_size_uses_default_other_fields() {
414 let got = parse_disk_arg(Some("256mib"))
415 .expect("256mib must parse")
416 .expect("Some(...) input must yield Some(DiskConfig)");
417 let expected = crate::vmm::disk_config::DiskConfig {
418 capacity_mib: 256,
419 ..crate::vmm::disk_config::DiskConfig::default()
420 };
421 assert_eq!(
422 got, expected,
423 "parse_disk_arg(\"256mib\") must equal DiskConfig::default() \
424 with capacity_mib=256: got {got:?}, expected {expected:?}",
425 );
426 }
427
428 /// Malformed size → `Err`. Pins that a CLI typo surfaces at
429 /// argument time with a parse-error message, not mid-VM-setup
430 /// or as a confusing zero-size disk.
431 #[test]
432 fn parse_disk_arg_garbage_propagates_size_error() {
433 let err =
434 parse_disk_arg(Some("garbage")).expect_err("malformed size must propagate parse error");
435 let rendered = format!("{err:#}");
436 // Every `parse_disk_size_mib` bail prefixes its message with
437 // `invalid disk size '...'` (the input echoed back), so a
438 // single-substring check is sufficient — every error path
439 // satisfies it. A future message-format change that drops the
440 // prefix would surface here instead of being silently absorbed.
441 assert!(
442 rendered.contains("invalid disk size"),
443 "expected size-parse diagnostic in disk-arg error, got: {rendered}",
444 );
445 }
446}