ktstr/workload/config/
workload.rs

1//! Top-level workload configuration: the [`WorkloadConfig`] struct,
2//! its `Default`, and the chainable builder methods on
3//! `impl WorkloadConfig`.
4//!
5//! WorkloadConfig is the primary group's shape — what each worker
6//! does (`work_type`), how many (`num_workers`), what scheduler
7//! policy / memory policy / nice / clone mode they get, and an
8//! optional `composed` list of secondary [`WorkSpec`] groups that
9//! spawn alongside.
10//!
11//! Validation lives on [`WorkloadConfig::validate`]: it gates
12//! invariants that must hold BEFORE any worker context exists —
13//! currently `num_workers > 0` (primary + every composed entry;
14//! rejects vacuously-passing zero-worker workloads where every
15//! assertion would trivially pass) and `mem_policy` empty-nodemask
16//! rejection (primary + every composed entry).
17
18use std::borrow::Cow;
19
20use super::super::{AffinityIntent, WorkType};
21use super::{CloneMode, MemPolicy, MpolFlags, SchedPolicy, WorkSpec};
22
23/// Configuration for spawning a group of worker processes.
24//
25// PartialEq (not Eq): the [`Self::composed`] field is
26// `Vec<WorkSpec>`, and `WorkSpec` is `PartialEq`-only because of
27// its `workers_pct: Option<f64>` field — see the derive comment on
28// [`WorkSpec`] for the IEEE-754 NaN rationale. Production
29// WorkSpec values are NaN-free at construction (the
30// `WorkSpec::workers_pct` builder rejects NaN), so the inherited
31// f64 semantics do not surface at typical call sites.
32#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
33// See [`WorkType`]'s `#[serde(bound(...))]` comment — embedding
34// `WorkType` propagates the same lifetime-bound issue, so we pass
35// through the same explicit empty bound.
36#[serde(bound(deserialize = ""))]
37pub struct WorkloadConfig {
38    /// Number of worker processes to fork. Concrete `usize` (not
39    /// `Option`): `WorkloadConfig` is the spawn-time configuration
40    /// passed to `WorkloadHandle::spawn`, by which point the worker
41    /// count must be known. The Option-to-usize coalescing happens
42    /// upstream at `resolve_num_workers`, which
43    /// reads [`crate::workload::WorkSpec::num_workers`]
44    /// (`Option<usize>` — `None` falls back to
45    /// `Ctx::workers_per_cgroup`) and produces the resolved value
46    /// passed to `Self::for_scenario_engine`. The type asymmetry
47    /// is deliberate: `WorkSpec` is the user-facing declarative
48    /// spec where `None` means "inherit the cgroup-level default",
49    /// `WorkloadConfig` is the spawn-time concrete config where
50    /// `usize` is the only sensible type.
51    pub num_workers: usize,
52    /// Per-worker affinity intent. Resolved at spawn time via the
53    /// same gate as composed entries (see [`Self::composed`]):
54    /// [`AffinityIntent::Inherit`] (resolved to
55    /// `ResolvedAffinity::None`),
56    /// [`AffinityIntent::Exact`] (resolved to
57    /// `ResolvedAffinity::Fixed`), and
58    /// [`AffinityIntent::RandomSubset`] (resolved to
59    /// `ResolvedAffinity::Random` — sampling deferred per-worker
60    /// at spawn time) are accepted at `WorkloadHandle::spawn`.
61    /// Topology-aware variants (`SingleCpu`, `LlcAligned`,
62    /// `CrossCgroup`, `SmtSiblingPair`) require scenario context
63    /// and are rejected with an actionable diagnostic.
64    /// Type-unified with [`WorkSpec::affinity`] so a test author
65    /// writes the same affinity expression at the top level and
66    /// inside `composed` entries.
67    pub affinity: AffinityIntent,
68    /// What each worker does.
69    pub work_type: WorkType,
70    /// Linux scheduling policy.
71    pub sched_policy: SchedPolicy,
72    /// NUMA memory placement policy.
73    pub mem_policy: MemPolicy,
74    /// Optional mode flags for `set_mempolicy(2)`.
75    pub mpol_flags: MpolFlags,
76    /// Per-worker nice value applied via `setpriority(2)` after
77    /// fork, before the work loop. Range `-20..=19` per `MIN_NICE`
78    /// / `MAX_NICE` in `kernel/sys.c`'s `setpriority` syscall;
79    /// values outside this window are clamped kernel-side. `None`
80    /// (the default) skips the syscall entirely so the worker
81    /// inherits the parent's nice value; `Some(n)` invokes
82    /// `setpriority(PRIO_PROCESS, 0, n)` unconditionally — a user
83    /// who wants the worker to land on nice 0 regardless of the
84    /// parent's nice (or a cgroup-level default stored at
85    /// [`CgroupDef::default_nice`](crate::scenario::ops::CgroupDef::default_nice))
86    /// writes `Some(0)`, distinct from `None`.
87    ///
88    /// Values below the calling task's current nice require
89    /// `CAP_SYS_NICE` (the kernel's `can_nice` check fires on
90    /// `niceval < task_nice(p)`, not only on negatives — the
91    /// `set_one_prio` gate at `kernel/sys.c` returns `EACCES` to
92    /// unprivileged callers when `is_nice_reduction` rejects the
93    /// requested value). With `Some(0)` on a parent at `nice=5`,
94    /// `setpriority` returns `EACCES` without the capability.
95    /// `None` (inherit) is always safe. Failures are logged once
96    /// via stderr and do not abort the worker — the
97    /// scheduling-policy and affinity sites use the same idiom.
98    pub nice: Option<i32>,
99    /// How to create each worker. Defaults to [`CloneMode::Fork`].
100    pub clone_mode: CloneMode,
101    /// Worker process name set via `prctl(PR_SET_NAME)` after fork.
102    /// The setter rejects > 15 bytes (TASK_COMM_LEN-1) at
103    /// construction so the operator sees the cap at the call site
104    /// instead of debugging a truncated comm. `None` inherits the
105    /// binary name. Mirrors [`WorkSpec::comm`] so the primary group
106    /// exposes the same scheduler-matcher knob composed entries
107    /// already do.
108    pub comm: Option<Cow<'static, str>>,
109    /// Effective UID set via `setresuid(uid, uid, uid)` after fork.
110    /// `None` inherits the parent's euid. Mirrors [`WorkSpec::uid`].
111    pub uid: Option<u32>,
112    /// Effective GID set via `setresgid(gid, gid, gid)` after fork.
113    /// `None` inherits the parent's egid. Mirrors [`WorkSpec::gid`].
114    pub gid: Option<u32>,
115    /// Restrict worker affinity to the CPUs of this NUMA node.
116    /// Applied via `sched_setaffinity` after fork. Mirrors
117    /// [`WorkSpec::numa_node`].
118    pub numa_node: Option<u32>,
119    /// Secondary worker groups spawned alongside the primary group
120    /// described by the top-level fields. Each entry is a
121    /// [`WorkSpec`] with its own `work_type`, `num_workers`,
122    /// `sched_policy`, `affinity`, etc. Composed groups are spawned
123    /// in declaration order after the primary group; their workers
124    /// run concurrently with the primary's for the lifetime of the
125    /// `WorkloadHandle`. The default (an empty vec) skips the
126    /// composed pass and behaves exactly as the pre-composition
127    /// spawn.
128    ///
129    /// All groups share the same stop signal —
130    /// `WorkloadHandle::stop_and_collect` terminates primary plus
131    /// every composed group atomically. Per-group stop is not
132    /// supported.
133    ///
134    /// Reports carry `WorkerReport::group_idx` = 0 for the primary
135    /// group and 1..=N for composed entries in declaration order.
136    ///
137    /// # Worked example
138    ///
139    /// Build a multi-group workload — primary `SpinWait(2)` plus
140    /// one `PipeIo(2)` composed group plus one `YieldHeavy(1)`
141    /// composed group — using either the replacing
142    /// [`composed`](Self::composed) setter or the appending
143    /// [`push_composed`](Self::push_composed) chain:
144    ///
145    /// ```
146    /// use ktstr::workload::{WorkSpec, WorkType, WorkloadConfig};
147    ///
148    /// // Append style: each call adds one group to the existing list.
149    /// let cfg = WorkloadConfig::default()
150    ///     .work_type(WorkType::SpinWait)
151    ///     .workers(2)
152    ///     .push_composed(
153    ///         WorkSpec::default()
154    ///             .work_type(WorkType::pipe_io(64))
155    ///             .workers(2),
156    ///     )
157    ///     .push_composed(
158    ///         WorkSpec::default()
159    ///             .work_type(WorkType::YieldHeavy)
160    ///             .workers(1),
161    ///     );
162    /// assert_eq!(cfg.composed.len(), 2);
163    ///
164    /// // Replace style: one call passes every composed group at once.
165    /// let cfg2 = WorkloadConfig::default()
166    ///     .work_type(WorkType::SpinWait)
167    ///     .workers(2)
168    ///     .composed([
169    ///         WorkSpec::default().work_type(WorkType::pipe_io(64)).workers(2),
170    ///         WorkSpec::default().work_type(WorkType::YieldHeavy).workers(1),
171    ///     ]);
172    /// assert_eq!(cfg2.composed.len(), 2);
173    /// ```
174    ///
175    /// # Resolution rules at spawn time
176    ///
177    /// Composed [`WorkSpec`] entries must specify
178    /// [`WorkSpec::num_workers`] (`Some(n)`); the `None` default
179    /// resolved by the scenario engine via
180    /// `Ctx::workers_per_cgroup` is unreachable from
181    /// `WorkloadHandle::spawn` and is rejected with an actionable
182    /// diagnostic.
183    ///
184    /// Composed [`WorkSpec::affinity`] accepts the no-context
185    /// variants [`AffinityIntent::Inherit`] (resolved to
186    /// `ResolvedAffinity::None`), [`AffinityIntent::Exact`]
187    /// (resolved to `ResolvedAffinity::Fixed`), and
188    /// [`AffinityIntent::RandomSubset`] (resolved to
189    /// `ResolvedAffinity::Random` — sampling deferred per-worker
190    /// at spawn time). The topology-aware variants (`SingleCpu`,
191    /// `LlcAligned`, `CrossCgroup`, `SmtSiblingPair`) are rejected
192    /// because spawn() has no access to the
193    /// [`crate::topology::TestTopology`] / cpuset state that the
194    /// scenario engine threads in.
195    ///
196    /// Composed entries inherit the parent
197    /// [`WorkloadConfig::clone_mode`] — the dispatch path
198    /// (fork vs thread) is a workload-wide property, so
199    /// [`WorkSpec`] carries no `clone_mode` field of its own.
200    ///
201    /// Composition is single-level — a [`WorkSpec`] inside
202    /// `composed` has no `composed` field of its own.
203    #[serde(default, skip_serializing_if = "Vec::is_empty")]
204    pub composed: Vec<WorkSpec>,
205}
206
207impl Default for WorkloadConfig {
208    fn default() -> Self {
209        Self {
210            num_workers: 1,
211            affinity: AffinityIntent::Inherit,
212            work_type: WorkType::SpinWait,
213            sched_policy: SchedPolicy::Normal,
214            mem_policy: MemPolicy::Default,
215            mpol_flags: MpolFlags::NONE,
216            nice: None,
217            clone_mode: CloneMode::Fork,
218            comm: None,
219            uid: None,
220            gid: None,
221            numa_node: None,
222            composed: Vec::new(),
223        }
224    }
225}
226
227impl WorkloadConfig {
228    /// Construct a `WorkloadConfig` for scenario-engine spawn
229    /// dispatch (apply_setup non-pcomm, Op::Spawn). The signature
230    /// pins `clone_mode =
231    /// CloneMode::Fork` in the constructor body so callers can't
232    /// accidentally route a Thread-mode workload through these
233    /// sites — a Thread-mode spawn would migrate the scenario
234    /// runner's tgid into the test cgroup when move_tasks fires,
235    /// per `kernel/cgroup/cgroup.c::cgroup_procs_write_start`
236    /// (cgroup.procs writes are process-scoped). The
237    /// previously-needed `debug_assert_eq!(wl.clone_mode, Fork)`
238    /// guards collapse into the type system.
239    ///
240    /// `work_type` is taken as an arg (not pulled from `work.work_type`)
241    /// because apply_setup's per-WorkSpec resolution layers a
242    /// `ctx.work_type_override` over the spec-declared type; passing
243    /// the resolved value keeps that call site honest.
244    ///
245    /// `affinity` is taken as an arg (not `work.affinity`) because
246    /// all three call sites pre-resolve the intent via
247    /// `intent_for_spawn` against the cgroup cpuset before
248    /// dispatch.
249    ///
250    /// **Do NOT pass `work.num_workers.unwrap()` / `work.affinity` /
251    /// `work.work_type` directly** — that bypasses the resolution
252    /// layer (workers_pct → ceil(cpuset * pct), cgroup-cpuset-aware
253    /// intent_for_spawn, work_type override) and silently produces
254    /// wrong counts / wrong affinity / wrong type. Always pass the
255    /// resolved values from `resolve_num_workers` /
256    /// `intent_for_spawn` / `resolve_work_type`; the args/fields
257    /// asymmetry is deliberate and the resolution layer is
258    /// load-bearing.
259    ///
260    /// `composed` always empty: the scenario-engine spawn dispatch
261    /// emits one WorkloadConfig per WorkSpec from the resolved
262    /// `non_pcomm_works` list, so composition is upstream. A future
263    /// "composed in single spawn" optimization would need a
264    /// sibling constructor, not a parameterized one.
265    ///
266    /// Bails when `work.pcomm.is_some()`: the scenario-engine spawn
267    /// dispatch (apply_setup non-pcomm path, Op::Spawn) forks one
268    /// process per worker and does not
269    /// route through `spawn_pcomm_cgroup`, so `task->group_leader->comm`
270    /// would be left at the binary name rather than the requested
271    /// pcomm value — workers spawn but scheduler matchers filtering
272    /// on the group_leader comm see zero matches. Mirrors the
273    /// `composed[i].pcomm.is_some()` bail at
274    /// `WorkloadHandle::spawn`. Test authors wanting pcomm route via
275    /// `CgroupDef::pcomm` + apply_setup pcomm-aware fan-out, or call
276    /// `WorkloadHandle::spawn_pcomm_cgroup` directly.
277    pub(crate) fn for_scenario_engine(
278        work: &WorkSpec,
279        num_workers: usize,
280        affinity: AffinityIntent,
281        work_type: WorkType,
282    ) -> anyhow::Result<Self> {
283        if work.pcomm.is_some() {
284            anyhow::bail!(
285                "WorkSpec::pcomm is unsupported in the scenario-engine \
286                 spawn dispatch (apply_setup non-pcomm path, \
287                 Op::Spawn) — those sites fork \
288                 one process per worker rather than threading inside \
289                 a pcomm container, so `task->group_leader->comm` \
290                 would stay at the binary name. To run with a \
291                 specific group-leader comm, declare \
292                 `CgroupDef::pcomm` (apply_setup picks up the pcomm-\
293                 aware coalesce path) or call \
294                 `WorkloadHandle::spawn_pcomm_cgroup` directly.",
295            );
296        }
297        Ok(Self {
298            num_workers,
299            affinity,
300            work_type,
301            sched_policy: work.sched_policy,
302            mem_policy: work.mem_policy.clone(),
303            mpol_flags: work.mpol_flags,
304            nice: work.nice,
305            clone_mode: CloneMode::Fork,
306            comm: work.comm.clone(),
307            uid: work.uid,
308            gid: work.gid,
309            numa_node: work.numa_node,
310            composed: Vec::new(),
311        })
312    }
313
314    /// Validate the config before spawn. Fails loud on invariants
315    /// that the worker-spawn path otherwise handles by silent
316    /// degradation — in particular `mem_policy` variants that
317    /// require a non-empty nodemask (Bind / Interleave / PreferredMany /
318    /// WeightedInterleave with an empty BTreeSet).
319    ///
320    /// # Why a config-layer gate
321    ///
322    /// `apply_mempolicy_with_flags` (called from the worker's hot
323    /// path in BOTH forked-child and thread-mode contexts) currently
324    /// handles an empty node-set by logging to `stderr` and
325    /// returning — the worker silently proceeds with default kernel
326    /// placement instead of the requested NUMA binding. That
327    /// silent-skip is a silent-drop bug (the test reports success
328    /// while the actual workload ran with the wrong placement).
329    ///
330    /// A hypothetical fix-it-in-the-worker design — `libc::_exit(1)`
331    /// on an empty node-set inside the worker — was rejected because
332    /// it is unsound for thread-mode workers: `_exit` invokes
333    /// `exit_group(2)` (verified at kernel/exit.c::do_group_exit →
334    /// `zap_other_threads`) which terminates EVERY thread in the
335    /// caller's tgid. A thread-mode worker shares its tgid with the
336    /// test runner, so an inner `_exit(1)` would kill the runner.
337    /// Rejecting at the config layer keeps the failure visible as a
338    /// returnable `Result` BEFORE any worker context exists,
339    /// regardless of clone-mode dispatch, and avoids the exit_group
340    /// hazard entirely.
341    ///
342    /// # What is validated
343    ///
344    /// Two gates, in order:
345    /// 1. `num_workers > 0` on the primary group and on every
346    ///    composed [`WorkSpec`] entry — zero workers emit no
347    ///    `WorkerReport`s and downstream assertions would vacuously
348    ///    pass. Composed entries also route through
349    ///    `WorkloadHandle::spawn` (via `GroupParams::from_composed`)
350    ///    directly, bypassing the scenario-engine's
351    ///    `resolve_num_workers` resolver, so the gate must live
352    ///    here to catch `composed[i].num_workers=0` before the spawn
353    ///    cascade forks anything.
354    /// 2. `mem_policy` on the primary group and on every composed
355    ///    [`WorkSpec`] entry.
356    ///
357    /// Per-entry errors name the offending slot (`"primary"` or
358    /// `"composed[N] (group_idx M)"`) so the test author can
359    /// locate the misconfigured group. Gate (1) runs first so the
360    /// more-fundamental "no workers" diagnostic surfaces before a
361    /// secondary mem_policy failure (which becomes moot when no
362    /// worker exists to bind).
363    ///
364    /// # Scope
365    ///
366    /// Validates `mem_policy` and `num_workers > 0`. Other field
367    /// invariants are validated at their own use sites:
368    /// `workers_pct` via `WorkSpec::resolve_workers_pct`,
369    /// [`WorkType`] payloads via per-variant constructors and
370    /// `validate_workload_admission`, [`AffinityIntent`] topology
371    /// rules at the scenario-engine `resolve_affinity_for_cgroup`
372    /// resolver. This method is the home for invariants that must
373    /// hold BEFORE any worker context (threads, forks, cgroups)
374    /// exists — `mem_policy` qualifies because of the silent-skip +
375    /// `exit_group` hazard noted above; `num_workers == 0`
376    /// qualifies because every downstream gate becomes
377    /// vacuous-pass. Future fields with the same
378    /// "must-fail-before-spawn" shape belong here too.
379    ///
380    /// # Return type
381    ///
382    /// Returns [`anyhow::Result`] (composite-layer convention used
383    /// by sibling composite validators
384    /// `crate::test_support::entry::KtstrTestEntry::validate` and
385    /// `crate::test_support::entry::TopologyConstraints::validate`
386    /// — they wrap leaf validators that return
387    /// `Result<(), String>` with slot-context). The leaf validator
388    /// [`MemPolicy::validate`] returns `Result<(), String>` to match
389    /// the leaf convention used by every per-spec validator in the
390    /// project.
391    pub fn validate(&self) -> anyhow::Result<()> {
392        // num_workers gate runs FIRST so the operator sees the more-
393        // fundamental "no workers" diagnostic before mem_policy
394        // failures (which become moot when no worker exists to bind).
395        if self.num_workers == 0 {
396            anyhow::bail!(
397                "WorkloadConfig.num_workers=0 is not allowed — \
398                 zero workers emit no WorkerReports and downstream \
399                 assertions would vacuously pass. Use at least 1 \
400                 worker or drop the WorkloadConfig entirely."
401            );
402        }
403        for (idx, spec) in self.composed.iter().enumerate() {
404            // composed entries route through `WorkloadHandle::spawn`
405            // (via `GroupParams::from_composed`) directly, bypassing
406            // the scenario engine's `resolve_num_workers` —
407            // the gate must live here for the spawn entry to catch
408            // composed[i].num_workers=0 before forking.
409            if spec.num_workers == Some(0) {
410                anyhow::bail!(
411                    "WorkloadConfig.composed[{idx}].num_workers=0 \
412                     (group_idx {}): zero workers in a composed group \
413                     emit no WorkerReports for the group; drop the \
414                     entry or use >= 1 worker",
415                    idx + 1,
416                );
417            }
418        }
419        self.mem_policy
420            .validate()
421            .map_err(|e| anyhow::anyhow!("WorkloadConfig.mem_policy (primary group): {e}",))?;
422        for (idx, spec) in self.composed.iter().enumerate() {
423            spec.mem_policy.validate().map_err(|e| {
424                anyhow::anyhow!(
425                    "WorkloadConfig.composed[{idx}].mem_policy (group_idx {}): {e}",
426                    idx + 1,
427                )
428            })?;
429        }
430        Ok(())
431    }
432
433    /// Set the number of worker processes.
434    #[must_use = "builder methods consume self; bind the result"]
435    pub fn workers(mut self, n: usize) -> Self {
436        self.num_workers = n;
437        self
438    }
439
440    /// Set the per-worker affinity intent.
441    ///
442    /// At `WorkloadHandle::spawn`, [`AffinityIntent::Inherit`],
443    /// [`AffinityIntent::Exact`], and [`AffinityIntent::RandomSubset`]
444    /// are accepted; topology-aware variants (`SingleCpu`,
445    /// `LlcAligned`, `CrossCgroup`, `SmtSiblingPair`) require
446    /// scenario context and are rejected.
447    ///
448    /// Idiomatic short form for an exact CPU set:
449    /// `cfg.affinity(AffinityIntent::exact([0, 1]))`.
450    #[must_use = "builder methods consume self; bind the result"]
451    pub fn affinity(mut self, a: AffinityIntent) -> Self {
452        self.affinity = a;
453        self
454    }
455
456    /// Set the work type.
457    #[must_use = "builder methods consume self; bind the result"]
458    pub fn work_type(mut self, wt: WorkType) -> Self {
459        self.work_type = wt;
460        self
461    }
462
463    /// Set the Linux scheduling policy.
464    #[must_use = "builder methods consume self; bind the result"]
465    pub fn sched_policy(mut self, p: SchedPolicy) -> Self {
466        self.sched_policy = p;
467        self
468    }
469
470    /// Set the NUMA memory placement policy.
471    #[must_use = "builder methods consume self; bind the result"]
472    pub fn mem_policy(mut self, p: MemPolicy) -> Self {
473        self.mem_policy = p;
474        self
475    }
476
477    /// Set the NUMA memory policy mode flags.
478    #[must_use = "builder methods consume self; bind the result"]
479    pub fn mpol_flags(mut self, f: MpolFlags) -> Self {
480        self.mpol_flags = f;
481        self
482    }
483
484    /// Set the per-worker nice value applied via `setpriority(2)`.
485    ///
486    /// Stores `Some(n)` on the config; the spawn pipeline calls
487    /// `setpriority(PRIO_PROCESS, 0, n)` unconditionally (including
488    /// `n == 0`). The "skip the syscall, inherit the parent's nice"
489    /// state is the type-level default `None` — set the field via
490    /// `..Default::default()` (or leave the builder unchained) when
491    /// you want inherit semantics. Values below the calling task's
492    /// current nice require `CAP_SYS_NICE`; see
493    /// [`WorkloadConfig::nice`] for the full `can_nice` rule.
494    #[must_use = "builder methods consume self; bind the result"]
495    pub fn nice(mut self, n: i32) -> Self {
496        self.nice = Some(n);
497        self
498    }
499
500    /// Set the clone mode used when spawning each worker.
501    ///
502    /// [`CloneMode::Fork`] (the default) preserves historical
503    /// behavior. See [`CloneMode`] for the full menu and dispatch
504    /// status.
505    #[must_use = "builder methods consume self; bind the result"]
506    pub fn clone_mode(mut self, m: CloneMode) -> Self {
507        self.clone_mode = m;
508        self
509    }
510
511    /// Set the worker process name via `prctl(PR_SET_NAME)`.
512    ///
513    /// # Panics
514    ///
515    /// Panics on programmer-error inputs — mirrors
516    /// [`crate::workload::WorkSpec::pcomm`]'s `# Panics`:
517    /// - Empty string.
518    /// - Interior NUL byte (prctl C-string truncation).
519    /// - More than 15 bytes (`TASK_COMM_LEN - 1` cap).
520    ///
521    /// See
522    /// `validate_task_comm_string`
523    /// for the centralized rationale; `name.len()` is the BYTE
524    /// length (UTF-8 multi-byte chars count as their byte width).
525    #[must_use = "builder methods consume self; bind the result"]
526    pub fn comm(mut self, name: impl Into<Cow<'static, str>>) -> Self {
527        let name: Cow<'static, str> = name.into();
528        crate::workload::validate_task_comm_string("WorkloadConfig::comm", &name);
529        self.comm = Some(name);
530        self
531    }
532
533    /// Set the worker's effective UID via `setresuid`.
534    #[must_use = "builder methods consume self; bind the result"]
535    pub fn uid(mut self, uid: u32) -> Self {
536        self.uid = Some(uid);
537        self
538    }
539
540    /// Set the worker's effective GID via `setresgid`.
541    #[must_use = "builder methods consume self; bind the result"]
542    pub fn gid(mut self, gid: u32) -> Self {
543        self.gid = Some(gid);
544        self
545    }
546
547    /// Restrict worker affinity to a NUMA node's CPU set.
548    #[must_use = "builder methods consume self; bind the result"]
549    pub fn numa_node(mut self, node: u32) -> Self {
550        self.numa_node = Some(node);
551        self
552    }
553
554    /// Replace the composed worker groups (replacing setter).
555    ///
556    /// Pass an iterator of [`WorkSpec`] entries; the existing
557    /// `composed` vec is REPLACED with the supplied entries. Each
558    /// will be spawned as an independent group alongside the
559    /// primary described by the top-level fields. Pass an empty
560    /// iterator to clear any previously-set composed groups.
561    ///
562    /// Use this when you have all groups in hand at once. To add
563    /// one group at a time to an existing list, use the appending
564    /// [`push_composed`](Self::push_composed) instead.
565    ///
566    /// See [`Self::composed`] for the resolution rules applied to
567    /// each entry's `num_workers` / `affinity` fields at spawn time.
568    #[must_use = "builder methods consume self; bind the result"]
569    pub fn composed(mut self, specs: impl IntoIterator<Item = WorkSpec>) -> Self {
570        self.composed = specs.into_iter().collect();
571        self
572    }
573
574    /// Append a single composed worker group to the existing list
575    /// (appending setter).
576    ///
577    /// The supplied [`WorkSpec`] is PUSHED onto the existing
578    /// `composed` vec; previously-set groups are preserved.
579    /// Convenience for chained construction:
580    /// `cfg.push_composed(a).push_composed(b)` produces
581    /// `composed: [a, b]`.
582    ///
583    /// Use this when building the group list incrementally. To
584    /// replace the entire list in one call, use the replacing
585    /// [`composed`](Self::composed) instead.
586    #[must_use = "builder methods consume self; bind the result"]
587    pub fn push_composed(mut self, spec: WorkSpec) -> Self {
588        self.composed.push(spec);
589        self
590    }
591}
592
593#[cfg(test)]
594mod tests {
595    use super::*;
596    use std::collections::BTreeSet;
597
598    #[test]
599    fn validate_rejects_zero_num_workers_on_primary() {
600        let cfg = WorkloadConfig {
601            num_workers: 0,
602            ..Default::default()
603        };
604        let err = cfg.validate().expect_err("num_workers=0 must bail");
605        let msg = format!("{err:#}");
606        assert!(msg.contains("num_workers=0 is not allowed"), "{msg}");
607        assert!(msg.contains("vacuously pass"), "{msg}");
608    }
609
610    #[test]
611    fn validate_rejects_zero_num_workers_on_composed_entry() {
612        let cfg = WorkloadConfig {
613            num_workers: 1,
614            composed: vec![WorkSpec::default().workers(0)],
615            ..Default::default()
616        };
617        let err = cfg
618            .validate()
619            .expect_err("composed[0].num_workers=0 must bail");
620        let msg = format!("{err:#}");
621        assert!(
622            msg.contains("composed[0]"),
623            "must cite the entry idx: {msg}"
624        );
625        assert!(msg.contains("group_idx 1"), "1-indexed group_idx: {msg}");
626    }
627
628    #[test]
629    fn validate_accepts_one_or_more_workers_on_primary_and_composed() {
630        let cfg = WorkloadConfig {
631            num_workers: 1,
632            composed: vec![WorkSpec::default().workers(2)],
633            ..Default::default()
634        };
635        cfg.validate().expect("1+composed(2) must validate ok");
636    }
637
638    #[test]
639    fn validate_rejects_zero_workers_before_mempolicy() {
640        // Zero workers + invalid mem_policy: zero-worker check must
641        // fire first so the operator's primary diagnostic is the
642        // more-fundamental "no workers" rather than the secondary
643        // "bad mempolicy" message.
644        let cfg = WorkloadConfig {
645            num_workers: 0,
646            mem_policy: MemPolicy::Bind(BTreeSet::new()), // invalid
647            ..Default::default()
648        };
649        let err = cfg
650            .validate()
651            .expect_err("zero workers + bad policy must bail on num_workers first");
652        let msg = format!("{err:#}");
653        assert!(
654            msg.contains("num_workers=0"),
655            "zero-workers msg surfaces: {msg}"
656        );
657        assert!(!msg.contains("mem_policy"), "mempolicy msg deferred: {msg}");
658    }
659
660    #[test]
661    #[should_panic(expected = "WorkloadConfig::comm: empty string rejected")]
662    fn workload_config_comm_rejects_empty() {
663        let _ = WorkloadConfig::default().comm("");
664    }
665
666    #[test]
667    #[should_panic(expected = "interior NUL byte")]
668    fn workload_config_comm_rejects_interior_nul() {
669        let _ = WorkloadConfig::default().comm("foo\0bar");
670    }
671
672    /// Per-builder boundary pin: a future refactor that re-routes
673    /// WorkloadConfig::comm around the shared
674    /// `validate_task_comm_string` helper would surface here even
675    /// if the helper-level tests still pass.
676    #[test]
677    fn workload_config_comm_accepts_15_byte_boundary() {
678        let fifteen = "a".repeat(15);
679        let cfg = WorkloadConfig::default().comm(fifteen.clone());
680        assert_eq!(cfg.comm.as_deref(), Some(fifteen.as_str()));
681    }
682
683    #[test]
684    #[should_panic(expected = "16 bytes")]
685    fn workload_config_comm_rejects_16_byte_overflow() {
686        let _ = WorkloadConfig::default().comm("a".repeat(16));
687    }
688}