ktstr/workload/config/
mod.rs

1//! Declarative configuration types for the workload pipeline.
2//!
3//! Holds every type a test author writes (or that round-trips through
4//! serde) without crossing the kernel boundary itself: [`WorkloadConfig`]
5//! and its [`WorkSpec`] composed entries, the per-knob enums
6//! ([`SchedPolicy`], [`SchedClass`], [`MemPolicy`], [`MpolFlags`],
7//! [`CloneMode`], [`FutexLockMode`], [`WakeMechanism`], [`ReapMode`],
8//! [`AluWidth`]),
9//! the [`defaults`] constants `WorkType::from_name` consults, the
10//! [`humantime_serde_helper`] module the duration fields cite, and the
11//! [`resolve_work_type`] selector. The corresponding kernel-call
12//! helpers live in the [`spawn`](super::spawn) submodule
13//! (`apply_mempolicy_with_flags`, `apply_nice`, `build_nodemask`)
14//! and the [`worker`](super::worker) submodule
15//! (`set_sched_policy` in `worker/sched.rs`).
16//!
17//! Types are re-exported from the parent module via `pub use config::*`,
18//! so existing `crate::workload::WorkloadConfig` paths continue to
19//! resolve.
20
21use super::WorkType;
22
23mod mempolicy;
24mod sched;
25mod work;
26mod workload;
27
28pub use mempolicy::{MemPolicy, MpolFlags};
29pub use sched::{AluWidth, FutexLockMode, ReapMode, SchedClass, SchedPolicy, WakeMechanism};
30pub use work::WorkSpec;
31pub(crate) use work::validate_task_comm_string;
32pub use workload::WorkloadConfig;
33
34/// Serde helper for [`std::time::Duration`] using human-readable
35/// strings (`"100ms"`, `"5s"`, `"1h30m"`) instead of the default
36/// `{secs, nanos}` object.
37///
38/// Wire format chosen so persisted [`WorkSpec`] / [`WorkloadConfig`]
39/// values are operator-readable: a test author who exports a config
40/// can edit `"work_per_hop": "100us"` directly without translating
41/// from `{secs: 0, nanos: 100_000}`.
42///
43/// Reuses the [`humantime`] crate already pulled in for CLI flag
44/// parsing — no new dependency. Use via `#[serde(with =
45/// "humantime_serde_helper")]` on `Duration` fields.
46pub(crate) mod humantime_serde_helper {
47    use std::time::Duration;
48
49    pub fn serialize<S: serde::Serializer>(d: &Duration, s: S) -> Result<S::Ok, S::Error> {
50        s.serialize_str(&humantime::format_duration(*d).to_string())
51    }
52
53    pub fn deserialize<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Duration, D::Error> {
54        let s = <String as serde::Deserialize>::deserialize(d)?;
55        humantime::parse_duration(&s).map_err(serde::de::Error::custom)
56    }
57}
58
59/// Named defaults for the parametric [`WorkType`] variants, used by
60/// [`WorkType::from_name`]. Extracting the magic numbers here
61/// provides a named home for the default values so tests and docs
62/// (e.g. `doc/guide/src/architecture/workers.md`) can cite them by
63/// constant name instead of each tracking a scattered integer
64/// literal. Every value carries a single-line comment naming the
65/// knob and its unit; the const names mirror the
66/// `{variant_snake}_{field}` convention so renames show up as
67/// compile errors in both sites.
68pub mod defaults {
69    // Bursty
70    pub const BURSTY_BURST_DURATION: std::time::Duration = std::time::Duration::from_millis(50);
71    pub const BURSTY_SLEEP_DURATION: std::time::Duration = std::time::Duration::from_millis(100);
72    // PipeIo
73    pub const PIPE_IO_BURST_ITERS: u64 = 1024;
74    // FutexPingPong
75    pub const FUTEX_PING_PONG_SPIN_ITERS: u64 = 1024;
76    // CachePressure / CacheYield / CachePipe share buffer shape
77    pub const CACHE_PRESSURE_SIZE_KIB: usize = 32;
78    pub const CACHE_PRESSURE_STRIDE: usize = 64;
79    pub const CACHE_YIELD_SIZE_KIB: usize = 32;
80    pub const CACHE_YIELD_STRIDE: usize = 64;
81    pub const CACHE_PIPE_SIZE_KIB: usize = 32;
82    pub const CACHE_PIPE_BURST_ITERS: u64 = 1024;
83    // FutexFanOut
84    pub const FUTEX_FAN_OUT_FAN_OUT: usize = 4;
85    pub const FUTEX_FAN_OUT_SPIN_ITERS: u64 = 1024;
86    // AffinityChurn
87    pub const AFFINITY_CHURN_SPIN_ITERS: u64 = 1024;
88    // CrossAffinityChurn
89    pub const CROSS_AFFINITY_CHURN_SPIN_ITERS: u64 = 1024;
90    // PolicyChurn
91    pub const POLICY_CHURN_SPIN_ITERS: u64 = 1024;
92    // FanOutCompute
93    pub const FAN_OUT_COMPUTE_FAN_OUT: usize = 4;
94    pub const FAN_OUT_COMPUTE_CACHE_FOOTPRINT_KIB: usize = 256;
95    pub const FAN_OUT_COMPUTE_OPERATIONS: usize = 5;
96    pub const FAN_OUT_COMPUTE_SLEEP_USEC: u64 = 100;
97    // PageFaultChurn
98    pub const PAGE_FAULT_CHURN_REGION_KIB: usize = 4096;
99    pub const PAGE_FAULT_CHURN_TOUCHES_PER_CYCLE: usize = 256;
100    pub const PAGE_FAULT_CHURN_SPIN_ITERS: u64 = 64;
101    // MutexContention
102    pub const MUTEX_CONTENTION_CONTENDERS: usize = 4;
103    pub const MUTEX_CONTENTION_HOLD_ITERS: u64 = 256;
104    pub const MUTEX_CONTENTION_WORK_ITERS: u64 = 1024;
105    // ThunderingHerd
106    pub const THUNDERING_HERD_WAITERS: usize = 7;
107    pub const THUNDERING_HERD_BATCHES: u64 = 1_000;
108    pub const THUNDERING_HERD_INTER_BATCH_MS: u64 = 5;
109    // PriorityInversion
110    pub const PRIORITY_INVERSION_HIGH_COUNT: usize = 1;
111    pub const PRIORITY_INVERSION_MEDIUM_COUNT: usize = 1;
112    pub const PRIORITY_INVERSION_LOW_COUNT: usize = 1;
113    pub const PRIORITY_INVERSION_HOLD_ITERS: u64 = 4096;
114    pub const PRIORITY_INVERSION_WORK_ITERS: u64 = 1024;
115    pub const PRIORITY_INVERSION_PI_MODE: super::FutexLockMode = super::FutexLockMode::Plain;
116    // ProducerConsumerImbalance
117    pub const PRODUCER_CONSUMER_PRODUCERS: usize = 2;
118    pub const PRODUCER_CONSUMER_CONSUMERS: usize = 1;
119    pub const PRODUCER_CONSUMER_PRODUCE_RATE_HZ: u64 = 1_000;
120    pub const PRODUCER_CONSUMER_CONSUME_ITERS: u64 = 4_096;
121    pub const PRODUCER_CONSUMER_QUEUE_DEPTH_TARGET: u64 = 1024;
122    // RtStarvation
123    pub const RT_STARVATION_RT_WORKERS: usize = 1;
124    pub const RT_STARVATION_CFS_WORKERS: usize = 1;
125    pub const RT_STARVATION_RT_PRIORITY: i32 = 50;
126    pub const RT_STARVATION_BURST_ITERS: u64 = 1024;
127    // AsymmetricWaker
128    pub const ASYMMETRIC_WAKER_BURST_ITERS: u64 = 1024;
129    // WakeChain
130    pub const WAKE_CHAIN_DEPTH: usize = 4;
131    pub const WAKE_CHAIN_WAKE: super::WakeMechanism = super::WakeMechanism::Pipe;
132    pub const WAKE_CHAIN_WORK_PER_HOP: std::time::Duration = std::time::Duration::from_micros(100);
133    // NumaWorkingSetSweep
134    pub const NUMA_WORKING_SET_SWEEP_REGION_KIB: usize = 4_096;
135    pub const NUMA_WORKING_SET_SWEEP_SWEEP_PERIOD_MS: u64 = 100;
136    // CgroupChurn
137    pub const CGROUP_CHURN_GROUPS: usize = 2;
138    pub const CGROUP_CHURN_CYCLE_MS: u64 = 100;
139    // CgroupAttachStorm — sentinel sibling-cgroup name used by
140    // `from_name("CgroupAttachStorm")`; the author sets a real `dest`
141    // (matching an `Op::add_cgroup`) before the storm migrates anything.
142    pub const CGROUP_ATTACH_STORM_DEST: &str = "dest";
143    // SignalStorm
144    pub const SIGNAL_STORM_SIGNALS_PER_ITER: u64 = 16;
145    pub const SIGNAL_STORM_WORK_ITERS: u64 = 1024;
146    // PreemptStorm
147    pub const PREEMPT_STORM_CFS_WORKERS: usize = 2;
148    pub const PREEMPT_STORM_RT_BURST_ITERS: u64 = 1024;
149    pub const PREEMPT_STORM_RT_SLEEP_US: u64 = 1_000;
150    // EpollStorm
151    pub const EPOLL_STORM_PRODUCERS: usize = 1;
152    pub const EPOLL_STORM_CONSUMERS: usize = 2;
153    pub const EPOLL_STORM_EVENTS_PER_BURST: u64 = 32;
154    // NumaMigrationChurn
155    pub const NUMA_MIGRATION_CHURN_PERIOD_MS: u64 = 100;
156    // IdleChurn
157    pub const IDLE_CHURN_BURST_DURATION: std::time::Duration = std::time::Duration::from_millis(1);
158    pub const IDLE_CHURN_SLEEP_DURATION: std::time::Duration = std::time::Duration::from_millis(5);
159    /// Default for `WorkType::IdleChurn`'s `precise_timing` field.
160    /// `false` keeps the inherited 50µs `current->timer_slack_ns`
161    /// the variant doc describes; opt-in callers set the field to
162    /// `true` directly to call `prctl(PR_SET_TIMERSLACK, 1)`.
163    pub const IDLE_CHURN_PRECISE_TIMING: bool = false;
164    // TimerLatency
165    /// Default [`crate::workload::WorkType::TimerLatency`] inter-wake interval
166    /// (µs): 1000 = 1kHz, matching `cyclictest`'s default tick rate. The
167    /// absolute deadline advances by this each cycle.
168    pub const TIMER_LATENCY_INTERVAL_US: u64 = 1000;
169    // NetTraffic
170    /// Default [`crate::workload::WorkType::NetTraffic`] inter-frame interval
171    /// (µs): 0 = continuous (maximum TX-kick / softirq rate).
172    pub const NET_TRAFFIC_INTERVAL_US: u64 = 0;
173    /// Default [`crate::workload::WorkType::NetTraffic`] Ethernet frame size
174    /// (bytes): 60 = `ETH_ZLEN`, the minimum L2 frame sans FCS.
175    pub const NET_TRAFFIC_FRAME_BYTES: u16 = 60;
176    // IrqWake
177    /// Default [`crate::workload::WorkType::IrqWake`] inter-frame interval (µs):
178    /// 1000 (1 kHz) paces the sender so the receiver drains its queue and
179    /// genuinely blocks between frames — giving a usable (non-degenerate) wake
180    /// reservoir. `interval_us == 0` maximizes softirq load (serviced by
181    /// `ksoftirqd`) but degenerates the wake reservoir — see the variant doc.
182    pub const IRQ_WAKE_INTERVAL_US: u64 = 1000;
183    /// Default [`crate::workload::WorkType::IrqWake`] Ethernet frame size (bytes):
184    /// 60 = `ETH_ZLEN`, the minimum L2 frame sans FCS.
185    pub const IRQ_WAKE_FRAME_BYTES: u16 = 60;
186    // AluHot
187    /// Default for `WorkType::AluHot`'s `width` field. `Widest`
188    /// resolves to the widest data-path the host supports at
189    /// worker entry — see [`super::AluWidth`] for the resolution
190    /// order.
191    pub const ALU_HOT_WIDTH: super::AluWidth = super::AluWidth::Widest;
192    // IpcVariance
193    /// Multiply-chain steps per hot phase in `WorkType::IpcVariance`.
194    /// At IPC 2.0 / 2 GHz this spans ~50µs — long enough that the
195    /// scheduler's IPC-window observer sees a steady high-IPC
196    /// signal before the cold phase flips it.
197    pub const IPC_VARIANCE_HOT_ITERS: u64 = 100_000;
198    /// Random cache-line touches per cold phase in
199    /// `WorkType::IpcVariance`. 1024 touches across a 512KB
200    /// working set on a typical x86 core takes ~100µs (LLC) to
201    /// ~1ms (DRAM-spill).
202    pub const IPC_VARIANCE_COLD_ITERS: u64 = 1024;
203    /// Hot+cold pair iterations per outer loop in
204    /// `WorkType::IpcVariance`. 64 keeps per-stop-check
205    /// overhead at <2% while bounding shutdown latency to one
206    /// outer iteration (~10ms with the defaults above).
207    pub const IPC_VARIANCE_PERIOD_ITERS: u64 = 64;
208}
209
210/// Resolve a work type with an optional override.
211///
212/// Returns a clone of `override_wt` when `swappable` is true, an
213/// override is provided, and the override's group size (if any)
214/// divides `num_workers`. Otherwise returns a clone of `base`. When
215/// `override_wt` is `None`, always returns `base` regardless of
216/// `swappable`.
217pub(crate) fn resolve_work_type(
218    base: &WorkType,
219    override_wt: Option<&WorkType>,
220    swappable: bool,
221    num_workers: usize,
222) -> WorkType {
223    if !swappable {
224        return base.clone();
225    }
226    match override_wt {
227        Some(wt) => {
228            if let Some(gs) = wt.worker_group_size()
229                && !num_workers.is_multiple_of(gs)
230            {
231                return base.clone();
232            }
233            wt.clone()
234        }
235        None => base.clone(),
236    }
237}
238
239/// How `WorkloadHandle::spawn` creates worker tasks.
240///
241/// `Fork` is the default — the existing `fork(2)` path with
242/// separate address space, separate thread group, and `waitpid`
243/// reaping. `Thread` switches to [`std::thread::spawn`] for workers
244/// that share the test runner's tgid.
245///
246/// # `WorkType` × `CloneMode` compatibility
247///
248/// Most [`WorkType`] variants compose with both clone modes. The
249/// only exception is surfaced at spawn time by
250/// `WorkloadHandle::spawn`:
251///
252/// | WorkType                | Fork | Thread |
253/// |-------------------------|------|--------|
254/// | All variants (default)  | OK   | OK     |
255/// | [`WorkType::ForkExit`]  | OK   | reject |
256///
257/// `ForkExit + Thread` is rejected because the worker body calls
258/// `libc::fork()` from inside a thread of the multi-threaded harness:
259/// the fork duplicates only the calling thread, so any lock another
260/// thread holds at fork time stays locked forever in the child. The
261/// child only `_exit`s here — and `fork()` omits `CLONE_THREAD`, so the
262/// child is its own singleton tgid; its `_exit` invokes `exit_group(2)`,
263/// but that tgid has no sibling threads to tear down, so it ends only
264/// the child — but the fork/exit lifecycle is faithfully exercised only
265/// when each worker is its own process. Use [`CloneMode::Fork`] for
266/// [`WorkType::ForkExit`].
267///
268/// Other Thread-mode interactions worth knowing:
269///
270/// - [`WorkType::NiceSweep`]: `setpriority(PRIO_PROCESS, 0, …)`
271///   targets the calling task only (`kernel/sys.c::sys_setpriority`
272///   `case PRIO_PROCESS: if (who == 0) p = current`), so each
273///   sibling thread independently sweeps its own nice. Allowed.
274/// - [`WorkType::AffinityChurn`]: `sched_setaffinity(0, …)`
275///   addresses the calling thread by kernel rule
276///   (`kernel/sched/syscalls.c::sched_setaffinity`). Allowed; no
277///   cross-thread interference.
278/// - [`WorkType::PolicyChurn`]: `sched_setscheduler(0, …)` is also
279///   per-task. Allowed.
280/// - [`WorkType::AsymmetricWaker`] with an RT class: legal but
281///   the harness still runs as its original (likely SCHED_NORMAL)
282///   policy; only the worker thread is RT.
283#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
284#[serde(rename_all = "snake_case")]
285pub enum CloneMode {
286    /// Plain `fork(2)`: separate address space, separate thread
287    /// group (`p->tgid = p->pid`), reaped via `waitpid`. The default
288    /// — preserves existing `WorkloadHandle::spawn` behavior.
289    #[default]
290    Fork,
291    /// Same thread group as the spawning process. Implementation
292    /// uses [`std::thread::spawn`]; the Rust thread runtime owns
293    /// all clone-flag selection internally. Reaped via
294    /// [`std::thread::JoinHandle`]. Workers share `tgid`,
295    /// signal-handler table, and address space with the parent —
296    /// observers like `task_struct->group_leader`, `tgid`,
297    /// `real_parent` all match the parent's.
298    Thread,
299}
300
301#[cfg(test)]
302mod tests {
303    use super::super::AffinityIntent;
304    use super::super::types::WorkType;
305    use super::*;
306    use std::collections::BTreeSet;
307    use std::time::Duration;
308
309    #[test]
310    fn sched_policy_debug_shows_variant_and_priority() {
311        let s = format!("{:?}", SchedPolicy::Fifo(50));
312        assert!(s.contains("Fifo"), "must show variant name");
313        assert!(s.contains("50"), "must show priority value");
314        let s = format!("{:?}", SchedPolicy::RoundRobin(99));
315        assert!(s.contains("RoundRobin"), "must show variant name");
316        assert!(s.contains("99"), "must show priority value");
317        // Ensure different priorities produce different output.
318        let s1 = format!("{:?}", SchedPolicy::Fifo(1));
319        let s10 = format!("{:?}", SchedPolicy::Fifo(10));
320        assert_ne!(
321            s1, s10,
322            "different priorities must produce different debug output"
323        );
324    }
325    #[test]
326    fn sched_policy_copy_preserves_priority() {
327        let a = SchedPolicy::Fifo(42);
328        let b = a; // Copy
329        match b {
330            SchedPolicy::Fifo(p) => assert_eq!(p, 42),
331            _ => panic!("copy must preserve variant and priority"),
332        }
333    }
334    // -- SchedPolicy constructors --
335
336    #[test]
337    fn sched_policy_fifo_constructor() {
338        match SchedPolicy::fifo(50) {
339            SchedPolicy::Fifo(p) => assert_eq!(p, 50),
340            _ => panic!("expected Fifo"),
341        }
342    }
343    #[test]
344    fn sched_policy_rr_constructor() {
345        match SchedPolicy::round_robin(25) {
346            SchedPolicy::RoundRobin(p) => assert_eq!(p, 25),
347            _ => panic!("expected RoundRobin"),
348        }
349    }
350    // -- MemPolicy tests --
351
352    #[test]
353    fn mempolicy_default_node_set_empty() {
354        assert!(MemPolicy::Default.node_set().is_empty());
355    }
356    #[test]
357    fn mempolicy_local_node_set_empty() {
358        assert!(MemPolicy::Local.node_set().is_empty());
359    }
360    #[test]
361    fn mempolicy_bind_node_set() {
362        let p = MemPolicy::Bind([0, 2].into_iter().collect());
363        assert_eq!(p.node_set(), [0, 2].into_iter().collect());
364    }
365    #[test]
366    fn mempolicy_preferred_node_set() {
367        let p = MemPolicy::Preferred(1);
368        assert_eq!(p.node_set(), [1].into_iter().collect());
369    }
370    #[test]
371    fn mempolicy_interleave_node_set() {
372        let p = MemPolicy::Interleave([0, 1, 3].into_iter().collect());
373        assert_eq!(p.node_set(), [0, 1, 3].into_iter().collect());
374    }
375    #[test]
376    fn mempolicy_preferred_many_node_set() {
377        let p = MemPolicy::preferred_many([0, 2]);
378        assert_eq!(p.node_set(), [0, 2].into_iter().collect());
379    }
380    #[test]
381    fn mempolicy_weighted_interleave_node_set() {
382        let p = MemPolicy::weighted_interleave([1, 3]);
383        assert_eq!(p.node_set(), [1, 3].into_iter().collect());
384    }
385    #[test]
386    fn mempolicy_validate_bind_empty() {
387        let err = MemPolicy::Bind(BTreeSet::new()).validate().unwrap_err();
388        assert!(
389            err.contains("Bind") && err.contains("NUMA node"),
390            "diagnostic must name the variant and required content: {err}",
391        );
392        // Actionable-trailer pin: the trailer points
393        // at the constructor a copy-paste fix would use. A future
394        // simplification that strips the trailer back to the terse
395        // form would silently regress the documented inline-fix UX
396        // (see MemPolicy::validate doc).
397        assert!(
398            err.contains("MemPolicy::bind("),
399            "diagnostic must name the recommended constructor: {err}",
400        );
401    }
402    #[test]
403    fn mempolicy_validate_interleave_empty() {
404        let err = MemPolicy::Interleave(BTreeSet::new())
405            .validate()
406            .unwrap_err();
407        assert!(
408            err.contains("Interleave") && err.contains("NUMA node"),
409            "diagnostic must name the variant and required content: {err}",
410        );
411        assert!(
412            err.contains("MemPolicy::interleave("),
413            "diagnostic must name the recommended constructor: {err}",
414        );
415    }
416    #[test]
417    fn mempolicy_validate_preferred_many_empty() {
418        let err = MemPolicy::PreferredMany(BTreeSet::new())
419            .validate()
420            .unwrap_err();
421        assert!(
422            err.contains("PreferredMany") && err.contains("NUMA node"),
423            "diagnostic must name the variant and required content: {err}",
424        );
425        assert!(
426            err.contains("MemPolicy::preferred_many("),
427            "diagnostic must name the recommended constructor: {err}",
428        );
429    }
430    #[test]
431    fn mempolicy_validate_weighted_interleave_empty() {
432        let err = MemPolicy::WeightedInterleave(BTreeSet::new())
433            .validate()
434            .unwrap_err();
435        assert!(
436            err.contains("WeightedInterleave") && err.contains("NUMA node"),
437            "diagnostic must name the variant and required content: {err}",
438        );
439        assert!(
440            err.contains("MemPolicy::weighted_interleave("),
441            "diagnostic must name the recommended constructor: {err}",
442        );
443        // Phd D1 regression guard: the WeightedInterleave trailer
444        // previously suggested `MemPolicy::Interleave([...])` (capital
445        // I — the tuple variant) which won't compile because
446        // `Interleave(BTreeSet<usize>)` cannot be constructed from
447        // a literal array. The correct suggestion is the lowercase
448        // `interleave(...)` function constructor. This assertion
449        // pins the fix.
450        assert!(
451            !err.contains("MemPolicy::Interleave(["),
452            "diagnostic must not suggest the non-compiling capital-I Interleave variant with a literal array: {err}",
453        );
454    }
455    #[test]
456    fn mempolicy_validate_preferred_many_ok() {
457        assert!(MemPolicy::preferred_many([0]).validate().is_ok());
458    }
459    #[test]
460    fn mempolicy_validate_weighted_interleave_ok() {
461        assert!(MemPolicy::weighted_interleave([0, 1]).validate().is_ok());
462    }
463
464    #[test]
465    fn workload_config_validate_accepts_default() {
466        WorkloadConfig::default()
467            .validate()
468            .expect("WorkloadConfig::default must self-validate (mem_policy=Default)");
469    }
470
471    #[test]
472    fn workload_config_validate_rejects_invalid_primary_mempolicy() {
473        let cfg = WorkloadConfig::default().mem_policy(MemPolicy::Bind(BTreeSet::new()));
474        let err = cfg
475            .validate()
476            .expect_err("empty Bind nodemask on primary must reject");
477        let msg = err.to_string();
478        assert!(
479            msg.contains("primary") && msg.contains("Bind") && msg.contains("NUMA node"),
480            "diagnostic must name the slot (primary), the variant (Bind), and the constraint (NUMA node): got {msg}",
481        );
482    }
483
484    #[test]
485    fn workload_config_validate_rejects_invalid_composed_mempolicy() {
486        let bad = WorkSpec::default()
487            .work_type(WorkType::SpinWait)
488            .mem_policy(MemPolicy::Interleave(BTreeSet::new()));
489        let cfg = WorkloadConfig::default().composed(vec![bad]);
490        let err = cfg
491            .validate()
492            .expect_err("empty Interleave nodemask on composed[0] must reject");
493        let msg = err.to_string();
494        assert!(
495            msg.contains("composed[0]")
496                && msg.contains("group_idx 1")
497                && msg.contains("Interleave"),
498            "diagnostic must name composed[0] + group_idx 1 + Interleave: got {msg}",
499        );
500    }
501
502    #[test]
503    fn workload_config_validate_accepts_valid_composed_mempolicy() {
504        let ok = WorkSpec::default()
505            .work_type(WorkType::SpinWait)
506            .mem_policy(MemPolicy::Bind([0].into_iter().collect()));
507        let cfg = WorkloadConfig::default().composed(vec![ok]);
508        cfg.validate()
509            .expect("non-empty Bind on composed[0] must validate");
510    }
511
512    /// Pins `?` short-circuit semantics in the composed-validation
513    /// loop. composed[0] is valid; composed[1] is invalid Bind;
514    /// composed[2] is invalid Interleave. The first invalid entry
515    /// (composed[1]) must surface; subsequent invalid entries
516    /// (composed[2]) must NOT appear in the diagnostic. A regression
517    /// that switched to an error-accumulator pattern (try_fold into a
518    /// Vec, partition, etc.) would change which composed[N] appears,
519    /// silently inverting the test-author's debugging order. Editor
520    /// note: `.collect::<Result<_, _>>()` also short-circuits on the
521    /// first Err, so swapping the for-loop for collect wouldn't break
522    /// this assertion — only a true accumulator would.
523    #[test]
524    fn workload_config_validate_short_circuits_first_invalid_composed() {
525        let valid_spec = WorkSpec::default()
526            .work_type(WorkType::SpinWait)
527            .mem_policy(MemPolicy::Bind([0].into_iter().collect()));
528        let invalid_bind = WorkSpec::default()
529            .work_type(WorkType::SpinWait)
530            .mem_policy(MemPolicy::Bind(BTreeSet::new()));
531        let invalid_interleave = WorkSpec::default()
532            .work_type(WorkType::SpinWait)
533            .mem_policy(MemPolicy::Interleave(BTreeSet::new()));
534        let cfg =
535            WorkloadConfig::default().composed(vec![valid_spec, invalid_bind, invalid_interleave]);
536        let err = cfg
537            .validate()
538            .expect_err("multi-composed with invalid entries must reject");
539        let msg = err.to_string();
540        assert!(
541            msg.contains("composed[1]"),
542            "diagnostic must name the FIRST invalid composed entry (composed[1]): got {msg}",
543        );
544        assert!(
545            msg.contains("Bind"),
546            "diagnostic must name the first failing variant (Bind): got {msg}",
547        );
548        // The negative assertion is LOAD-BEARING on the short-circuit
549        // semantics (`?` in the validate loop returns on first Err),
550        // not on the wrap content. A future "errors-trailing-
551        // suggestions" rewrite that mentions composed.len() or
552        // re-formats the wrap to include sibling indices would
553        // silently break this guard — at which point the right fix
554        // is to assert on the structural property (e.g. count of
555        // anyhow::Error frames) rather than to relax the substring
556        // check.
557        assert!(
558            !msg.contains("composed[2]"),
559            "short-circuit must not surface the second invalid entry (composed[2]): got {msg}",
560        );
561    }
562    #[test]
563    fn mpol_flags_union() {
564        let f = MpolFlags::STATIC_NODES | MpolFlags::NUMA_BALANCING;
565        assert_eq!(f.bits(), (1 << 15) | (1 << 13));
566    }
567    #[test]
568    fn mpol_flags_none_is_zero() {
569        assert_eq!(MpolFlags::NONE.bits(), 0);
570    }
571    #[test]
572    fn work_mpol_flags_builder() {
573        let w = WorkSpec::default().mpol_flags(MpolFlags::STATIC_NODES);
574        assert_eq!(w.mpol_flags, MpolFlags::STATIC_NODES);
575    }
576    #[test]
577    fn mpol_flags_contains_identity() {
578        assert!(MpolFlags::NONE.contains(MpolFlags::NONE));
579        assert!(MpolFlags::STATIC_NODES.contains(MpolFlags::STATIC_NODES));
580        let composite = MpolFlags::STATIC_NODES | MpolFlags::NUMA_BALANCING;
581        assert!(composite.contains(composite));
582    }
583    #[test]
584    fn mpol_flags_contains_superset_is_true_for_subset() {
585        let composite = MpolFlags::STATIC_NODES | MpolFlags::NUMA_BALANCING;
586        assert!(composite.contains(MpolFlags::STATIC_NODES));
587        assert!(composite.contains(MpolFlags::NUMA_BALANCING));
588    }
589    #[test]
590    fn mpol_flags_contains_subset_is_false_for_superset() {
591        let composite = MpolFlags::STATIC_NODES | MpolFlags::NUMA_BALANCING;
592        assert!(!MpolFlags::STATIC_NODES.contains(composite));
593        assert!(!MpolFlags::NUMA_BALANCING.contains(composite));
594    }
595    #[test]
596    fn mpol_flags_contains_empty_is_always_true() {
597        // `(x & 0) == 0` holds for every x, so every MpolFlags
598        // value — including NONE itself — is a superset of NONE.
599        assert!(MpolFlags::NONE.contains(MpolFlags::NONE));
600        assert!(MpolFlags::STATIC_NODES.contains(MpolFlags::NONE));
601        let composite = MpolFlags::STATIC_NODES | MpolFlags::NUMA_BALANCING;
602        assert!(composite.contains(MpolFlags::NONE));
603    }
604    #[test]
605    fn mpol_flags_none_does_not_contain_any_set_flag() {
606        assert!(!MpolFlags::NONE.contains(MpolFlags::STATIC_NODES));
607        assert!(!MpolFlags::NONE.contains(MpolFlags::RELATIVE_NODES));
608        assert!(!MpolFlags::NONE.contains(MpolFlags::NUMA_BALANCING));
609    }
610    #[test]
611    fn mpol_flags_contains_rejects_disjoint_flag() {
612        // Single-flag values that share no bits must not satisfy
613        // `contains` in either direction.
614        assert!(!MpolFlags::STATIC_NODES.contains(MpolFlags::NUMA_BALANCING));
615        assert!(!MpolFlags::NUMA_BALANCING.contains(MpolFlags::STATIC_NODES));
616    }
617    #[test]
618    fn mpol_flags_contains_rejects_partial_overlap() {
619        // Partial bit overlap must not satisfy `contains` — every
620        // bit of `other` must be set in `self`, not merely some.
621        let a = MpolFlags::STATIC_NODES | MpolFlags::NUMA_BALANCING;
622        let b = MpolFlags::RELATIVE_NODES | MpolFlags::NUMA_BALANCING;
623        assert!(!a.contains(b));
624        assert!(!b.contains(a));
625    }
626    // -- CloneMode tests --
627
628    #[test]
629    fn clone_mode_default_is_fork() {
630        // Preserves historical fork-based behavior — anything else
631        // would silently change every existing caller's spawn path.
632        assert!(matches!(CloneMode::default(), CloneMode::Fork));
633    }
634    #[test]
635    fn workload_config_default_clone_mode_is_fork() {
636        let c = WorkloadConfig::default();
637        assert!(matches!(c.clone_mode, CloneMode::Fork));
638    }
639    #[test]
640    fn workload_config_clone_mode_builder() {
641        let cfg = WorkloadConfig::default().clone_mode(CloneMode::Thread);
642        assert!(matches!(cfg.clone_mode, CloneMode::Thread));
643    }
644    #[test]
645    fn work_mem_policy_builder() {
646        let w = WorkSpec::default().mem_policy(MemPolicy::Bind([0].into_iter().collect()));
647        assert!(matches!(w.mem_policy, MemPolicy::Bind(_)));
648    }
649    #[test]
650    fn work_default_mempolicy_is_default() {
651        let w = WorkSpec::default();
652        assert!(matches!(w.mem_policy, MemPolicy::Default));
653    }
654    #[test]
655    fn workload_config_default_mempolicy() {
656        let wl = WorkloadConfig::default();
657        assert!(matches!(wl.mem_policy, MemPolicy::Default));
658    }
659    /// `comm` / `uid` / `gid` / `numa_node` mirror the matcher knobs
660    /// that already live on [`WorkSpec`] — ensure the top-level
661    /// defaults are `None` and the builders set the field.
662    #[test]
663    fn workload_config_default_matcher_fields_are_none() {
664        let wl = WorkloadConfig::default();
665        assert!(wl.comm.is_none());
666        assert!(wl.uid.is_none());
667        assert!(wl.gid.is_none());
668        assert!(wl.numa_node.is_none());
669    }
670    #[test]
671    fn workload_config_matcher_field_builders() {
672        let wl = WorkloadConfig::default()
673            .comm("ktstr-worker")
674            .uid(1001)
675            .gid(1002)
676            .numa_node(0);
677        assert_eq!(wl.comm.as_deref(), Some("ktstr-worker"));
678        assert_eq!(wl.uid, Some(1001));
679        assert_eq!(wl.gid, Some(1002));
680        assert_eq!(wl.numa_node, Some(0));
681    }
682    /// Full `WorkloadConfig` round-trip with `Default` ensures every
683    /// field handles serde correctly together — no field is silently
684    /// missing a derive.
685    #[test]
686    fn workload_config_default_roundtrips() {
687        let cfg = WorkloadConfig::default();
688        let json = serde_json::to_string(&cfg).unwrap();
689        let back: WorkloadConfig = serde_json::from_str(&json).unwrap();
690        // Compare via re-serialization since WorkloadConfig has no PartialEq.
691        let json2 = serde_json::to_string(&back).unwrap();
692        assert_eq!(json, json2);
693    }
694
695    // -- resolve_work_type --
696
697    #[test]
698    fn resolve_work_type_not_swappable() {
699        let base = WorkType::SpinWait;
700        let over = WorkType::YieldHeavy;
701        let result = resolve_work_type(&base, Some(&over), false, 4);
702        assert!(matches!(result, WorkType::SpinWait));
703    }
704    #[test]
705    fn resolve_work_type_swappable_applies_override() {
706        let base = WorkType::SpinWait;
707        let over = WorkType::YieldHeavy;
708        let result = resolve_work_type(&base, Some(&over), true, 4);
709        assert!(matches!(result, WorkType::YieldHeavy));
710    }
711    #[test]
712    fn resolve_work_type_swappable_no_override() {
713        let base = WorkType::SpinWait;
714        let result = resolve_work_type(&base, None, true, 4);
715        assert!(matches!(result, WorkType::SpinWait));
716    }
717    #[test]
718    fn resolve_work_type_group_size_mismatch() {
719        let base = WorkType::SpinWait;
720        let over = WorkType::pipe_io(100); // group_size = 2
721        let result = resolve_work_type(&base, Some(&over), true, 3); // 3 not divisible by 2
722        assert!(matches!(result, WorkType::SpinWait));
723    }
724    #[test]
725    fn resolve_work_type_group_size_match() {
726        let base = WorkType::SpinWait;
727        let over = WorkType::pipe_io(100); // group_size = 2
728        let result = resolve_work_type(&base, Some(&over), true, 4); // 4 divisible by 2
729        assert!(matches!(result, WorkType::PipeIo { .. }));
730    }
731
732    // -- WorkSpec builder --
733
734    #[test]
735    fn work_builder_chain() {
736        let w = WorkSpec::default()
737            .workers(8)
738            .work_type(WorkType::bursty(
739                Duration::from_millis(10),
740                Duration::from_millis(20),
741            ))
742            .sched_policy(SchedPolicy::Batch)
743            .affinity(AffinityIntent::SingleCpu)
744            .nice(7);
745        assert_eq!(w.num_workers, Some(8));
746        if let WorkType::Bursty {
747            burst_duration,
748            sleep_duration,
749        } = w.work_type
750        {
751            assert_eq!(burst_duration, Duration::from_millis(10));
752            assert_eq!(sleep_duration, Duration::from_millis(20));
753        } else {
754            panic!("expected Bursty variant; got {:?}", w.work_type);
755        }
756        assert!(matches!(w.sched_policy, SchedPolicy::Batch));
757        assert!(matches!(w.affinity, AffinityIntent::SingleCpu));
758        assert_eq!(w.nice, Some(7));
759    }
760    #[test]
761    fn work_default_values() {
762        let w = WorkSpec::default();
763        assert_eq!(w.num_workers, None);
764        assert!(matches!(w.work_type, WorkType::SpinWait));
765        assert!(matches!(w.sched_policy, SchedPolicy::Normal));
766        assert!(matches!(w.affinity, AffinityIntent::Inherit));
767        // Default nice is None — same skip semantics as
768        // [`WorkloadConfig::nice`].
769        assert_eq!(w.nice, None);
770    }
771
772    /// GAP 9: pin that `SchedPolicy::fifo` / `round_robin` /
773    /// `deadline` are usable in const context. A regression where
774    /// any of the three dropped `const` (e.g. switched from `Self {
775    /// .. }` to a builder) would silently break static
776    /// `KtstrTestEntry` declarations that bake a fixed policy.
777    #[test]
778    fn sched_policy_constructors_usable_in_const_context() {
779        const F: SchedPolicy = SchedPolicy::fifo(50);
780        const RR: SchedPolicy = SchedPolicy::round_robin(99);
781        const DL: SchedPolicy = SchedPolicy::deadline(
782            Duration::from_millis(10),
783            Duration::from_millis(20),
784            Duration::from_millis(30),
785        );
786        assert!(matches!(F, SchedPolicy::Fifo(50)));
787        assert!(matches!(RR, SchedPolicy::RoundRobin(99)));
788        assert!(matches!(
789            DL,
790            SchedPolicy::Deadline {
791                runtime,
792                deadline,
793                period
794            } if runtime == Duration::from_millis(10)
795                && deadline == Duration::from_millis(20)
796                && period == Duration::from_millis(30)
797        ));
798    }
799
800    /// GAP 10: pin `SchedPolicy::default() == Normal` and that
801    /// every variant roundtrips through serde unchanged. Default
802    /// drift would silently re-class every WorkSpec that omits
803    /// `sched_policy`; serde drift would break captured config
804    /// replay across all 7 variants.
805    #[test]
806    fn sched_policy_default_is_normal_and_serde_roundtrip_per_variant() {
807        let d: SchedPolicy = Default::default();
808        assert!(matches!(d, SchedPolicy::Normal));
809
810        let variants = [
811            SchedPolicy::Normal,
812            SchedPolicy::Batch,
813            SchedPolicy::Idle,
814            SchedPolicy::Fifo(50),
815            SchedPolicy::RoundRobin(99),
816            SchedPolicy::Deadline {
817                runtime: Duration::from_millis(10),
818                deadline: Duration::from_millis(20),
819                period: Duration::from_millis(30),
820            },
821            SchedPolicy::Ext,
822        ];
823        for original in &variants {
824            let bytes = serde_json::to_vec(original).expect("serialize");
825            let restored: SchedPolicy = serde_json::from_slice(&bytes).expect("deserialize");
826            assert_eq!(restored, *original, "roundtrip drift for {original:?}");
827        }
828    }
829
830    /// `SchedClass::Ext` resolves to `SchedPolicy::Ext` (the SCHED_EXT
831    /// syscall path), closing the prior gap where it mapped to `Normal`.
832    /// Pins the coarse-class -> policy mapping the verifier's SCHED_EXT
833    /// dispatch probe relies on.
834    #[test]
835    fn sched_class_ext_maps_to_sched_policy_ext() {
836        assert_eq!(SchedClass::Ext.to_policy(), SchedPolicy::Ext);
837    }
838}