ktstr/scenario/
stress.rs

1//! Stress and edge-case scenario implementations.
2
3use super::ops::{CgroupDef, CpusetSpec, HoldSpec, Op, Setup, Step, execute_steps};
4use super::{CgroupGroup, Ctx, collect_all, spawn_diverse};
5use crate::assert::AssertResult;
6use crate::workload::*;
7use anyhow::Result;
8use std::collections::BTreeSet;
9use std::thread;
10use std::time::{Duration, Instant};
11
12fn per_cpu_defs(ctx: &super::Ctx) -> Vec<CgroupDef> {
13    let all = ctx.topo.all_cpus();
14    if all.is_empty() {
15        return Vec::new();
16    }
17    let n = (all.len() - 1).min(64);
18    (0..n)
19        .map(|i| {
20            CgroupDef::named(format!("many_{i}"))
21                .cpuset(CpusetSpec::exact([all[i]]))
22                .workers(1)
23        })
24        .collect()
25}
26
27fn cgroup_per_cpu_steps(ctx: &Ctx) -> Vec<Step> {
28    vec![Step {
29        setup: Setup::Factory(per_cpu_defs),
30        ops: vec![],
31        hold: HoldSpec::fixed(Duration::from_secs(1) + ctx.duration),
32    }]
33}
34
35/// One cgroup per CPU, each with a single pinned worker. Stresses
36/// the scheduler with up to 64 cgroups on disjoint single-CPU cpusets.
37pub fn custom_cgroup_per_cpu(ctx: &Ctx) -> Result<AssertResult> {
38    execute_steps(ctx, cgroup_per_cpu_steps(ctx))
39}
40
41fn reuse_defs(ctx: &super::Ctx) -> Vec<CgroupDef> {
42    let all = ctx.topo.all_cpus();
43    if all.is_empty() {
44        return Vec::new();
45    }
46    let n = (all.len() - 1).min(15);
47    let half = n / 2;
48    (0..half)
49        .map(|i| {
50            CgroupDef::named(format!("reuse_{i}"))
51                .cpuset(CpusetSpec::exact([all[i % all.len()]]))
52                .workers(1)
53        })
54        .collect()
55}
56
57fn cgroup_exhaust_reuse_steps(ctx: &Ctx) -> Vec<Step> {
58    let all = ctx.topo.all_cpus();
59    if all.is_empty() {
60        return Vec::new();
61    }
62    let n = (all.len() - 1).min(15);
63    let half = n / 2;
64
65    // Phase 1 ops: create empty cgroups with cpusets but no workers.
66    // Uses raw AddCgroup + SetCpuset ops (not CgroupDefs) because
67    // CgroupDef always spawns workers via apply_setup.
68    let mut exhaust_ops = Vec::new();
69    for i in 0..n {
70        let name = format!("exhaust_{i}");
71        exhaust_ops.push(Op::add_cgroup(name.clone()));
72        exhaust_ops.push(Op::set_cpuset(
73            name,
74            CpusetSpec::exact([all[i % all.len()]]),
75        ));
76    }
77
78    let mut remove_ops = Vec::new();
79    for i in 0..half {
80        remove_ops.push(Op::remove_cgroup(format!("exhaust_{i}")));
81    }
82
83    vec![
84        // Phase 1: create N exhaust cgroups (no workers — they just occupy slots).
85        Step::new(exhaust_ops, HoldSpec::fixed(Duration::from_secs(1))),
86        // Phase 2: remove first half.
87        Step::new(remove_ops, HoldSpec::fixed(Duration::from_secs(1))),
88        // Phase 3: create replacement cgroups with workers.
89        Step {
90            setup: Setup::Factory(reuse_defs),
91            ops: vec![],
92            hold: HoldSpec::fixed(ctx.duration),
93        },
94    ]
95}
96
97/// Exhaust cgroup slots with empty cpuset-pinned cgroups, remove half,
98/// then create replacement cgroups with workers to test slot reuse.
99pub fn custom_cgroup_exhaust_reuse(ctx: &Ctx) -> Result<AssertResult> {
100    execute_steps(ctx, cgroup_exhaust_reuse_steps(ctx))
101}
102
103/// Per-CPU pinned workers + custom gap assertion (max_gap_ms > 1500).
104/// Not expressible via Op/Step's standard assert pipeline.
105pub fn custom_cgroup_dsq_contention(ctx: &Ctx) -> Result<AssertResult> {
106    // Multiple CPUs sharing a DSQ under bursty wake patterns. Lockless
107    // peek can miss tasks when store visibility ordering delays the
108    // first_task pointer update. Without a fallback to the locked
109    // consume path, CPUs go idle and never retry.
110    let all = ctx.topo.all_cpus();
111    if all.len() < 4 {
112        return Ok(AssertResult::skip("need >=4 CPUs"));
113    }
114    let last = all.len() - 1;
115
116    let mut _guard = CgroupGroup::new(ctx.cgroups);
117    _guard.add_cgroup("cg_0", &all[..last].iter().copied().collect())?;
118    thread::sleep(ctx.settle);
119
120    let n_unpinned = (last * 3).max(8);
121    let mut h_cgroup = WorkloadHandle::spawn(&WorkloadConfig {
122        num_workers: n_unpinned,
123        work_type: WorkType::bursty(Duration::from_millis(10), Duration::from_millis(5)),
124        ..Default::default()
125    })?;
126    ctx.cgroups.move_tasks("cg_0", &h_cgroup.worker_pids())?;
127
128    let n_pinned = last.min(4);
129    let mut pinned_handles = Vec::new();
130    for &cpu in all.iter().take(n_pinned) {
131        let h = WorkloadHandle::spawn(&WorkloadConfig {
132            num_workers: 1,
133            affinity: AffinityIntent::Exact([cpu].into_iter().collect()),
134            work_type: WorkType::bursty(Duration::from_millis(10), Duration::from_millis(5)),
135            ..Default::default()
136        })?;
137        ctx.cgroups.move_tasks("cg_0", &h.worker_pids())?;
138        pinned_handles.push(h);
139    }
140
141    h_cgroup.start();
142    for h in &mut pinned_handles {
143        h.start();
144    }
145    thread::sleep(ctx.duration);
146
147    let mut r = AssertResult::pass();
148    {
149        let reports = h_cgroup.stop_and_collect();
150        r.merge(ctx.assert.assert_cgroup(&reports, None));
151    }
152    for h in pinned_handles {
153        let reports = h.stop_and_collect();
154        for w in &reports {
155            if w.max_gap_ms > 1500 {
156                r.record_fail(crate::assert::AssertDetail::new(
157                    crate::assert::DetailKind::Stuck,
158                    format!(
159                        "pinned worker {} on CPU {} had {}ms gap (dispatch contention stall)",
160                        w.tid,
161                        w.cpus_used.iter().next().unwrap_or(&0),
162                        w.max_gap_ms
163                    ),
164                ));
165            }
166        }
167        r.merge(ctx.assert.assert_cgroup(&reports, None));
168    }
169    Ok(r)
170}
171
172/// Uses spawn_diverse helper for 5 different workload types across cgroups.
173/// Dynamic cgroup count and workload rotation logic is not Op/Step compatible.
174pub fn custom_cgroup_workload_variety(ctx: &Ctx) -> Result<AssertResult> {
175    // 5 diverse workload types (SpinWait, Bursty, IoSyncWrite, Mixed, YieldHeavy) across 5 cgroups, no flags. Exercises base dispatch across the diverse work-type set.
176    if ctx.topo.all_cpus().len() < 6 {
177        return Ok(AssertResult::skip("need >=6 CPUs for 5 cgroups"));
178    }
179    let names: Vec<String> = (0..5).map(|i| format!("cg_{i}")).collect();
180    let mut _guard = CgroupGroup::new(ctx.cgroups);
181    for n in &names {
182        _guard.add_cgroup_no_cpuset(n)?;
183    }
184    thread::sleep(ctx.settle);
185    let name_refs: Vec<&str> = names.iter().map(|s| s.as_str()).collect();
186    let handles = spawn_diverse(ctx, &name_refs)?;
187    thread::sleep(ctx.duration);
188    Ok(collect_all(handles, &ctx.assert))
189}
190
191/// Uses spawn_diverse for workload variety + manual cpuset partitioning.
192pub fn custom_cgroup_cpuset_workload_variety(ctx: &Ctx) -> Result<AssertResult> {
193    // 3 diverse workload types (SpinWait, Bursty, IoSyncWrite) across 3 cpuset-partitioned cgroups.
194    let all = ctx.topo.all_cpus();
195    if all.len() < 6 {
196        return Ok(AssertResult::skip("need >=6 CPUs"));
197    }
198    let last = all.len() - 1;
199    let chunk = last / 3;
200    let names = ["cg_0", "cg_1", "cg_2"];
201    let mut _guard = CgroupGroup::new(ctx.cgroups);
202    for (i, n) in names.iter().enumerate() {
203        let start = i * chunk;
204        let end = if i == 2 { last } else { (i + 1) * chunk };
205        _guard.add_cgroup(n, &all[start..end].iter().copied().collect())?;
206    }
207    thread::sleep(ctx.settle);
208    let handles = spawn_diverse(ctx, &names)?;
209    thread::sleep(ctx.duration);
210    Ok(collect_all(handles, &ctx.assert))
211}
212
213/// spawn_diverse + dynamic cgroup add/remove mid-run.
214pub fn custom_cgroup_dynamic_workload_variety(ctx: &Ctx) -> Result<AssertResult> {
215    // Dynamic cgroup ops with diverse workloads.
216    if ctx.topo.all_cpus().len() < 5 {
217        return Ok(AssertResult::skip("need >=5 CPUs for dynamic cgroup add"));
218    }
219    let names: Vec<String> = (0..3).map(|i| format!("cg_{i}")).collect();
220    let mut _guard = CgroupGroup::new(ctx.cgroups);
221    for n in &names {
222        _guard.add_cgroup_no_cpuset(n)?;
223    }
224    thread::sleep(ctx.settle);
225    let name_refs: Vec<&str> = names.iter().map(|s| s.as_str()).collect();
226    let mut handles = spawn_diverse(ctx, &name_refs)?;
227    thread::sleep(ctx.duration / 3);
228    // Add cgroups with more workload types
229    _guard.add_cgroup_no_cpuset("cg_3")?;
230    let mut h = WorkloadHandle::spawn(&WorkloadConfig {
231        num_workers: 4,
232        work_type: WorkType::bursty(Duration::from_millis(100), Duration::from_millis(50)),
233        ..Default::default()
234    })?;
235    ctx.cgroups.move_tasks("cg_3", &h.worker_pids())?;
236    h.start();
237    handles.push(h);
238    thread::sleep(ctx.duration / 3);
239    // Remove cg_3 — guard still tracks it, but explicit removal
240    // during the scenario is fine; guard's drop will skip missing cgroups.
241    // The cg_3 reports must flow through the scenario's assertion
242    // checks alongside the surviving handles' reports — discarding
243    // the WorkerReports lost the entire mid-run cg_3 workload from
244    // the scenario verdict, masking starvation/affinity violations
245    // that occurred during cg_3's duration/3 hold.
246    let cg3_result: Option<AssertResult> = handles.pop().map(|h| {
247        let reports = h.stop_and_collect();
248        ctx.assert.assert_cgroup_with_numa(&reports, None, None)
249    });
250    // Best-effort early teardown: the CgroupGroup guard's Drop
251    // skips missing cgroups, so a transient ENOENT (already
252    // unwound by a sibling step) or EBUSY (drain races a stray
253    // task migration) here is a no-op for cleanup correctness.
254    // Bailing would abort the scenario before the final 1/3
255    // hold runs.
256    if let Err(e) = ctx.cgroups.remove_cgroup("cg_3") {
257        tracing::warn!(err = %format!("{e:#}"), "stress: early remove_cgroup(cg_3) failed; guard Drop will retry on scenario teardown");
258    }
259    thread::sleep(ctx.duration / 3);
260    let mut r = collect_all(handles, &ctx.assert);
261    if let Some(cg3) = cg3_result {
262        r.merge(cg3);
263    }
264    Ok(r)
265}
266
267/// LLC-specific cpusets + tight flip loop. Uses Instant::now() deadline
268/// loop and LLC-aligned BTreeSets computed at runtime. Not Op/Step compatible.
269pub fn custom_cgroup_cpuset_cross_llc_race(ctx: &Ctx) -> Result<AssertResult> {
270    // Need at least 2 LLCs to flip cpusets across LLC boundaries.
271    if ctx.topo.num_llcs() < 2 {
272        return Ok(AssertResult::skip("need >=2 LLCs"));
273    }
274    let llc0_full: BTreeSet<usize> = ctx.topo.llc_aligned_cpuset(0);
275    let llc1_full: BTreeSet<usize> = ctx.topo.llc_aligned_cpuset(1);
276    if llc0_full.is_empty() {
277        return Ok(AssertResult::skip("LLC0 has no CPUs"));
278    }
279    // LLC1 is also load-bearing — the cross-LLC flip below assigns
280    // cg_1 to llc1 and cg_0 to a cross0=llc1 set on flipped
281    // iterations. An empty llc1 makes both writes degenerate
282    // (cg_0 set_cpuset to empty errors, cg_1 has no CPUs to run
283    // workers on) and there is no race to probe. Skip rather than
284    // silently degrade.
285    if llc1_full.is_empty() {
286        return Ok(AssertResult::skip("LLC1 has no CPUs"));
287    }
288
289    // Hold one LLC0 CPU out of both cgroups' cpusets: it is filtered
290    // from llc0 below and never appears in llc0/llc1/cross0/cross1, so
291    // the cross-LLC flip never packs every LLC0 CPU, leaving host headroom.
292    let reserved = *llc0_full.iter().next().unwrap();
293    let llc0: BTreeSet<usize> = llc0_full
294        .iter()
295        .copied()
296        .filter(|c| *c != reserved)
297        .collect();
298    let llc1: BTreeSet<usize> = llc1_full.clone();
299    if llc0.is_empty() {
300        return Ok(AssertResult::skip(
301            "LLC0 too small after reserving for cg_0",
302        ));
303    }
304
305    // Two cgroups, initially each on its own LLC.
306    let mut _guard = CgroupGroup::new(ctx.cgroups);
307    _guard.add_cgroup("cg_0", &llc0)?;
308    _guard.add_cgroup("cg_1", &llc1)?;
309    thread::sleep(Duration::from_secs(2));
310
311    // Oversubscribe both cgroups — lots of enqueue pressure.
312    let n = llc0.len().max(4) * 8;
313    let mut h0 = WorkloadHandle::spawn(&WorkloadConfig {
314        num_workers: n,
315        work_type: WorkType::Mixed,
316        ..Default::default()
317    })?;
318    ctx.cgroups.move_tasks("cg_0", &h0.worker_pids())?;
319    let mut h1 = WorkloadHandle::spawn(&WorkloadConfig {
320        num_workers: n,
321        work_type: WorkType::Mixed,
322        ..Default::default()
323    })?;
324    ctx.cgroups.move_tasks("cg_1", &h1.worker_pids())?;
325    h0.start();
326    h1.start();
327
328    // Rapidly flip cpusets across LLC boundaries to race with LLC assignment.
329    // Build cross-LLC sets (excluding the reserved CPU).
330    let cross0: BTreeSet<usize> = llc1.iter().copied().collect();
331    let cross1: BTreeSet<usize> = llc0.iter().copied().collect();
332    let deadline = Instant::now() + ctx.duration;
333    let mut flip = false;
334    while Instant::now() < deadline {
335        // Best-effort cpuset flips: a transient EBUSY (kernel
336        // updating the same hierarchy in-flight) or ENOENT (cgroup
337        // removed by a sibling step) on either write leaves the
338        // cgroup on its prior cpuset for this iteration. The next
339        // 200ms iteration retries with a fresh write, so a single
340        // dropped flip is observed by the scheduler as a longer
341        // hold on the prior layout — exactly the cross-LLC race
342        // pathology this scenario probes. Bailing on the first
343        // failed write would mask the race rather than expose it.
344        if flip {
345            // cg_0 on LLC1 CPUs, cg_1 on LLC0 CPUs — cross-LLC
346            if let Err(e) = ctx.cgroups.set_cpuset("cg_0", &cross0) {
347                tracing::warn!(err = %format!("{e:#}"), "cross-LLC race: set_cpuset cg_0 cross0 failed; flip skipped");
348            }
349            if let Err(e) = ctx.cgroups.set_cpuset("cg_1", &cross1) {
350                tracing::warn!(err = %format!("{e:#}"), "cross-LLC race: set_cpuset cg_1 cross1 failed; flip skipped");
351            }
352        } else {
353            // cg_0 on LLC0 CPUs, cg_1 on LLC1 CPUs — aligned
354            if let Err(e) = ctx.cgroups.set_cpuset("cg_0", &llc0) {
355                tracing::warn!(err = %format!("{e:#}"), "cross-LLC race: set_cpuset cg_0 llc0 failed; flip skipped");
356            }
357            if let Err(e) = ctx.cgroups.set_cpuset("cg_1", &llc1) {
358                tracing::warn!(err = %format!("{e:#}"), "cross-LLC race: set_cpuset cg_1 llc1 failed; flip skipped");
359            }
360        }
361        flip = !flip;
362        // Short sleep to let rebalancing/reconfiguration run between flips.
363        thread::sleep(Duration::from_millis(200));
364    }
365
366    let mut r = AssertResult::pass();
367    r.merge(ctx.assert.assert_cgroup(&h0.stop_and_collect(), None));
368    r.merge(ctx.assert.assert_cgroup(&h1.stop_and_collect(), None));
369    Ok(r)
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375    use crate::cgroup::CgroupManager;
376    use crate::topology::TestTopology;
377
378    fn ctx_for_test<'a>(cgroups: &'a CgroupManager, topo: &'a TestTopology) -> Ctx<'a> {
379        Ctx {
380            cgroups,
381            topo,
382            duration: Duration::from_secs(2),
383            workers_per_cgroup: 1,
384            sched_pid: Some(1),
385            settle: Duration::from_millis(100),
386            work_type_override: None,
387            assert: crate::assert::Assert::default_checks(),
388            wait_for_map_write: false,
389            current_step: std::sync::Arc::new(std::sync::atomic::AtomicU16::new(0)),
390            entry_name: None,
391            variant_hash: 0,
392        }
393    }
394
395    #[test]
396    fn per_cpu_factory_produces_cgroup_per_cpu_capped_at_64() {
397        let cgroups = CgroupManager::new("/nonexistent");
398        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 4, 1));
399        let ctx = ctx_for_test(&cgroups, &topo);
400
401        let steps = cgroup_per_cpu_steps(&ctx);
402        assert_eq!(steps.len(), 1);
403        assert!(steps[0].ops.is_empty());
404        let factory = match steps[0].setup {
405            Setup::Factory(f) => f,
406            Setup::Defs(_) => panic!("per_cpu should use Factory setup"),
407        };
408        let defs = factory(&ctx);
409        // n = (all_cpus - 1).min(64) with 4 CPUs → 3 defs.
410        assert_eq!(defs.len(), 3);
411        for (i, d) in defs.iter().enumerate() {
412            assert_eq!(d.name, format!("many_{i}"));
413            assert!(d.cpuset.is_some());
414            assert_eq!(d.works[0].num_workers, Some(1));
415        }
416    }
417
418    #[test]
419    fn exhaust_reuse_builds_three_phases_with_matching_add_remove_counts() {
420        let cgroups = CgroupManager::new("/nonexistent");
421        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 8, 1));
422        let ctx = ctx_for_test(&cgroups, &topo);
423
424        let steps = cgroup_exhaust_reuse_steps(&ctx);
425        assert_eq!(steps.len(), 3);
426
427        // Phase 1: n = (8-1).min(15) = 7 cgroups, each paired AddCgroup+SetCpuset.
428        let adds = steps[0]
429            .ops
430            .iter()
431            .filter(|o| matches!(o, Op::AddCgroup { .. }))
432            .count();
433        let sets = steps[0]
434            .ops
435            .iter()
436            .filter(|o| matches!(o, Op::SetCpuset { .. }))
437            .count();
438        assert_eq!(adds, 7);
439        assert_eq!(sets, 7);
440        assert_eq!(steps[0].ops.len(), 14);
441
442        // Phase 2: remove first half (7/2 = 3).
443        let removes = steps[1]
444            .ops
445            .iter()
446            .filter(|o| matches!(o, Op::RemoveCgroup { .. }))
447            .count();
448        assert_eq!(removes, 3);
449        assert_eq!(steps[1].ops.len(), 3);
450
451        // Phase 3: factory-built defs, no ops.
452        assert!(steps[2].ops.is_empty());
453        let factory = match steps[2].setup {
454            Setup::Factory(f) => f,
455            Setup::Defs(_) => panic!("phase 3 should use Factory setup"),
456        };
457        let defs = factory(&ctx);
458        // half = 7/2 = 3 replacement cgroups.
459        assert_eq!(defs.len(), 3);
460        for d in &defs {
461            assert_eq!(d.works[0].num_workers, Some(1));
462        }
463    }
464
465    /// Pins the `.min(64)` clamp arm in `per_cpu_defs`: with 66 CPUs,
466    /// `n = (66 - 1).min(64) = 64` while an unclamped `len - 1` would be
467    /// 65, so asserting exactly 64 defs genuinely kills a deleted/wrong
468    /// clamp (the existing 4-CPU test only exercises the `len - 1` path).
469    #[test]
470    fn per_cpu_factory_clamps_at_64_for_topology_above_65_cpus() {
471        let cgroups = CgroupManager::new("/nonexistent");
472        let topo =
473            TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 66, 1));
474        let ctx = ctx_for_test(&cgroups, &topo);
475
476        // 66 single-LLC CPUs: (66-1)=65 clamps DOWN to 64; without the
477        // .min(64) the factory would emit 65 defs.
478        assert_eq!(ctx.topo.all_cpus().len(), 66);
479
480        let steps = cgroup_per_cpu_steps(&ctx);
481        let factory = match steps[0].setup {
482            Setup::Factory(f) => f,
483            Setup::Defs(_) => panic!("per_cpu should use Factory setup"),
484        };
485        let defs = factory(&ctx);
486        assert_eq!(defs.len(), 64);
487        assert_eq!(defs.last().unwrap().name, "many_63");
488        assert_eq!(defs[0].cpuset, Some(CpusetSpec::exact([0])));
489        assert_eq!(defs[63].cpuset, Some(CpusetSpec::exact([63])));
490    }
491
492    /// Pins the per-def cpuset CONTENT (`Exact([all[i]])`) and the
493    /// `Duration::from_secs(1) + ctx.duration` hold arithmetic that the
494    /// existing test only checked with a vacuous `is_some()`.
495    #[test]
496    fn per_cpu_factory_pins_exact_single_cpu_cpuset_and_hold() {
497        let cgroups = CgroupManager::new("/nonexistent");
498        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 4, 1));
499        let ctx = ctx_for_test(&cgroups, &topo);
500
501        let steps = cgroup_per_cpu_steps(&ctx);
502        assert_eq!(
503            steps[0].hold,
504            HoldSpec::Fixed(Duration::from_secs(1) + ctx.duration)
505        );
506
507        let factory = match steps[0].setup {
508            Setup::Factory(f) => f,
509            Setup::Defs(_) => panic!("per_cpu should use Factory setup"),
510        };
511        let defs = factory(&ctx);
512        assert_eq!(defs.len(), 3);
513        for (i, d) in defs.iter().enumerate() {
514            assert_eq!(d.cpuset, Some(CpusetSpec::exact([i])));
515        }
516    }
517
518    /// Pins the `.min(15)` clamp and odd-`n` half-division in
519    /// `cgroup_exhaust_reuse_steps` / `reuse_defs`: with 16 CPUs,
520    /// `n = (17-1).min(15) = 15` (an unclamped `len - 1` would be 16)
521    /// and `half = 15/2 = 7`, distinct from the existing 8-CPU fixture's
522    /// 7/3 counts — so the asserted 15/30/7/7 kill a deleted/wrong clamp.
523    #[test]
524    fn exhaust_reuse_clamps_n_at_15_and_half_at_7() {
525        let cgroups = CgroupManager::new("/nonexistent");
526        let topo =
527            TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 17, 1));
528        let ctx = ctx_for_test(&cgroups, &topo);
529
530        let steps = cgroup_exhaust_reuse_steps(&ctx);
531        assert_eq!(steps.len(), 3);
532
533        // Phase 1: n = (17-1).min(15) = 15 cgroups (unclamped 16), paired AddCgroup+SetCpuset.
534        let adds = steps[0]
535            .ops
536            .iter()
537            .filter(|o| matches!(o, Op::AddCgroup { .. }))
538            .count();
539        let sets = steps[0]
540            .ops
541            .iter()
542            .filter(|o| matches!(o, Op::SetCpuset { .. }))
543            .count();
544        assert_eq!(adds, 15);
545        assert_eq!(sets, 15);
546        assert_eq!(steps[0].ops.len(), 30);
547
548        // Phase 2: remove first half = 15/2 = 7.
549        let removes = steps[1]
550            .ops
551            .iter()
552            .filter(|o| matches!(o, Op::RemoveCgroup { .. }))
553            .count();
554        assert_eq!(removes, 7);
555        assert_eq!(steps[1].ops.len(), 7);
556
557        // Phase 3: factory-built half = 7 replacement cgroups, each 1 worker.
558        let factory = match steps[2].setup {
559            Setup::Factory(f) => f,
560            Setup::Defs(_) => panic!("phase 3 should use Factory setup"),
561        };
562        let defs = factory(&ctx);
563        assert_eq!(defs.len(), 7);
564        for d in &defs {
565            assert_eq!(d.works[0].num_workers, Some(1));
566        }
567    }
568
569    /// Pins the `< 4 CPUs` early-skip arm of `custom_cgroup_dsq_contention`
570    /// (returns before any cgroup op or workload spawn, so host-isolable).
571    #[test]
572    fn dsq_contention_skips_below_4_cpus() {
573        let cgroups = CgroupManager::new("/nonexistent");
574        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 2, 1));
575        let ctx = ctx_for_test(&cgroups, &topo);
576
577        let r = custom_cgroup_dsq_contention(&ctx).unwrap();
578        assert!(r.is_skip());
579        let msgs: Vec<&str> = r.skip_details().map(|d| d.message.as_str()).collect();
580        assert_eq!(msgs, vec!["need >=4 CPUs"]);
581    }
582
583    /// Pins the `< 6 CPUs` early-skip arm of `custom_cgroup_workload_variety`
584    /// (returns before `spawn_diverse`).
585    #[test]
586    fn workload_variety_skips_below_6_cpus() {
587        let cgroups = CgroupManager::new("/nonexistent");
588        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 4, 1));
589        let ctx = ctx_for_test(&cgroups, &topo);
590
591        let r = custom_cgroup_workload_variety(&ctx).unwrap();
592        assert!(r.is_skip());
593        let msgs: Vec<&str> = r.skip_details().map(|d| d.message.as_str()).collect();
594        assert_eq!(msgs, vec!["need >=6 CPUs for 5 cgroups"]);
595    }
596
597    /// Pins the `< 6 CPUs` early-skip arm of
598    /// `custom_cgroup_cpuset_workload_variety`. Asserts the distinct
599    /// message ("need >=6 CPUs") so it cannot be swapped with the
600    /// non-cpuset variety fn's "need >=6 CPUs for 5 cgroups".
601    #[test]
602    fn cpuset_workload_variety_skips_below_6_cpus() {
603        let cgroups = CgroupManager::new("/nonexistent");
604        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 4, 1));
605        let ctx = ctx_for_test(&cgroups, &topo);
606
607        let r = custom_cgroup_cpuset_workload_variety(&ctx).unwrap();
608        assert!(r.is_skip());
609        let msgs: Vec<&str> = r.skip_details().map(|d| d.message.as_str()).collect();
610        assert_eq!(msgs, vec!["need >=6 CPUs"]);
611    }
612
613    /// Pins the `< 5 CPUs` early-skip arm of
614    /// `custom_cgroup_dynamic_workload_variety`. 4 CPUs hits this
615    /// 5-threshold skip (the fn has only the one gate).
616    #[test]
617    fn dynamic_workload_variety_skips_below_5_cpus() {
618        let cgroups = CgroupManager::new("/nonexistent");
619        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 4, 1));
620        let ctx = ctx_for_test(&cgroups, &topo);
621
622        let r = custom_cgroup_dynamic_workload_variety(&ctx).unwrap();
623        assert!(r.is_skip());
624        let msgs: Vec<&str> = r.skip_details().map(|d| d.message.as_str()).collect();
625        assert_eq!(msgs, vec!["need >=5 CPUs for dynamic cgroup add"]);
626    }
627
628    /// Pins the `num_llcs() < 2` early-skip arm of
629    /// `custom_cgroup_cpuset_cross_llc_race` (single-LLC topology
630    /// returns before any LLC-aligned cpuset or cgroup op).
631    #[test]
632    fn cross_llc_race_skips_with_single_llc() {
633        let cgroups = CgroupManager::new("/nonexistent");
634        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 1, 4, 1));
635        let ctx = ctx_for_test(&cgroups, &topo);
636
637        assert_eq!(ctx.topo.num_llcs(), 1);
638        let r = custom_cgroup_cpuset_cross_llc_race(&ctx).unwrap();
639        assert!(r.is_skip());
640        let msgs: Vec<&str> = r.skip_details().map(|d| d.message.as_str()).collect();
641        assert_eq!(msgs, vec!["need >=2 LLCs"]);
642    }
643
644    /// Pins the deepest host-reachable skip in
645    /// `custom_cgroup_cpuset_cross_llc_race`: the
646    /// `LLC0 too small after reserving for cg_0` arm. A 2-LLC topology
647    /// with one CPU per LLC passes the `num_llcs() >= 2` and
648    /// non-empty-LLC gates, then reserving LLC0's only CPU leaves LLC0
649    /// empty.
650    #[test]
651    fn cross_llc_race_skips_when_llc0_single_cpu_after_reserve() {
652        let cgroups = CgroupManager::new("/nonexistent");
653        // numa=1, llcs=2, cores=1, threads=1 -> 2 CPUs, LLC0={0}, LLC1={1}.
654        let topo = TestTopology::from_vm_topology(&crate::vmm::topology::Topology::new(1, 2, 1, 1));
655        let ctx = ctx_for_test(&cgroups, &topo);
656
657        assert_eq!(ctx.topo.num_llcs(), 2);
658        assert_eq!(ctx.topo.llc_aligned_cpuset(0), BTreeSet::from([0]));
659        assert_eq!(ctx.topo.llc_aligned_cpuset(1), BTreeSet::from([1]));
660
661        let r = custom_cgroup_cpuset_cross_llc_race(&ctx).unwrap();
662        assert!(r.is_skip());
663        let msgs: Vec<&str> = r.skip_details().map(|d| d.message.as_str()).collect();
664        assert_eq!(msgs, vec!["LLC0 too small after reserving for cg_0"]);
665    }
666}