ktstr/scenario/
performance.rs

1//! Performance and benchmarking scenario implementations.
2
3use super::Ctx;
4use super::ops::{CgroupDef, CpusetSpec, Step, execute_steps_with};
5use crate::assert::{Assert, AssertResult};
6use crate::workload::*;
7use anyhow::Result;
8
9/// CachePressure vs SpinWait cgroups under work conservation.
10///
11/// One cgroup runs CachePressure workers (L1-strided RMW, cache-hot) and
12/// the other runs SpinWait workers (cache-cold). Checks throughput
13/// fairness across workers (CV <= 1.0) to catch gross placement imbalance.
14pub fn custom_cache_pressure_imbalance(ctx: &Ctx) -> Result<AssertResult> {
15    let checks = Assert::default_checks().max_throughput_cv(1.0);
16
17    let steps = vec![Step::with_defs(
18        vec![
19            ctx.cgroup_def("cg_0")
20                .work_type(WorkType::cache_pressure(32, 64)),
21            CgroupDef::named("cg_1").workers(ctx.topo.total_cpus()),
22        ],
23        ctx.settled_hold(1.0),
24    )];
25
26    execute_steps_with(ctx, steps, Some(&checks))
27}
28
29/// CacheYield workers testing wake-affine placement after voluntary preemption.
30///
31/// All workers run CacheYield (strided RMW then sched_yield). After yield,
32/// the scheduler must decide where to place the waking task. Two cgroups on
33/// LLC-aligned cpusets make cross-LLC migration observable. Checks wake
34/// latency CV (consistent placement) and throughput fairness.
35pub fn custom_cache_yield_wake_affine(ctx: &Ctx) -> Result<AssertResult> {
36    if ctx.topo.num_llcs() < 2 {
37        return Ok(AssertResult::skip("need >=2 LLCs"));
38    }
39
40    let checks = Assert::default_checks()
41        .max_wake_latency_cv(50.0)
42        .max_throughput_cv(1.0);
43
44    let steps = vec![Step::with_defs(
45        vec![
46            ctx.cgroup_def("cg_0")
47                .cpuset(CpusetSpec::llc(0))
48                .work_type(WorkType::cache_yield(32, 64)),
49            ctx.cgroup_def("cg_1")
50                .cpuset(CpusetSpec::llc(1))
51                .work_type(WorkType::cache_yield(32, 64)),
52        ],
53        ctx.settled_hold(1.0),
54    )];
55
56    execute_steps_with(ctx, steps, Some(&checks))
57}
58
59/// CachePipe vs SpinWait cgroups under work conservation.
60///
61/// One cgroup runs CachePipe workers (cache-hot burst then pipe exchange,
62/// combining cache pressure with cross-CPU wake placement). The other runs
63/// SpinWait at full CPU count. Checks wake latency CV to catch erratic
64/// pipe wake placement.
65pub fn custom_cache_pipe_io_compute_imbalance(ctx: &Ctx) -> Result<AssertResult> {
66    let n_pipe = ctx.workers_per_cgroup;
67    // CachePipe requires even workers.
68    let n_pipe = if !n_pipe.is_multiple_of(2) {
69        n_pipe + 1
70    } else {
71        n_pipe
72    };
73
74    let checks = Assert::default_checks().max_wake_latency_cv(15.0);
75
76    let steps = vec![Step::with_defs(
77        vec![
78            CgroupDef::named("cg_0")
79                .workers(n_pipe)
80                .work_type(WorkType::cache_pipe(32, 1024)),
81            CgroupDef::named("cg_1").workers(ctx.topo.total_cpus()),
82        ],
83        ctx.settled_hold(1.0),
84    )];
85
86    execute_steps_with(ctx, steps, Some(&checks))
87}
88
89/// 1:N fan-out wake pattern.
90///
91/// One cgroup runs FutexFanOut workers: each group has 1 messenger that
92/// does CPU work then wakes 4 receivers via FUTEX_WAKE. Receivers measure
93/// wake-to-run latency. A second cgroup runs SpinWait workers to create
94/// CPU contention. Checks wake latency CV to catch inconsistent
95/// receiver placement.
96pub fn custom_fan_out_wake(ctx: &Ctx) -> Result<AssertResult> {
97    let fan_out = 4usize;
98    let group_size = fan_out + 1;
99    // Round down to nearest multiple of group_size, at least one group.
100    let n_fan_out = (ctx.workers_per_cgroup / group_size).max(1) * group_size;
101
102    let checks = Assert::default_checks()
103        .max_wake_latency_cv(10.0)
104        .max_spread_pct(50.0);
105
106    let steps = vec![Step::with_defs(
107        vec![
108            CgroupDef::named("cg_0")
109                .workers(n_fan_out)
110                .work_type(WorkType::futex_fan_out(fan_out, 1024)),
111            CgroupDef::named("cg_1").workers(ctx.topo.total_cpus()),
112        ],
113        ctx.settled_hold(1.0),
114    )];
115
116    execute_steps_with(ctx, steps, Some(&checks))
117}
118
119/// Fan-out messenger/worker with compute workload.
120///
121/// One cgroup runs FanOutCompute workers: each group has 1 messenger that
122/// stamps a wake timestamp then wakes 4 receivers via FUTEX_WAKE.
123/// Receivers measure wake-to-run latency, sleep, and do matrix multiply
124/// work. A second cgroup runs SpinWait workers to create CPU contention.
125/// Checks wake latency CV to catch inconsistent receiver placement.
126pub fn custom_fan_out_compute(ctx: &Ctx) -> Result<AssertResult> {
127    let fan_out = 4usize;
128    let group_size = fan_out + 1;
129    let n_workers = (ctx.workers_per_cgroup / group_size).max(1) * group_size;
130
131    let checks = Assert::default_checks()
132        .max_wake_latency_cv(10.0)
133        .max_spread_pct(50.0);
134
135    let steps = vec![Step::with_defs(
136        vec![
137            CgroupDef::named("cg_0")
138                .workers(n_workers)
139                .work_type(WorkType::fan_out_compute(fan_out, 256, 5, 100)),
140            CgroupDef::named("cg_1").workers(ctx.topo.total_cpus()),
141        ],
142        ctx.settled_hold(1.0),
143    )];
144
145    execute_steps_with(ctx, steps, Some(&checks))
146}