ktstr/scenario/nested.rs
1//! Nested cgroup hierarchy scenario implementations.
2
3use super::backdrop::Backdrop;
4use super::ops::{CgroupDef, HoldSpec, Op, Step, execute_scenario, execute_steps};
5use super::{CgroupGroup, Ctx, collect_all, dfl_wl, setup_cgroups};
6use crate::assert::AssertResult;
7use crate::workload::*;
8use anyhow::Result;
9use std::collections::BTreeSet;
10use std::thread;
11use std::time::{Duration, Instant};
12
13/// Four nested sub-cgroups up to three levels deep with steady workload.
14pub fn custom_nested_cgroup_steady(ctx: &Ctx) -> Result<AssertResult> {
15 let steps = vec![
16 Step::with_defs(
17 vec![
18 CgroupDef::named("cg_0/sub_a"),
19 CgroupDef::named("cg_0/sub_b"),
20 CgroupDef::named("cg_1/sub_b"),
21 CgroupDef::named("cg_1/sub_a/deep"),
22 ],
23 HoldSpec::fixed(Duration::from_secs(2) + ctx.duration),
24 )
25 .set_ops(vec![
26 Op::add_cgroup("cg_0"),
27 Op::add_cgroup("cg_1"),
28 Op::add_cgroup("cg_1/sub_a"),
29 ]),
30 ];
31
32 execute_steps(ctx, steps)
33}
34
35/// Move workers through nested hierarchy: sub -> parent ->
36/// cross-hierarchy sub -> parent.
37///
38/// The four cgroups (`cg_0/sub` with workers; `cg_0`, `cg_1`, and
39/// `cg_1/sub` as empty move targets) all persist for the full
40/// scenario — declaring them on the Backdrop is what lets every
41/// Step's `MoveAllTasks` resolve the target cgroup without the
42/// previous Step's teardown rmdir'ing it. Workers spawn inside
43/// `cg_0/sub` via [`CgroupDef`]; the empty peer cgroups go through
44/// [`Backdrop::extend_ops`] so no implicit worker spawn happens
45/// there.
46pub fn custom_nested_cgroup_task_move(ctx: &Ctx) -> Result<AssertResult> {
47 let backdrop = Backdrop::new()
48 .push_cgroup(CgroupDef::named("cg_0/sub"))
49 .extend_ops(vec![
50 Op::add_cgroup("cg_0"),
51 Op::add_cgroup("cg_1"),
52 Op::add_cgroup("cg_1/sub"),
53 ]);
54 let steps = vec![
55 // Settle: hold once so workers run inside cg_0/sub before
56 // the first MoveAllTasks. Matches the legacy 2s + duration/4
57 // budget the pre-refactor single-Step version used.
58 Step::new(
59 vec![],
60 HoldSpec::fixed(Duration::from_secs(2) + ctx.duration / 4),
61 ),
62 Step::new(
63 vec![Op::move_all_tasks("cg_0/sub", "cg_0")],
64 HoldSpec::frac(0.25),
65 ),
66 Step::new(
67 vec![Op::move_all_tasks("cg_0", "cg_1/sub")],
68 HoldSpec::frac(0.25),
69 ),
70 Step::new(
71 vec![Op::move_all_tasks("cg_1/sub", "cg_1")],
72 HoldSpec::frac(0.25),
73 ),
74 ];
75
76 execute_scenario(ctx, backdrop, steps)
77}
78
79/// Rapid nested cgroup create/destroy with dynamic names. Custom logic
80/// for dynamic naming.
81pub fn custom_nested_cgroup_rapid_churn(ctx: &Ctx) -> Result<AssertResult> {
82 let (handles, mut guard) = setup_cgroups(ctx, 2, &dfl_wl(ctx))?;
83 let deadline = Instant::now() + ctx.duration;
84 let mut i = 0usize;
85 // Cap on the number of distinct ephemeral cgroup names. The
86 // parent and 'deep' child remove paths are both best-effort
87 // (see comments below); without a cap a long scenario with
88 // persistent EBUSY/ENOENT churn would accumulate one cgroup
89 // per iteration in the cgroupfs tree until the
90 // `setup_cgroups` guard's Drop reaps them at scenario
91 // teardown. Reusing the same 100 names via `i % 100` bounds
92 // the peak resident leaked-cgroup count to at most 100
93 // parents (plus their `deep` children on the every-3rd
94 // iterations) while still exercising the rapid
95 // create→remove churn the test is designed to drive.
96 // `create_cgroup` is idempotent on a name whose dir already
97 // exists (`if !p.exists()` in `CgroupManager::create_cgroup`),
98 // so a cycle that lapped a still-resident sibling is a no-op
99 // re-create rather than an error. Mirrors the cap in the
100 // single-level sibling `custom_cgroup_rapid_churn` in
101 // `scenario/dynamic.rs`.
102 //
103 // Each parent (and 'deep' child on every-3rd iterations) is
104 // registered in the `setup_cgroups` guard via
105 // `add_cgroup_no_cpuset` so its Drop reaps any cgroup whose
106 // best-effort remove_cgroup below failed. The reverse-iterate
107 // contract in `CgroupGroup::drop` removes children before
108 // parents (matters here: a `deep` push always happens after
109 // its parent's push within the same iteration, so reverse
110 // iteration tears down the child first — preventing the
111 // ENOTEMPTY that an already-leaked deep would otherwise
112 // produce when the guard tries to remove its parent).
113 const MAX_EPHEMERAL_NAMES: usize = 100;
114 while Instant::now() < deadline {
115 let path = format!("cg_0/churn_{}", i % MAX_EPHEMERAL_NAMES);
116 guard.add_cgroup_no_cpuset(&path)?;
117 if i.is_multiple_of(3) {
118 let deep = format!("{path}/deep");
119 guard.add_cgroup_no_cpuset(&deep)?;
120 thread::sleep(Duration::from_millis(50));
121 // Best-effort teardown of the nested 'deep' child
122 // before its parent: a transient EBUSY from the
123 // kernel's drain path or ENOENT if the parent's
124 // removal below races and reaps it leaves the path
125 // for the parent to clean up. The setup_cgroups
126 // guard reaps any leaked cgroups at scenario
127 // teardown.
128 if let Err(e) = ctx.cgroups.remove_cgroup(&deep) {
129 tracing::warn!(cgroup = %deep, err = %format!("{e:#}"), "nested churn: remove_cgroup(deep) failed; parent removal or guard Drop will reap");
130 }
131 }
132 thread::sleep(Duration::from_millis(50));
133 // Parent removal in the same churn loop. EBUSY (a child
134 // 'deep' cgroup is still being torn down by its own
135 // remove_cgroup above) or ENOENT (already gone) here
136 // leaves the cgroup for the guard's Drop to reap on
137 // scenario teardown. Bailing would truncate the churn
138 // workload mid-run and mask hierarchy races.
139 if let Err(e) = ctx.cgroups.remove_cgroup(&path) {
140 tracing::warn!(cgroup = %path, err = %format!("{e:#}"), "nested churn: remove_cgroup(path) failed; guard Drop will reap on scenario teardown");
141 }
142 i = i.wrapping_add(1);
143 }
144 Ok(collect_all(handles, &ctx.assert))
145}
146
147/// Nested cgroups with cpusets. `create_cgroup` auto-enables
148/// controllers on intermediate cgroup `subtree_control` for
149/// nested paths.
150pub fn custom_nested_cgroup_cpuset(ctx: &Ctx) -> Result<AssertResult> {
151 let all = ctx.topo.all_cpus();
152 if all.len() < 4 {
153 return Ok(AssertResult::skip("need >=4 CPUs"));
154 }
155 let mid = all.len() / 2;
156 let set_a: BTreeSet<usize> = all[..mid].iter().copied().collect();
157
158 let mut _guard = CgroupGroup::new(ctx.cgroups);
159 _guard.add_cgroup("cg_0", &set_a)?;
160 thread::sleep(Duration::from_secs(2));
161
162 let sub_set: BTreeSet<usize> = all[..mid / 2].iter().copied().collect();
163 _guard.add_cgroup("cg_0/narrow", &sub_set)?;
164
165 let wl = WorkloadConfig {
166 num_workers: ctx.workers_per_cgroup,
167 ..Default::default()
168 };
169 let mut h = WorkloadHandle::spawn(&wl)?;
170 ctx.cgroups
171 .move_tasks("cg_0/narrow", &h.worker_pids_for_cgroup_procs()?)?;
172 h.start();
173
174 thread::sleep(ctx.duration);
175 let reports = h.stop_and_collect();
176 let mut r = AssertResult::pass();
177 r.merge(ctx.assert.assert_cgroup(&reports, Some(&sub_set)));
178 Ok(r)
179}
180
181/// Nested sub-cgroups with heavy SpinWait vs light Bursty load imbalance.
182pub fn custom_nested_cgroup_imbalance(ctx: &Ctx) -> Result<AssertResult> {
183 let steps = vec![
184 Step::with_defs(
185 vec![
186 CgroupDef::named("cg_0/sub_a").workers(8),
187 CgroupDef::named("cg_1/sub_b")
188 .workers(2)
189 .work_type(WorkType::bursty(
190 Duration::from_millis(50),
191 Duration::from_millis(100),
192 )),
193 ],
194 ctx.settled_hold(1.0),
195 )
196 .set_ops(vec![Op::add_cgroup("cg_0"), Op::add_cgroup("cg_1")]),
197 ];
198
199 execute_steps(ctx, steps)
200}
201
202/// Three-level nested hierarchy with workers at leaf cgroups.
203pub fn custom_nested_cgroup_no_ctrl(ctx: &Ctx) -> Result<AssertResult> {
204 let steps = vec![
205 Step::with_defs(
206 vec![
207 CgroupDef::named("cg_0/sub_a/deep"),
208 CgroupDef::named("cg_1/sub_b"),
209 ],
210 ctx.settled_hold(1.0),
211 )
212 .set_ops(vec![
213 Op::add_cgroup("cg_0"),
214 Op::add_cgroup("cg_0/sub_a"),
215 Op::add_cgroup("cg_1"),
216 ]),
217 ];
218
219 execute_steps(ctx, steps)
220}