#[non_exhaustive]pub struct ThreadState {Show 98 fields
pub tid: u32,
pub tgid: u32,
pub pcomm: String,
pub comm: String,
pub cgroup: String,
pub start_time_clock_ticks: u64,
pub policy: CategoricalString,
pub nice: OrdinalI32,
pub cpu_affinity: CpuSet,
pub processor: OrdinalI32,
pub state: char,
pub ext_enabled: bool,
pub run_time_ns: MonotonicNs,
pub wait_time_ns: MonotonicNs,
pub timeslices: MonotonicCount,
pub voluntary_csw: MonotonicCount,
pub nonvoluntary_csw: MonotonicCount,
pub nr_wakeups: MonotonicCount,
pub nr_wakeups_local: MonotonicCount,
pub nr_wakeups_remote: MonotonicCount,
pub nr_wakeups_sync: MonotonicCount,
pub nr_wakeups_migrate: MonotonicCount,
pub nr_wakeups_affine: MonotonicCount,
pub nr_wakeups_affine_attempts: MonotonicCount,
pub nr_migrations: MonotonicCount,
pub nr_forced_migrations: MonotonicCount,
pub nr_failed_migrations_affine: MonotonicCount,
pub nr_failed_migrations_running: MonotonicCount,
pub nr_failed_migrations_hot: MonotonicCount,
pub wait_sum: MonotonicNs,
pub wait_count: MonotonicCount,
pub wait_max: PeakNs,
pub voluntary_sleep_ns: MonotonicNs,
pub sleep_max: PeakNs,
pub block_sum: MonotonicNs,
pub block_max: PeakNs,
pub iowait_sum: MonotonicNs,
pub iowait_count: MonotonicCount,
pub exec_max: PeakNs,
pub slice_max: PeakNs,
pub allocated_bytes: Bytes,
pub deallocated_bytes: Bytes,
pub minflt: MonotonicCount,
pub majflt: MonotonicCount,
pub utime_clock_ticks: ClockTicks,
pub stime_clock_ticks: ClockTicks,
pub priority: OrdinalI32,
pub rt_priority: OrdinalU32,
pub core_forceidle_sum: MonotonicNs,
pub fair_slice_ns: GaugeNs,
pub nr_threads: GaugeCount,
pub smaps_rollup_kib: BTreeMap<String, u64>,
pub rchar: Bytes,
pub wchar: Bytes,
pub syscr: MonotonicCount,
pub syscw: MonotonicCount,
pub read_bytes: Bytes,
pub write_bytes: Bytes,
pub cancelled_write_bytes: Bytes,
pub cpu_delay_count: MonotonicCount,
pub cpu_delay_total_ns: MonotonicNs,
pub cpu_delay_max_ns: PeakNs,
pub cpu_delay_min_ns: PeakNs,
pub blkio_delay_count: MonotonicCount,
pub blkio_delay_total_ns: MonotonicNs,
pub blkio_delay_max_ns: PeakNs,
pub blkio_delay_min_ns: PeakNs,
pub swapin_delay_count: MonotonicCount,
pub swapin_delay_total_ns: MonotonicNs,
pub swapin_delay_max_ns: PeakNs,
pub swapin_delay_min_ns: PeakNs,
pub freepages_delay_count: MonotonicCount,
pub freepages_delay_total_ns: MonotonicNs,
pub freepages_delay_max_ns: PeakNs,
pub freepages_delay_min_ns: PeakNs,
pub thrashing_delay_count: MonotonicCount,
pub thrashing_delay_total_ns: MonotonicNs,
pub thrashing_delay_max_ns: PeakNs,
pub thrashing_delay_min_ns: PeakNs,
pub compact_delay_count: MonotonicCount,
pub compact_delay_total_ns: MonotonicNs,
pub compact_delay_max_ns: PeakNs,
pub compact_delay_min_ns: PeakNs,
pub wpcopy_delay_count: MonotonicCount,
pub wpcopy_delay_total_ns: MonotonicNs,
pub wpcopy_delay_max_ns: PeakNs,
pub wpcopy_delay_min_ns: PeakNs,
pub irq_delay_count: MonotonicCount,
pub irq_delay_total_ns: MonotonicNs,
pub irq_delay_max_ns: PeakNs,
pub irq_delay_min_ns: PeakNs,
pub hiwater_rss_bytes: PeakBytes,
pub hiwater_vm_bytes: PeakBytes,
pub taskstats_measured: bool,
pub cpu_delay_active: bool,
pub delay_block_active: bool,
pub xacct_active: bool,
pub jemalloc_measured: bool,
}Expand description
Per-thread resource profile.
Populated by the capture layer from /proc/<tid>/{sched,status, io,stat,comm,cgroup}, sched_getaffinity, the taskstats
genetlink path (delay-accounting + memory-watermark fields),
and (for jemalloc-linked processes only, via ptrace +
process_vm_readv) the per-thread tsd_s.thread_allocated /
thread_deallocated TLS counters.
Field families (mirrors the module-level breakdown, with the registry-pairing reductions named):
- Cumulative counters and totals (the majority): wakeups,
migrations, csw, run/wait/sleep/block/iowait time,
schedstat counts, page-fault counters, syscall counters,
byte counters, the taskstats per-bucket
*_countand*_delay_total_ns, and the jemalloc per-thread allocated/deallocated TSD counters. Probe-timing invariant modulo monotonic forward progress; reduced via theSum*rules. - Lifetime high-water peaks: schedstat
*_maxfamily, every taskstats*_delay_max_ns/*_delay_min_ns, and the memory watermarks (Self::hiwater_rss_bytes,Self::hiwater_vm_bytes). Non-decreasing-over-time but per-event extrema, so non-summable across threads; the registry reduces them viaMaxPeak/MaxPeakBytes. - Instantaneous gauges (sensitive to probe timing):
Self::nr_threads(signal_struct->nr_threads snapshot),Self::fair_slice_ns(instantaneousp->se.slice), andSelf::state(task_state_array letter). Two probes of the same thread at different instants can legitimately produce different values. Reduced viaMaxGaugeCount/MaxGaugeNs/ModeChar. - Categorical / ordinal scalars (point-in-time
snapshots):
Self::policy,Self::nice,Self::priority,Self::processor,Self::rt_priority, plus the identity strings (Self::pcomm,Self::comm,Self::cgroup) and thecrate::metric_types::CpuSetSelf::cpu_affinity. Sampled at capture time and can change at runtime (e.g.sched_setaffinitymid-run flipsprocessorandcpu_affinity); reduced viaMode*/Range*/Affinity.
Same family taxonomy as the module-level block at the top of
the file; the per-field docs flag the family on each entry
and the registry’s AggRule pairing makes the
“category-mismatched aggregation is a compile error”
invariant load-bearing.
Default is implemented manually rather than derived because
the Self::state field needs '~' (the absent-value
sentinel) instead of '\0' (the char Default). See the
field doc on Self::state for why: '\0' lex-compares
SMALLER than every real kernel state letter, which would
poison crate::ctprof_compare::AggRule::ModeChar
tie-breaks toward “absent” whenever a default-constructed
thread sat alongside a real one in a group.
Fields (Non-exhaustive)§
This struct is marked as non-exhaustive
Struct { .. } syntax; cannot be matched against without a wildcard ..; and struct update syntax will not work.tid: u32Kernel task id. Ephemeral across runs; not used as a grouping axis.
tgid: u32Thread group id (process id). Ephemeral across runs.
pcomm: StringProcess name, read from /proc/<tgid>/comm. Stable across
runs on the same build. Feeds the grouping key under
--group-by pcomm (default), where it flows through the
token-based crate::ctprof_compare::pattern_key
normalizer so ephemeral worker pools (worker-0,
worker-1, …) collapse into a single worker-{N}
bucket; pass --no-thread-normalize to group by literal
pcomm. Also feeds the smaps_rollup join key (with the same
normalization rules) so per-process memory rows survive
PID churn across snapshots.
comm: StringThread name, read from /proc/<tid>/comm. Stable when the
runtime assigns deterministic names (worker pools, async
runtimes). Feeds the grouping key under --group-by comm,
where it flows through the token-based
crate::ctprof_compare::pattern_key normalizer (same
rules as pcomm). Pass --no-thread-normalize to group by
literal comm, or --group-by comm-exact for the same
effect on this axis only (smaps still normalizes).
cgroup: StringCgroup v2 path.
§Namespace semantics
The path is read verbatim from /proc/<tid>/cgroup and
is therefore relative to the CGROUP NAMESPACE ROOT the
capturing process sees — NOT relative to the
system-global v2 mount root. A process outside the
capturing namespace would see the same cgroup under a
different path (prefixed with the namespace-root ancestors
the inner view hides); a process inside a nested cgroup
namespace sees a truncated path. Cross-namespace
comparison requires external canonicalization (e.g.
resolving via cgroup.procs inode chains or walking
/proc/<tid>/ns/cgroup to the common root) — the
capture layer deliberately does NOT attempt this because
the resolution depends on capture-site privilege and
namespace visibility that varies per caller.
Kept as cgroup (not renamed to cgroup_ns_relative)
for consistency with GroupBy::Cgroup,
cgroup_flatten, cgroup_stats, and every CLI flag
that threads the same concept through the comparison
layer; a rename would cascade through every pinned
string in the compare pipeline without improving the
semantic guarantee. This doc is the canonical
documentation of the namespace-relative contract.
start_time_clock_ticks: u64/proc/<tid>/stat field 22 (start_time) in USER_HZ
clock ticks since system boot. The kernel exports this
field in USER_HZ units (defined in
include/asm-generic/param.h as USER_HZ == 100 on
every architecture the capture layer targets — x86_64
and aarch64) — NOT raw internal jiffies, which scale
with CONFIG_HZ. Cross-host comparison between x86_64 and
aarch64 is meaningful because USER_HZ is the same 100 on
both, so a diff between two hosts on different CONFIG_HZ
settings still compares correctly. Seconds-since-boot
is simply start_time_clock_ticks / 100 on those
architectures. Other in-tree architectures carry
different USER_HZ (alpha defines 1024, for instance);
a future port must either restate the divisor or
normalise at capture time. fs/proc/array.c::do_task_stat
is where the kernel writes the field to procfs.
Stored as raw u64, NOT wrapped in
crate::metric_types::ClockTicks, because this field
is an identity / ghost-thread sentinel rather than a
metric that flows through the aggregation pipeline. The
ghost-filter in capture_with / capture_pid_with
keys on start_time_clock_ticks == 0 (alongside an
empty comm) to drop ThreadStates assembled from a
tid that exited mid-capture, which is cleaner against a
raw u64 than against a wrapped sentinel.
policy: CategoricalStringScheduling policy (SCHED_OTHER, SCHED_FIFO, SCHED_RR,
SCHED_BATCH, SCHED_IDLE, SCHED_DEADLINE, SCHED_EXT). Stored
as the canonical name string rather than the kernel
integer so comparison output is human-readable without a
reverse-lookup table. Wrapped in
crate::metric_types::CategoricalString so the
aggregation pipeline reduces by mode (most-frequent value)
rather than a category-mismatched sum or max.
nice: OrdinalI32Nice value in the standard [-20, 19] range. Signed i32
because the range includes negative values and
parse_stat extracts the field via get_i32 on
procfs’s decimal text — the inner type matches the
extraction path and the kernel-visible range without
coercion. Wrapped in crate::metric_types::OrdinalI32
so the aggregation pipeline reduces by [min, max] range
rather than sum.
cpu_affinity: CpuSetAllowed CPU set from sched_getaffinity. Sorted ascending.
Comparison aggregates via union across the group and
renders as “N cpus (range)” or “mixed” for heterogeneous
sets — see crate::ctprof_compare::AffinitySummary.
Wrapped in crate::metric_types::CpuSet so the
aggregation pipeline routes through the dedicated
affinity-summary reduction rather than a numeric path.
processor: OrdinalI32Last CPU the thread executed on. /proc/<tid>/stat field
39 (task_cpu(task) in fs/proc/array.c::do_task_stat,
emitted via seq_put_decimal_ll). Signed for symmetry
with Self::nice; the kernel emits non-negative values
only — task_cpu (defined unsigned int in
include/linux/sched.h) zero-extends through the
seq_put_decimal_ll widening to s64. 0 is the
absent-value default (collisions with a legitimate CPU 0
are distinguished by inspecting cpu_affinity).
Wrapped in crate::metric_types::OrdinalI32 so the
aggregation pipeline reduces by [min, max] range across
the group.
state: charSingle-letter task state from /proc/<tid>/status State:
line. Real kernel chars are R, S, D, T, t, X,
Z, P, I (see fs/proc/array.c::task_state_array,
emitted via get_task_state). '~' is the absent-value
sentinel — visually distinct from every real kernel char
so a downstream consumer can distinguish “no state read”
from a real value. When '~' appears in compare output,
the /proc/<tid>/status read failed (thread likely
exited mid-capture).
ThreadState::default(), the capture-time
unwrap_or_else(default_state_char) fallback, and
serde(default) deserialize of a partial JSON record all
produce '~' (NOT '\0', the bare char Default). The
manual Default impl on ThreadState, the
unwrap_or_else site in capture_thread_at_with_tally,
and the serde(default = ...) attribute on this field
are paired specifically so the absent-value sentinel is
the same byte everywhere.
'~' (U+007E = 126) is chosen so it sorts AFTER every
real kernel state letter — R (82), S (83), D (68),
T (84), t (116), X (88), Z (90), P (80), I
(73). crate::ctprof_compare::AggRule::ModeChar
breaks count-ties toward the LEX-SMALLEST candidate, so
a sentinel smaller than the real letters would silently
elect “absent” whenever a default-built thread sat
alongside a real one in the same group. '~' being
larger than all of them lets the real letter win the
tie. The earlier '?' (U+003F = 63) sentinel was
numerically smaller than every real state letter — a
tiebreak hijacker; do not return to it.
ext_enabled: booltrue when the task is currently scheduled by sched_ext —
/proc/<tid>/sched ext.enabled line. The kernel emits
the literal key ext.enabled only when
CONFIG_SCHED_CLASS_EXT is enabled; on kernels without it
the field is absent and lands at the default false. When
false on a task expected under sched_ext, the task may
have been ejected (sched_ext fall-back to CFS on BPF error)
or never enrolled.
Stays a bare bool — not wrapped in a categorical newtype
— because it is the only bool-valued metric in the
registry. The
crate::ctprof_compare::AggRule::ModeBool dispatch
coerces it to a String via to_string()/Display at
the call site (see the
crate::metric_types::CategoricalString doc note: if a
second bool-valued metric appears, promote both to a
dedicated CategoricalBool wrapper rather than keeping
the ad-hoc coercion).
run_time_ns: MonotonicNsCumulative on-CPU time, ns; /proc/<tid>/schedstat
field 1. MonotonicNs per the lifetime-accumulator
contract.
wait_time_ns: MonotonicNsCumulative time waiting on the runqueue, ns;
/proc/<tid>/schedstat field 2. MonotonicNs.
timeslices: MonotonicCountNumber of times the task was scheduled onto a CPU;
/proc/<tid>/schedstat field 3. MonotonicCount.
voluntary_csw: MonotonicCountVoluntary context switches — task gave up the CPU itself;
/proc/<tid>/status voluntary_ctxt_switches.
MonotonicCount.
nonvoluntary_csw: MonotonicCountInvoluntary context switches — task was preempted;
/proc/<tid>/status nonvoluntary_ctxt_switches.
MonotonicCount.
nr_wakeups: MonotonicCountTotal wakeups via try_to_wake_up(); /proc/<tid>/sched
nr_wakeups. MonotonicCount.
nr_wakeups_local: MonotonicCountWakeups landed on the same CPU as the waker;
/proc/<tid>/sched nr_wakeups_local. MonotonicCount.
nr_wakeups_remote: MonotonicCountWakeups landed on a different CPU than the waker;
/proc/<tid>/sched nr_wakeups_remote. MonotonicCount.
nr_wakeups_sync: MonotonicCountWF_SYNC synchronous-wakeup hint count;
/proc/<tid>/sched nr_wakeups_sync. MonotonicCount.
nr_wakeups_migrate: MonotonicCountWakeups where the task migrated to a different CPU than
its prior one (WF_MIGRATED); /proc/<tid>/sched
nr_wakeups_migrate. Distinct from nr_wakeups_remote
(waker CPU != target CPU). MonotonicCount.
nr_wakeups_affine: MonotonicCountWakeups onto this CPU (cache-affine wakeup
fast-path). /proc/<tid>/sched nr_wakeups_affine,
emitted via P_SCHEDSTAT. Plain u64. Zero on kernels
without CONFIG_SCHEDSTATS. Zero under sched_ext:
wake_affine is a CFS-only path.
nr_wakeups_affine_attempts: MonotonicCountTotal invocations of the cache-affine wakeup heuristic
wake_affine() — denominator for the affine-wake success
ratio (nr_wakeups_affine / nr_wakeups_affine_attempts).
/proc/<tid>/sched nr_wakeups_affine_attempts, emitted
via P_SCHEDSTAT (plain u64). The kernel increments this
counter unconditionally on every wake_affine() call in
kernel/sched/fair.c::wake_affine, then increments
nr_wakeups_affine only when the heuristic chose this
CPU — so the ratio is the success rate of the cache-
affine fast-path. Zero on kernels without
CONFIG_SCHEDSTATS. Zero under sched_ext: wake_affine
is a CFS-only path and kernel/sched/ext.c does not
increment this counter.
nr_migrations: MonotonicCountTotal cross-CPU migrations of the task. Incremented
unconditionally in kernel/sched/core.c (p->se.nr_migrations++)
— no schedstat macro, no class gating. Always populated
regardless of CONFIG_SCHEDSTATS or scheduling class.
MonotonicCount.
nr_forced_migrations: MonotonicCountMigrations forced by load balance (the load balancer
migrated the task even though the local heuristic would
have skipped it). /proc/<tid>/sched nr_forced_migrations,
plain u64 via P_SCHEDSTAT. Zero on kernels without
CONFIG_SCHEDSTATS.
nr_failed_migrations_affine: MonotonicCountFailed migrations attributed to affinity mismatch — the
destination CPU was not in cpus_allowed. /proc/<tid>/sched
nr_failed_migrations_affine, plain u64 via P_SCHEDSTAT.
Zero on kernels without CONFIG_SCHEDSTATS.
nr_failed_migrations_running: MonotonicCountFailed migrations attributed to the task being currently
running on the source CPU. /proc/<tid>/sched
nr_failed_migrations_running, plain u64 via P_SCHEDSTAT.
Zero on kernels without CONFIG_SCHEDSTATS.
nr_failed_migrations_hot: MonotonicCountFailed migrations attributed to cache-hot heuristic — the
source CPU’s cache was too hot to leave. /proc/<tid>/sched
nr_failed_migrations_hot, plain u64 via P_SCHEDSTAT.
Zero on kernels without CONFIG_SCHEDSTATS.
wait_sum: MonotonicNsTotal nanoseconds the task spent on the runqueue waiting
to be picked. Populated from /proc/<tid>/sched’s
wait_sum key — kernel emits via PN_SCHEDSTAT as
ms.ns_remainder, reconstructed by the parser to full ns.
Zero on kernels without CONFIG_SCHEDSTATS. Zero under
sched_ext: the kernel updates this counter via
__update_stats_wait_end (kernel/sched/stats.c), called
from CFS/RT/DL paths only — kernel/sched/ext.c does not
call that helper.
wait_count: MonotonicCountNumber of runqueue-wait windows the task accumulated —
the per-event tally that pairs with Self::wait_sum.
Populated from /proc/<tid>/sched’s wait_count key
(kernel emits as P_SCHEDSTAT, plain u64). Zero on
kernels without CONFIG_SCHEDSTATS. Same write path as
wait_sum (__update_stats_wait_end in
kernel/sched/stats.c), so the same sched_ext caveat
applies: zero under sched_ext.
wait_max: PeakNsLongest single runqueue-wait window the task ever
experienced, in nanoseconds. /proc/<tid>/sched wait_max
emitted via PN_SCHEDSTAT (ms.ns_remainder,
reconstructed to full ns by the parser). Tail-latency
signal that pairs with the wait_sum average. Zero on
kernels without CONFIG_SCHEDSTATS. Zero under sched_ext:
the kernel sets this counter via
__update_stats_wait_end from CFS/RT/DL paths only —
kernel/sched/ext.c does not call that helper, so
sched_ext-managed tasks never accumulate wait_max.
voluntary_sleep_ns: MonotonicNsPure voluntary sleep time, nanoseconds — TASK_INTERRUPTIBLE
off-CPU windows only, with the involuntary-block
component already subtracted at capture.
Computed at capture as sum_sleep_runtime - sum_block_runtime
(saturating; the read-skew window where block briefly
exceeds sleep collapses to zero). The kernel’s
sum_sleep_runtime key (read via PN_SCHEDSTAT in
/proc/<tid>/sched) is the FULL off-CPU total because
__update_stats_enqueue_sleeper (kernel/sched/stats.c)
charges every sleeper window regardless of which sleep
state the task was in — voluntary sleep AND involuntary
block both contribute. Subtracting sum_block_runtime
at capture leaves the voluntary-sleep residual, which
is the operationally useful signal for “how much time
did this task spend on a syscall wait that wasn’t a
kernel block.”
Capture-side normalization (rather than a derived metric at compare time) means every consumer sees the pre-normalized value without re-deriving — and the raw kernel reading is intentionally NOT preserved in the snapshot per the project’s pre-1.0 disposable-sidecar policy.
There is no voluntary_sleep_count counterpart: the
kernel does not emit one — the scheduler records the
aggregate runtime but not the sleep-event count
separately from nr_wakeups, which already covers the
wake-side tally.
Zero on kernels without CONFIG_SCHEDSTATS. Zero under
sched_ext: __update_stats_enqueue_sleeper is called
from CFS/RT/DL paths only. Also zero when either
sum_sleep_runtime or sum_block_runtime fails to parse
from /proc/<tid>/sched: the residual is uncomputable
without both halves, and falling back to the unsubtracted
sum_sleep_runtime would mislabel involuntary block as
voluntary sleep.
sleep_max: PeakNsLongest single sleep window in nanoseconds.
/proc/<tid>/sched sleep_max emitted via PN_SCHEDSTAT
(ms.ns_remainder, reconstructed by the parser). Zero on
kernels without CONFIG_SCHEDSTATS. Zero under sched_ext:
the kernel sets this counter via
__update_stats_enqueue_sleeper from CFS/RT/DL paths
only.
block_sum: MonotonicNsTotal nanoseconds blocked in the scheduler — every path
that puts the task into TASK_UNINTERRUPTIBLE contributes:
swap-in, page-fault resolution, disk I/O, plus
mutex/rwsem/completion waits inside kernel code that
hold the task off the runqueue. Populated from
/proc/<tid>/sched’s sum_block_runtime key (kernel
emits ms.ns_remainder via PN_SCHEDSTAT; the parser
reconstructs full ns). block_sum - iowait_sum is
therefore an UPPER BOUND on non-iowait involuntary-block
time — swap/zswap decompression contributes, but so do
the lock-family waits, so the delta cannot be read as
swap latency without further attribution. There is no
block_count counterpart: the kernel does not emit one.
Zero on kernels without CONFIG_SCHEDSTATS. Zero under
sched_ext: the kernel updates this counter via
__update_stats_enqueue_sleeper (kernel/sched/stats.c),
called from CFS/RT/DL paths only.
block_max: PeakNsLongest single block window in nanoseconds.
/proc/<tid>/sched block_max emitted via PN_SCHEDSTAT
(ms.ns_remainder, reconstructed by the parser). Tail-
latency signal that pairs with the block_sum average.
Zero on kernels without CONFIG_SCHEDSTATS. Zero under
sched_ext: the kernel sets this counter via
__update_stats_enqueue_sleeper from CFS/RT/DL paths
only.
iowait_sum: MonotonicNsTotal nanoseconds in I/O wait specifically (subset of
block_sum). Distinguishes disk-backed I/O delay from
the full involuntary-block total — callers that want
disk latency alone read this field, callers that want
every blocked window read block_sum. Populated from
/proc/<tid>/sched’s iowait_sum key (kernel emits
ms.ns_remainder via PN_SCHEDSTAT; the parser
reconstructs full ns). Zero on kernels without
CONFIG_SCHEDSTATS. Zero under sched_ext: the kernel
updates this counter via __update_stats_enqueue_sleeper
(kernel/sched/stats.c), called from CFS/RT/DL paths
only.
iowait_count: MonotonicCountNumber of I/O-wait windows the task accumulated — the
per-event tally that pairs with Self::iowait_sum.
Populated from /proc/<tid>/sched’s iowait_count key
(kernel emits as P_SCHEDSTAT, plain u64). Zero on
kernels without CONFIG_SCHEDSTATS. Same write path as
iowait_sum (__update_stats_enqueue_sleeper in
kernel/sched/stats.c), so the same sched_ext caveat
applies: zero under sched_ext.
exec_max: PeakNsLongest single CPU-burst (run-without-preempt window) in
nanoseconds. /proc/<tid>/sched exec_max emitted via
PN_SCHEDSTAT (ms.ns_remainder, reconstructed by the
parser). Zero on kernels without CONFIG_SCHEDSTATS.
Updated for sched_ext tasks too: the kernel sets it in
update_se (kernel/sched/fair.c), which sched_ext
reaches via update_curr_scx → update_curr_common.
slice_max: PeakNsLongest scheduling slice the task got before being
preempted, in nanoseconds. /proc/<tid>/sched slice_max
emitted via PN_SCHEDSTAT (ms.ns_remainder,
reconstructed by the parser). Zero on kernels without
CONFIG_SCHEDSTATS. Zero under sched_ext: the kernel sets
this counter only in set_next_entity
(kernel/sched/fair.c), a CFS-only path —
sched_ext-managed tasks never accumulate slice_max even
when CONFIG_SCHEDSTATS is enabled.
allocated_bytes: BytesBytes allocated by this thread over its lifetime — read
directly from jemalloc’s per-thread TSD u64 counter
(tsd_s.thread_allocated) via ptrace + process_vm_readv.
Cumulative-from-thread-creation; jemalloc updates the
per-thread TSD counters unconditionally on its alloc fast
and slow paths, so attaching the probe late does not lose
data.
Distinct from crate::host_heap::HostHeapState::allocated_bytes,
which is the runner process’s own
tikv_jemalloc_ctl::stats::allocated reading — a global
arena counter for the calling process. This field is the
per-thread TSD counter for an arbitrary target thread the
probe attached to.
Zero when the capture layer could not pull the counter:
(a) the target process is not linked against jemalloc,
(b) the probe attach failed for any other reason (DWARF
missing, jemalloc in a DSO rather than the main
executable, arch mismatch),
(c) the per-thread ptrace step failed (tid exited
mid-capture, EPERM under YAMA scope=1 without
CAP_SYS_PTRACE),
or (d) the thread is in the calling process’s own tgid
(PTRACE_SEIZE rejects self-attach). All four collapse to
zero per the best-effort “absent = 0” capture contract.
Snapshot-level diagnosis lives on
CtprofProbeSummary::dominant_failure (the per-tag
plurality) and
CtprofProbeSummary::privilege_dominant (the EPERM
remediation gate, true when ptrace tags account for ≥ 50%
of failed), reachable via
CtprofSnapshot::probe_summary; the per-tag taxonomy
is documented in the ktstr ctprof capture CLI help.
deallocated_bytes: BytesBytes freed by this thread over its lifetime — read from
jemalloc’s per-thread TSD u64 counter
(tsd_s.thread_deallocated) via the same probe path that
populates Self::allocated_bytes.
allocated_bytes - deallocated_bytes is a thread-local
estimate of currently-held bytes; the difference races
any in-flight allocator activity since the two counters
are sampled in one process_vm_readv over a 24-byte span
the target may continue to mutate during the read.
minflt: MonotonicCountMinor faults (no disk I/O). /proc/<tid>/stat field 10.
majflt: MonotonicCountMajor faults (backed by disk). /proc/<tid>/stat field 12.
utime_clock_ticks: ClockTicksUser-mode CPU time in USER_HZ clock ticks since thread
start. /proc/<tid>/stat field 14
(nsec_to_clock_t(utime) in fs/proc/array.c::do_task_stat).
USER_HZ-scaled like Self::start_time_clock_ticks —
cross-host comparison between x86_64 and aarch64 is
meaningful because USER_HZ is 100 on both, independent of
CONFIG_HZ. Suffix _clock_ticks mirrors the existing
start_time_clock_ticks precedent.
stime_clock_ticks: ClockTicksKernel-mode CPU time in USER_HZ clock ticks since thread
start. /proc/<tid>/stat field 15
(nsec_to_clock_t(stime) in fs/proc/array.c::do_task_stat).
Same USER_HZ scaling and _clock_ticks suffix convention as
Self::utime_clock_ticks.
priority: OrdinalI32Kernel-internal scheduler priority (signed). Distinct
from Self::nice — priority is the post-bias
scheduling priority (task_prio(task)) the scheduler
uses for ordering, while nice is the
userspace-presentable [-20, 19] preference.
/proc/<tid>/stat field 18, emitted via
seq_put_decimal_ll(m, " ", priority) (the local priority
= task_prio(task)) in do_task_stat() (fs/proc/array.c).
Range per task_prio() (kernel/sched/syscalls.c):
CFS / SCHED_OTHER tasks see [0..39] (nice [-20..19]
translated by task_prio() returning
p->prio - MAX_RT_PRIO); SCHED_FIFO / SCHED_RR tasks
see [-2..-100]; SCHED_DEADLINE tasks land at -101.
Default 0 when the stat read fails — collides with the
CFS nice-0 case, so a CFS task at default nice and an
absent stat line both render 0. Wrapped in
crate::metric_types::OrdinalI32 for the
[min, max] range reduction across a group.
rt_priority: OrdinalU32Real-time scheduler priority. /proc/<tid>/stat field
40, emitted via seq_put_decimal_ull(m, " ", task->rt_priority)
in do_task_stat() (fs/proc/array.c). Non-zero only when the task
runs SCHED_FIFO or SCHED_RR; CFS / SCHED_OTHER tasks
land at zero. Useful as a post-hoc filter to identify
real-time threads in a snapshot. Wrapped in
crate::metric_types::OrdinalU32 for the
[min, max] range reduction across a group; the inner
u32 matches the kernel’s
unsigned int task_struct::rt_priority declaration
(include/linux/sched.h) exactly. Practical range is
bounded 0..99 regardless of the type width.
core_forceidle_sum: MonotonicNsCumulative time this task forced its SMT sibling idle for
core-scheduling, in nanoseconds. /proc/<tid>/sched
core_forceidle_sum, dotted ms.ns format via
PN_SCHEDSTAT in proc_sched_show_task() (kernel/sched/debug.c).
Reconstructed to full ns via the same
parsed_ns_from_dotted helper as wait_sum /
block_sum.
Increment occurs in __account_forceidle_time()
(kernel/sched/cputime.c), called from
__sched_core_account_forceidle()
(kernel/sched/core_sched.c). The increment body is a plain
__schedstat_add(p->stats.core_forceidle_sum, delta) —
it is CLASS-AGNOSTIC. The caller iterates
for_each_cpu(i, smt_mask) and picks
p = rq_i->core_pick ?: rq_i->curr on each SMT sibling,
charging whichever task is running there regardless of
scheduling class. So a SCHED_EXT / DEADLINE / RR / FIFO
task on a core-scheduled SMT cohort CAN accrue forceidle
time the same way a CFS task can.
Real gating is at the rq/build level, not per-task, and
the runtime gates apply IN SERIES rather than equating —
sched_core_enabled(rq) and core_forceidle_count are
independent conditions that BOTH have to fire:
- Build:
CONFIG_SCHED_CORE(file-level#ifdefinkernel/sched/cputime.candkernel/sched/core_sched.c). - Build:
CONFIG_SCHEDSTATS(the caller’s own#ifdef CONFIG_SCHEDSTATSin__sched_core_account_forceidle()). - Runtime, scheduler-class entry:
sched_core_enabled(rq)is the FIRST gate — checked atpick_next_task()entry (kernel/sched/core.c) with an early__pick_next_task()return when false. No core-wide selection runs without this. - Runtime, transient counter:
rq->core->core_forceidle_count > 0is a SEPARATE subsequent gate —pick_next_task()only invokessched_core_account_forceidle(rq)when this counter is non-zero (kernel/sched/core.c); theWARN_ON_ONCE(!rq->core->core_forceidle_count)inside__sched_core_account_forceidle()(kernel/sched/core_sched.c) reasserts the same precondition. The early-return in the same function oncore_forceidle_start == 0is then a third transient guard against accounting before forceidle has begun. - Runtime, occupancy: non-zero
core_forceidle_occupation(theWARN_ON_ONCEin__sched_core_account_forceidle()).
Kernels that fail any build gate, or rqs that fail any runtime gate, see this counter at zero for every task. Hosts where no SMT cohort has ever accumulated forceidle also see zero across the board.
fair_slice_ns: GaugeNsPer-thread se.slice in nanoseconds. For fair-class
tasks (SCHED_NORMAL / SCHED_BATCH) this is the
instantaneous slice CFS is currently running the task
with. For SCHED_EXT tasks the line is still emitted but
reflects stale p->se.slice state — ext-class
schedulers maintain slice in p->scx.slice and do not
update p->se.slice. Field name fair_slice_ns mirrors
the kernel emission gate fair_policy(p->policy), not a
guarantee about which class actually populated the value.
/proc/<tid>/sched se.slice, plain integer via
P(se.slice) in proc_sched_show_task()
(kernel/sched/debug.c), gated by fair_policy(p->policy)
in the same function. fair_policy() is defined in
kernel/sched/sched.h as
normal_policy(policy) || policy == SCHED_BATCH, and
normal_policy() (sched.h) returns true for
SCHED_NORMAL AND, when CONFIG_SCHED_CLASS_EXT is
built, for SCHED_EXT. So the line IS emitted for
SCHED_EXT tasks on a sched_ext-enabled kernel — but the
value carries the staleness caveat above. The parser
cannot distinguish “ext-class hasn’t refreshed
p->se.slice since the task left the fair class” from
“CFS task with a current slice that happens to equal the
last value”: that ambiguity is the user’s to resolve via
policy (also captured per-thread). Tasks under
SCHED_DEADLINE / SCHED_RR / SCHED_FIFO / SCHED_IDLE land
at the absent-line default of 0.
This is a GAUGE (instantaneous current value), not a
counter or high-water mark. Distinct from
Self::slice_max which IS the schedstat lifetime
high-water — a thread that hasn’t run for a long time
can have a stale fair_slice_ns value while slice_max
continues to reflect the historical worst. Aggregation
across a group uses Max so the rendered cell shows the
longest current slice any thread in the group is running
with — Sum would multiply a near-identical instantaneous
value across the group and obscure the signal (and would
also be semantically meaningless: instantaneous gauges
do not add).
nr_threads: GaugeCountTotal threads in this task’s tgid (process-wide thread
count, the signal_struct->nr_threads snapshot). Field
name mirrors the kernel struct member to avoid collision
with CtprofSnapshot::threads (the snapshot’s own
Vec<ThreadState>). /proc/<pid>/status Threads: line
emitted in task_sig() (fs/proc/array.c) via
seq_put_decimal_ull(m, "Threads:\t", num_threads).
Identical for every thread of the same tgid.
Capture-side dedup: the field is populated ONLY on the
thread leader (tid == tgid) and zero for non-leader
threads of the same process. The registry pairs this with
crate::ctprof_compare::AggRule::MaxGaugeCount (not
Sum) so the rendered cell surfaces “the largest process
represented in this bucket” regardless of grouping axis.
Sum would be wrong under --group-by comm and
--group-by cgroup because non-leader buckets get a 0
contribution from every member — a bucket whose leader
thread did NOT match the grouping
would render 0 even though processes are represented.
Wrapped in crate::metric_types::GaugeCount so the
type system rejects sum-style aggregation: a bucket with
N threads sharing a tgid would over-count the parent
process N-fold under Sum, while Max is well-defined
(largest current count any contributor reported).
smaps_rollup_kib: BTreeMap<String, u64>Per-process memory breakdown from
/proc/<tid>/smaps_rollup, parsed as a key-value map
with values in kilobytes (the kernel’s native unit on
this file — __show_smap() (fs/proc/task_mmu.c)
emits every line as Name: NN kB).
Stored as a BTreeMap for forward-compat with the
open key set: rollup mode (gated in __show_smap())
emits 22 keys on a recent kernel — Rss, Pss, Pss_Dirty,
Pss_Anon, Pss_File, Pss_Shmem, Shared_Clean,
Shared_Dirty, Private_Clean, Private_Dirty, Referenced,
Anonymous, KSM, LazyFree, AnonHugePages,
ShmemPmdMapped, FilePmdMapped, Shared_Hugetlb,
Private_Hugetlb, Swap, SwapPss, Locked, plus the
[rollup] header which the parser elides. The map
preserves any future-kernel keys without a schema bump.
Pss is the most operationally valuable: proportional
share of shared pages — distinguishes “sole owner” from
“one of N sharing”.
Per-MM, not per-thread: every thread of the same tgid
shares one mm_struct, so all threads expose identical
values. Capture-side dedup populates ONLY the thread
leader (tid == tgid) and leaves non-leader threads at
the empty map. Mirrors Self::nr_threads’s
leader-dedup discipline. The capture cost is one
read_to_string per tgid (NOT per-tid) because
non-leaders short-circuit before opening the file.
Empty when smaps_rollup is absent (older kernels
without /proc/<pid>/smaps_rollup support — added
upstream in 4.14) or unreadable (typical
permission-denied for /proc/1/smaps_rollup outside
CAP_SYS_PTRACE).
rchar: BytesBytes read at the read syscall layer (incl. cached /
pagecache hits). Gated by CONFIG_TASK_IO_ACCOUNTING.
wchar: BytesBytes written at the write syscall layer (incl.
pagecache / writeback). Gated by CONFIG_TASK_IO_ACCOUNTING.
syscr: MonotonicCountNumber of read syscalls. Gated by CONFIG_TASK_IO_ACCOUNTING.
syscw: MonotonicCountNumber of write syscalls. Gated by CONFIG_TASK_IO_ACCOUNTING.
read_bytes: BytesBytes that hit the storage device on read (excludes
pagecache hits). Gated by CONFIG_TASK_IO_ACCOUNTING.
write_bytes: BytesBytes that hit the storage device on write
(post-writeback). Gated by CONFIG_TASK_IO_ACCOUNTING.
cancelled_write_bytes: BytesBytes the kernel deaccounted from a prior dirty-write
because the page was reclaimed without writeback (truncate,
inode invalidation). /proc/<tid>/io 7th line, gated by
CONFIG_TASK_IO_ACCOUNTING.
include/linux/task_io_accounting_ops.h
(task_io_account_cancelled_write) increments
current->ioac.cancelled_write_bytes — i.e. the value
records on the task that triggers the deaccount
(the truncating / unmapping task), NOT the original
writer. Sole call site is folio_account_cleaned
(mm/page-writeback.c), invoked when a dirty folio
is reclaimed without going through writeback.
Operationally this is a “negative write” signal — bytes
the kernel previously charged to a thread’s wchar
pipeline that never ended up on disk. Higher values mean
more wasted writeback intent. Per-thread interpretation
is asymmetric vs. Self::write_bytes: a thread’s
cancelled_write_bytes does NOT correspond to its own
write_bytes — the writer and the canceller may be
distinct tasks. Group-level Sum across a registry-grouped
bucket is therefore meaningful (total bytes the bucket’s
threads cancelled), but per-thread actual_write_bytes = write_bytes - cancelled_write_bytes is NOT defined for
that reason — the two counters track different parties.
cpu_delay_count: MonotonicCountNumber of off-CPU windows the task waited for the runqueue
to schedule it. Source: taskstats cpu_count, populated at
query time from tsk->sched_info.pcount (incremented by
sched_info_arrive in kernel/sched/stats.h, line 282).
delayacct_add_tsk (kernel/delayacct.c::delayacct_add_tsk,
line 175) snapshots the value into the reply via
d->cpu_count += t1 where t1 = tsk->sched_info.pcount.
cpu_delay_total_ns: MonotonicNsCumulative ns the task spent waiting on the runqueue.
Source: taskstats cpu_delay_total. RACY: count and total
are not updated atomically (sched_info path, no lock); a
concurrent reader may observe count or total advance ahead
of the other.
cpu_delay_max_ns: PeakNsLongest single CPU-wait window, ns. Source: taskstats
cpu_delay_max. Same lifetime-watermark semantics as
wait_max / block_max — MaxPeak aggregation surfaces
the worst single window any thread in the group ever
experienced.
cpu_delay_min_ns: PeakNsShortest non-zero CPU-wait window, ns. Source: taskstats
cpu_delay_min. Sentinel 0 means “no events observed”:
the kernel writes the field on every event, so 0 is
distinguishable from a genuine zero-ns event by checking
cpu_delay_count == 0. PeakNs aggregation surfaces “the
largest minimum any thread reported” across the group.
blkio_delay_count: MonotonicCountNumber of block-I/O wait windows. Source: taskstats
blkio_count. Updates from delayacct_blkio_start/end in
kernel/delayacct.c.
blkio_delay_total_ns: MonotonicNsCumulative ns the task waited on synchronous block I/O.
Source: taskstats blkio_delay_total. Distinct from
iowait_sum (schedstat) which counts a different bucket;
the delayacct path is the canonical block-I/O delay
accounting.
blkio_delay_max_ns: PeakNsLongest single block-I/O wait window, ns. Source: taskstats
blkio_delay_max.
blkio_delay_min_ns: PeakNsShortest non-zero block-I/O wait window, ns. Source:
taskstats blkio_delay_min. Sentinel-0 caveat per
cpu_delay_min_ns.
swapin_delay_count: MonotonicCountNumber of swap-in wait windows. Source: taskstats
swapin_count. NOTE: overlaps with thrashing_count —
every thrashing event is also a swapin event from the
syscall layer; do not sum.
swapin_delay_total_ns: MonotonicNsCumulative ns waiting for swap-in to complete. Source:
taskstats swapin_delay_total.
swapin_delay_max_ns: PeakNsLongest single swap-in wait, ns. Source: taskstats
swapin_delay_max.
swapin_delay_min_ns: PeakNsShortest non-zero swap-in wait, ns. Sentinel-0 caveat per
cpu_delay_min_ns.
freepages_delay_count: MonotonicCountNumber of direct-reclaim (free-pages) wait windows. Source:
taskstats freepages_count. Updates from
delayacct_freepages_start/end (mm/page_alloc.c).
freepages_delay_total_ns: MonotonicNsCumulative ns waiting in direct memory reclaim. Source:
taskstats freepages_delay_total.
freepages_delay_max_ns: PeakNsLongest single direct-reclaim wait, ns. Source: taskstats
freepages_delay_max.
freepages_delay_min_ns: PeakNsShortest non-zero direct-reclaim wait, ns. Sentinel-0 caveat
per cpu_delay_min_ns.
thrashing_delay_count: MonotonicCountNumber of thrashing wait windows. Source: taskstats
thrashing_count. OVERLAPS with swapin_*: thrashing
detection is a refinement of swapin tracking
(mm/workingset.c).
thrashing_delay_total_ns: MonotonicNsCumulative ns waiting under thrashing pressure. Source:
taskstats thrashing_delay_total.
thrashing_delay_max_ns: PeakNsLongest single thrashing wait, ns. Source: taskstats
thrashing_delay_max.
thrashing_delay_min_ns: PeakNsShortest non-zero thrashing wait, ns. Sentinel-0 caveat per
cpu_delay_min_ns.
compact_delay_count: MonotonicCountNumber of memory-compaction wait windows. Source: taskstats
compact_count. Updates from delayacct_compact_start/end
(mm/compaction.c).
compact_delay_total_ns: MonotonicNsCumulative ns waiting on memory compaction. Source:
taskstats compact_delay_total.
compact_delay_max_ns: PeakNsLongest single compaction wait, ns. Source: taskstats
compact_delay_max.
compact_delay_min_ns: PeakNsShortest non-zero compaction wait, ns. Sentinel-0 caveat
per cpu_delay_min_ns.
wpcopy_delay_count: MonotonicCountNumber of write-protect-copy (CoW) fault wait windows.
Source: taskstats wpcopy_count. Updates from
delayacct_wpcopy_start/end (mm/memory.c).
wpcopy_delay_total_ns: MonotonicNsCumulative ns waiting on write-protect-copy faults. Source:
taskstats wpcopy_delay_total.
wpcopy_delay_max_ns: PeakNsLongest single wpcopy wait, ns. Source: taskstats
wpcopy_delay_max.
wpcopy_delay_min_ns: PeakNsShortest non-zero wpcopy wait, ns. Sentinel-0 caveat per
cpu_delay_min_ns.
irq_delay_count: MonotonicCountNumber of IRQ-handler windows the task delegated. Source:
taskstats irq_count. Updates from delayacct_irq in
kernel/delayacct.c — counts kernel-IRQ time charged to
the task by the IRQ accounting subsystem.
irq_delay_total_ns: MonotonicNsCumulative ns of IRQ handling time charged to the task.
Source: taskstats irq_delay_total.
irq_delay_max_ns: PeakNsLongest single IRQ-handler window, ns. Source: taskstats
irq_delay_max.
irq_delay_min_ns: PeakNsShortest non-zero IRQ-handler window, ns. Sentinel-0 caveat
per cpu_delay_min_ns.
hiwater_rss_bytes: PeakBytesLifetime high-watermark of resident-set size, bytes. Source:
taskstats hiwater_rss (kB), converted at parse time via
saturating_mul(1024). Updates from xacct_add_tsk in
kernel/tsacct.c::xacct_add_tsk. Distinct from
smaps_rollup_kib["Rss"] which is the CURRENT RSS —
this field is the lifetime peak.
Kernel threads read zero: xacct_add_tsk
(kernel/tsacct.c) calls mm = get_task_mm(p) and the
hiwater assignments are guarded by
if (mm). Kernel threads (PF_KTHREAD, tsk->mm == NULL)
skip the assignment entirely, so the field stays at the
kernel-side zero default.
Sibling threads of the same tgid see the same value:
get_mm_hiwater_rss(mm) reads from the shared
mm_struct, so every thread of a process reports the same
hiwater value. The registry’s MaxPeakBytes aggregation
behaves as a per-process selector when buckets span
multiple tgids: cross-tgid Max picks the largest
per-process watermark in the bucket; intra-tgid Max is a
no-op (every sibling reports the same number).
hiwater_vm_bytes: PeakBytesLifetime high-watermark of virtual-memory size, bytes.
Source: taskstats hiwater_vm (kB), converted at parse
time. Same kernel write path as hiwater_rss_bytes —
inherits the same kernel-thread zero and same sibling-tid
shared-mm caveats; see Self::hiwater_rss_bytes.
taskstats_measured: boolWhether this thread’s taskstats genetlink query succeeded and populated
the payload — true iff apply_delay_stats ran on an Ok query. This
is the capture-mechanism flag for the WHOLE taskstats payload: one query
(fill_stats) fills BOTH the delay-accounting family (cpu/blkio/… delay
counters) AND the xacct memory watermarks (hiwater_rss_bytes /
hiwater_vm_bytes) together, so they share this one flag. false when
the query could not capture (CONFIG_TASKSTATS off, no CAP_NET_ADMIN, or
the query raced task exit), leaving the absent-counter zero defaults. The
group aggregation reads this to distinguish a captured (measured) zero
from a never-captured payload — without it both read as a sentinel 0
and a derived metric like total_offcpu_delay_ns renders “0” instead of
“-”. A whole group with no captured thread aggregates to
crate::ctprof_compare::Aggregated::Absent.
QUERY-level: true means THIS thread’s taskstats query succeeded. Per
sub-family ENABLEMENT is carried separately by Self::cpu_delay_active /
Self::delay_block_active / Self::xacct_active (baked at capture from
the host /proc/sys/kernel/task_delayacct + /proc/config.gz probes). The
group measured predicate ANDs this query-Ok flag with the relevant
sub-family active flag, so a sub-family disabled while the query still
succeeds (CONFIG_TASK_XACCT off, or the kernel.task_delayacct sysctl
off, with the other family on) now renders “-” not “0”. On ktstr’s own
kernel (all configs =y, delayacct booted on) every sub-family is active,
so the gating is an in-VM no-op; it only changes host-facing ctprof capture against single-family kernels.
cpu_delay_active: boolHost-wide enablement of the cpu_delay_* sub-family (sched_info-sourced,
filled unconditionally by delayacct_add_tsk): CONFIG_TASK_DELAY_ACCT is
built in — survives the runtime task_delayacct toggle. Baked at capture
from host_context::probe_taskstats_active; AND-ed with
Self::taskstats_measured by the group measured predicate. No
serde(default) (matching the sibling capture flags): a sidecar predating
this field fails to deserialize and is regenerated by re-running, per the
disposable-sidecar policy.
delay_block_active: boolHost-wide enablement of the delayacct resource-wait sub-family (blkio /
swapin / freepages / thrashing / compact / wpcopy / irq): the
runtime task_delayacct toggle is ON (these are gated by tsk->delays,
allocated at fork only when on). Baked at capture; AND-ed with
Self::taskstats_measured.
xacct_active: boolHost-wide enablement of the xacct watermark sub-family
(hiwater_rss_bytes, hiwater_vm_bytes): CONFIG_TASK_XACCT is built in (no
runtime toggle); an unknown host config (/proc/config.gz not exposed) is
treated as active to avoid a false absent. Baked at capture; AND-ed with
Self::taskstats_measured.
jemalloc_measured: boolWhether this thread’s jemalloc allocated_bytes / deallocated_bytes
were captured from a successful per-thread TSD probe read, versus left at
the absent-as-0 default (process not jemalloc-linked, the probe could
not attach, or the per-thread read failed). Set from the per-thread read
outcome — NOT the per-tgid attach — mirroring taskstats_measured’s
per-thread Ok-gating: a failed read is not a measurement. Same
measured-vs-zero discipline as Self::taskstats_measured, for the
live_heap_estimate derived metric.
Implementations§
Source§impl ThreadState
impl ThreadState
Sourcepub fn smaps_rollup_bytes(&self) -> impl Iterator<Item = (&String, Bytes)>
pub fn smaps_rollup_bytes(&self) -> impl Iterator<Item = (&String, Bytes)>
Iterate over Self::smaps_rollup_kib with values
converted from kilobytes to bytes via saturating_mul(1024).
The kernel emits smaps_rollup values in kB; the
project’s display layer auto-scales bytes via the
existing “B” → KiB → MiB → GiB ladder, so a single
helper centralizes the unit conversion at every render
site (write_show + write_diff). Saturating multiply
guards against pathological input from a malformed
snapshot file. Wrapped in
crate::metric_types::Bytes so the byte-typed value
flows through the same auto-scale path as the rest of
the byte-tagged registry metrics.
Trait Implementations§
Source§impl Clone for ThreadState
impl Clone for ThreadState
Source§fn clone(&self) -> ThreadState
fn clone(&self) -> ThreadState
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for ThreadState
impl Debug for ThreadState
Source§impl Default for ThreadState
impl Default for ThreadState
Source§fn default() -> Self
fn default() -> Self
Zero-valued sentinel — tid=0/tgid=0/empty strings are the
“no thread observed yet” placeholder that ctprof inserts
into HashMap entries before the /proc walk populates them
from the live kernel state. Default-constructed ThreadState
values are NOT visible to operator-facing output: the
capture path in capture_thread_at_with_tally
(which delegates to the per-file /proc read helpers in
parse) overwrites each field from
/proc/<pid>/task/<tid>/{stat,status,schedstat,cgroup} before
the entry is read for rendering. The state char uses the
'~' absent-value sentinel rather than the bare char
Default '\0' because ‘\0’ would print as an empty cell in
the ctprof table and the absent-value glyph is operator-
readable.
Source§impl<'de> Deserialize<'de> for ThreadState
impl<'de> Deserialize<'de> for ThreadState
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Auto Trait Implementations§
impl Freeze for ThreadState
impl RefUnwindSafe for ThreadState
impl Send for ThreadState
impl Sync for ThreadState
impl Unpin for ThreadState
impl UnwindSafe for ThreadState
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more