ktstr/flock/primitives.rs
1//! Kernel-syscall wrappers for `flock(2)` acquire/release.
2//!
3//! Three entry points, each gated through
4//! [`super::fs_filter::reject_remote_fs`] so a misconfigured lockfile
5//! path on NFS / CIFS / SMB2 / CephFS / AFS / FUSE surfaces actionably
6//! at open time rather than silently returning an unserialized fd:
7//!
8//! - [`materialize`] — create the lockfile inode without acquiring
9//! a lock. Used by the DISCOVER phase of
10//! `acquire_llc_plan` so the snapshot pass has a target inode
11//! for the subsequent `/proc/locks` match without contending
12//! with live acquirers.
13//! - [`try_flock`] — non-blocking acquire. Returns `Ok(None)` on
14//! `EWOULDBLOCK` so the caller can decide whether to retry, poll,
15//! or surface contention.
16//! - [`block_flock`] — blocking acquire. Parks the calling thread
17//! in the kernel until the lock is available. Used after
18//! [`try_flock`] returns `None` for callers that want to wait
19//! indefinitely; callers with a deadline use
20//! [`super::acquire::acquire_flock_with_timeout`] instead.
21//!
22//! All three open with `O_CREAT | O_RDWR | O_CLOEXEC | 0o666` so the
23//! resulting fd matches the rest of the crate's lockfile contract:
24//!
25//! - `O_CLOEXEC` keeps the lock from leaking across `exec(2)` into
26//! spawned subprocesses (cargo subcommands, build pipeline,
27//! initramfs compressor) where the parent's `OwnedFd::drop`
28//! would not release a child-held flock.
29//! - 0o666 mode matches a peer first-acquire so the file's owner
30//! and permissions don't depend on creation order.
31
32use anyhow::Result;
33use std::os::fd::OwnedFd;
34use std::path::Path;
35
36use super::FlockMode;
37use super::fs_filter::reject_remote_fs;
38
39/// Open a lockfile with the crate-wide flock contract: refuses
40/// remote filesystems via [`reject_remote_fs`], then opens with
41/// `O_CREAT | O_RDWR | O_CLOEXEC | 0o666`. The three module entry
42/// points ([`materialize`], [`try_flock`], [`block_flock`]) share
43/// this open shape; centralizing it here means a future flag change
44/// (or an addition to the remote-fs deny-list) lands in one place
45/// instead of drifting across three call sites.
46///
47/// `O_CLOEXEC` is mandatory: a leaked fd across `exec(2)` (cargo
48/// subcommand, build-pipeline subprocess, initramfs compressor)
49/// would keep the lock alive in the child after the parent's
50/// `OwnedFd::drop`, producing phantom holders the next acquirer
51/// would blame on the wrong pid.
52///
53/// 0o666 mode matches a peer first-acquire so the file's owner and
54/// permissions don't depend on creation order.
55fn open_lockfile(path: &Path) -> Result<OwnedFd> {
56 use rustix::fs::{Mode, OFlags, open};
57
58 reject_remote_fs(path)?;
59 open(
60 path,
61 OFlags::CREATE | OFlags::RDWR | OFlags::CLOEXEC,
62 Mode::from_raw_mode(0o666),
63 )
64 .map_err(|e| anyhow::anyhow!("open {}: {e}", path.display()))
65}
66
67/// Ensure the lockfile exists on disk without acquiring a lock.
68/// Used by the DISCOVER phase of `acquire_llc_plan` (see
69/// `discover_llc_snapshots` in `crate::vmm::host_topology`): the
70/// snapshot pass needs every per-LLC lockfile's inode to exist so a
71/// subsequent `/proc/locks` match has a target, but DISCOVER itself
72/// must not contend with peer acquires.
73///
74/// Opens through [`open_lockfile`] so the resulting inode and fd
75/// mode match what a first-time acquirer would create. Immediately
76/// closes the fd — `OwnedFd::drop` releases the open-file
77/// description and (since no flock was ever taken on this fd)
78/// cannot release a lock held by a peer fd.
79pub(crate) fn materialize<P: AsRef<Path>>(path: P) -> Result<()> {
80 let fd = open_lockfile(path.as_ref())?;
81 drop(fd);
82 Ok(())
83}
84
85/// Open a lock file and attempt `flock` with `LOCK_NB`.
86///
87/// Creates the file with mode 0o666 if absent. Returns
88/// `Ok(Some(fd))` on successful acquire, `Ok(None)` on
89/// `EWOULDBLOCK` (peer already holds an incompatible lock), and
90/// propagates other errors. The returned fd owns the open-file
91/// description; dropping it closes the fd AND releases the kernel
92/// flock (the kernel releases `flock(2)` only when the last fd
93/// referring to its OFD closes — `OwnedFd::drop` is what makes that
94/// work).
95///
96/// `O_CLOEXEC` is mandatory: a leaked fd across `exec(2)` (cargo
97/// subcommand, build-pipeline subprocess, initramfs compressor) would
98/// keep the lock alive in the child process after the parent's
99/// `OwnedFd::drop` runs, producing phantom holders the next acquirer
100/// would blame on the wrong pid.
101///
102/// Calls `super::fs_filter::reject_remote_fs` before the open to
103/// fail-fast on NFS / CIFS / SMB2 / CEPH / AFS / FUSE — see the
104/// module-level rationale.
105///
106/// Accepts any `AsRef<Path>` so `&str`, `&Path`, `&PathBuf`, and
107/// `String` callers all work without string-ifying round trips. LLC
108/// lockfile paths are built as `String` via `format!` and cache
109/// lockfile paths are built as `PathBuf` via `Path::join` — both
110/// pass straight through.
111pub fn try_flock<P: AsRef<Path>>(path: P, mode: FlockMode) -> Result<Option<OwnedFd>> {
112 use rustix::fs::{FlockOperation, flock};
113
114 let path = path.as_ref();
115 let fd = open_lockfile(path)?;
116 let op = match mode {
117 FlockMode::Exclusive => FlockOperation::NonBlockingLockExclusive,
118 FlockMode::Shared => FlockOperation::NonBlockingLockShared,
119 };
120 match flock(&fd, op) {
121 Ok(()) => Ok(Some(fd)),
122 Err(e) if e == rustix::io::Errno::WOULDBLOCK => Ok(None),
123 Err(e) => anyhow::bail!("flock {}: {e}", path.display()),
124 }
125}
126
127/// Blocking variant of [`try_flock`]. Opens the lockfile (creating
128/// it if absent), then issues a blocking `flock(2)` that parks the
129/// caller in the kernel until the lock is available. Use after
130/// [`try_flock`] returns `None` to wait for a live peer to finish.
131pub fn block_flock<P: AsRef<Path>>(path: P, mode: FlockMode) -> Result<OwnedFd> {
132 use rustix::fs::{FlockOperation, flock};
133
134 let path = path.as_ref();
135 let fd = open_lockfile(path)?;
136 let op = match mode {
137 FlockMode::Exclusive => FlockOperation::LockExclusive,
138 FlockMode::Shared => FlockOperation::LockShared,
139 };
140 flock(&fd, op).map_err(|e| anyhow::anyhow!("flock (blocking) {}: {e}", path.display()))?;
141 Ok(fd)
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147
148 /// [`try_flock`] sets `O_CLOEXEC` on the returned fd. Earlier
149 /// revisions missed this flag, which leaked flock-held fds
150 /// through `execve` into child processes — the child inherited
151 /// the lock, broke assumptions about RAII scope, and
152 /// manifested as phantom holders in `/proc/locks` long after
153 /// the parent had dropped its guard.
154 ///
155 /// Verifies the bit directly via `fcntl(F_GETFD)` rather than
156 /// asserting via a side-effect (forking an exec'd child is
157 /// noisier and harder to match). Failure mode: if the bit is
158 /// cleared by a future refactor that re-opens the fd without
159 /// re-applying O_CLOEXEC, this test fails the build.
160 #[test]
161 fn try_flock_sets_cloexec_on_returned_fd() {
162 use std::os::fd::AsRawFd;
163 use tempfile::TempDir;
164
165 let tmp = TempDir::new().expect("tempdir");
166 let path = tmp.path().join("cloexec.lock");
167 let fd = try_flock(&path, FlockMode::Exclusive)
168 .expect("try_flock must succeed on fresh tempfile")
169 .expect("EX must acquire on clean pool");
170
171 // SAFETY: fd is a valid OwnedFd — fcntl F_GETFD is a pure
172 // accessor, no concurrent modification, no ownership move.
173 let flags = unsafe { libc::fcntl(fd.as_raw_fd(), libc::F_GETFD) };
174 assert!(
175 flags >= 0,
176 "fcntl F_GETFD must succeed on our fd; got errno={}",
177 std::io::Error::last_os_error(),
178 );
179 assert_eq!(
180 flags & libc::FD_CLOEXEC,
181 libc::FD_CLOEXEC,
182 "FD_CLOEXEC must be set on try_flock-returned fd; \
183 flags=0x{flags:x}. Without it, exec'd children \
184 inherit the flock and produce phantom holders.",
185 );
186
187 drop(fd);
188 }
189}