ktstr/flock/
primitives.rs

1//! Kernel-syscall wrappers for `flock(2)` acquire/release.
2//!
3//! Three entry points, each gated through
4//! [`super::fs_filter::reject_remote_fs`] so a misconfigured lockfile
5//! path on NFS / CIFS / SMB2 / CephFS / AFS / FUSE surfaces actionably
6//! at open time rather than silently returning an unserialized fd:
7//!
8//!  - [`materialize`] — create the lockfile inode without acquiring
9//!    a lock. Used by the DISCOVER phase of
10//!    `acquire_llc_plan` so the snapshot pass has a target inode
11//!    for the subsequent `/proc/locks` match without contending
12//!    with live acquirers.
13//!  - [`try_flock`] — non-blocking acquire. Returns `Ok(None)` on
14//!    `EWOULDBLOCK` so the caller can decide whether to retry, poll,
15//!    or surface contention.
16//!  - [`block_flock`] — blocking acquire. Parks the calling thread
17//!    in the kernel until the lock is available. Used after
18//!    [`try_flock`] returns `None` for callers that want to wait
19//!    indefinitely; callers with a deadline use
20//!    [`super::acquire::acquire_flock_with_timeout`] instead.
21//!
22//! All three open with `O_CREAT | O_RDWR | O_CLOEXEC | 0o666` so the
23//! resulting fd matches the rest of the crate's lockfile contract:
24//!
25//!  - `O_CLOEXEC` keeps the lock from leaking across `exec(2)` into
26//!    spawned subprocesses (cargo subcommands, build pipeline,
27//!    initramfs compressor) where the parent's `OwnedFd::drop`
28//!    would not release a child-held flock.
29//!  - 0o666 mode matches a peer first-acquire so the file's owner
30//!    and permissions don't depend on creation order.
31
32use anyhow::Result;
33use std::os::fd::OwnedFd;
34use std::path::Path;
35
36use super::FlockMode;
37use super::fs_filter::reject_remote_fs;
38
39/// Open a lockfile with the crate-wide flock contract: refuses
40/// remote filesystems via [`reject_remote_fs`], then opens with
41/// `O_CREAT | O_RDWR | O_CLOEXEC | 0o666`. The three module entry
42/// points ([`materialize`], [`try_flock`], [`block_flock`]) share
43/// this open shape; centralizing it here means a future flag change
44/// (or an addition to the remote-fs deny-list) lands in one place
45/// instead of drifting across three call sites.
46///
47/// `O_CLOEXEC` is mandatory: a leaked fd across `exec(2)` (cargo
48/// subcommand, build-pipeline subprocess, initramfs compressor)
49/// would keep the lock alive in the child after the parent's
50/// `OwnedFd::drop`, producing phantom holders the next acquirer
51/// would blame on the wrong pid.
52///
53/// 0o666 mode matches a peer first-acquire so the file's owner and
54/// permissions don't depend on creation order.
55fn open_lockfile(path: &Path) -> Result<OwnedFd> {
56    use rustix::fs::{Mode, OFlags, open};
57
58    reject_remote_fs(path)?;
59    open(
60        path,
61        OFlags::CREATE | OFlags::RDWR | OFlags::CLOEXEC,
62        Mode::from_raw_mode(0o666),
63    )
64    .map_err(|e| anyhow::anyhow!("open {}: {e}", path.display()))
65}
66
67/// Ensure the lockfile exists on disk without acquiring a lock.
68/// Used by the DISCOVER phase of `acquire_llc_plan` (see
69/// `discover_llc_snapshots` in `crate::vmm::host_topology`): the
70/// snapshot pass needs every per-LLC lockfile's inode to exist so a
71/// subsequent `/proc/locks` match has a target, but DISCOVER itself
72/// must not contend with peer acquires.
73///
74/// Opens through [`open_lockfile`] so the resulting inode and fd
75/// mode match what a first-time acquirer would create. Immediately
76/// closes the fd — `OwnedFd::drop` releases the open-file
77/// description and (since no flock was ever taken on this fd)
78/// cannot release a lock held by a peer fd.
79pub(crate) fn materialize<P: AsRef<Path>>(path: P) -> Result<()> {
80    let fd = open_lockfile(path.as_ref())?;
81    drop(fd);
82    Ok(())
83}
84
85/// Open a lock file and attempt `flock` with `LOCK_NB`.
86///
87/// Creates the file with mode 0o666 if absent. Returns
88/// `Ok(Some(fd))` on successful acquire, `Ok(None)` on
89/// `EWOULDBLOCK` (peer already holds an incompatible lock), and
90/// propagates other errors. The returned fd owns the open-file
91/// description; dropping it closes the fd AND releases the kernel
92/// flock (the kernel releases `flock(2)` only when the last fd
93/// referring to its OFD closes — `OwnedFd::drop` is what makes that
94/// work).
95///
96/// `O_CLOEXEC` is mandatory: a leaked fd across `exec(2)` (cargo
97/// subcommand, build-pipeline subprocess, initramfs compressor) would
98/// keep the lock alive in the child process after the parent's
99/// `OwnedFd::drop` runs, producing phantom holders the next acquirer
100/// would blame on the wrong pid.
101///
102/// Calls `super::fs_filter::reject_remote_fs` before the open to
103/// fail-fast on NFS / CIFS / SMB2 / CEPH / AFS / FUSE — see the
104/// module-level rationale.
105///
106/// Accepts any `AsRef<Path>` so `&str`, `&Path`, `&PathBuf`, and
107/// `String` callers all work without string-ifying round trips. LLC
108/// lockfile paths are built as `String` via `format!` and cache
109/// lockfile paths are built as `PathBuf` via `Path::join` — both
110/// pass straight through.
111pub fn try_flock<P: AsRef<Path>>(path: P, mode: FlockMode) -> Result<Option<OwnedFd>> {
112    use rustix::fs::{FlockOperation, flock};
113
114    let path = path.as_ref();
115    let fd = open_lockfile(path)?;
116    let op = match mode {
117        FlockMode::Exclusive => FlockOperation::NonBlockingLockExclusive,
118        FlockMode::Shared => FlockOperation::NonBlockingLockShared,
119    };
120    match flock(&fd, op) {
121        Ok(()) => Ok(Some(fd)),
122        Err(e) if e == rustix::io::Errno::WOULDBLOCK => Ok(None),
123        Err(e) => anyhow::bail!("flock {}: {e}", path.display()),
124    }
125}
126
127/// Blocking variant of [`try_flock`]. Opens the lockfile (creating
128/// it if absent), then issues a blocking `flock(2)` that parks the
129/// caller in the kernel until the lock is available. Use after
130/// [`try_flock`] returns `None` to wait for a live peer to finish.
131pub fn block_flock<P: AsRef<Path>>(path: P, mode: FlockMode) -> Result<OwnedFd> {
132    use rustix::fs::{FlockOperation, flock};
133
134    let path = path.as_ref();
135    let fd = open_lockfile(path)?;
136    let op = match mode {
137        FlockMode::Exclusive => FlockOperation::LockExclusive,
138        FlockMode::Shared => FlockOperation::LockShared,
139    };
140    flock(&fd, op).map_err(|e| anyhow::anyhow!("flock (blocking) {}: {e}", path.display()))?;
141    Ok(fd)
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    /// [`try_flock`] sets `O_CLOEXEC` on the returned fd. Earlier
149    /// revisions missed this flag, which leaked flock-held fds
150    /// through `execve` into child processes — the child inherited
151    /// the lock, broke assumptions about RAII scope, and
152    /// manifested as phantom holders in `/proc/locks` long after
153    /// the parent had dropped its guard.
154    ///
155    /// Verifies the bit directly via `fcntl(F_GETFD)` rather than
156    /// asserting via a side-effect (forking an exec'd child is
157    /// noisier and harder to match). Failure mode: if the bit is
158    /// cleared by a future refactor that re-opens the fd without
159    /// re-applying O_CLOEXEC, this test fails the build.
160    #[test]
161    fn try_flock_sets_cloexec_on_returned_fd() {
162        use std::os::fd::AsRawFd;
163        use tempfile::TempDir;
164
165        let tmp = TempDir::new().expect("tempdir");
166        let path = tmp.path().join("cloexec.lock");
167        let fd = try_flock(&path, FlockMode::Exclusive)
168            .expect("try_flock must succeed on fresh tempfile")
169            .expect("EX must acquire on clean pool");
170
171        // SAFETY: fd is a valid OwnedFd — fcntl F_GETFD is a pure
172        // accessor, no concurrent modification, no ownership move.
173        let flags = unsafe { libc::fcntl(fd.as_raw_fd(), libc::F_GETFD) };
174        assert!(
175            flags >= 0,
176            "fcntl F_GETFD must succeed on our fd; got errno={}",
177            std::io::Error::last_os_error(),
178        );
179        assert_eq!(
180            flags & libc::FD_CLOEXEC,
181            libc::FD_CLOEXEC,
182            "FD_CLOEXEC must be set on try_flock-returned fd; \
183             flags=0x{flags:x}. Without it, exec'd children \
184             inherit the flock and produce phantom holders.",
185        );
186
187        drop(fd);
188    }
189}