imago/
file.rs

1//! Use a plain file or host block device as storage.
2
3#[cfg(unix)]
4use crate::io_buffers::IoBuffer;
5use crate::io_buffers::{IoVector, IoVectorMut};
6#[cfg(unix)]
7use crate::misc_helpers::while_eintr;
8use crate::misc_helpers::ResultErrorContext;
9use crate::storage::drivers::CommonStorageHelper;
10use crate::storage::ext::write_full_zeroes;
11use crate::storage::PreallocateMode;
12use crate::{Storage, StorageCreateOptions, StorageOpenOptions};
13use cfg_if::cfg_if;
14use std::fmt::{self, Display, Formatter};
15use std::io::{self, Write};
16#[cfg(any(target_os = "linux", target_os = "macos"))]
17use std::os::fd::AsRawFd;
18#[cfg(unix)]
19use std::os::unix::fs::FileTypeExt;
20#[cfg(all(unix, not(target_os = "macos")))]
21use std::os::unix::fs::OpenOptionsExt;
22#[cfg(windows)]
23use std::os::windows::fs::{FileExt, OpenOptionsExt};
24#[cfg(windows)]
25use std::os::windows::io::AsRawHandle;
26use std::path::{Path, PathBuf};
27use std::sync::atomic::{AtomicU64, Ordering};
28use std::sync::RwLock;
29use std::{cmp, fs};
30#[cfg(unix)]
31use tracing::{debug, warn};
32#[cfg(windows)]
33use windows_sys::Win32::System::Ioctl::{FILE_ZERO_DATA_INFORMATION, FSCTL_SET_ZERO_DATA};
34#[cfg(windows)]
35use windows_sys::Win32::System::IO::DeviceIoControl;
36
37/// Use a plain file or host block device as a storage object.
38#[derive(Debug)]
39pub struct File {
40    /// The file.
41    file: RwLock<fs::File>,
42
43    /// For debug purposes, and to resolve relative filenames.
44    filename: Option<PathBuf>,
45
46    /// Minimal I/O alignment for requests.
47    req_align: usize,
48
49    /// Minimal memory buffer alignment.
50    mem_align: usize,
51
52    /// Minimum required alignment for zero writes.
53    zero_align: usize,
54
55    /// Minimum required alignment for effective discards.
56    discard_align: usize,
57
58    /// Cached file length.
59    ///
60    /// Third parties changing the length concurrently is pretty certain to break things anyway.
61    size: AtomicU64,
62
63    /// Storage helper.
64    common_storage_helper: CommonStorageHelper,
65
66    /// macOS-only: Use fsync() instead of F_FULLFSYNC on `sync()` method.
67    #[cfg(target_os = "macos")]
68    relaxed_sync: bool,
69}
70
71impl TryFrom<fs::File> for File {
72    type Error = io::Error;
73
74    /// Use the given existing `std::fs::File`.
75    ///
76    /// Convert the given existing `std::fs::File` object into an imago storage object.
77    ///
78    /// When using this, the resulting object will not know its own filename.  That makes it
79    /// impossible to auto-resolve relative paths to it, e.g. qcow2 backing file names.
80    fn try_from(file: fs::File) -> io::Result<Self> {
81        Self::new(
82            file,
83            None,
84            false,
85            #[cfg(target_os = "macos")]
86            false,
87        )
88    }
89}
90
91impl Storage for File {
92    async fn open(opts: StorageOpenOptions) -> io::Result<Self> {
93        Self::do_open_sync(opts, fs::OpenOptions::new())
94    }
95
96    #[cfg(feature = "sync-wrappers")]
97    fn open_sync(opts: StorageOpenOptions) -> io::Result<Self> {
98        Self::do_open_sync(opts, fs::OpenOptions::new())
99    }
100
101    async fn create_open(opts: StorageCreateOptions) -> io::Result<Self> {
102        // Always allow writing for new files
103        let opts = opts.modify_open_opts(|o| o.write(true));
104        let size = opts.size;
105        let prealloc_mode = opts.prealloc_mode;
106
107        let mut file_opts = fs::OpenOptions::new();
108        if opts.overwrite {
109            file_opts.create(true).truncate(true);
110        } else {
111            file_opts.create_new(true);
112        };
113
114        let file = Self::do_open_sync(opts.get_open_options(), file_opts)?;
115        if size > 0 {
116            file.resize(size, prealloc_mode)
117                .await
118                .err_context(|| "Resizing file")?;
119        }
120
121        Ok(file)
122    }
123
124    fn mem_align(&self) -> usize {
125        self.mem_align
126    }
127
128    fn req_align(&self) -> usize {
129        self.req_align
130    }
131
132    fn zero_align(&self) -> usize {
133        self.zero_align
134    }
135
136    fn discard_align(&self) -> usize {
137        self.discard_align
138    }
139
140    fn size(&self) -> io::Result<u64> {
141        Ok(self.size.load(Ordering::Relaxed))
142    }
143
144    fn resolve_relative_path<P: AsRef<Path>>(&self, relative: P) -> io::Result<PathBuf> {
145        let relative = relative.as_ref();
146
147        if relative.is_absolute() {
148            return Ok(relative.to_path_buf());
149        }
150
151        let filename = self
152            .filename
153            .as_ref()
154            .ok_or_else(|| io::Error::other("No filename set for base image"))?;
155
156        let dirname = filename
157            .parent()
158            .ok_or_else(|| io::Error::other("Invalid base image filename set"))?;
159
160        Ok(dirname.join(relative))
161    }
162
163    fn get_filename(&self) -> Option<PathBuf> {
164        self.filename.as_ref().cloned()
165    }
166
167    #[cfg(unix)]
168    async unsafe fn pure_readv(
169        &self,
170        mut bufv: IoVectorMut<'_>,
171        mut offset: u64,
172    ) -> io::Result<()> {
173        while !bufv.is_empty() {
174            let iovec = unsafe { bufv.as_iovec() };
175            let preadv_offset = offset
176                .try_into()
177                .map_err(|_| io::Error::other("Read offset overflow"))?;
178
179            let len = while_eintr(|| unsafe {
180                libc::preadv(
181                    self.file.read().unwrap().as_raw_fd(),
182                    iovec.as_ptr(),
183                    iovec.len() as libc::c_int,
184                    preadv_offset,
185                )
186            })? as u64;
187
188            if len == 0 {
189                // End of file
190                bufv.fill(0);
191                break;
192            }
193
194            bufv = bufv.split_tail_at(len);
195            offset = offset
196                .checked_add(len)
197                .ok_or_else(|| io::Error::other("Read offset overflow"))?;
198        }
199
200        Ok(())
201    }
202
203    #[cfg(windows)]
204    async unsafe fn pure_readv(&self, bufv: IoVectorMut<'_>, mut offset: u64) -> io::Result<()> {
205        for mut buffer in bufv.into_inner() {
206            let mut buffer: &mut [u8] = &mut buffer;
207            while !buffer.is_empty() {
208                let len = if offset >= self.size.load(Ordering::Relaxed) {
209                    buffer.fill(0);
210                    buffer.len()
211                } else {
212                    self.file.write().unwrap().seek_read(buffer, offset)?
213                };
214                offset = offset
215                    .checked_add(len as u64)
216                    .ok_or_else(|| io::Error::other("Read offset overflow"))?;
217                buffer = buffer.split_at_mut(len).1;
218            }
219        }
220        Ok(())
221    }
222
223    #[cfg(unix)]
224    async unsafe fn pure_writev(&self, mut bufv: IoVector<'_>, mut offset: u64) -> io::Result<()> {
225        while !bufv.is_empty() {
226            let iovec = unsafe { bufv.as_iovec() };
227            let pwritev_offset = offset
228                .try_into()
229                .map_err(|_| io::Error::other("Write offset overflow"))?;
230
231            let len = while_eintr(|| unsafe {
232                libc::pwritev(
233                    self.file.read().unwrap().as_raw_fd(),
234                    iovec.as_ptr(),
235                    iovec.len() as libc::c_int,
236                    pwritev_offset,
237                )
238            })? as u64;
239
240            if len == 0 {
241                // Should not happen, i.e. is an error
242                return Err(io::ErrorKind::WriteZero.into());
243            }
244
245            bufv = bufv.split_tail_at(len);
246            offset = offset
247                .checked_add(len)
248                .ok_or_else(|| io::Error::other("Write offset overflow"))?;
249            self.size.fetch_max(offset, Ordering::Relaxed);
250        }
251
252        Ok(())
253    }
254
255    #[cfg(windows)]
256    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, mut offset: u64) -> io::Result<()> {
257        for buffer in bufv.into_inner() {
258            let mut buffer: &[u8] = &buffer;
259            while !buffer.is_empty() {
260                let len = self.file.write().unwrap().seek_write(buffer, offset)?;
261                offset = offset
262                    .checked_add(len as u64)
263                    .ok_or_else(|| io::Error::other("Write offset overflow"))?;
264                self.size.fetch_max(offset, Ordering::Relaxed);
265                buffer = buffer.split_at(len).1;
266            }
267        }
268        Ok(())
269    }
270
271    #[cfg(any(target_os = "linux", windows, target_os = "macos"))]
272    async unsafe fn pure_write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
273        // All of our discard methods also ensure the range reads back as zeroes
274        unsafe { self.pure_discard(offset, length) }.await
275    }
276
277    #[cfg(target_os = "linux")]
278    async unsafe fn pure_write_allocated_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
279        let offset: libc::off_t = offset
280            .try_into()
281            .map_err(|e| io::Error::other(format!("Discard/write-zeroes offset error: {e}")))?;
282        let length: libc::off_t = length
283            .try_into()
284            .map_err(|e| io::Error::other(format!("Discard/write-zeroes length error: {e}")))?;
285
286        let file = self.file.read().unwrap();
287        // Safe: File descriptor is valid, and the rest are simple integer parameters.
288        while_eintr(|| unsafe {
289            libc::fallocate(file.as_raw_fd(), libc::FALLOC_FL_ZERO_RANGE, offset, length)
290        })?;
291
292        Ok(())
293    }
294
295    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
296    // current expectation is that discarded ranges will read back as zeroes.  If the new method
297    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
298    #[cfg(target_os = "linux")]
299    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
300        if self.try_discard_by_truncate(offset, length)? {
301            return Ok(());
302        }
303
304        let offset: libc::off_t = offset
305            .try_into()
306            .map_err(|e| io::Error::other(format!("Discard/write-zeroes offset error: {e}")))?;
307        let length: libc::off_t = length
308            .try_into()
309            .map_err(|e| io::Error::other(format!("Discard/write-zeroes length error: {e}")))?;
310
311        let file = self.file.read().unwrap();
312        // Safe: File descriptor is valid, and the rest are simple integer parameters.
313        while_eintr(|| unsafe {
314            libc::fallocate(
315                file.as_raw_fd(),
316                libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
317                offset,
318                length,
319            )
320        })?;
321
322        Ok(())
323    }
324
325    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
326    // current expectation is that discarded ranges will read back as zeroes.  If the new method
327    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
328    #[cfg(windows)]
329    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
330        if self.try_discard_by_truncate(offset, length)? {
331            return Ok(());
332        }
333
334        let offset: i64 = offset
335            .try_into()
336            .map_err(|e| io::Error::other(format!("Discard/write-zeroes offset error: {e}")))?;
337        let length: i64 = length
338            .try_into()
339            .map_err(|e| io::Error::other(format!("Discard/write-zeroes length error: {e}")))?;
340
341        let end = offset.saturating_add(length).saturating_add(1);
342        let params = FILE_ZERO_DATA_INFORMATION {
343            FileOffset: offset,
344            BeyondFinalZero: end,
345        };
346        let mut _returned = 0;
347        let file = self.file.read().unwrap();
348        // Safe: File handle is valid, mandatory pointers (input, returned length) are passed and
349        // valid, the parameter type matches the call, and the input size matches the object
350        // passed.
351        let ret = unsafe {
352            DeviceIoControl(
353                file.as_raw_handle(),
354                FSCTL_SET_ZERO_DATA,
355                (&params as *const FILE_ZERO_DATA_INFORMATION).cast::<std::ffi::c_void>(),
356                size_of_val(&params) as u32,
357                std::ptr::null_mut(),
358                0,
359                &mut _returned,
360                std::ptr::null_mut(),
361            )
362        };
363        if ret == 0 {
364            return Err(io::Error::last_os_error());
365        }
366
367        Ok(())
368    }
369
370    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
371    // current expectation is that discarded ranges will read back as zeroes.  If the new method
372    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
373    #[cfg(target_os = "macos")]
374    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
375        if self.try_discard_by_truncate(offset, length)? {
376            return Ok(());
377        }
378
379        let offset: libc::off_t = offset
380            .try_into()
381            .map_err(|e| io::Error::other(format!("Discard/write-zeroes offset error: {e}")))?;
382        let length: libc::off_t = length
383            .try_into()
384            .map_err(|e| io::Error::other(format!("Discard/write-zeroes length error: {e}")))?;
385
386        let params = libc::fpunchhole_t {
387            fp_flags: 0,
388            reserved: 0,
389            fp_offset: offset,
390            fp_length: length,
391        };
392        let file = self.file.read().unwrap();
393        // Safe: FD is valid, passed pointer is valid and its type matches the call.
394        while_eintr(|| unsafe { libc::fcntl(file.as_raw_fd(), libc::F_PUNCHHOLE, &params) })?;
395
396        Ok(())
397    }
398
399    async fn flush(&self) -> io::Result<()> {
400        self.file.write().unwrap().flush()
401    }
402
403    async fn sync(&self) -> io::Result<()> {
404        #[cfg(target_os = "macos")]
405        if self.relaxed_sync {
406            // Safe: File descriptor is valid and there aren't any other arguments.
407            while_eintr(|| unsafe { libc::fsync(self.file.write().unwrap().as_raw_fd()) })?;
408            return Ok(());
409        }
410        self.file.write().unwrap().sync_all()
411    }
412
413    async unsafe fn invalidate_cache(&self) -> io::Result<()> {
414        // TODO: Figure out what to do.  Generally, `std::fs::File` does not have internal buffers,
415        // so we don’t need to invalidate anything; we could close and reopen, but that would still
416        // flush, and is difficult to do in a platform-independent way (/proc/self/fd would allow
417        // this on Linux).  Using e.g. the filename is not safe.
418        // Right now, it’s best not to do anything.
419        Ok(())
420    }
421
422    fn get_storage_helper(&self) -> &CommonStorageHelper {
423        &self.common_storage_helper
424    }
425
426    async fn resize(&self, new_size: u64, prealloc_mode: PreallocateMode) -> io::Result<()> {
427        let file = self.file.write().unwrap();
428        let current_size = self.size.load(Ordering::Relaxed);
429
430        match new_size.cmp(&current_size) {
431            std::cmp::Ordering::Equal => return Ok(()),
432            std::cmp::Ordering::Less => {
433                file.set_len(new_size)?;
434                self.size.fetch_min(new_size, Ordering::Relaxed);
435                return Ok(());
436            }
437            std::cmp::Ordering::Greater => (), // handled below
438        }
439
440        match prealloc_mode {
441            PreallocateMode::None | PreallocateMode::Zero => file.set_len(new_size)?,
442            PreallocateMode::Allocate => {
443                #[cfg(not(unix))]
444                return Err(io::ErrorKind::Unsupported.into());
445
446                #[cfg(all(unix, not(target_os = "macos")))]
447                {
448                    let ofs = current_size.try_into().map_err(io::Error::other)?;
449                    let len = (new_size - current_size)
450                        .try_into()
451                        .map_err(io::Error::other)?;
452                    while_eintr(|| unsafe { libc::fallocate(file.as_raw_fd(), 0, ofs, len) })?;
453                }
454
455                #[cfg(target_os = "macos")]
456                {
457                    // Best-effort.  PEOFPOSMODE allocates from the “physical” EOF, wherever that
458                    // may be, but the only alternative would be VOLPOSMODE, which nobody knows the
459                    // meaning of.  Also doesn’t change the file length, we need to truncate
460                    // afterwards still.
461                    let mut params = libc::fstore_t {
462                        fst_flags: libc::F_ALLOCATEALL,
463                        fst_posmode: libc::F_PEOFPOSMODE,
464                        fst_offset: 0,
465                        fst_length: (new_size - current_size)
466                            .try_into()
467                            .map_err(io::Error::other)?,
468                        fst_bytesalloc: 0, // output
469                    };
470                    while_eintr(|| unsafe {
471                        libc::fcntl(file.as_raw_fd(), libc::F_PREALLOCATE, &mut params)
472                    })?;
473
474                    file.set_len(new_size)?;
475                }
476            }
477            PreallocateMode::WriteData => {
478                // FIXME: Keeping the lock would be nice, but resizing concurrently with I/O is
479                // pretty risky anyway.
480                drop(file);
481                write_full_zeroes(self, current_size, new_size - current_size).await?;
482            }
483        }
484
485        self.size.fetch_max(new_size, Ordering::Relaxed);
486        Ok(())
487    }
488}
489
490impl File {
491    /// Central internal function to create a `File` object.
492    ///
493    /// `direct_io` should be `true` if direct I/O was requested, and can be `false` if that status
494    /// is unknown.
495    fn new(
496        mut file: fs::File,
497        filename: Option<PathBuf>,
498        direct_io: bool,
499        #[cfg(target_os = "macos")] relaxed_sync: bool,
500    ) -> io::Result<Self> {
501        let size = get_file_size(&file).err_context(|| "Failed to determine file size")?;
502
503        #[cfg(all(unix, not(target_os = "macos")))]
504        let direct_io = direct_io || {
505            // Safe: No argument, returns result.
506            let res = unsafe { libc::fcntl(file.as_raw_fd(), libc::F_GETFL) };
507            res > 0 && (res & libc::O_DIRECT) != 0
508        };
509
510        let (min_req_align, min_mem_align) = if direct_io {
511            #[cfg(unix)]
512            {
513                (
514                    Self::get_min_dio_req_align(&file),
515                    Self::get_min_dio_mem_align(&file),
516                )
517            }
518
519            #[cfg(not(unix))]
520            {
521                (1, 1)
522            } // probe it then
523        } else {
524            (1, 1)
525        };
526
527        let (req_align, mem_align, zero_align, discard_align) =
528            Self::probe_alignments(&mut file, min_req_align, min_mem_align);
529        assert!(req_align.is_power_of_two());
530        assert!(mem_align.is_power_of_two());
531
532        Ok(File {
533            file: RwLock::new(file),
534            filename,
535            req_align,
536            mem_align,
537            zero_align,
538            discard_align,
539            size: size.into(),
540            common_storage_helper: Default::default(),
541            #[cfg(target_os = "macos")]
542            relaxed_sync,
543        })
544    }
545
546    /// Probe minimal request, memory, zero and discard alignments.
547    ///
548    /// Start at `min_req_align` and `min_mem_align`.
549    #[cfg(unix)]
550    fn probe_alignments(
551        file: &mut fs::File,
552        min_req_align: usize,
553        min_mem_align: usize,
554    ) -> (usize, usize, usize, usize) {
555        let mut page_size = page_size::get();
556        if !page_size.is_power_of_two() {
557            let assume = page_size.checked_next_power_of_two().unwrap_or(4096);
558            let assume = cmp::max(4096, assume);
559            warn!("Reported page size of {page_size} is not a power of two, assuming {assume}");
560            page_size = assume;
561        }
562
563        #[cfg(not(target_os = "macos"))]
564        let (zero_align, discard_align) = (1, 1);
565        #[cfg(target_os = "macos")]
566        let (zero_align, discard_align) = {
567            let mut statfs: libc::statfs = unsafe { std::mem::zeroed() };
568            // Safe: FD is valid, passed pointer is valid and its type matches the call.
569            match while_eintr(|| unsafe { libc::fstatfs(file.as_raw_fd(), &mut statfs) }) {
570                Ok(_) => (statfs.f_bsize as usize, statfs.f_bsize as usize),
571                Err(_) => (page_size, page_size),
572            }
573        };
574
575        let mut writable = true;
576
577        let max_req_align = 65536;
578        let max_mem_align = cmp::max(page_size, max_req_align);
579
580        // Minimum fallbacks in case something goes wrong.
581        let safe_req_align = 4096;
582        let safe_mem_align = cmp::max(page_size, safe_req_align);
583
584        let mut test_buf = match IoBuffer::new(max_mem_align, max_mem_align) {
585            Ok(buf) => buf,
586            Err(err) => {
587                warn!(
588                    "Failed to allocate memory to probe request alignment ({err}), \
589                    falling back to {safe_req_align}/{safe_mem_align}"
590                );
591                return (safe_req_align, safe_mem_align, zero_align, discard_align);
592            }
593        };
594
595        let mut req_align: usize = min_req_align;
596        let result = loop {
597            assert!(req_align <= max_mem_align);
598            match Self::probe_access(
599                file,
600                test_buf.as_mut_range(0..req_align).into_slice(),
601                req_align.try_into().unwrap(),
602                &mut writable,
603            ) {
604                Ok(true) => break Ok(req_align),
605                Ok(false) => {
606                    if req_align >= max_req_align {
607                        break Err(io::Error::other(format!(
608                            "Maximum I/O alignment ({max_req_align}) exceeded"
609                        )));
610                    }
611                    // No reason to probe anything between 1 and 512
612                    if req_align == min_req_align {
613                        req_align = cmp::max(min_req_align << 1, 512);
614                    } else {
615                        req_align <<= 1;
616                    }
617                }
618                Err(err) => break Err(err),
619            }
620        };
621
622        let req_align = match result {
623            Ok(align) => {
624                debug!("Probed request alignment: {align}");
625                align
626            }
627            Err(err) => {
628                // Failed to determine request alignment, use a presumably safe value
629                let align = cmp::max(req_align, safe_req_align);
630                warn!(
631                    "Failed to probe request alignment ({err}; {}), falling back to {align} bytes",
632                    err.kind(),
633                );
634                align
635            }
636        };
637
638        let mut mem_align: usize = min_mem_align;
639        let result = loop {
640            assert!(mem_align <= max_mem_align);
641            let range = (max_mem_align - mem_align)..max_mem_align;
642            match Self::probe_access(
643                file,
644                test_buf.as_mut_range(range).into_slice(),
645                0,
646                &mut writable,
647            ) {
648                Ok(true) => break Ok(mem_align),
649                Ok(false) => {
650                    // Not aligned
651                    if mem_align >= max_mem_align {
652                        break Err(io::Error::other(format!(
653                            "Maximum memory alignment ({max_mem_align}) exceeded"
654                        )));
655                    }
656                    // No reason to probe anything between 1 and the page size (or 4096 at least)
657                    if mem_align == min_mem_align {
658                        mem_align = cmp::max(min_mem_align << 1, cmp::min(page_size, 4096));
659                    } else {
660                        mem_align <<= 1;
661                    }
662                }
663                Err(err) => break Err(err),
664            }
665        };
666
667        let mem_align = match result {
668            Ok(align) => {
669                debug!("Probed memory alignment: {align}");
670                align
671            }
672            Err(err) => {
673                // Failed to determine memory alignment, use a presumably safe value
674                let align = cmp::max(mem_align, safe_mem_align);
675                warn!(
676                    "Failed to probe memory alignment ({err}; {}), falling back to {align} bytes",
677                    err.kind(),
678                );
679                align
680            }
681        };
682
683        (req_align, mem_align, zero_align, discard_align)
684    }
685
686    /// Do an alignment-probing I/O access.
687    ///
688    /// Return `Ok(true)` if everything was OK, and `Ok(false)` if the request was reported to be
689    /// misaligned.
690    ///
691    /// `may_write` is a boolean that controls whether this is allowed to write (the same data read
692    /// before) to improve reliability.  Is automatically set to `false` if writing is found to not
693    /// be possible.
694    #[cfg(unix)]
695    fn probe_access(
696        file: &mut fs::File,
697        slice: &mut [u8],
698        offset: libc::off_t,
699        may_write: &mut bool,
700    ) -> io::Result<bool> {
701        // Use `libc::pread` so we get well-defined errors.
702        // Safe: Passing the slice as the buffer it is.
703        let ret = while_eintr(|| unsafe {
704            libc::pread(
705                file.as_raw_fd(),
706                slice.as_mut_ptr() as *mut libc::c_void,
707                slice.len(),
708                offset,
709            )
710        });
711
712        if let Err(err) = ret {
713            if err.raw_os_error() == Some(libc::EINVAL) {
714                return Ok(false);
715            } else {
716                return Err(err);
717            }
718        }
719
720        if !*may_write {
721            return Ok(true);
722        }
723
724        // Safe: Passing the slice as the buffer it is.
725        let ret = while_eintr(|| unsafe {
726            libc::pwrite(
727                file.as_raw_fd(),
728                slice.as_ptr() as *const libc::c_void,
729                slice.len(),
730                offset,
731            )
732        });
733
734        if let Err(err) = ret {
735            if err.raw_os_error() == Some(libc::EINVAL) {
736                Ok(false)
737            } else if err.raw_os_error() == Some(libc::EBADF) {
738                *may_write = false;
739                Ok(true)
740            } else {
741                Err(err)
742            }
743        } else {
744            Ok(true)
745        }
746    }
747
748    /// Get system-reported minimum request alignment for direct I/O.
749    #[cfg(unix)]
750    fn get_min_dio_req_align(file: &fs::File) -> usize {
751        #[cfg(target_os = "linux")]
752        {
753            let mut alignment = 0;
754            let res = unsafe { ioctl::blksszget(file.as_raw_fd(), &mut alignment) };
755            if res.is_ok() && alignment > 0 {
756                let alignment = alignment as usize;
757                if alignment.is_power_of_two() {
758                    return alignment;
759                }
760            }
761        }
762
763        #[cfg(target_os = "macos")]
764        {
765            let mut alignment = 0;
766            let res = unsafe { ioctl::dkiocgetblocksize(file.as_raw_fd(), &mut alignment) };
767            if res.is_ok() && alignment.is_power_of_two() {
768                return alignment as usize;
769            }
770        }
771
772        #[cfg(target_os = "freebsd")]
773        {
774            let mut alignment = 0;
775            let res = unsafe { ioctl::diocgsectorsize(file.as_raw_fd(), &mut alignment) };
776            if res.is_ok() && alignment.is_power_of_two() {
777                return alignment as usize;
778            }
779        }
780
781        // Then we’ll probe.
782        1
783    }
784
785    /// Get system-reported minimum memory alignment for direct I/O.
786    #[cfg(unix)]
787    fn get_min_dio_mem_align(_file: &fs::File) -> usize {
788        // I don’t think there’s a reliable way to get this.
789        1
790    }
791
792    /// Probe minimal request and memory alignments.
793    ///
794    /// Start at `min_req_align` and `min_mem_align`.
795    #[cfg(windows)]
796    fn probe_alignments(
797        _file: &mut fs::File,
798        min_req_align: usize,
799        min_mem_align: usize,
800    ) -> (usize, usize, usize, usize) {
801        // TODO: Need to find out how Windows indicates unaligned I/O
802        (
803            cmp::max(min_req_align, 4096),
804            cmp::max(min_mem_align, 4096),
805            1,
806            1,
807        )
808    }
809
810    /// Implementation for anything that opens a file.
811    fn do_open_sync(opts: StorageOpenOptions, base_fs_opts: fs::OpenOptions) -> io::Result<Self> {
812        let Some(filename) = opts.filename else {
813            return Err(io::Error::new(
814                io::ErrorKind::InvalidInput,
815                "Filename required",
816            ));
817        };
818
819        let mut file_opts = base_fs_opts;
820        file_opts.read(true).write(opts.writable);
821        #[cfg(not(target_os = "macos"))]
822        if opts.direct {
823            file_opts.custom_flags(
824                #[cfg(unix)]
825                libc::O_DIRECT,
826                #[cfg(windows)]
827                windows_sys::Win32::Storage::FileSystem::FILE_FLAG_NO_BUFFERING,
828            );
829        }
830
831        let filename_owned = filename.to_owned();
832        let file = file_opts.open(filename)?;
833
834        #[cfg(target_os = "macos")]
835        if opts.direct {
836            // Safe: We check the return value.
837            while_eintr(|| unsafe { libc::fcntl(file.as_raw_fd(), libc::F_NOCACHE, 1) })
838                .err_context(|| "Failed to disable host cache")?;
839        }
840
841        Self::new(
842            file,
843            Some(filename_owned),
844            opts.direct,
845            #[cfg(target_os = "macos")]
846            opts.relaxed_sync,
847        )
848    }
849
850    /// Attempt to discard range by truncating the file.
851    ///
852    /// If the given range is at the end of the file, discard it by simply truncating the file.
853    /// Return `true` on success.
854    ///
855    /// If the range is not at the end of the file, i.e. another method of discarding is needed,
856    /// return `false`.
857    fn try_discard_by_truncate(&self, offset: u64, length: u64) -> io::Result<bool> {
858        // Prevent modifications to the file length
859        #[allow(clippy::readonly_write_lock)]
860        let file = self.file.write().unwrap();
861
862        let size = self.size.load(Ordering::Relaxed);
863        if offset >= size {
864            // Nothing to do
865            return Ok(true);
866        }
867
868        // If `offset + length` overflows, we can just assume it ends at `size`.  (Anything past
869        // `size is irrelevant anyway.)
870        let end = offset.checked_add(length).unwrap_or(size);
871        if end < size {
872            return Ok(false);
873        }
874
875        file.set_len(offset)?;
876        Ok(true)
877    }
878}
879
880impl Display for File {
881    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
882        if let Some(filename) = self.filename.as_ref() {
883            write!(f, "file:{filename:?}")
884        } else {
885            write!(f, "file:<unknown path>")
886        }
887    }
888}
889
890/// Get total size in bytes of the given file.
891///
892/// If the file is a block or character device, use get_device_size() instead of
893/// reading len from metadata which doesn't work on some platforms like macOS.
894fn get_file_size(file: &fs::File) -> io::Result<u64> {
895    #[allow(clippy::bind_instead_of_map)]
896    file.metadata().and_then(|m| {
897        #[cfg(unix)]
898        if m.file_type().is_block_device() || m.file_type().is_char_device() {
899            return get_device_size(file);
900        }
901        Ok(m.len())
902    })
903}
904
905cfg_if! {
906    if #[cfg(target_os = "linux")] {
907        /// Get total size in bytes of the given block or character device.
908        fn get_device_size(file: &fs::File) -> io::Result<u64> {
909            let mut size = 0;
910            unsafe { ioctl::blkgetsize64(file.as_raw_fd(), &mut size) }?;
911            Ok(size)
912        }
913    } else if #[cfg(target_os = "macos")] {
914        /// Get total size in bytes of the given block or character device.
915        fn get_device_size(file: &fs::File) -> io::Result<u64> {
916            let mut block_size = 0;
917            unsafe { ioctl::dkiocgetblocksize(file.as_raw_fd(), &mut block_size) }?;
918            let mut block_count = 0;
919            unsafe { ioctl::dkiocgetblockcount(file.as_raw_fd(), &mut block_count) }?;
920            Ok(u64::from(block_size) * block_count)
921        }
922    } else if #[cfg(target_os = "freebsd")] {
923        /// Get total size in bytes of the given block or character device.
924        fn get_device_size(file: &fs::File) -> io::Result<u64> {
925            let mut size = 0;
926            unsafe { ioctl::diocgmediasize(file.as_raw_fd(), &mut size) }?;
927            Ok(size as u64)
928        }
929    } else if #[cfg(unix)] {
930        /// Get total size in bytes of the given block or character device - unsupported platform.
931        fn get_device_size(_file: &fs::File) -> io::Result<u64> {
932            Err(io::ErrorKind::Unsupported.into())
933        }
934    }
935}
936
937/// This module generates type-safe wrappers for chosen ioctls
938mod ioctl {
939    #[cfg(unix)]
940    use nix::ioctl_read;
941    #[cfg(target_os = "linux")]
942    use nix::ioctl_read_bad;
943
944    // https://github.com/torvalds/linux/blob/master/include/uapi/linux/fs.h#L200
945
946    #[cfg(target_os = "linux")]
947    ioctl_read!(blkgetsize64, 0x12, 114, u64);
948
949    #[cfg(target_os = "linux")]
950    ioctl_read_bad!(blksszget, libc::BLKSSZGET, libc::c_int);
951
952    // https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/disk.h#L198-L199
953
954    #[cfg(target_os = "macos")]
955    ioctl_read!(dkiocgetblocksize, 'd', 24, u32);
956
957    #[cfg(target_os = "macos")]
958    ioctl_read!(dkiocgetblockcount, 'd', 25, u64);
959
960    // https://web.mit.edu/freebsd/head/sys/sys/disk.h
961
962    #[cfg(target_os = "freebsd")]
963    ioctl_read!(diocgsectorsize, 'd', 128, libc::c_uint);
964
965    #[cfg(target_os = "freebsd")]
966    ioctl_read!(diocgmediasize, 'd', 129, libc::off_t);
967}