imago/
file.rs

1//! Use a plain file or host block device as storage.
2
3#[cfg(unix)]
4use crate::io_buffers::IoBuffer;
5use crate::io_buffers::{IoVector, IoVectorMut};
6#[cfg(unix)]
7use crate::misc_helpers::while_eintr;
8use crate::misc_helpers::ResultErrorContext;
9use crate::storage::drivers::CommonStorageHelper;
10use crate::storage::ext::write_full_zeroes;
11use crate::storage::PreallocateMode;
12use crate::{Storage, StorageCreateOptions, StorageOpenOptions};
13use cfg_if::cfg_if;
14use std::fmt::{self, Display, Formatter};
15use std::io::{self, Write};
16#[cfg(any(target_os = "linux", target_os = "macos"))]
17use std::os::fd::AsRawFd;
18#[cfg(unix)]
19use std::os::unix::fs::FileTypeExt;
20#[cfg(all(unix, not(target_os = "macos")))]
21use std::os::unix::fs::OpenOptionsExt;
22#[cfg(windows)]
23use std::os::windows::fs::{FileExt, OpenOptionsExt};
24#[cfg(windows)]
25use std::os::windows::io::AsRawHandle;
26use std::path::{Path, PathBuf};
27use std::sync::atomic::{AtomicU64, Ordering};
28use std::sync::RwLock;
29use std::{cmp, fs};
30#[cfg(unix)]
31use tracing::{debug, warn};
32#[cfg(windows)]
33use windows_sys::Win32::System::Ioctl::{FILE_ZERO_DATA_INFORMATION, FSCTL_SET_ZERO_DATA};
34#[cfg(windows)]
35use windows_sys::Win32::System::IO::DeviceIoControl;
36
37/// Use a plain file or host block device as a storage object.
38#[derive(Debug)]
39pub struct File {
40    /// The file.
41    file: RwLock<fs::File>,
42
43    /// For debug purposes, and to resolve relative filenames.
44    filename: Option<PathBuf>,
45
46    /// Minimal I/O alignment for requests.
47    req_align: usize,
48
49    /// Minimal memory buffer alignment.
50    mem_align: usize,
51
52    /// Cached file length.
53    ///
54    /// Third parties changing the length concurrently is pretty certain to break things anyway.
55    size: AtomicU64,
56
57    /// Storage helper.
58    common_storage_helper: CommonStorageHelper,
59}
60
61impl TryFrom<fs::File> for File {
62    type Error = io::Error;
63
64    /// Use the given existing `std::fs::File`.
65    ///
66    /// Convert the given existing `std::fs::File` object into an imago storage object.
67    ///
68    /// When using this, the resulting object will not know its own filename.  That makes it
69    /// impossible to auto-resolve relative paths to it, e.g. qcow2 backing file names.
70    fn try_from(file: fs::File) -> io::Result<Self> {
71        Self::new(file, None, false)
72    }
73}
74
75impl Storage for File {
76    async fn open(opts: StorageOpenOptions) -> io::Result<Self> {
77        Self::do_open_sync(opts, fs::OpenOptions::new())
78    }
79
80    #[cfg(feature = "sync-wrappers")]
81    fn open_sync(opts: StorageOpenOptions) -> io::Result<Self> {
82        Self::do_open_sync(opts, fs::OpenOptions::new())
83    }
84
85    async fn create_open(opts: StorageCreateOptions) -> io::Result<Self> {
86        // Always allow writing for new files
87        let opts = opts.modify_open_opts(|o| o.write(true));
88        let size = opts.size;
89        let prealloc_mode = opts.prealloc_mode;
90
91        let mut file_opts = fs::OpenOptions::new();
92        if opts.overwrite {
93            file_opts.create(true).truncate(true);
94        } else {
95            file_opts.create_new(true);
96        };
97
98        let file = Self::do_open_sync(opts.get_open_options(), file_opts)?;
99        if size > 0 {
100            file.resize(size, prealloc_mode)
101                .await
102                .err_context(|| "Resizing file")?;
103        }
104
105        Ok(file)
106    }
107
108    fn mem_align(&self) -> usize {
109        self.mem_align
110    }
111
112    fn req_align(&self) -> usize {
113        self.req_align
114    }
115
116    fn size(&self) -> io::Result<u64> {
117        Ok(self.size.load(Ordering::Relaxed))
118    }
119
120    fn resolve_relative_path<P: AsRef<Path>>(&self, relative: P) -> io::Result<PathBuf> {
121        let relative = relative.as_ref();
122
123        if relative.is_absolute() {
124            return Ok(relative.to_path_buf());
125        }
126
127        let filename = self
128            .filename
129            .as_ref()
130            .ok_or_else(|| io::Error::other("No filename set for base image"))?;
131
132        let dirname = filename
133            .parent()
134            .ok_or_else(|| io::Error::other("Invalid base image filename set"))?;
135
136        Ok(dirname.join(relative))
137    }
138
139    fn get_filename(&self) -> Option<PathBuf> {
140        self.filename.as_ref().cloned()
141    }
142
143    #[cfg(unix)]
144    async unsafe fn pure_readv(
145        &self,
146        mut bufv: IoVectorMut<'_>,
147        mut offset: u64,
148    ) -> io::Result<()> {
149        while !bufv.is_empty() {
150            let iovec = unsafe { bufv.as_iovec() };
151            let preadv_offset = offset
152                .try_into()
153                .map_err(|_| io::Error::other("Read offset overflow"))?;
154
155            let len = while_eintr(|| unsafe {
156                libc::preadv(
157                    self.file.read().unwrap().as_raw_fd(),
158                    iovec.as_ptr(),
159                    iovec.len() as libc::c_int,
160                    preadv_offset,
161                )
162            })? as u64;
163
164            if len == 0 {
165                // End of file
166                bufv.fill(0);
167                break;
168            }
169
170            bufv = bufv.split_tail_at(len);
171            offset = offset
172                .checked_add(len)
173                .ok_or_else(|| io::Error::other("Read offset overflow"))?;
174        }
175
176        Ok(())
177    }
178
179    #[cfg(windows)]
180    async unsafe fn pure_readv(&self, bufv: IoVectorMut<'_>, mut offset: u64) -> io::Result<()> {
181        for mut buffer in bufv.into_inner() {
182            let mut buffer: &mut [u8] = &mut buffer;
183            while !buffer.is_empty() {
184                let len = if offset >= self.size.load(Ordering::Relaxed) {
185                    buffer.fill(0);
186                    buffer.len()
187                } else {
188                    self.file.write().unwrap().seek_read(buffer, offset)?
189                };
190                offset = offset
191                    .checked_add(len as u64)
192                    .ok_or_else(|| io::Error::other("Read offset overflow"))?;
193                buffer = buffer.split_at_mut(len).1;
194            }
195        }
196        Ok(())
197    }
198
199    #[cfg(unix)]
200    async unsafe fn pure_writev(&self, mut bufv: IoVector<'_>, mut offset: u64) -> io::Result<()> {
201        while !bufv.is_empty() {
202            let iovec = unsafe { bufv.as_iovec() };
203            let pwritev_offset = offset
204                .try_into()
205                .map_err(|_| io::Error::other("Write offset overflow"))?;
206
207            let len = while_eintr(|| unsafe {
208                libc::pwritev(
209                    self.file.read().unwrap().as_raw_fd(),
210                    iovec.as_ptr(),
211                    iovec.len() as libc::c_int,
212                    pwritev_offset,
213                )
214            })? as u64;
215
216            if len == 0 {
217                // Should not happen, i.e. is an error
218                return Err(io::ErrorKind::WriteZero.into());
219            }
220
221            bufv = bufv.split_tail_at(len);
222            offset = offset
223                .checked_add(len)
224                .ok_or_else(|| io::Error::other("Write offset overflow"))?;
225            self.size.fetch_max(offset, Ordering::Relaxed);
226        }
227
228        Ok(())
229    }
230
231    #[cfg(windows)]
232    async unsafe fn pure_writev(&self, bufv: IoVector<'_>, mut offset: u64) -> io::Result<()> {
233        for buffer in bufv.into_inner() {
234            let mut buffer: &[u8] = &buffer;
235            while !buffer.is_empty() {
236                let len = self.file.write().unwrap().seek_write(buffer, offset)?;
237                offset = offset
238                    .checked_add(len as u64)
239                    .ok_or_else(|| io::Error::other("Write offset overflow"))?;
240                self.size.fetch_max(offset, Ordering::Relaxed);
241                buffer = buffer.split_at(len).1;
242            }
243        }
244        Ok(())
245    }
246
247    #[cfg(any(target_os = "linux", windows, target_os = "macos"))]
248    async unsafe fn pure_write_zeroes(&self, offset: u64, length: u64) -> io::Result<()> {
249        // All of our discard methods also ensure the range reads back as zeroes
250        unsafe { self.pure_discard(offset, length) }.await
251    }
252
253    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
254    // current expectation is that discarded ranges will read back as zeroes.  If the new method
255    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
256    #[cfg(target_os = "linux")]
257    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
258        if self.try_discard_by_truncate(offset, length)? {
259            return Ok(());
260        }
261
262        // If offset or length are too big, just skip discarding.
263        let Ok(offset) = libc::off_t::try_from(offset) else {
264            return Ok(());
265        };
266        let Ok(length) = libc::off_t::try_from(length) else {
267            return Ok(());
268        };
269
270        let file = self.file.read().unwrap();
271        // Safe: File descriptor is valid, and the rest are simple integer parameters.
272        while_eintr(|| unsafe {
273            libc::fallocate(
274                file.as_raw_fd(),
275                libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
276                offset,
277                length,
278            )
279        })?;
280
281        Ok(())
282    }
283
284    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
285    // current expectation is that discarded ranges will read back as zeroes.  If the new method
286    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
287    #[cfg(windows)]
288    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
289        if self.try_discard_by_truncate(offset, length)? {
290            return Ok(());
291        }
292
293        // If offset or length are too big, just skip discarding.
294        let Ok(offset) = i64::try_from(offset) else {
295            return Ok(());
296        };
297        let Ok(length) = i64::try_from(length) else {
298            return Ok(());
299        };
300
301        let end = offset.saturating_add(length).saturating_add(1);
302        let params = FILE_ZERO_DATA_INFORMATION {
303            FileOffset: offset,
304            BeyondFinalZero: end,
305        };
306        let mut _returned = 0;
307        let file = self.file.read().unwrap();
308        // Safe: File handle is valid, mandatory pointers (input, returned length) are passed and
309        // valid, the parameter type matches the call, and the input size matches the object
310        // passed.
311        let ret = unsafe {
312            DeviceIoControl(
313                file.as_raw_handle(),
314                FSCTL_SET_ZERO_DATA,
315                (&params as *const FILE_ZERO_DATA_INFORMATION).cast::<std::ffi::c_void>(),
316                size_of_val(&params) as u32,
317                std::ptr::null_mut(),
318                0,
319                &mut _returned,
320                std::ptr::null_mut(),
321            )
322        };
323        if ret == 0 {
324            return Err(io::Error::last_os_error());
325        }
326
327        Ok(())
328    }
329
330    // Beware when adding new discard methods: This is called by `pure_write_zeroes()`, so the
331    // current expectation is that discarded ranges will read back as zeroes.  If the new method
332    // does not guarantee that, you will need to modify `pure_write_zeroes()`.
333    #[cfg(target_os = "macos")]
334    async unsafe fn pure_discard(&self, offset: u64, length: u64) -> io::Result<()> {
335        if self.try_discard_by_truncate(offset, length)? {
336            return Ok(());
337        }
338
339        // If offset or length are too big, just skip discarding.
340        let Ok(offset) = libc::off_t::try_from(offset) else {
341            return Ok(());
342        };
343        let Ok(length) = libc::off_t::try_from(length) else {
344            return Ok(());
345        };
346
347        let params = libc::fpunchhole_t {
348            fp_flags: 0,
349            reserved: 0,
350            fp_offset: offset,
351            fp_length: length,
352        };
353        let file = self.file.read().unwrap();
354        // Safe: FD is valid, passed pointer is valid and its type matches the call.
355        while_eintr(|| unsafe { libc::fcntl(file.as_raw_fd(), libc::F_PUNCHHOLE, &params) })?;
356
357        Ok(())
358    }
359
360    async fn flush(&self) -> io::Result<()> {
361        self.file.write().unwrap().flush()
362    }
363
364    async fn sync(&self) -> io::Result<()> {
365        self.file.write().unwrap().sync_all()
366    }
367
368    async unsafe fn invalidate_cache(&self) -> io::Result<()> {
369        // TODO: Figure out what to do.  Generally, `std::fs::File` does not have internal buffers,
370        // so we don’t need to invalidate anything; we could close and reopen, but that would still
371        // flush, and is difficult to do in a platform-independent way (/proc/self/fd would allow
372        // this on Linux).  Using e.g. the filename is not safe.
373        // Right now, it’s best not to do anything.
374        Ok(())
375    }
376
377    fn get_storage_helper(&self) -> &CommonStorageHelper {
378        &self.common_storage_helper
379    }
380
381    async fn resize(&self, new_size: u64, prealloc_mode: PreallocateMode) -> io::Result<()> {
382        let file = self.file.write().unwrap();
383        let current_size = self.size.load(Ordering::Relaxed);
384
385        match new_size.cmp(&current_size) {
386            std::cmp::Ordering::Equal => return Ok(()),
387            std::cmp::Ordering::Less => {
388                file.set_len(new_size)?;
389                self.size.fetch_min(new_size, Ordering::Relaxed);
390                return Ok(());
391            }
392            std::cmp::Ordering::Greater => (), // handled below
393        }
394
395        match prealloc_mode {
396            PreallocateMode::None | PreallocateMode::Zero => file.set_len(new_size)?,
397            PreallocateMode::Allocate => {
398                #[cfg(not(unix))]
399                return Err(io::ErrorKind::Unsupported.into());
400
401                #[cfg(all(unix, not(target_os = "macos")))]
402                {
403                    let ofs = current_size.try_into().map_err(io::Error::other)?;
404                    let len = (new_size - current_size)
405                        .try_into()
406                        .map_err(io::Error::other)?;
407                    while_eintr(|| unsafe { libc::fallocate(file.as_raw_fd(), 0, ofs, len) })?;
408                }
409
410                #[cfg(target_os = "macos")]
411                {
412                    // Best-effort.  PEOFPOSMODE allocates from the “physical” EOF, wherever that
413                    // may be, but the only alternative would be VOLPOSMODE, which nobody knows the
414                    // meaning of.  Also doesn’t change the file length, we need to truncate
415                    // afterwards still.
416                    let mut params = libc::fstore_t {
417                        fst_flags: libc::F_ALLOCATEALL,
418                        fst_posmode: libc::F_PEOFPOSMODE,
419                        fst_offset: 0,
420                        fst_length: (new_size - current_size)
421                            .try_into()
422                            .map_err(io::Error::other)?,
423                        fst_bytesalloc: 0, // output
424                    };
425                    while_eintr(|| unsafe {
426                        libc::fcntl(file.as_raw_fd(), libc::F_PREALLOCATE, &mut params)
427                    })?;
428
429                    file.set_len(new_size)?;
430                }
431            }
432            PreallocateMode::WriteData => {
433                // FIXME: Keeping the lock would be nice, but resizing concurrently with I/O is
434                // pretty risky anyway.
435                drop(file);
436                write_full_zeroes(self, current_size, new_size - current_size).await?;
437            }
438        }
439
440        self.size.fetch_max(new_size, Ordering::Relaxed);
441        Ok(())
442    }
443}
444
445impl File {
446    /// Central internal function to create a `File` object.
447    ///
448    /// `direct_io` should be `true` if direct I/O was requested, and can be `false` if that status
449    /// is unknown.
450    fn new(mut file: fs::File, filename: Option<PathBuf>, direct_io: bool) -> io::Result<Self> {
451        let size = get_file_size(&file).err_context(|| "Failed to determine file size")?;
452
453        #[cfg(all(unix, not(target_os = "macos")))]
454        let direct_io = direct_io || {
455            // Safe: No argument, returns result.
456            let res = unsafe { libc::fcntl(file.as_raw_fd(), libc::F_GETFL) };
457            res > 0 && (res & libc::O_DIRECT) != 0
458        };
459
460        let (min_req_align, min_mem_align) = if direct_io {
461            #[cfg(unix)]
462            {
463                (
464                    Self::get_min_dio_req_align(&file),
465                    Self::get_min_dio_mem_align(&file),
466                )
467            }
468
469            #[cfg(not(unix))]
470            {
471                (1, 1)
472            } // probe it then
473        } else {
474            (1, 1)
475        };
476
477        let (req_align, mem_align) =
478            Self::probe_alignments(&mut file, min_req_align, min_mem_align);
479        assert!(req_align.is_power_of_two());
480        assert!(mem_align.is_power_of_two());
481
482        Ok(File {
483            file: RwLock::new(file),
484            filename,
485            req_align,
486            mem_align,
487            size: size.into(),
488            common_storage_helper: Default::default(),
489        })
490    }
491
492    /// Probe minimal request and memory alignments.
493    ///
494    /// Start at `min_req_align` and `min_mem_align`.
495    #[cfg(unix)]
496    fn probe_alignments(
497        file: &mut fs::File,
498        min_req_align: usize,
499        min_mem_align: usize,
500    ) -> (usize, usize) {
501        let mut page_size = page_size::get();
502        if !page_size.is_power_of_two() {
503            let assume = page_size.checked_next_power_of_two().unwrap_or(4096);
504            let assume = cmp::max(4096, assume);
505            warn!("Reported page size of {page_size} is not a power of two, assuming {assume}");
506            page_size = assume;
507        }
508
509        let mut writable = true;
510
511        let max_req_align = 65536;
512        let max_mem_align = cmp::max(page_size, max_req_align);
513
514        // Minimum fallbacks in case something goes wrong.
515        let safe_req_align = 4096;
516        let safe_mem_align = cmp::max(page_size, safe_req_align);
517
518        let mut test_buf = match IoBuffer::new(max_mem_align, max_mem_align) {
519            Ok(buf) => buf,
520            Err(err) => {
521                warn!(
522                    "Failed to allocate memory to probe request alignment ({err}), \
523                    falling back to {safe_req_align}/{safe_mem_align}"
524                );
525                return (safe_req_align, safe_mem_align);
526            }
527        };
528
529        let mut req_align: usize = min_req_align;
530        let result = loop {
531            assert!(req_align <= max_mem_align);
532            match Self::probe_access(
533                file,
534                test_buf.as_mut_range(0..req_align).into_slice(),
535                req_align.try_into().unwrap(),
536                &mut writable,
537            ) {
538                Ok(true) => break Ok(req_align),
539                Ok(false) => {
540                    if req_align >= max_req_align {
541                        break Err(io::Error::other(format!(
542                            "Maximum I/O alignment ({max_req_align}) exceeded"
543                        )));
544                    }
545                    // No reason to probe anything between 1 and 512
546                    if req_align == min_req_align {
547                        req_align = cmp::max(min_req_align << 1, 512);
548                    } else {
549                        req_align <<= 1;
550                    }
551                }
552                Err(err) => break Err(err),
553            }
554        };
555
556        let req_align = match result {
557            Ok(align) => {
558                debug!("Probed request alignment: {align}");
559                align
560            }
561            Err(err) => {
562                // Failed to determine request alignment, use a presumably safe value
563                let align = cmp::max(req_align, safe_req_align);
564                warn!(
565                    "Failed to probe request alignment ({err}; {}), falling back to {align} bytes",
566                    err.kind(),
567                );
568                align
569            }
570        };
571
572        let mut mem_align: usize = min_mem_align;
573        let result = loop {
574            assert!(mem_align <= max_mem_align);
575            let range = (max_mem_align - mem_align)..max_mem_align;
576            match Self::probe_access(
577                file,
578                test_buf.as_mut_range(range).into_slice(),
579                0,
580                &mut writable,
581            ) {
582                Ok(true) => break Ok(mem_align),
583                Ok(false) => {
584                    // Not aligned
585                    if mem_align >= max_mem_align {
586                        break Err(io::Error::other(format!(
587                            "Maximum memory alignment ({max_mem_align}) exceeded"
588                        )));
589                    }
590                    // No reason to probe anything between 1 and the page size (or 4096 at least)
591                    if mem_align == min_mem_align {
592                        mem_align = cmp::max(min_mem_align << 1, cmp::min(page_size, 4096));
593                    } else {
594                        mem_align <<= 1;
595                    }
596                }
597                Err(err) => break Err(err),
598            }
599        };
600
601        let mem_align = match result {
602            Ok(align) => {
603                debug!("Probed memory alignment: {align}");
604                align
605            }
606            Err(err) => {
607                // Failed to determine memory alignment, use a presumably safe value
608                let align = cmp::max(mem_align, safe_mem_align);
609                warn!(
610                    "Failed to probe memory alignment ({err}; {}), falling back to {align} bytes",
611                    err.kind(),
612                );
613                align
614            }
615        };
616
617        (req_align, mem_align)
618    }
619
620    /// Do an alignment-probing I/O access.
621    ///
622    /// Return `Ok(true)` if everything was OK, and `Ok(false)` if the request was reported to be
623    /// misaligned.
624    ///
625    /// `may_write` is a boolean that controls whether this is allowed to write (the same data read
626    /// before) to improve reliability.  Is automatically set to `false` if writing is found to not
627    /// be possible.
628    #[cfg(unix)]
629    fn probe_access(
630        file: &mut fs::File,
631        slice: &mut [u8],
632        offset: libc::off_t,
633        may_write: &mut bool,
634    ) -> io::Result<bool> {
635        // Use `libc::pread` so we get well-defined errors.
636        // Safe: Passing the slice as the buffer it is.
637        let ret = while_eintr(|| unsafe {
638            libc::pread(
639                file.as_raw_fd(),
640                slice.as_mut_ptr() as *mut libc::c_void,
641                slice.len(),
642                offset,
643            )
644        });
645
646        if let Err(err) = ret {
647            if err.raw_os_error() == Some(libc::EINVAL) {
648                return Ok(false);
649            } else {
650                return Err(err);
651            }
652        }
653
654        if !*may_write {
655            return Ok(true);
656        }
657
658        // Safe: Passing the slice as the buffer it is.
659        let ret = while_eintr(|| unsafe {
660            libc::pwrite(
661                file.as_raw_fd(),
662                slice.as_ptr() as *const libc::c_void,
663                slice.len(),
664                offset,
665            )
666        });
667
668        if let Err(err) = ret {
669            if err.raw_os_error() == Some(libc::EINVAL) {
670                Ok(false)
671            } else if err.raw_os_error() == Some(libc::EBADF) {
672                *may_write = false;
673                Ok(true)
674            } else {
675                Err(err)
676            }
677        } else {
678            Ok(true)
679        }
680    }
681
682    /// Get system-reported minimum request alignment for direct I/O.
683    #[cfg(unix)]
684    fn get_min_dio_req_align(file: &fs::File) -> usize {
685        #[cfg(target_os = "linux")]
686        {
687            let mut alignment = 0;
688            let res = unsafe { ioctl::blksszget(file.as_raw_fd(), &mut alignment) };
689            if res.is_ok() && alignment > 0 {
690                let alignment = alignment as usize;
691                if alignment.is_power_of_two() {
692                    return alignment;
693                }
694            }
695        }
696
697        #[cfg(target_os = "macos")]
698        {
699            let mut alignment = 0;
700            let res = unsafe { ioctl::dkiocgetblocksize(file.as_raw_fd(), &mut alignment) };
701            if res.is_ok() && alignment.is_power_of_two() {
702                return alignment as usize;
703            }
704        }
705
706        #[cfg(target_os = "freebsd")]
707        {
708            let mut alignment = 0;
709            let res = unsafe { ioctl::diocgsectorsize(file.as_raw_fd(), &mut alignment) };
710            if res.is_ok() && alignment.is_power_of_two() {
711                return alignment as usize;
712            }
713        }
714
715        // Then we’ll probe.
716        1
717    }
718
719    /// Get system-reported minimum memory alignment for direct I/O.
720    #[cfg(unix)]
721    fn get_min_dio_mem_align(_file: &fs::File) -> usize {
722        // I don’t think there’s a reliable way to get this.
723        1
724    }
725
726    /// Probe minimal request and memory alignments.
727    ///
728    /// Start at `min_req_align` and `min_mem_align`.
729    #[cfg(windows)]
730    fn probe_alignments(
731        _file: &mut fs::File,
732        min_req_align: usize,
733        min_mem_align: usize,
734    ) -> (usize, usize) {
735        // TODO: Need to find out how Windows indicates unaligned I/O
736        (cmp::max(min_req_align, 4096), cmp::max(min_mem_align, 4096))
737    }
738
739    /// Implementation for anything that opens a file.
740    fn do_open_sync(opts: StorageOpenOptions, base_fs_opts: fs::OpenOptions) -> io::Result<Self> {
741        let Some(filename) = opts.filename else {
742            return Err(io::Error::new(
743                io::ErrorKind::InvalidInput,
744                "Filename required",
745            ));
746        };
747
748        let mut file_opts = base_fs_opts;
749        file_opts.read(true).write(opts.writable);
750        #[cfg(not(target_os = "macos"))]
751        if opts.direct {
752            file_opts.custom_flags(
753                #[cfg(unix)]
754                libc::O_DIRECT,
755                #[cfg(windows)]
756                windows_sys::Win32::Storage::FileSystem::FILE_FLAG_NO_BUFFERING,
757            );
758        }
759
760        let filename_owned = filename.to_owned();
761        let file = file_opts.open(filename)?;
762
763        #[cfg(target_os = "macos")]
764        if opts.direct {
765            // Safe: We check the return value.
766            while_eintr(|| unsafe { libc::fcntl(file.as_raw_fd(), libc::F_NOCACHE, 1) })
767                .err_context(|| "Failed to disable host cache")?;
768        }
769
770        Self::new(file, Some(filename_owned), opts.direct)
771    }
772
773    /// Attempt to discard range by truncating the file.
774    ///
775    /// If the given range is at the end of the file, discard it by simply truncating the file.
776    /// Return `true` on success.
777    ///
778    /// If the range is not at the end of the file, i.e. another method of discarding is needed,
779    /// return `false`.
780    fn try_discard_by_truncate(&self, offset: u64, length: u64) -> io::Result<bool> {
781        // Prevent modifications to the file length
782        #[allow(clippy::readonly_write_lock)]
783        let file = self.file.write().unwrap();
784
785        let size = self.size.load(Ordering::Relaxed);
786        if offset >= size {
787            // Nothing to do
788            return Ok(true);
789        }
790
791        // If `offset + length` overflows, we can just assume it ends at `size`.  (Anything past
792        // `size is irrelevant anyway.)
793        let end = offset.checked_add(length).unwrap_or(size);
794        if end < size {
795            return Ok(false);
796        }
797
798        file.set_len(offset)?;
799        Ok(true)
800    }
801}
802
803impl Display for File {
804    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
805        if let Some(filename) = self.filename.as_ref() {
806            write!(f, "file:{filename:?}")
807        } else {
808            write!(f, "file:<unknown path>")
809        }
810    }
811}
812
813/// Get total size in bytes of the given file.
814///
815/// If the file is a block or character device, use get_device_size() instead of
816/// reading len from metadata which doesn't work on some platforms like macOS.
817fn get_file_size(file: &fs::File) -> io::Result<u64> {
818    #[allow(clippy::bind_instead_of_map)]
819    file.metadata().and_then(|m| {
820        #[cfg(unix)]
821        if m.file_type().is_block_device() || m.file_type().is_char_device() {
822            return get_device_size(file);
823        }
824        Ok(m.len())
825    })
826}
827
828cfg_if! {
829    if #[cfg(target_os = "linux")] {
830        /// Get total size in bytes of the given block or character device.
831        fn get_device_size(file: &fs::File) -> io::Result<u64> {
832            let mut size = 0;
833            unsafe { ioctl::blkgetsize64(file.as_raw_fd(), &mut size) }?;
834            Ok(size)
835        }
836    } else if #[cfg(target_os = "macos")] {
837        /// Get total size in bytes of the given block or character device.
838        fn get_device_size(file: &fs::File) -> io::Result<u64> {
839            let mut block_size = 0;
840            unsafe { ioctl::dkiocgetblocksize(file.as_raw_fd(), &mut block_size) }?;
841            let mut block_count = 0;
842            unsafe { ioctl::dkiocgetblockcount(file.as_raw_fd(), &mut block_count) }?;
843            Ok(u64::from(block_size) * block_count)
844        }
845    } else if #[cfg(target_os = "freebsd")] {
846        /// Get total size in bytes of the given block or character device.
847        fn get_device_size(file: &fs::File) -> io::Result<u64> {
848            let mut size = 0;
849            unsafe { ioctl::diocgmediasize(file.as_raw_fd(), &mut size) }?;
850            Ok(size as u64)
851        }
852    } else if #[cfg(unix)] {
853        /// Get total size in bytes of the given block or character device - unsupported platform.
854        fn get_device_size(_file: &fs::File) -> io::Result<u64> {
855            Err(io::ErrorKind::Unsupported.into())
856        }
857    }
858}
859
860/// This module generates type-safe wrappers for chosen ioctls
861mod ioctl {
862    #[cfg(unix)]
863    use nix::ioctl_read;
864    #[cfg(target_os = "linux")]
865    use nix::ioctl_read_bad;
866
867    // https://github.com/torvalds/linux/blob/master/include/uapi/linux/fs.h#L200
868
869    #[cfg(target_os = "linux")]
870    ioctl_read!(blkgetsize64, 0x12, 114, u64);
871
872    #[cfg(target_os = "linux")]
873    ioctl_read_bad!(blksszget, libc::BLKSSZGET, libc::c_int);
874
875    // https://github.com/apple-oss-distributions/xnu/blob/main/bsd/sys/disk.h#L198-L199
876
877    #[cfg(target_os = "macos")]
878    ioctl_read!(dkiocgetblocksize, 'd', 24, u32);
879
880    #[cfg(target_os = "macos")]
881    ioctl_read!(dkiocgetblockcount, 'd', 25, u64);
882
883    // https://web.mit.edu/freebsd/head/sys/sys/disk.h
884
885    #[cfg(target_os = "freebsd")]
886    ioctl_read!(diocgsectorsize, 'd', 128, libc::c_uint);
887
888    #[cfg(target_os = "freebsd")]
889    ioctl_read!(diocgmediasize, 'd', 129, libc::off_t);
890}