imago/qcow2/
mappings.rs

1//! Get and establish cluster mappings.
2
3use super::*;
4use tokio::sync::RwLockWriteGuard;
5
6impl<S: Storage, F: WrappedFormat<S>> Qcow2<S, F> {
7    /// Get the given range’s mapping information.
8    ///
9    /// Underlying implementation for [`Qcow2::get_mapping()`].
10    pub(super) async fn do_get_mapping(
11        &self,
12        offset: GuestOffset,
13        max_length: u64,
14    ) -> io::Result<(ShallowMapping<'_, S>, u64)> {
15        let Some(l2_table) = self.get_l2(offset, false).await? else {
16            let cb = self.header.cluster_bits();
17            let len = cmp::min(offset.remaining_in_l2_table(cb), max_length);
18            let mapping = if let Some(backing) = self.backing.as_ref() {
19                ShallowMapping::Indirect {
20                    layer: backing.inner(),
21                    offset: offset.0,
22                    writable: false,
23                }
24            } else {
25                ShallowMapping::Zero { explicit: false }
26            };
27            return Ok((mapping, len));
28        };
29
30        self.do_get_mapping_with_l2(offset, max_length, &l2_table)
31            .await
32    }
33
34    /// Get the given range’s mapping information, when we already have the L2 table.
35    pub(super) async fn do_get_mapping_with_l2(
36        &self,
37        offset: GuestOffset,
38        max_length: u64,
39        l2_table: &L2Table,
40    ) -> io::Result<(ShallowMapping<'_, S>, u64)> {
41        let cb = self.header.cluster_bits();
42
43        // Get mapping at `offset`
44        let mut current_guest_cluster = offset.cluster(cb);
45        let first_mapping = l2_table.get_mapping(current_guest_cluster)?;
46        let return_mapping = match first_mapping {
47            L2Mapping::DataFile {
48                host_cluster,
49                copied,
50            } => ShallowMapping::Raw {
51                storage: self.storage(),
52                offset: host_cluster.relative_offset(offset, cb).0,
53                writable: copied,
54            },
55
56            L2Mapping::Backing { backing_offset } => {
57                if let Some(backing) = self.backing.as_ref() {
58                    ShallowMapping::Indirect {
59                        layer: backing.inner(),
60                        offset: backing_offset + offset.in_cluster_offset(cb) as u64,
61                        writable: false,
62                    }
63                } else {
64                    ShallowMapping::Zero { explicit: false }
65                }
66            }
67
68            L2Mapping::Zero {
69                host_cluster: _,
70                copied: _,
71            } => ShallowMapping::Zero { explicit: true },
72
73            L2Mapping::Compressed {
74                host_offset: _,
75                length: _,
76            } => ShallowMapping::Special { offset: offset.0 },
77        };
78
79        // Find out how long this consecutive mapping is, but only within the current L2 table
80        let mut consecutive_length = offset.remaining_in_cluster(cb);
81        let mut preceding_mapping = first_mapping;
82        while consecutive_length < max_length {
83            let Some(next) = current_guest_cluster.next_in_l2(cb) else {
84                break;
85            };
86            current_guest_cluster = next;
87
88            let mapping = l2_table.get_mapping(current_guest_cluster)?;
89            if !mapping.is_consecutive(&preceding_mapping, cb) {
90                break;
91            }
92
93            preceding_mapping = mapping;
94            consecutive_length += self.header.cluster_size() as u64;
95        }
96
97        consecutive_length = cmp::min(consecutive_length, max_length);
98        Ok((return_mapping, consecutive_length))
99    }
100
101    /// Make the given range be mapped by data clusters.
102    ///
103    /// Underlying implementation for [`Qcow2::ensure_data_mapping()`].
104    ///
105    /// `skip_cow` is equivalent to [`Qcow2::ensure_data_mapping()`]’s `overwrite`: It indicates
106    /// the area is to be overwritten, so COW can be skipped on it.  `skip_cow_to_eof` indicates
107    /// that the mapping will go until the EOF, so no COW needs to be performed at all past
108    /// `offset`.  Only use this for preallocation on resize or create.
109    pub(super) async fn do_ensure_data_mapping(
110        &self,
111        offset: GuestOffset,
112        length: u64,
113        skip_cow: bool,
114        skip_cow_to_eof: bool,
115    ) -> io::Result<(&S, u64, u64)> {
116        let l2_table = self.ensure_l2(offset).await?;
117
118        // Fast path for if everything is already allocated, which should be the common case at
119        // runtime.
120        // It must really be everything, though; we know our caller will want to have everything
121        // allocated eventually, so if anything is missing, go down to the allocation path so we
122        // try to allocate clusters such that they are not fragmented (if possible) and we can
123        // return as big of a single mapping as possible.
124        let existing = self
125            .do_get_mapping_with_l2(offset, length, &l2_table)
126            .await?;
127        if let ShallowMapping::Raw {
128            storage,
129            offset,
130            writable: true,
131        } = existing.0
132        {
133            if existing.1 >= length {
134                return Ok((storage, offset, existing.1));
135            }
136        }
137
138        let l2_table = l2_table.lock_write().await;
139        let mut leaked_allocations = Vec::<(HostCluster, ClusterCount)>::new();
140
141        let res = self
142            .ensure_data_mapping_no_cleanup(
143                offset,
144                length,
145                skip_cow,
146                skip_cow_to_eof,
147                l2_table,
148                &mut leaked_allocations,
149            )
150            .await;
151
152        for alloc in leaked_allocations {
153            self.free_data_clusters(alloc.0, alloc.1).await;
154        }
155        let (host_offset, length) = res?;
156
157        Ok((self.storage(), host_offset, length))
158    }
159
160    /// Make the given range be mapped by a fixed kind of clusters.
161    ///
162    /// Allows zeroing or discarding clusters.  `mapping` says which kind of mapping to create.
163    ///
164    /// Return the offset of the first affected cluster, and the byte length affected (may be 0).
165    pub(super) async fn ensure_fixed_mapping(
166        &self,
167        offset: GuestOffset,
168        length: u64,
169        mapping: FixedMapping,
170    ) -> io::Result<(GuestOffset, u64)> {
171        match mapping {
172            FixedMapping::ZeroDiscard | FixedMapping::ZeroRetainAllocation => {
173                self.header.require_version(3)?;
174            }
175            FixedMapping::FullDiscard => (),
176        }
177
178        let cb = self.header.cluster_bits();
179
180        // We can only touch full clusters
181        let cluster_align_mask = self.header.cluster_size() as u64 - 1;
182        let end = (offset + length).0;
183        let aligned_end = if end == self.header.size() {
184            // Up-align operations until the image end to a full cluster (the remainder of this
185            // cluster is not used for anything)
186            (end + cluster_align_mask) & !cluster_align_mask
187        } else {
188            // Otherwise, align down (only full clusters)
189            end & !cluster_align_mask
190        };
191        let aligned_offset = (offset + cluster_align_mask).0 & !cluster_align_mask;
192        let aligned_length = aligned_end.saturating_sub(aligned_offset);
193
194        // We have aligned this, so we can unwrap
195        let first_cluster = GuestOffset(aligned_offset).checked_cluster(cb).unwrap();
196        let cluster_count = ClusterCount::checked_from_byte_size(aligned_length, cb).unwrap();
197
198        if cluster_count.0 == 0 {
199            return Ok((GuestOffset(aligned_offset), 0));
200        }
201
202        let l2_table = self.ensure_l2(first_cluster.offset(cb)).await?;
203        let l2_table = l2_table.lock_write().await;
204        let mut leaked_allocations = Vec::<(HostCluster, ClusterCount)>::new();
205
206        let res = self
207            .ensure_fixed_mapping_no_cleanup(
208                first_cluster,
209                cluster_count,
210                mapping,
211                l2_table,
212                &mut leaked_allocations,
213            )
214            .await;
215
216        for alloc in leaked_allocations {
217            self.free_data_clusters(alloc.0, alloc.1).await;
218        }
219
220        let count = res?;
221
222        let affected_offset = first_cluster.offset(cb);
223        let affected_length = count.byte_size(cb);
224
225        let head = affected_offset - offset;
226        // We may overshoot for the last cluster in the image, limit the returned value to the
227        // range given by the caller
228        let affected_length = cmp::min(affected_length, length.saturating_sub(head));
229
230        Ok((affected_offset, affected_length))
231    }
232
233    /// Get the L2 table referenced by the given L1 table index, if any.
234    ///
235    /// `writable` says whether the L2 table should be modifiable.
236    ///
237    /// If the L1 table index does not point to any L2 table, or the existing entry is not
238    /// modifiable but `writable` is true, return `Ok(None)`.
239    pub(super) async fn get_l2(
240        &self,
241        offset: GuestOffset,
242        writable: bool,
243    ) -> io::Result<Option<Arc<L2Table>>> {
244        let cb = self.header.cluster_bits();
245
246        let l1_entry = self.l1_table.read().await.get(offset.l1_index(cb));
247        if let Some(l2_offset) = l1_entry.l2_offset() {
248            if writable && !l1_entry.is_copied() {
249                return Ok(None);
250            }
251            let l2_cluster = l2_offset.checked_cluster(cb).ok_or_else(|| {
252                invalid_data(format!(
253                    "Unaligned L2 table for {offset:?}; L1 entry: {l1_entry:?}"
254                ))
255            })?;
256
257            self.l2_cache.get_or_insert(l2_cluster).await.map(Some)
258        } else {
259            Ok(None)
260        }
261    }
262
263    /// Get a L2 table for the given L1 table index.
264    ///
265    /// If there already is an L2 table at that index, return it.  Otherwise, create one and hook
266    /// it up.
267    pub(super) async fn ensure_l2(&self, offset: GuestOffset) -> io::Result<Arc<L2Table>> {
268        let cb = self.header.cluster_bits();
269
270        if let Some(l2) = self.get_l2(offset, true).await? {
271            return Ok(l2);
272        }
273
274        self.need_writable()?;
275
276        let mut l1_locked = self.l1_table.write().await;
277        let l1_index = offset.l1_index(cb);
278        if !l1_locked.in_bounds(l1_index) {
279            l1_locked = self.grow_l1_table(l1_locked, l1_index).await?;
280        }
281
282        let l1_entry = l1_locked.get(l1_index);
283        let mut l2_table = if let Some(l2_offset) = l1_entry.l2_offset() {
284            let l2_cluster = l2_offset.checked_cluster(cb).ok_or_else(|| {
285                invalid_data(format!(
286                    "Unaligned L2 table for {offset:?}; L1 entry: {l1_entry:?}"
287                ))
288            })?;
289
290            let l2 = self.l2_cache.get_or_insert(l2_cluster).await?;
291            if l1_entry.is_copied() {
292                return Ok(l2);
293            }
294
295            L2Table::clone(&l2)
296        } else {
297            L2Table::new_cleared(&self.header)
298        };
299
300        let l2_cluster = self.allocate_meta_cluster().await?;
301        l2_table.set_cluster(l2_cluster);
302        l2_table.write(self.metadata.as_ref()).await?;
303
304        l1_locked.enter_l2_table(l1_index, &l2_table)?;
305        l1_locked
306            .write_entry(self.metadata.as_ref(), l1_index)
307            .await?;
308
309        // Free old L2 table, if any
310        if let Some(l2_offset) = l1_entry.l2_offset() {
311            self.free_meta_clusters(l2_offset.cluster(cb), ClusterCount(1))
312                .await;
313        }
314
315        let l2_table = Arc::new(l2_table);
316        self.l2_cache
317            .insert(l2_cluster, Arc::clone(&l2_table))
318            .await?;
319        Ok(l2_table)
320    }
321
322    /// Create a new L1 table covering at least `at_least_index`.
323    ///
324    /// Create a new L1 table of the required size with all the entries of the previous L1 table.
325    pub(super) async fn grow_l1_table<'a>(
326        &self,
327        mut l1_locked: RwLockWriteGuard<'a, L1Table>,
328        at_least_index: usize,
329    ) -> io::Result<RwLockWriteGuard<'a, L1Table>> {
330        let mut new_l1 = l1_locked.clone_and_grow(at_least_index, &self.header)?;
331
332        let l1_start = self.allocate_meta_clusters(new_l1.cluster_count()).await?;
333
334        new_l1.set_cluster(l1_start);
335        new_l1.write(self.metadata.as_ref()).await?;
336
337        self.header.set_l1_table(&new_l1)?;
338        self.header
339            .write_l1_table_pointer(self.metadata.as_ref())
340            .await?;
341
342        if let Some(old_l1_cluster) = l1_locked.get_cluster() {
343            let old_l1_size = l1_locked.cluster_count();
344            l1_locked.unset_cluster();
345            self.free_meta_clusters(old_l1_cluster, old_l1_size).await;
346        }
347
348        *l1_locked = new_l1;
349
350        Ok(l1_locked)
351    }
352
353    /// Inner implementation for [`Qcow2::do_ensure_data_mapping()`].
354    ///
355    /// Does not do any clean-up: The L2 table will probably be modified, but not written to disk.
356    /// Any existing allocations that have been removed from it (and are thus leaked) are entered
357    /// into `leaked_allocations`, but not freed.
358    ///
359    /// The caller must do both, ensuring it is done both in case of success and in case of error.
360    async fn ensure_data_mapping_no_cleanup(
361        &self,
362        offset: GuestOffset,
363        full_length: u64,
364        skip_cow: bool,
365        skip_cow_to_eof: bool,
366        mut l2_table: L2TableWriteGuard<'_>,
367        leaked_allocations: &mut Vec<(HostCluster, ClusterCount)>,
368    ) -> io::Result<(u64, u64)> {
369        let cb = self.header.cluster_bits();
370
371        let partial_skip_cow = skip_cow.then(|| {
372            let start = offset.in_cluster_offset(cb);
373            let end = if skip_cow_to_eof {
374                1 << cb
375            } else {
376                cmp::min(start as u64 + full_length, 1 << cb) as usize
377            };
378            start..end
379        });
380
381        let mut current_guest_cluster = offset.cluster(cb);
382
383        // Without a mandatory host offset, this should never return `Ok(None)`
384        let host_cluster = self
385            .cow_cluster(
386                current_guest_cluster,
387                None,
388                partial_skip_cow,
389                &mut l2_table,
390                leaked_allocations,
391            )
392            .await?
393            .ok_or_else(|| io::Error::other("Internal allocation error"))?;
394
395        let host_offset_start = host_cluster.relative_offset(offset, cb);
396        let mut allocated_length = offset.remaining_in_cluster(cb);
397        let mut current_host_cluster = host_cluster;
398
399        while allocated_length < full_length {
400            let Some(next) = current_guest_cluster.next_in_l2(cb) else {
401                break;
402            };
403            current_guest_cluster = next;
404
405            let chunk_length = cmp::min(full_length - allocated_length, 1 << cb) as usize;
406            let partial_skip_cow = match (skip_cow, skip_cow_to_eof) {
407                (false, _) => None,
408                (true, false) => Some(0..chunk_length),
409                (true, true) => Some(0..(1 << cb)),
410            };
411
412            let next_host_cluster = current_host_cluster + ClusterCount(1);
413            let host_cluster = self
414                .cow_cluster(
415                    current_guest_cluster,
416                    Some(next_host_cluster),
417                    partial_skip_cow,
418                    &mut l2_table,
419                    leaked_allocations,
420                )
421                .await?;
422
423            let Some(host_cluster) = host_cluster else {
424                // Cannot continue continuous mapping range
425                break;
426            };
427            assert!(host_cluster == next_host_cluster);
428            current_host_cluster = host_cluster;
429
430            allocated_length += chunk_length as u64;
431        }
432
433        Ok((host_offset_start.0, allocated_length))
434    }
435
436    /// Inner implementation for [`Qcow2::ensure_fixed_mapping()`].
437    ///
438    /// Does not do any clean-up: The L2 table will probably be modified, but not written to disk.
439    /// Any existing allocations that have been removed from it (and are thus leaked) are entered
440    /// into `leaked_allocations`, but not freed.
441    ///
442    /// The caller must do both, ensuring it is done both in case of success and in case of error.
443    ///
444    /// Allows zeroing or discarding clusters.  `mapping` says which kind of mapping to create.
445    async fn ensure_fixed_mapping_no_cleanup(
446        &self,
447        first_cluster: GuestCluster,
448        count: ClusterCount,
449        mapping: FixedMapping,
450        mut l2_table: L2TableWriteGuard<'_>,
451        leaked_allocations: &mut Vec<(HostCluster, ClusterCount)>,
452    ) -> io::Result<ClusterCount> {
453        self.header.require_version(3)?;
454
455        let cb = self.header.cluster_bits();
456        let mut cluster = first_cluster;
457        let end_cluster = first_cluster + count;
458        let mut done = ClusterCount(0);
459
460        while cluster < end_cluster {
461            let l2i = cluster.l2_index(cb);
462            let leaked = match mapping {
463                FixedMapping::ZeroDiscard => l2_table.zero_cluster(l2i, false)?,
464                FixedMapping::ZeroRetainAllocation => l2_table.zero_cluster(l2i, true)?,
465                FixedMapping::FullDiscard => l2_table.discard_cluster(l2i),
466            };
467            if let Some(leaked) = leaked {
468                leaked_allocations.push(leaked);
469            }
470
471            done += ClusterCount(1);
472            let Some(next) = cluster.next_in_l2(cb) else {
473                break;
474            };
475            cluster = next;
476        }
477
478        Ok(done)
479    }
480}
481
482/// Possible mapping types for [`Qcow2::ensure_fixed_mapping()`].
483#[derive(Clone, Copy, Debug, Eq, PartialEq)]
484pub(super) enum FixedMapping {
485    /// Make all clusters zero clusters, discarding previous allocations.
486    ///
487    /// Note this breaks existing mapping information, which must be communicated somehow, for
488    /// example by requiring mutable access to the `Qcow2` object.
489    ZeroDiscard,
490
491    /// Make all clusters zero clusters, retaining previous allocations.
492    ///
493    /// Retains previous data cluster allocations in the form of preallocated zero clusters, but
494    /// cannot retain previously existing compressed cluster allocations.  Because those mappings
495    /// are not returned through the mapping interface, however, concurrent accesses should be
496    /// reasonably safe.
497    ///
498    /// (Writing to zeroed data cluster mappings will just have no effect.)
499    ZeroRetainAllocation,
500
501    /// Fully remove clusters’ mappings, allowing backing data to appear.
502    ///
503    /// Note this breaks existing mapping information, which must be communicated somehow, for
504    /// example by requiring mutable access to the `Qcow2` object.
505    FullDiscard,
506}