imago/qcow2/
mappings.rs

1//! Get and establish cluster mappings.
2
3use super::*;
4use tokio::sync::RwLockWriteGuard;
5
6impl<S: Storage, F: WrappedFormat<S>> Qcow2<S, F> {
7    /// Get the given range’s mapping information.
8    ///
9    /// Underlying implementation for [`Qcow2::get_mapping()`].
10    pub(super) async fn do_get_mapping(
11        &self,
12        offset: GuestOffset,
13        max_length: u64,
14    ) -> io::Result<(ShallowMapping<'_, S>, u64)> {
15        let Some(l2_table) = self.get_l2(offset, false).await? else {
16            let cb = self.header.cluster_bits();
17            let len = cmp::min(offset.remaining_in_l2_table(cb), max_length);
18            let mapping = if let Some(backing) = self.backing.as_ref() {
19                ShallowMapping::Indirect {
20                    layer: backing.inner(),
21                    offset: offset.0,
22                    writable: false,
23                }
24            } else {
25                ShallowMapping::Zero { explicit: false }
26            };
27            return Ok((mapping, len));
28        };
29
30        self.do_get_mapping_with_l2(offset, max_length, &l2_table)
31            .await
32    }
33
34    /// Get the given range’s mapping information, when we already have the L2 table.
35    pub(super) async fn do_get_mapping_with_l2(
36        &self,
37        offset: GuestOffset,
38        max_length: u64,
39        l2_table: &L2Table,
40    ) -> io::Result<(ShallowMapping<'_, S>, u64)> {
41        let cb = self.header.cluster_bits();
42
43        // Get mapping at `offset`
44        let mut current_guest_cluster = offset.cluster(cb);
45        let first_mapping = l2_table.get_mapping(current_guest_cluster)?;
46        let return_mapping = match first_mapping {
47            L2Mapping::DataFile {
48                host_cluster,
49                copied,
50            } => ShallowMapping::Raw {
51                storage: self.storage(),
52                offset: host_cluster.relative_offset(offset, cb).0,
53                writable: copied,
54            },
55
56            L2Mapping::Backing { backing_offset } => {
57                if let Some(backing) = self.backing.as_ref() {
58                    ShallowMapping::Indirect {
59                        layer: backing.inner(),
60                        offset: backing_offset + offset.in_cluster_offset(cb) as u64,
61                        writable: false,
62                    }
63                } else {
64                    ShallowMapping::Zero { explicit: false }
65                }
66            }
67
68            L2Mapping::Zero {
69                host_cluster: _,
70                copied: _,
71            } => ShallowMapping::Zero { explicit: true },
72
73            L2Mapping::Compressed {
74                host_offset: _,
75                length: _,
76            } => ShallowMapping::Special { offset: offset.0 },
77        };
78
79        // Find out how long this consecutive mapping is, but only within the current L2 table
80        let mut consecutive_length = offset.remaining_in_cluster(cb);
81        let mut preceding_mapping = first_mapping;
82        while consecutive_length < max_length {
83            let Some(next) = current_guest_cluster.next_in_l2(cb) else {
84                break;
85            };
86            current_guest_cluster = next;
87
88            let mapping = l2_table.get_mapping(current_guest_cluster)?;
89            if !mapping.is_consecutive(&preceding_mapping, cb) {
90                break;
91            }
92
93            preceding_mapping = mapping;
94            consecutive_length += self.header.cluster_size() as u64;
95        }
96
97        consecutive_length = cmp::min(consecutive_length, max_length);
98        Ok((return_mapping, consecutive_length))
99    }
100
101    /// Make the given range be mapped by data clusters.
102    ///
103    /// Underlying implementation for [`Qcow2::ensure_data_mapping()`].
104    pub(super) async fn do_ensure_data_mapping(
105        &self,
106        offset: GuestOffset,
107        length: u64,
108        overwrite: bool,
109    ) -> io::Result<(&S, u64, u64)> {
110        let l2_table = self.ensure_l2(offset).await?;
111
112        // Fast path for if everything is already allocated, which should be the common case at
113        // runtime.
114        // It must really be everything, though; we know our caller will want to have everything
115        // allocated eventually, so if anything is missing, go down to the allocation path so we
116        // try to allocate clusters such that they are not fragmented (if possible) and we can
117        // return as big of a single mapping as possible.
118        let existing = self
119            .do_get_mapping_with_l2(offset, length, &l2_table)
120            .await?;
121        if let ShallowMapping::Raw {
122            storage,
123            offset,
124            writable: true,
125        } = existing.0
126        {
127            if existing.1 >= length {
128                return Ok((storage, offset, existing.1));
129            }
130        }
131
132        let l2_table = l2_table.lock_write().await;
133        let mut leaked_allocations = Vec::<(HostCluster, ClusterCount)>::new();
134
135        let res = self
136            .ensure_data_mapping_no_cleanup(
137                offset,
138                length,
139                overwrite,
140                l2_table,
141                &mut leaked_allocations,
142            )
143            .await;
144
145        for alloc in leaked_allocations {
146            self.free_data_clusters(alloc.0, alloc.1).await;
147        }
148        let (host_offset, length) = res?;
149
150        Ok((self.storage(), host_offset, length))
151    }
152
153    /// Make the given range be mapped by a fixed kind of clusters.
154    ///
155    /// Allows zeroing or discarding clusters.  `mapping` says which kind of mapping to create.
156    ///
157    /// Return the offset of the first affected cluster, and the byte length affected (may be 0).
158    pub(super) async fn ensure_fixed_mapping(
159        &self,
160        offset: GuestOffset,
161        length: u64,
162        mapping: FixedMapping,
163    ) -> io::Result<(GuestOffset, u64)> {
164        match mapping {
165            FixedMapping::ZeroDiscard | FixedMapping::ZeroRetainAllocation => {
166                self.header.require_version(3)?;
167            }
168            FixedMapping::FullDiscard => (),
169        }
170
171        let cb = self.header.cluster_bits();
172
173        // We can only touch full clusters
174        let cluster_align_mask = self.header.cluster_size() as u64 - 1;
175        let end = (offset + length).0;
176        let aligned_end = if end == self.header.size() {
177            // Up-align operations until the image end to a full cluster (the remainder of this
178            // cluster is not used for anything)
179            (end + cluster_align_mask) & !cluster_align_mask
180        } else {
181            // Otherwise, align down (only full clusters)
182            end & !cluster_align_mask
183        };
184        let aligned_offset = (offset + cluster_align_mask).0 & !cluster_align_mask;
185        let aligned_length = aligned_end.saturating_sub(aligned_offset);
186
187        // We have aligned this, so we can unwrap
188        let first_cluster = GuestOffset(aligned_offset).checked_cluster(cb).unwrap();
189        let cluster_count = ClusterCount::checked_from_byte_size(aligned_length, cb).unwrap();
190
191        if cluster_count.0 == 0 {
192            return Ok((GuestOffset(aligned_offset), 0));
193        }
194
195        let l2_table = self.ensure_l2(first_cluster.offset(cb)).await?;
196        let l2_table = l2_table.lock_write().await;
197        let mut leaked_allocations = Vec::<(HostCluster, ClusterCount)>::new();
198
199        let res = self
200            .ensure_fixed_mapping_no_cleanup(
201                first_cluster,
202                cluster_count,
203                mapping,
204                l2_table,
205                &mut leaked_allocations,
206            )
207            .await;
208
209        for alloc in leaked_allocations {
210            self.free_data_clusters(alloc.0, alloc.1).await;
211        }
212
213        let count = res?;
214
215        let affected_offset = first_cluster.offset(cb);
216        let affected_length = count.byte_size(cb);
217
218        let head = affected_offset - offset;
219        // We may overshoot for the last cluster in the image, limit the returned value to the
220        // range given by the caller
221        let affected_length = cmp::min(affected_length, length.saturating_sub(head));
222
223        Ok((affected_offset, affected_length))
224    }
225
226    /// Get the L2 table referenced by the given L1 table index, if any.
227    ///
228    /// `writable` says whether the L2 table should be modifiable.
229    ///
230    /// If the L1 table index does not point to any L2 table, or the existing entry is not
231    /// modifiable but `writable` is true, return `Ok(None)`.
232    pub(super) async fn get_l2(
233        &self,
234        offset: GuestOffset,
235        writable: bool,
236    ) -> io::Result<Option<Arc<L2Table>>> {
237        let cb = self.header.cluster_bits();
238
239        let l1_entry = self.l1_table.read().await.get(offset.l1_index(cb));
240        if let Some(l2_offset) = l1_entry.l2_offset() {
241            if writable && !l1_entry.is_copied() {
242                return Ok(None);
243            }
244            let l2_cluster = l2_offset.checked_cluster(cb).ok_or_else(|| {
245                invalid_data(format!(
246                    "Unaligned L2 table for {offset:?}; L1 entry: {l1_entry:?}"
247                ))
248            })?;
249
250            self.l2_cache.get_or_insert(l2_cluster).await.map(Some)
251        } else {
252            Ok(None)
253        }
254    }
255
256    /// Get a L2 table for the given L1 table index.
257    ///
258    /// If there already is an L2 table at that index, return it.  Otherwise, create one and hook
259    /// it up.
260    pub(super) async fn ensure_l2(&self, offset: GuestOffset) -> io::Result<Arc<L2Table>> {
261        let cb = self.header.cluster_bits();
262
263        if let Some(l2) = self.get_l2(offset, true).await? {
264            return Ok(l2);
265        }
266
267        self.need_writable()?;
268
269        let mut l1_locked = self.l1_table.write().await;
270        let l1_index = offset.l1_index(cb);
271        if !l1_locked.in_bounds(l1_index) {
272            l1_locked = self.grow_l1_table(l1_locked, l1_index).await?;
273        }
274
275        let l1_entry = l1_locked.get(l1_index);
276        let mut l2_table = if let Some(l2_offset) = l1_entry.l2_offset() {
277            let l2_cluster = l2_offset.checked_cluster(cb).ok_or_else(|| {
278                invalid_data(format!(
279                    "Unaligned L2 table for {offset:?}; L1 entry: {l1_entry:?}"
280                ))
281            })?;
282
283            let l2 = self.l2_cache.get_or_insert(l2_cluster).await?;
284            if l1_entry.is_copied() {
285                return Ok(l2);
286            }
287
288            L2Table::clone(&l2)
289        } else {
290            L2Table::new_cleared(&self.header)
291        };
292
293        let l2_cluster = self.allocate_meta_cluster().await?;
294        l2_table.set_cluster(l2_cluster);
295        l2_table.write(self.metadata.as_ref()).await?;
296
297        l1_locked.enter_l2_table(l1_index, &l2_table)?;
298        l1_locked
299            .write_entry(self.metadata.as_ref(), l1_index)
300            .await?;
301
302        // Free old L2 table, if any
303        if let Some(l2_offset) = l1_entry.l2_offset() {
304            self.free_meta_clusters(l2_offset.cluster(cb), ClusterCount(1))
305                .await;
306        }
307
308        let l2_table = Arc::new(l2_table);
309        self.l2_cache
310            .insert(l2_cluster, Arc::clone(&l2_table))
311            .await?;
312        Ok(l2_table)
313    }
314
315    /// Create a new L1 table covering at least `at_least_index`.
316    ///
317    /// Create a new L1 table of the required size with all the entries of the previous L1 table.
318    pub(super) async fn grow_l1_table<'a>(
319        &self,
320        mut l1_locked: RwLockWriteGuard<'a, L1Table>,
321        at_least_index: usize,
322    ) -> io::Result<RwLockWriteGuard<'a, L1Table>> {
323        let mut new_l1 = l1_locked.clone_and_grow(at_least_index, &self.header)?;
324
325        let l1_start = self.allocate_meta_clusters(new_l1.cluster_count()).await?;
326
327        new_l1.set_cluster(l1_start);
328        new_l1.write(self.metadata.as_ref()).await?;
329
330        self.header.set_l1_table(&new_l1)?;
331        self.header
332            .write_l1_table_pointer(self.metadata.as_ref())
333            .await?;
334
335        if let Some(old_l1_cluster) = l1_locked.get_cluster() {
336            let old_l1_size = l1_locked.cluster_count();
337            l1_locked.unset_cluster();
338            self.free_meta_clusters(old_l1_cluster, old_l1_size).await;
339        }
340
341        *l1_locked = new_l1;
342
343        Ok(l1_locked)
344    }
345
346    /// Inner implementation for [`Qcow2::do_ensure_data_mapping()`].
347    ///
348    /// Does not do any clean-up: The L2 table will probably be modified, but not written to disk.
349    /// Any existing allocations that have been removed from it (and are thus leaked) are entered
350    /// into `leaked_allocations`, but not freed.
351    ///
352    /// The caller must do both, ensuring it is done both in case of success and in case of error.
353    async fn ensure_data_mapping_no_cleanup(
354        &self,
355        offset: GuestOffset,
356        full_length: u64,
357        overwrite: bool,
358        mut l2_table: L2TableWriteGuard<'_>,
359        leaked_allocations: &mut Vec<(HostCluster, ClusterCount)>,
360    ) -> io::Result<(u64, u64)> {
361        let cb = self.header.cluster_bits();
362
363        let partial_skip_cow = overwrite.then(|| {
364            let start = offset.in_cluster_offset(cb);
365            let end = cmp::min(start as u64 + full_length, 1 << cb) as usize;
366            start..end
367        });
368
369        let mut current_guest_cluster = offset.cluster(cb);
370
371        // Without a mandatory host offset, this should never return `Ok(None)`
372        let host_cluster = self
373            .cow_cluster(
374                current_guest_cluster,
375                None,
376                partial_skip_cow,
377                &mut l2_table,
378                leaked_allocations,
379            )
380            .await?
381            .ok_or_else(|| io::Error::other("Internal allocation error"))?;
382
383        let host_offset_start = host_cluster.relative_offset(offset, cb);
384        let mut allocated_length = offset.remaining_in_cluster(cb);
385        let mut current_host_cluster = host_cluster;
386
387        while allocated_length < full_length {
388            let Some(next) = current_guest_cluster.next_in_l2(cb) else {
389                break;
390            };
391            current_guest_cluster = next;
392
393            let chunk_length = cmp::min(full_length - allocated_length, 1 << cb) as usize;
394            let partial_skip_cow = overwrite.then(|| 0..chunk_length);
395
396            let next_host_cluster = current_host_cluster + ClusterCount(1);
397            let host_cluster = self
398                .cow_cluster(
399                    current_guest_cluster,
400                    Some(next_host_cluster),
401                    partial_skip_cow,
402                    &mut l2_table,
403                    leaked_allocations,
404                )
405                .await?;
406
407            let Some(host_cluster) = host_cluster else {
408                // Cannot continue continuous mapping range
409                break;
410            };
411            assert!(host_cluster == next_host_cluster);
412            current_host_cluster = host_cluster;
413
414            allocated_length += chunk_length as u64;
415        }
416
417        Ok((host_offset_start.0, allocated_length))
418    }
419
420    /// Inner implementation for [`Qcow2::ensure_fixed_mapping()`].
421    ///
422    /// Does not do any clean-up: The L2 table will probably be modified, but not written to disk.
423    /// Any existing allocations that have been removed from it (and are thus leaked) are entered
424    /// into `leaked_allocations`, but not freed.
425    ///
426    /// The caller must do both, ensuring it is done both in case of success and in case of error.
427    ///
428    /// Allows zeroing or discarding clusters.  `mapping` says which kind of mapping to create.
429    async fn ensure_fixed_mapping_no_cleanup(
430        &self,
431        first_cluster: GuestCluster,
432        count: ClusterCount,
433        mapping: FixedMapping,
434        mut l2_table: L2TableWriteGuard<'_>,
435        leaked_allocations: &mut Vec<(HostCluster, ClusterCount)>,
436    ) -> io::Result<ClusterCount> {
437        self.header.require_version(3)?;
438
439        let cb = self.header.cluster_bits();
440        let mut cluster = first_cluster;
441        let end_cluster = first_cluster + count;
442        let mut done = ClusterCount(0);
443
444        while cluster < end_cluster {
445            let l2i = cluster.l2_index(cb);
446            let leaked = match mapping {
447                FixedMapping::ZeroDiscard => l2_table.zero_cluster(l2i, false)?,
448                FixedMapping::ZeroRetainAllocation => l2_table.zero_cluster(l2i, true)?,
449                FixedMapping::FullDiscard => l2_table.discard_cluster(l2i),
450            };
451            if let Some(leaked) = leaked {
452                leaked_allocations.push(leaked);
453            }
454
455            done += ClusterCount(1);
456            let Some(next) = cluster.next_in_l2(cb) else {
457                break;
458            };
459            cluster = next;
460        }
461
462        Ok(done)
463    }
464}
465
466/// Possible mapping types for [`Qcow2::ensure_fixed_mapping()`].
467#[derive(Clone, Copy, Debug, Eq, PartialEq)]
468pub(super) enum FixedMapping {
469    /// Make all clusters zero clusters, discarding previous allocations.
470    ///
471    /// Note this breaks existing mapping information, which must be communicated somehow, for
472    /// example by requiring mutable access to the `Qcow2` object.
473    ZeroDiscard,
474
475    /// Make all clusters zero clusters, retaining previous allocations.
476    ///
477    /// Retains previous data cluster allocations in the form of preallocated zero clusters, but
478    /// cannot retain previously existing compressed cluster allocations.  Because those mappings
479    /// are not returned through the mapping interface, however, concurrent accesses should be
480    /// reasonably safe.
481    ///
482    /// (Writing to zeroed data cluster mappings will just have no effect.)
483    ZeroRetainAllocation,
484
485    /// Fully remove clusters’ mappings, allowing backing data to appear.
486    ///
487    /// Note this breaks existing mapping information, which must be communicated somehow, for
488    /// example by requiring mutable access to the `Qcow2` object.
489    FullDiscard,
490}