imago/qcow2/
allocation.rs1use super::*;
7use std::mem;
8use tokio::sync::MutexGuard;
9use tracing::{event, warn, Level};
10
11pub(super) struct Allocator<S: Storage> {
13 file: Arc<S>,
15
16 reftable: RefTable,
18
19 first_free_cluster: HostCluster,
21
22 header: Arc<Header>,
24
25 caches: Arc<MetadataCaches<S>>,
27}
28
29impl<S: Storage + 'static, F: WrappedFormat<S> + 'static> Qcow2<S, F> {
30 async fn allocator(&self) -> io::Result<MutexGuard<'_, Allocator<S>>> {
34 Ok(self
35 .allocator
36 .as_ref()
37 .ok_or_else(|| io::Error::other("Image is read-only"))?
38 .lock()
39 .await)
40 }
41
42 pub(super) async fn allocate_meta_cluster(&self) -> io::Result<HostCluster> {
46 self.allocate_meta_clusters(ClusterCount(1)).await
47 }
48
49 pub(super) async fn allocate_meta_clusters(
53 &self,
54 count: ClusterCount,
55 ) -> io::Result<HostCluster> {
56 self.allocator().await?.allocate_clusters(count, None).await
57 }
58
59 pub(super) async fn allocate_data_cluster(
68 &self,
69 guest_cluster: GuestCluster,
70 ) -> io::Result<HostCluster> {
71 if self.header.external_data_file() {
72 Ok(HostCluster(guest_cluster.0))
73 } else {
74 let mut allocator = self.allocator().await?;
75
76 self.caches.l2_depends_on_rb().await?;
78
79 allocator.allocate_clusters(ClusterCount(1), None).await
80 }
81 }
82
83 pub(super) async fn allocate_data_cluster_at(
91 &self,
92 guest_cluster: GuestCluster,
93 mandatory_host_cluster: Option<HostCluster>,
94 ) -> io::Result<Option<HostCluster>> {
95 let Some(mandatory_host_cluster) = mandatory_host_cluster else {
96 return self.allocate_data_cluster(guest_cluster).await.map(Some);
97 };
98
99 if self.header.external_data_file() {
100 let cluster = HostCluster(guest_cluster.0);
101 Ok((cluster == mandatory_host_cluster).then_some(cluster))
102 } else {
103 let mut allocator = self.allocator().await?;
104
105 self.caches.l2_depends_on_rb().await?;
107
108 let cluster = allocator
109 .allocate_cluster_at(mandatory_host_cluster)
110 .await?
111 .then_some(mandatory_host_cluster);
112 Ok(cluster)
113 }
114 }
115
116 pub(super) async fn free_meta_clusters(&self, cluster: HostCluster, count: ClusterCount) {
121 if let Ok(mut allocator) = self.allocator().await {
122 allocator.free_clusters(cluster, count).await
123 }
124 }
125
126 pub(super) async fn free_data_clusters(&self, cluster: HostCluster, count: ClusterCount) {
131 if !self.header.external_data_file() {
132 if let Ok(mut allocator) = self.allocator().await {
133 if let Err(err) = self.caches.rb_depends_on_l2().await {
135 warn!("Leaking clusters; cannot set up cache dependency: {err}");
136 return;
137 }
138
139 allocator.free_clusters(cluster, count).await;
140 }
141 }
142 }
143}
144
145impl<S: Storage> Allocator<S> {
146 pub async fn new(
148 image: Arc<S>,
149 header: Arc<Header>,
150 caches: Arc<MetadataCaches<S>>,
151 ) -> io::Result<Self> {
152 let cb = header.cluster_bits();
153 let rt_offset = header.reftable_offset();
154 let rt_cluster = rt_offset
155 .checked_cluster(cb)
156 .ok_or_else(|| invalid_data(format!("Unaligned refcount table: {rt_offset}")))?;
157
158 let reftable = RefTable::load(
159 image.as_ref(),
160 &header,
161 rt_cluster,
162 header.reftable_entries(),
163 )
164 .await?;
165
166 Ok(Allocator {
167 file: image,
168 reftable,
169 first_free_cluster: HostCluster(0),
170 header,
171 caches,
172 })
173 }
174
175 pub async unsafe fn invalidate_rb_cache(&self) -> io::Result<()> {
180 unsafe { self.caches.invalidate_rb() }.await
182 }
183
184 async fn allocate_clusters(
190 &mut self,
191 count: ClusterCount,
192 end_cluster: Option<HostCluster>,
193 ) -> io::Result<HostCluster> {
194 let mut index = self.first_free_cluster;
195 loop {
196 if end_cluster == Some(index) {
197 return Err(io::Error::other("Maximum cluster index reached"));
198 }
199
200 let alloc_count = self.allocate_clusters_at(index, count).await?;
201 if alloc_count == count {
202 return Ok(index);
203 }
204
205 index += alloc_count + ClusterCount(1);
206 if index.offset(self.header.cluster_bits()) > MAX_OFFSET {
207 return Err(io::Error::other("Cannot grow qcow2 file any further"));
208 }
209 }
210 }
211
212 async fn allocate_clusters_at(
223 &mut self,
224 mut index: HostCluster,
225 mut count: ClusterCount,
226 ) -> io::Result<ClusterCount> {
227 let start_index = index;
228
229 while count > ClusterCount(0) {
230 let result = self.allocate_cluster_at(index).await;
258 if !matches!(result, Ok(true)) {
259 self.free_clusters(start_index, index - start_index).await;
261 return result.map(|_| index - start_index);
262 }
263
264 count -= ClusterCount(1);
265 index += ClusterCount(1);
266 }
267
268 Ok(index - start_index)
269 }
270
271 async fn allocate_cluster_at(&mut self, index: HostCluster) -> io::Result<bool> {
276 let rb_bits = self.header.rb_bits();
277 let (rt_index, rb_index) = index.rt_rb_indices(rb_bits);
278
279 let rb = self.ensure_rb(rt_index).await?;
280 let mut rb = rb.lock_write().await;
281 let can_allocate = rb.is_zero(rb_index);
282 if can_allocate {
283 rb.increment(rb_index)?;
284 }
285
286 if index == self.first_free_cluster {
288 self.first_free_cluster = index + ClusterCount(1);
289 }
290
291 Ok(can_allocate)
292 }
293
294 async fn get_rb(&mut self, rt_index: usize) -> io::Result<Option<Arc<RefBlock>>> {
298 let rt_entry = self.reftable.get(rt_index);
299 if let Some(rb_offset) = rt_entry.refblock_offset() {
300 let cb = self.header.cluster_bits();
301 let rb_cluster = rb_offset.checked_cluster(cb).ok_or_else(|| {
302 invalid_data(format!("Unaligned refcount block with index {rt_index}; refcount table entry: {rt_entry:?}"))
303 })?;
304
305 self.caches.rb_get_or_insert(rb_cluster).await.map(Some)
306 } else {
307 Ok(None)
308 }
309 }
310
311 async fn ensure_rb(&mut self, rt_index: usize) -> io::Result<Arc<RefBlock>> {
316 if let Some(rb) = self.get_rb(rt_index).await? {
317 return Ok(rb);
318 }
319
320 if !self.reftable.in_bounds(rt_index) {
321 self.grow_reftable(rt_index).await?;
322 if let Some(rb) = self.get_rb(rt_index).await? {
324 return Ok(rb);
325 }
326 }
327
328 let mut new_rb = RefBlock::new_cleared(self.file.as_ref(), &self.header)?;
329
330 let rb_cluster = HostCluster::from_ref_indices(rt_index, 0, self.header.rb_bits());
332
333 if let Ok(new_rb_cluster) =
341 Box::pin(self.allocate_clusters(ClusterCount(1), Some(rb_cluster))).await
342 {
343 new_rb.set_cluster(new_rb_cluster);
344 } else {
345 new_rb.set_cluster(rb_cluster);
347 new_rb.lock_write().await.increment(0)?;
348 }
349 new_rb.write(self.file.as_ref()).await?;
350
351 self.reftable.enter_refblock(rt_index, &new_rb)?;
352 self.reftable
353 .write_entry(self.file.as_ref(), rt_index)
354 .await?;
355
356 let new_rb = Arc::new(new_rb);
357 self.caches
358 .rb_insert(new_rb.get_cluster().unwrap(), Arc::clone(&new_rb))
359 .await?;
360 Ok(new_rb)
361 }
362
363 async fn grow_reftable(&mut self, at_least_index: usize) -> io::Result<()> {
369 let cb = self.header.cluster_bits();
370 let rb_bits = self.header.rb_bits();
371 let rb_entries = 1 << rb_bits;
372
373 let mut new_rt = self.reftable.clone_and_grow(&self.header, at_least_index)?;
374 let rt_clusters = ClusterCount::from_byte_size(new_rt.byte_size() as u64, cb);
375
376 let (mut rt_index, mut rb_index) = self.first_free_cluster.rt_rb_indices(rb_bits);
378 let mut free_cluster_index: Option<HostCluster> = None;
379 let mut free_cluster_count = ClusterCount(0);
380
381 let mut required_clusters = rt_clusters;
384
385 while free_cluster_count < required_clusters {
386 assert!(new_rt.in_bounds(rt_index));
388
389 let rt_entry = new_rt.get(rt_index);
390 let Some(rb_offset) = rt_entry.refblock_offset() else {
391 let start_index = HostCluster::from_ref_indices(rt_index, 0, rb_bits);
392 free_cluster_index.get_or_insert(start_index);
393 free_cluster_count += ClusterCount(rb_entries as u64);
394 required_clusters += ClusterCount(1);
396 continue;
397 };
398
399 let rb_cluster = rb_offset.checked_cluster(cb).ok_or_else(|| {
400 invalid_data(format!("Unaligned refcount block with index {rt_index}; refcount table entry: {rt_entry:?}"))
401 })?;
402
403 let rb = self.caches.rb_get_or_insert(rb_cluster).await?;
404 for i in rb_index..rb_entries {
405 if rb.is_zero(i) {
406 let index = HostCluster::from_ref_indices(rt_index, i, rb_bits);
407 free_cluster_index.get_or_insert(index);
408 free_cluster_count += ClusterCount(1);
409
410 if free_cluster_count >= required_clusters {
411 break;
412 }
413 } else if free_cluster_index.is_some() {
414 free_cluster_index.take();
415 free_cluster_count = ClusterCount(0);
416 required_clusters = rt_clusters; }
418 }
419
420 rb_index = 0;
421 rt_index += 1;
422 }
423
424 let mut index = free_cluster_index.unwrap();
425 let mut count = required_clusters;
426
427 let rt_index_start = index.rt_index(rb_bits);
429 let rt_index_end = (index + count).0.div_ceil(rb_entries as u64) as usize;
430
431 let mut refblocks = Vec::<Arc<RefBlock>>::new();
432 for rt_i in rt_index_start..rt_index_end {
433 if let Some(rb_offset) = new_rt.get(rt_i).refblock_offset() {
434 let rb_cluster = rb_offset.checked_cluster(cb).unwrap();
436 let rb = self.caches.rb_get_or_insert(rb_cluster).await?;
437 refblocks.push(rb);
438 continue;
439 }
440
441 let mut rb = RefBlock::new_cleared(self.file.as_ref(), &self.header)?;
442 rb.set_cluster(index);
443 new_rt.enter_refblock(rt_i, &rb)?;
444 let rb = Arc::new(rb);
445 self.caches.rb_insert(index, Arc::clone(&rb)).await?;
446 refblocks.push(rb);
447 index += ClusterCount(1);
448 count -= ClusterCount(1);
449 }
450
451 assert!(count >= rt_clusters);
452 new_rt.set_cluster(index);
453
454 let start_index = free_cluster_index.unwrap();
456 let end_index = index + rt_clusters;
457
458 for index in start_index.0..end_index.0 {
459 let index = HostCluster(index);
460 let (rt_i, rb_i) = index.rt_rb_indices(rb_bits);
461
462 let rb_vec_i = rt_i - rt_index_start;
464 refblocks[rb_vec_i]
466 .lock_write()
467 .await
468 .increment(rb_i)
469 .unwrap();
470 }
471
472 self.caches.flush_rb().await?;
476 new_rt.write(self.file.as_ref()).await?;
477
478 self.header.set_reftable(&new_rt)?;
479 self.header
480 .write_reftable_pointer(self.file.as_ref())
481 .await?;
482
483 let mut old_reftable = mem::replace(&mut self.reftable, new_rt);
485 if let Some(old_rt_cluster) = old_reftable.get_cluster() {
486 let old_rt_size = old_reftable.cluster_count();
487 old_reftable.unset_cluster();
488 self.free_clusters(old_rt_cluster, old_rt_size).await;
489 }
490
491 Ok(())
492 }
493
494 async fn free_clusters(&mut self, start: HostCluster, mut count: ClusterCount) {
499 if count.0 == 0 {
500 return;
501 }
502
503 if start < self.first_free_cluster {
504 self.first_free_cluster = start;
505 }
506
507 let rb_bits = self.header.rb_bits();
508 let rb_entries = 1 << rb_bits;
509 let (mut rt_index, mut rb_index) = start.rt_rb_indices(rb_bits);
510
511 while count > ClusterCount(0) {
512 let in_rb_count = cmp::min((rb_entries - rb_index) as u64, count.0) as usize;
513
514 match self.get_rb(rt_index).await {
515 Ok(Some(rb)) => {
516 let mut rb = rb.lock_write().await;
517 for i in rb_index..(rb_index + in_rb_count) {
518 if let Err(err) = rb.decrement(i) {
519 event!(Level::WARN, "Failed to free cluster: {err}");
520 }
521 }
522 }
523
524 Ok(None) => {
525 event!(
526 Level::WARN,
527 "Failed to free {in_rb_count} clusters: Not allocated"
528 )
529 }
530 Err(err) => event!(Level::WARN, "Failed to free {in_rb_count} clusters: {err}"),
531 }
532
533 count -= ClusterCount(in_rb_count as u64);
534 rb_index = 0;
535 rt_index += 1;
536 }
537 }
538}