imago/qcow2/
allocation.rs1use super::cache::RefBlockCacheBackend;
7use super::*;
8use std::mem;
9use tokio::sync::MutexGuard;
10use tracing::{event, warn, Level};
11
12pub(super) struct Allocator<S: Storage> {
14 file: Arc<S>,
16
17 reftable: RefTable,
19
20 first_free_cluster: HostCluster,
22
23 header: Arc<Header>,
25
26 rb_cache: AsyncLruCache<HostCluster, RefBlock, RefBlockCacheBackend<S>>,
28}
29
30impl<S: Storage + 'static, F: WrappedFormat<S> + 'static> Qcow2<S, F> {
31 async fn allocator(&self) -> io::Result<MutexGuard<'_, Allocator<S>>> {
35 Ok(self
36 .allocator
37 .as_ref()
38 .ok_or_else(|| io::Error::other("Image is read-only"))?
39 .lock()
40 .await)
41 }
42
43 pub(super) async fn allocate_meta_cluster(&self) -> io::Result<HostCluster> {
47 self.allocate_meta_clusters(ClusterCount(1)).await
48 }
49
50 pub(super) async fn allocate_meta_clusters(
54 &self,
55 count: ClusterCount,
56 ) -> io::Result<HostCluster> {
57 self.allocator().await?.allocate_clusters(count, None).await
58 }
59
60 pub(super) async fn allocate_data_cluster(
69 &self,
70 guest_cluster: GuestCluster,
71 ) -> io::Result<HostCluster> {
72 if self.header.external_data_file() {
73 Ok(HostCluster(guest_cluster.0))
74 } else {
75 let mut allocator = self.allocator().await?;
76
77 self.l2_cache.depend_on(&allocator.rb_cache).await?;
79
80 allocator.allocate_clusters(ClusterCount(1), None).await
81 }
82 }
83
84 pub(super) async fn allocate_data_cluster_at(
92 &self,
93 guest_cluster: GuestCluster,
94 mandatory_host_cluster: Option<HostCluster>,
95 ) -> io::Result<Option<HostCluster>> {
96 let Some(mandatory_host_cluster) = mandatory_host_cluster else {
97 return self.allocate_data_cluster(guest_cluster).await.map(Some);
98 };
99
100 if self.header.external_data_file() {
101 let cluster = HostCluster(guest_cluster.0);
102 Ok((cluster == mandatory_host_cluster).then_some(cluster))
103 } else {
104 let mut allocator = self.allocator().await?;
105
106 self.l2_cache.depend_on(&allocator.rb_cache).await?;
108
109 let cluster = allocator
110 .allocate_cluster_at(mandatory_host_cluster)
111 .await?
112 .then_some(mandatory_host_cluster);
113 Ok(cluster)
114 }
115 }
116
117 pub(super) async fn free_meta_clusters(&self, cluster: HostCluster, count: ClusterCount) {
122 if let Ok(mut allocator) = self.allocator().await {
123 allocator.free_clusters(cluster, count).await
124 }
125 }
126
127 pub(super) async fn free_data_clusters(&self, cluster: HostCluster, count: ClusterCount) {
132 if !self.header.external_data_file() {
133 if let Ok(mut allocator) = self.allocator().await {
134 if let Err(err) = allocator.rb_cache.depend_on(&self.l2_cache).await {
136 warn!("Leaking clusters; cannot set up cache inter-dependency with L2 cache: {err}");
137 return;
138 }
139
140 allocator.free_clusters(cluster, count).await;
141 }
142 }
143 }
144}
145
146impl<S: Storage> Allocator<S> {
147 pub async fn new(image: Arc<S>, header: Arc<Header>) -> io::Result<Self> {
149 let cb = header.cluster_bits();
150 let rt_offset = header.reftable_offset();
151 let rt_cluster = rt_offset
152 .checked_cluster(cb)
153 .ok_or_else(|| invalid_data(format!("Unaligned refcount table: {rt_offset}")))?;
154
155 let reftable = RefTable::load(
156 image.as_ref(),
157 &header,
158 rt_cluster,
159 header.reftable_entries(),
160 )
161 .await?;
162
163 let rb_cache_backend = RefBlockCacheBackend::new(Arc::clone(&image), Arc::clone(&header));
164 let rb_cache = AsyncLruCache::new(rb_cache_backend, 32);
165
166 Ok(Allocator {
167 file: image,
168 reftable,
169 first_free_cluster: HostCluster(0),
170 header,
171 rb_cache,
172 })
173 }
174
175 pub async fn flush_rb_cache(&self) -> io::Result<()> {
177 self.rb_cache.flush().await
178 }
179
180 pub async unsafe fn invalidate_rb_cache(&self) -> io::Result<()> {
185 unsafe { self.rb_cache.invalidate() }.await
187 }
188
189 async fn allocate_clusters(
195 &mut self,
196 count: ClusterCount,
197 end_cluster: Option<HostCluster>,
198 ) -> io::Result<HostCluster> {
199 let mut index = self.first_free_cluster;
200 loop {
201 if end_cluster == Some(index) {
202 return Err(io::Error::other("Maximum cluster index reached"));
203 }
204
205 let alloc_count = self.allocate_clusters_at(index, count).await?;
206 if alloc_count == count {
207 return Ok(index);
208 }
209
210 index += alloc_count + ClusterCount(1);
211 if index.offset(self.header.cluster_bits()) > MAX_OFFSET {
212 return Err(io::Error::other("Cannot grow qcow2 file any further"));
213 }
214 }
215 }
216
217 async fn allocate_clusters_at(
228 &mut self,
229 mut index: HostCluster,
230 mut count: ClusterCount,
231 ) -> io::Result<ClusterCount> {
232 let start_index = index;
233
234 while count > ClusterCount(0) {
235 let result = self.allocate_cluster_at(index).await;
263 if !matches!(result, Ok(true)) {
264 self.free_clusters(start_index, index - start_index).await;
266 return result.map(|_| index - start_index);
267 }
268
269 count -= ClusterCount(1);
270 index += ClusterCount(1);
271 }
272
273 Ok(index - start_index)
274 }
275
276 async fn allocate_cluster_at(&mut self, index: HostCluster) -> io::Result<bool> {
281 let rb_bits = self.header.rb_bits();
282 let (rt_index, rb_index) = index.rt_rb_indices(rb_bits);
283
284 let rb = self.ensure_rb(rt_index).await?;
285 let mut rb = rb.lock_write().await;
286 let can_allocate = rb.is_zero(rb_index);
287 if can_allocate {
288 rb.increment(rb_index)?;
289 }
290
291 if index == self.first_free_cluster {
293 self.first_free_cluster = index + ClusterCount(1);
294 }
295
296 Ok(can_allocate)
297 }
298
299 async fn get_rb(&mut self, rt_index: usize) -> io::Result<Option<Arc<RefBlock>>> {
303 let rt_entry = self.reftable.get(rt_index);
304 if let Some(rb_offset) = rt_entry.refblock_offset() {
305 let cb = self.header.cluster_bits();
306 let rb_cluster = rb_offset.checked_cluster(cb).ok_or_else(|| {
307 invalid_data(format!("Unaligned refcount block with index {rt_index}; refcount table entry: {rt_entry:?}"))
308 })?;
309
310 self.rb_cache.get_or_insert(rb_cluster).await.map(Some)
311 } else {
312 Ok(None)
313 }
314 }
315
316 async fn ensure_rb(&mut self, rt_index: usize) -> io::Result<Arc<RefBlock>> {
321 if let Some(rb) = self.get_rb(rt_index).await? {
322 return Ok(rb);
323 }
324
325 if !self.reftable.in_bounds(rt_index) {
326 self.grow_reftable(rt_index).await?;
327 if let Some(rb) = self.get_rb(rt_index).await? {
329 return Ok(rb);
330 }
331 }
332
333 let mut new_rb = RefBlock::new_cleared(self.file.as_ref(), &self.header)?;
334
335 let rb_cluster = HostCluster::from_ref_indices(rt_index, 0, self.header.rb_bits());
337
338 if let Ok(new_rb_cluster) =
346 Box::pin(self.allocate_clusters(ClusterCount(1), Some(rb_cluster))).await
347 {
348 new_rb.set_cluster(new_rb_cluster);
349 } else {
350 new_rb.set_cluster(rb_cluster);
352 new_rb.lock_write().await.increment(0)?;
353 }
354 new_rb.write(self.file.as_ref()).await?;
355
356 self.reftable.enter_refblock(rt_index, &new_rb)?;
357 self.reftable
358 .write_entry(self.file.as_ref(), rt_index)
359 .await?;
360
361 let new_rb = Arc::new(new_rb);
362 self.rb_cache
363 .insert(new_rb.get_cluster().unwrap(), Arc::clone(&new_rb))
364 .await?;
365 Ok(new_rb)
366 }
367
368 async fn grow_reftable(&mut self, at_least_index: usize) -> io::Result<()> {
374 let cb = self.header.cluster_bits();
375 let rb_bits = self.header.rb_bits();
376 let rb_entries = 1 << rb_bits;
377
378 let mut new_rt = self.reftable.clone_and_grow(&self.header, at_least_index)?;
379 let rt_clusters = ClusterCount::from_byte_size(new_rt.byte_size() as u64, cb);
380
381 let (mut rt_index, mut rb_index) = self.first_free_cluster.rt_rb_indices(rb_bits);
383 let mut free_cluster_index: Option<HostCluster> = None;
384 let mut free_cluster_count = ClusterCount(0);
385
386 let mut required_clusters = rt_clusters;
389
390 while free_cluster_count < required_clusters {
391 assert!(new_rt.in_bounds(rt_index));
393
394 let rt_entry = new_rt.get(rt_index);
395 let Some(rb_offset) = rt_entry.refblock_offset() else {
396 let start_index = HostCluster::from_ref_indices(rt_index, 0, rb_bits);
397 free_cluster_index.get_or_insert(start_index);
398 free_cluster_count += ClusterCount(rb_entries as u64);
399 required_clusters += ClusterCount(1);
401 continue;
402 };
403
404 let rb_cluster = rb_offset.checked_cluster(cb).ok_or_else(|| {
405 invalid_data(format!("Unaligned refcount block with index {rt_index}; refcount table entry: {rt_entry:?}"))
406 })?;
407
408 let rb = self.rb_cache.get_or_insert(rb_cluster).await?;
409 for i in rb_index..rb_entries {
410 if rb.is_zero(i) {
411 let index = HostCluster::from_ref_indices(rt_index, i, rb_bits);
412 free_cluster_index.get_or_insert(index);
413 free_cluster_count += ClusterCount(1);
414
415 if free_cluster_count >= required_clusters {
416 break;
417 }
418 } else if free_cluster_index.is_some() {
419 free_cluster_index.take();
420 free_cluster_count = ClusterCount(0);
421 required_clusters = rt_clusters; }
423 }
424
425 rb_index = 0;
426 rt_index += 1;
427 }
428
429 let mut index = free_cluster_index.unwrap();
430 let mut count = required_clusters;
431
432 let rt_index_start = index.rt_index(rb_bits);
434 let rt_index_end = (index + count).0.div_ceil(rb_entries as u64) as usize;
435
436 let mut refblocks = Vec::<Arc<RefBlock>>::new();
437 for rt_i in rt_index_start..rt_index_end {
438 if let Some(rb_offset) = new_rt.get(rt_i).refblock_offset() {
439 let rb_cluster = rb_offset.checked_cluster(cb).unwrap();
441 let rb = self.rb_cache.get_or_insert(rb_cluster).await?;
442 refblocks.push(rb);
443 continue;
444 }
445
446 let mut rb = RefBlock::new_cleared(self.file.as_ref(), &self.header)?;
447 rb.set_cluster(index);
448 new_rt.enter_refblock(rt_i, &rb)?;
449 let rb = Arc::new(rb);
450 self.rb_cache.insert(index, Arc::clone(&rb)).await?;
451 refblocks.push(rb);
452 index += ClusterCount(1);
453 count -= ClusterCount(1);
454 }
455
456 assert!(count >= rt_clusters);
457 new_rt.set_cluster(index);
458
459 let start_index = free_cluster_index.unwrap();
461 let end_index = index + rt_clusters;
462
463 for index in start_index.0..end_index.0 {
464 let index = HostCluster(index);
465 let (rt_i, rb_i) = index.rt_rb_indices(rb_bits);
466
467 let rb_vec_i = rt_i - rt_index_start;
469 refblocks[rb_vec_i]
471 .lock_write()
472 .await
473 .increment(rb_i)
474 .unwrap();
475 }
476
477 self.rb_cache.flush().await?;
481 new_rt.write(self.file.as_ref()).await?;
482
483 self.header.set_reftable(&new_rt)?;
484 self.header
485 .write_reftable_pointer(self.file.as_ref())
486 .await?;
487
488 let mut old_reftable = mem::replace(&mut self.reftable, new_rt);
490 if let Some(old_rt_cluster) = old_reftable.get_cluster() {
491 let old_rt_size = old_reftable.cluster_count();
492 old_reftable.unset_cluster();
493 self.free_clusters(old_rt_cluster, old_rt_size).await;
494 }
495
496 Ok(())
497 }
498
499 async fn free_clusters(&mut self, start: HostCluster, mut count: ClusterCount) {
504 if count.0 == 0 {
505 return;
506 }
507
508 if start < self.first_free_cluster {
509 self.first_free_cluster = start;
510 }
511
512 let rb_bits = self.header.rb_bits();
513 let rb_entries = 1 << rb_bits;
514 let (mut rt_index, mut rb_index) = start.rt_rb_indices(rb_bits);
515
516 while count > ClusterCount(0) {
517 let in_rb_count = cmp::min((rb_entries - rb_index) as u64, count.0) as usize;
518
519 match self.get_rb(rt_index).await {
520 Ok(Some(rb)) => {
521 let mut rb = rb.lock_write().await;
522 for i in rb_index..(rb_index + in_rb_count) {
523 if let Err(err) = rb.decrement(i) {
524 event!(Level::WARN, "Failed to free cluster: {err}");
525 }
526 }
527 }
528
529 Ok(None) => {
530 event!(
531 Level::WARN,
532 "Failed to free {in_rb_count} clusters: Not allocated"
533 )
534 }
535 Err(err) => event!(Level::WARN, "Failed to free {in_rb_count} clusters: {err}"),
536 }
537
538 count -= ClusterCount(in_rb_count as u64);
539 rb_index = 0;
540 rt_index += 1;
541 }
542 }
543}