xref: /haiku/src/add-ons/kernel/file_systems/ntfs/libntfs/lcnalloc.c (revision 582da17386c4a192ca30270d6b0b95f561cf5843)
1 /**
2  * lcnalloc.c - Cluster (de)allocation code. Originated from the Linux-NTFS project.
3  *
4  * Copyright (c) 2002-2004 Anton Altaparmakov
5  * Copyright (c) 2004 Yura Pakhuchiy
6  * Copyright (c) 2004-2007 Szabolcs Szakacsits
7  *
8  * This program/include file is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as published
10  * by the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program/include file is distributed in the hope that it will be
14  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program (in the main directory of the NTFS-3G
20  * distribution in the file COPYING); if not, write to the Free Software
21  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #ifdef HAVE_STDLIB_H
29 #include <stdlib.h>
30 #endif
31 #ifdef HAVE_STDIO_H
32 #include <stdio.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 
38 #include "types.h"
39 #include "attrib.h"
40 #include "bitmap.h"
41 #include "debug.h"
42 #include "runlist.h"
43 #include "volume.h"
44 #include "lcnalloc.h"
45 #include "logging.h"
46 #include "misc.h"
47 
48 /*
49  * Plenty possibilities for big optimizations all over in the cluster
50  * allocation, however at the moment the dominant bottleneck (~ 90%) is
51  * the update of the mapping pairs which converges to the cubic Faulhaber's
52  * formula as the function of the number of extents (fragments, runs).
53  */
54 #define NTFS_LCNALLOC_BSIZE 1024
55 #define NTFS_LCNALLOC_SKIP  NTFS_LCNALLOC_BSIZE
56 
57 static void ntfs_cluster_set_zone_pos(LCN start, LCN end, LCN *pos, LCN tc)
58 {
59 	ntfs_log_trace("pos: %lld  tc: %lld\n", (long long)*pos, tc);
60 
61 	if (tc >= end)
62 		*pos = start;
63 	else if (tc >= start)
64 		*pos = tc;
65 }
66 
67 static void ntfs_cluster_update_zone_pos(ntfs_volume *vol, u8 zone, LCN tc)
68 {
69 	ntfs_log_trace("tc = %lld, zone = %d\n", (long long)tc, zone);
70 
71 	if (zone == 1)
72 		ntfs_cluster_set_zone_pos(vol->mft_lcn, vol->mft_zone_end,
73 					  &vol->mft_zone_pos, tc);
74 	else if (zone == 2)
75 		ntfs_cluster_set_zone_pos(vol->mft_zone_end, vol->nr_clusters,
76 					  &vol->data1_zone_pos, tc);
77 	else /* zone == 4 */
78 		ntfs_cluster_set_zone_pos(0, vol->mft_zone_start,
79 					  &vol->data2_zone_pos, tc);
80 }
81 
82 static s64 max_empty_bit_range(unsigned char *buf, int size)
83 {
84 	int i, j, run = 0;
85 	int max_range = 0;
86 	s64 start_pos = -1;
87 
88 	ntfs_log_trace("Entering");
89 
90 	for (i = 0; i < size; i++, buf++) {
91 
92 		for (j = 0; j < 8; j++) {
93 
94 			int bit = *buf & (1 << j);
95 
96 			if (bit) {
97 				if (run > max_range) {
98 					max_range = run;
99 					start_pos = i * 8 + j - run;
100 				}
101 				run = 0;
102 			} else
103 				run++;
104 		}
105 	}
106 
107 	if (run > max_range)
108 		start_pos = i * 8 - run;
109 
110 	return start_pos;
111 }
112 
113 static int bitmap_writeback(ntfs_volume *vol, s64 pos, s64 size, void *b,
114 			    u8 *writeback)
115 {
116 	s64 written;
117 
118 	ntfs_log_trace("Entering");
119 
120 	if (!*writeback)
121 		return 0;
122 
123 	*writeback = 0;
124 
125 	written = ntfs_attr_pwrite(vol->lcnbmp_na, pos, size, b);
126 	if (written != size) {
127 		if (!written)
128 			errno = EIO;
129 		ntfs_log_perror("Bitmap write error (%lld, %lld)", pos, size);
130 		return -1;
131 	}
132 
133 	return 0;
134 }
135 
136 /**
137  * ntfs_cluster_alloc - allocate clusters on an ntfs volume
138  * @vol:	mounted ntfs volume on which to allocate the clusters
139  * @start_vcn:	vcn to use for the first allocated cluster
140  * @count:	number of clusters to allocate
141  * @start_lcn:	starting lcn at which to allocate the clusters (or -1 if none)
142  * @zone:	zone from which to allocate the clusters
143  *
144  * Allocate @count clusters preferably starting at cluster @start_lcn or at the
145  * current allocator position if @start_lcn is -1, on the mounted ntfs volume
146  * @vol. @zone is either DATA_ZONE for allocation of normal clusters and
147  * MFT_ZONE for allocation of clusters for the master file table, i.e. the
148  * $MFT/$DATA attribute.
149  *
150  * On success return a runlist describing the allocated cluster(s).
151  *
152  * On error return NULL with errno set to the error code.
153  *
154  * Notes on the allocation algorithm
155  * =================================
156  *
157  * There are two data zones. First is the area between the end of the mft zone
158  * and the end of the volume, and second is the area between the start of the
159  * volume and the start of the mft zone. On unmodified/standard NTFS 1.x
160  * volumes, the second data zone doesn't exist due to the mft zone being
161  * expanded to cover the start of the volume in order to reserve space for the
162  * mft bitmap attribute.
163  *
164  * The complexity stems from the need of implementing the mft vs data zoned
165  * approach and from the fact that we have access to the lcn bitmap via up to
166  * NTFS_LCNALLOC_BSIZE bytes at a time, so we need to cope with crossing over
167  * boundaries of two buffers. Further, the fact that the allocator allows for
168  * caller supplied hints as to the location of where allocation should begin
169  * and the fact that the allocator keeps track of where in the data zones the
170  * next natural allocation should occur, contribute to the complexity of the
171  * function. But it should all be worthwhile, because this allocator:
172  *   1) implements MFT zone reservation
173  *   2) causes reduction in fragmentation.
174  * The code is not optimized for speed.
175  */
176 runlist *ntfs_cluster_alloc(ntfs_volume *vol, VCN start_vcn, s64 count,
177 		LCN start_lcn, const NTFS_CLUSTER_ALLOCATION_ZONES zone)
178 {
179 	LCN zone_start, zone_end;  /* current search range */
180 	LCN last_read_pos, lcn;
181 	LCN bmp_pos;		/* current bit position inside the bitmap */
182 	LCN prev_lcn = 0, prev_run_len = 0;
183 	s64 clusters, br;
184 	runlist *rl = NULL, *trl;
185 	u8 *buf, *byte, bit, writeback;
186 	u8 pass = 1; 	/* 1: inside zone;  2: start of zone */
187 	u8 search_zone; /* 4: data2 (start) 1: mft (middle) 2: data1 (end) */
188 	u8 done_zones = 0;
189 	u8 has_guess, used_zone_pos;
190 	int err = 0, rlpos, rlsize, buf_size;
191 
192 	ntfs_log_trace("Entering with count = 0x%llx, start_lcn = 0x%llx, "
193 		       "zone = %s_ZONE.\n", (long long)count, (long long)
194 		       start_lcn, zone == MFT_ZONE ? "MFT" : "DATA");
195 	if (!vol || count < 0 || start_lcn < -1 || !vol->lcnbmp_na ||
196 			(s8)zone < FIRST_ZONE || zone > LAST_ZONE) {
197 		errno = EINVAL;
198 		ntfs_log_perror("%s: vcn: %lld, count: %lld, lcn: %lld",
199 				__FUNCTION__, (long long)start_vcn,
200 				(long long)count, (long long)start_lcn);
201 		return NULL;
202 	}
203 
204 	/* Return empty runlist if @count == 0 */
205 	if (!count) {
206 		rl = ntfs_malloc(0x1000);
207 		if (rl) {
208 			rl[0].vcn = start_vcn;
209 			rl[0].lcn = LCN_RL_NOT_MAPPED;
210 			rl[0].length = 0;
211 		}
212 		return rl;
213 	}
214 
215 	buf = ntfs_malloc(NTFS_LCNALLOC_BSIZE);
216 	if (!buf)
217 		return NULL;
218 	/*
219 	 * If no @start_lcn was requested, use the current zone
220 	 * position otherwise use the requested @start_lcn.
221 	 */
222 	has_guess = 1;
223 	zone_start = start_lcn;
224 
225 	if (zone_start < 0) {
226 		if (zone == DATA_ZONE)
227 			zone_start = vol->data1_zone_pos;
228 		else
229 			zone_start = vol->mft_zone_pos;
230 		has_guess = 0;
231 	}
232 
233 	used_zone_pos = has_guess ? 0 : 1;
234 
235 	if (!zone_start || zone_start == vol->mft_zone_start ||
236 			zone_start == vol->mft_zone_end)
237 		pass = 2;
238 
239 	if (zone_start < vol->mft_zone_start) {
240 		zone_end = vol->mft_zone_start;
241 		search_zone = 4;
242 	} else if (zone_start < vol->mft_zone_end) {
243 		zone_end = vol->mft_zone_end;
244 		search_zone = 1;
245 	} else {
246 		zone_end = vol->nr_clusters;
247 		search_zone = 2;
248 	}
249 
250 	bmp_pos = zone_start;
251 
252 	/* Loop until all clusters are allocated. */
253 	clusters = count;
254 	rlpos = rlsize = 0;
255 	while (1) {
256 		last_read_pos = bmp_pos >> 3;
257 		br = ntfs_attr_pread(vol->lcnbmp_na, last_read_pos,
258 				     NTFS_LCNALLOC_BSIZE, buf);
259 		if (br <= 0) {
260 			if (!br)
261 				goto zone_pass_done;
262 			err = errno;
263 			ntfs_log_perror("Reading $BITMAP failed");
264 			goto err_ret;
265 		}
266 		/*
267 		 * We might have read less than NTFS_LCNALLOC_BSIZE bytes
268 		 * if we are close to the end of the attribute.
269 		 */
270 		buf_size = (int)br << 3;
271 		lcn = bmp_pos & 7;
272 		bmp_pos &= ~7;
273 		writeback = 0;
274 
275 		while (1) {
276 			byte = buf + (lcn >> 3);
277 			bit = 1 << (lcn & 7);
278 			if (has_guess) {
279 				if (*byte & bit) {
280 					has_guess = 0;
281 					break;
282 				}
283 			} else {
284 				lcn = max_empty_bit_range(buf, br);
285 				if (lcn < 0)
286 					break;
287 				has_guess = 1;
288 				continue;
289 			}
290 
291 			/* First free bit is at lcn + bmp_pos. */
292 
293 			/* Reallocate memory if necessary. */
294 			if ((rlpos + 2) * (int)sizeof(runlist) >= rlsize) {
295 				rlsize += 4096;
296 				trl = realloc(rl, rlsize);
297 				if (!trl) {
298 					err = ENOMEM;
299 					ntfs_log_perror("realloc() failed");
300 					goto wb_err_ret;
301 				}
302 				rl = trl;
303 			}
304 
305 			/* Allocate the bitmap bit. */
306 			*byte |= bit;
307 			writeback = 1;
308 
309 			/*
310 			 * Coalesce with previous run if adjacent LCNs.
311 			 * Otherwise, append a new run.
312 			 */
313 			if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos)
314 				rl[rlpos - 1].length = ++prev_run_len;
315 			else {
316 				if (rlpos)
317 					rl[rlpos].vcn = rl[rlpos - 1].vcn +
318 							prev_run_len;
319 				else
320 					rl[rlpos].vcn = start_vcn;
321 
322 				rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
323 				rl[rlpos].length = prev_run_len = 1;
324 				rlpos++;
325 			}
326 
327 			/* Done? */
328 			if (!--clusters) {
329 				if (used_zone_pos)
330 					ntfs_cluster_update_zone_pos(vol,
331 						search_zone, lcn + bmp_pos + 1 +
332 							NTFS_LCNALLOC_SKIP);
333 				goto done_ret;
334 			}
335 
336 			lcn++;
337 		}
338 
339 		if (bitmap_writeback(vol, last_read_pos, br, buf, &writeback)) {
340 			err = errno;
341 			goto err_ret;
342 		}
343 
344 		if (!used_zone_pos) {
345 
346 			used_zone_pos = 1;
347 
348 			if (search_zone == 1)
349 				zone_start = vol->mft_zone_pos;
350 			else if (search_zone == 2)
351 				zone_start = vol->data1_zone_pos;
352 			else
353 				zone_start = vol->data2_zone_pos;
354 
355 			if (!zone_start || zone_start == vol->mft_zone_start ||
356 					zone_start == vol->mft_zone_end)
357 				pass = 2;
358 			bmp_pos = zone_start;
359 		} else
360 			bmp_pos += buf_size;
361 
362 		if (bmp_pos < zone_end)
363 			continue;
364 
365 zone_pass_done:
366 		ntfs_log_trace("Finished current zone pass(%i).\n", pass);
367 		if (pass == 1) {
368 
369 			pass = 2;
370 			zone_end = zone_start;
371 
372 			if (search_zone == 1)
373 				zone_start = vol->mft_zone_start;
374 			else if (search_zone == 2)
375 				zone_start = vol->mft_zone_end;
376 			else
377 				zone_start = 0;
378 
379 			/* Sanity check. */
380 			if (zone_end < zone_start)
381 				zone_end = zone_start;
382 
383 			bmp_pos = zone_start;
384 
385 			continue;
386 		}
387 		/* pass == 2 */
388 done_zones_check:
389 		done_zones |= search_zone;
390 		if (done_zones < 7) {
391 			ntfs_log_trace("Switching zone.\n");
392 			pass = 1;
393 			if (rlpos) {
394 				LCN tc = tc = rl[rlpos - 1].lcn +
395 				      rl[rlpos - 1].length + NTFS_LCNALLOC_SKIP;
396 
397 				if (used_zone_pos)
398 					ntfs_cluster_update_zone_pos(vol,
399 						search_zone, tc);
400 			}
401 
402 			switch (search_zone) {
403 			case 1:
404 				ntfs_log_trace("Zone switch: mft -> data1\n");
405 switch_to_data1_zone:		search_zone = 2;
406 				zone_start = vol->data1_zone_pos;
407 				zone_end = vol->nr_clusters;
408 				if (zone_start == vol->mft_zone_end)
409 					pass = 2;
410 				break;
411 			case 2:
412 				ntfs_log_trace("Zone switch: data1 -> data2\n");
413 				search_zone = 4;
414 				zone_start = vol->data2_zone_pos;
415 				zone_end = vol->mft_zone_start;
416 				if (!zone_start)
417 					pass = 2;
418 				break;
419 			case 4:
420 				if (!(done_zones & 2)) {
421 					ntfs_log_trace("data2 -> data1\n");
422 					goto switch_to_data1_zone;
423 				}
424 				ntfs_log_trace("Zone switch: data2 -> mft\n");
425 				search_zone = 1;
426 				zone_start = vol->mft_zone_pos;
427 				zone_end = vol->mft_zone_end;
428 				if (!zone_start == vol->mft_zone_start)
429 					pass = 2;
430 				break;
431 			}
432 
433 			bmp_pos = zone_start;
434 
435 			if (zone_start == zone_end) {
436 				ntfs_log_trace("Empty zone, skipped.\n");
437 				goto done_zones_check;
438 			}
439 
440 			continue;
441 		}
442 
443 		ntfs_log_trace("All zones are finished, no space on device.\n");
444 		err = ENOSPC;
445 		goto err_ret;
446 	}
447 done_ret:
448 	ntfs_log_debug("At done_ret.\n");
449 	/* Add runlist terminator element. */
450 	rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
451 	rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
452 	rl[rlpos].length = 0;
453 	if (bitmap_writeback(vol, last_read_pos, br, buf, &writeback)) {
454 		err = errno;
455 		goto err_ret;
456 	}
457 done_err_ret:
458 	ntfs_log_debug("At done_err_ret (follows done_ret).\n");
459 	free(buf);
460 	if (!err)
461 		return rl;
462 	ntfs_log_trace("Failed to allocate clusters (%d)", errno);
463 	errno = err;
464 	return NULL;
465 
466 wb_err_ret:
467 	ntfs_log_trace("At wb_err_ret.\n");
468 	if (bitmap_writeback(vol, last_read_pos, br, buf, &writeback))
469 		err = errno;
470 err_ret:
471 	ntfs_log_trace("At err_ret.\n");
472 	if (rl) {
473 		/* Add runlist terminator element. */
474 		rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
475 		rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
476 		rl[rlpos].length = 0;
477 		ntfs_cluster_free_from_rl(vol, rl);
478 		free(rl);
479 		rl = NULL;
480 	}
481 	goto done_err_ret;
482 }
483 
484 /**
485  * ntfs_cluster_free_from_rl - free clusters from runlist
486  * @vol:	mounted ntfs volume on which to free the clusters
487  * @rl:		runlist from which deallocate clusters
488  *
489  * On success return 0 and on error return -1 with errno set to the error code.
490  */
491 int ntfs_cluster_free_from_rl(ntfs_volume *vol, runlist *rl)
492 {
493 	ntfs_log_trace("Entering.\n");
494 
495 	for (; rl->length; rl++) {
496 
497 		ntfs_log_trace("Dealloc lcn 0x%llx, len 0x%llx.\n",
498 			       (long long)rl->lcn, (long long)rl->length);
499 
500 		if (rl->lcn >= 0 && ntfs_bitmap_clear_run(vol->lcnbmp_na,
501 				rl->lcn, rl->length)) {
502 			int eo = errno;
503 			ntfs_log_trace("Eeek! Deallocation of clusters failed.\n");
504 			errno = eo;
505 			return -1;
506 		}
507 	}
508 	return 0;
509 }
510 
511 /**
512  * ntfs_cluster_free - free clusters on an ntfs volume
513  * @vol:	mounted ntfs volume on which to free the clusters
514  * @na:		attribute whose runlist describes the clusters to free
515  * @start_vcn:	vcn in @rl at which to start freeing clusters
516  * @count:	number of clusters to free or -1 for all clusters
517  *
518  * Free @count clusters starting at the cluster @start_vcn in the runlist
519  * described by the attribute @na from the mounted ntfs volume @vol.
520  *
521  * If @count is -1, all clusters from @start_vcn to the end of the runlist
522  * are deallocated.
523  *
524  * On success return the number of deallocated clusters (not counting sparse
525  * clusters) and on error return -1 with errno set to the error code.
526  */
527 int ntfs_cluster_free(ntfs_volume *vol, ntfs_attr *na, VCN start_vcn, s64 count)
528 {
529 	runlist *rl;
530 	s64 nr_freed, delta, to_free;
531 
532 	if (!vol || !vol->lcnbmp_na || !na || start_vcn < 0 ||
533 			(count < 0 && count != -1)) {
534 		ntfs_log_trace("Invalid arguments!\n");
535 		errno = EINVAL;
536 		return -1;
537 	}
538 	ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, count 0x%llx, "
539 		       "vcn 0x%llx.\n", (unsigned long long)na->ni->mft_no,
540 		       na->type, (long long)count, (long long)start_vcn);
541 
542 	rl = ntfs_attr_find_vcn(na, start_vcn);
543 	if (!rl) {
544 		if (errno == ENOENT)
545 			return 0;
546 		else
547 			return -1;
548 	}
549 
550 	if (rl->lcn < 0 && rl->lcn != LCN_HOLE) {
551 		errno = EIO;
552 		return -1;
553 	}
554 
555 	/* Find the starting cluster inside the run that needs freeing. */
556 	delta = start_vcn - rl->vcn;
557 
558 	/* The number of clusters in this run that need freeing. */
559 	to_free = rl->length - delta;
560 	if (count >= 0 && to_free > count)
561 		to_free = count;
562 
563 	if (rl->lcn != LCN_HOLE) {
564 		/* Do the actual freeing of the clusters in this run. */
565 		if (ntfs_bitmap_clear_run(vol->lcnbmp_na, rl->lcn + delta,
566 				to_free))
567 			return -1;
568 		/* We have freed @to_free real clusters. */
569 		nr_freed = to_free;
570 	} else {
571 		/* No real clusters were freed. */
572 		nr_freed = 0;
573 	}
574 
575 	/* Go to the next run and adjust the number of clusters left to free. */
576 	++rl;
577 	if (count >= 0)
578 		count -= to_free;
579 
580 	/*
581 	 * Loop over the remaining runs, using @count as a capping value, and
582 	 * free them.
583 	 */
584 	for (; rl->length && count != 0; ++rl) {
585 		// FIXME: Need to try ntfs_attr_map_runlist() for attribute
586 		//	  list support! (AIA)
587 		if (rl->lcn < 0 && rl->lcn != LCN_HOLE) {
588 			// FIXME: Eeek! We need rollback! (AIA)
589 			ntfs_log_trace("Eeek! invalid lcn (= %lli).  Should attempt "
590 					"to map runlist!  Leaving inconsistent "
591 					"metadata!\n", (long long)rl->lcn);
592 			errno = EIO;
593 			return -1;
594 		}
595 
596 		/* The number of clusters in this run that need freeing. */
597 		to_free = rl->length;
598 		if (count >= 0 && to_free > count)
599 			to_free = count;
600 
601 		if (rl->lcn != LCN_HOLE) {
602 			/* Do the actual freeing of the clusters in the run. */
603 			if (ntfs_bitmap_clear_run(vol->lcnbmp_na, rl->lcn,
604 					to_free)) {
605 				int eo = errno;
606 
607 				// FIXME: Eeek! We need rollback! (AIA)
608 				ntfs_log_trace("Eeek!  bitmap clear run failed.  "
609 						"Leaving inconsistent metadata!\n");
610 				errno = eo;
611 				return -1;
612 			}
613 			/* We have freed @to_free real clusters. */
614 			nr_freed += to_free;
615 		}
616 
617 		if (count >= 0)
618 			count -= to_free;
619 	}
620 
621 	if (count != -1 && count != 0) {
622 		// FIXME: Eeek! BUG()
623 		ntfs_log_trace("Eeek!  count still not zero (= %lli).  Leaving "
624 				"inconsistent metadata!\n", (long long)count);
625 		errno = EIO;
626 		return -1;
627 	}
628 
629 	/* Done. Return the number of actual clusters that were freed. */
630 	return nr_freed;
631 }
632