xref: /haiku/src/add-ons/kernel/file_systems/ntfs/libntfs/lcnalloc.c (revision 93a78ecaa45114d68952d08c4778f073515102f2)
1 /**
2  * lcnalloc.c - Cluster (de)allocation code. Originated from the Linux-NTFS project.
3  *
4  * Copyright (c) 2002-2004 Anton Altaparmakov
5  * Copyright (c) 2004-2007 Szabolcs Szakacsits
6  * Copyright (c) 2004 Yura Pakhuchiy
7  *
8  * This program/include file is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as published
10  * by the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program/include file is distributed in the hope that it will be
14  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program (in the main directory of the NTFS-3G
20  * distribution in the file COPYING); if not, write to the Free Software
21  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #ifdef HAVE_STDLIB_H
29 #include <stdlib.h>
30 #endif
31 #ifdef HAVE_STDIO_H
32 #include <stdio.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 
38 #include "types.h"
39 #include "attrib.h"
40 #include "bitmap.h"
41 #include "debug.h"
42 #include "runlist.h"
43 #include "volume.h"
44 #include "lcnalloc.h"
45 #include "logging.h"
46 #include "misc.h"
47 
48 #define NTFS_LCNALLOC_BSIZE  512
49 
50 #define NTFS_LCNALLOC_SKIP  4096
51 
52 static void ntfs_cluster_set_zone_pos(LCN zone_start, LCN zone_end,
53 			LCN *zone_pos, LCN tc, LCN bmp_initial_pos)
54 {
55 	ntfs_log_trace("Before: zone_pos: %lld\n", (long long)*zone_pos);
56 
57 	if (tc >= zone_end) {
58 		*zone_pos = zone_start;
59 		// FIXME: seems to be bogus and only MFT zone used it
60 		if (!zone_end)
61 			*zone_pos = 0;
62 	} else if ((bmp_initial_pos >= *zone_pos || tc > *zone_pos) &&
63 		   tc >= zone_start)
64 		*zone_pos = tc;
65 
66 	ntfs_log_trace("After: zone_pos: %lld\n", (long long)*zone_pos);
67 }
68 
69 static int ntfs_cluster_update_zone_pos(ntfs_volume *vol, u8 zone, LCN tc,
70 					LCN bmp_initial_pos)
71 {
72 	ntfs_log_trace("tc = %lld, zone = %d\n", (long long)tc, zone);
73 
74 	switch (zone) {
75 	case 1:
76 		ntfs_cluster_set_zone_pos(vol->mft_lcn,
77 					  vol->mft_zone_end,
78 					  &vol->mft_zone_pos,
79 					  tc, bmp_initial_pos);
80 		break;
81 	case 2:
82 		ntfs_cluster_set_zone_pos(vol->mft_zone_end,
83 					  vol->nr_clusters,
84 					  &vol->data1_zone_pos,
85 					  tc, bmp_initial_pos);
86 		break;
87 	case 4:
88 		ntfs_cluster_set_zone_pos(0,
89 					  vol->mft_zone_start,
90 					  &vol->data2_zone_pos,
91 					  tc, bmp_initial_pos);
92 		break;
93 	default:
94 		ntfs_log_error("Invalid zone: %d\n", zone);
95 		return -1;
96 	}
97 
98 	return 0;
99 }
100 
101 /**
102  * ntfs_cluster_alloc - allocate clusters on an ntfs volume
103  * @vol:	mounted ntfs volume on which to allocate the clusters
104  * @start_vcn:	vcn to use for the first allocated cluster
105  * @count:	number of clusters to allocate
106  * @start_lcn:	starting lcn at which to allocate the clusters (or -1 if none)
107  * @zone:	zone from which to allocate the clusters
108  *
109  * Allocate @count clusters preferably starting at cluster @start_lcn or at the
110  * current allocator position if @start_lcn is -1, on the mounted ntfs volume
111  * @vol. @zone is either DATA_ZONE for allocation of normal clusters and
112  * MFT_ZONE for allocation of clusters for the master file table, i.e. the
113  * $MFT/$DATA attribute.
114  *
115  * On success return a runlist describing the allocated cluster(s).
116  *
117  * On error return NULL with errno set to the error code.
118  *
119  * Notes on the allocation algorithm
120  * =================================
121  *
122  * There are two data zones. First is the area between the end of the mft zone
123  * and the end of the volume, and second is the area between the start of the
124  * volume and the start of the mft zone. On unmodified/standard NTFS 1.x
125  * volumes, the second data zone doesn't exist due to the mft zone being
126  * expanded to cover the start of the volume in order to reserve space for the
127  * mft bitmap attribute.
128  *
129  * This is not the prettiest function but the complexity stems from the need of
130  * implementing the mft vs data zoned approach and from the fact that we have
131  * access to the lcn bitmap via up to NTFS_LCNALLOC_BSIZE bytes at a time, so we
132  * need to cope with crossing over boundaries of two buffers. Further, the fact
133  * that the allocator allows for caller supplied hints as to the location of
134  * where allocation should begin and the fact that the allocator keeps track of
135  * where in the data zones the next natural allocation should occur, contribute
136  * to the complexity of the function. But it should all be worthwhile, because
137  * this allocator should: 1) be a full implementation of the MFT zone approach
138  * used by Windows, 2) cause reduction in fragmentation as much as possible,
139  * and 3) be speedy in allocations (the code is not optimized for speed, but
140  * the algorithm is, so further speed improvements are probably possible).
141  *
142  * FIXME: We should be monitoring cluster allocation and increment the MFT zone
143  * size dynamically but this is something for the future. We will just cause
144  * heavier fragmentation by not doing it and I am not even sure Windows would
145  * grow the MFT zone dynamically, so it might even be correct not to do this.
146  * The overhead in doing dynamic MFT zone expansion would be very large and
147  * unlikely worth the effort. (AIA)
148  *
149  * TODO: I have added in double the required zone position pointer wrap around
150  * logic which can be optimized to having only one of the two logic sets.
151  * However, having the double logic will work fine, but if we have only one of
152  * the sets and we get it wrong somewhere, then we get into trouble, so
153  * removing the duplicate logic requires _very_ careful consideration of _all_
154  * possible code paths. So at least for now, I am leaving the double logic -
155  * better safe than sorry... (AIA)
156  */
157 runlist *ntfs_cluster_alloc(ntfs_volume *vol, VCN start_vcn, s64 count,
158 		LCN start_lcn, const NTFS_CLUSTER_ALLOCATION_ZONES zone)
159 {
160 	LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn;
161 	LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size;
162 	s64 clusters, br;
163 	runlist *rl = NULL, *trl;
164 	u8 *buf, *byte;
165 	int err = 0, rlpos, rlsize, buf_size;
166 	u8 pass, done_zones, search_zone, need_writeback, bit;
167 	u8 first_try = 1;
168 
169 	ntfs_log_trace("Entering with count = 0x%llx, start_lcn = 0x%llx, "
170 		       "zone = %s_ZONE.\n", (long long)count, (long long)
171 		       start_lcn, zone == MFT_ZONE ? "MFT" : "DATA");
172 	if (!vol || count < 0 || start_lcn < -1 || !vol->lcnbmp_na ||
173 			(s8)zone < FIRST_ZONE || zone > LAST_ZONE) {
174 		errno = EINVAL;
175 		ntfs_log_perror("%s: vcn: %lld, count: %lld, lcn: %lld",
176 				__FUNCTION__, (long long)start_vcn,
177 				(long long)count, (long long)start_lcn);
178 		return NULL;
179 	}
180 
181 	/* Return empty runlist if @count == 0 */
182 	if (!count) {
183 		rl = ntfs_malloc(0x1000);
184 		if (!rl)
185 			return NULL;
186 		rl[0].vcn = start_vcn;
187 		rl[0].lcn = LCN_RL_NOT_MAPPED;
188 		rl[0].length = 0;
189 		return rl;
190 	}
191 
192 	/* Allocate memory. */
193 	buf = ntfs_malloc(NTFS_LCNALLOC_BSIZE);
194 	if (!buf)
195 		return NULL;
196 	/*
197 	 * If no specific @start_lcn was requested, use the current data zone
198 	 * position, otherwise use the requested @start_lcn but make sure it
199 	 * lies outside the mft zone. Also set done_zones to 0 (no zones done)
200 	 * and pass depending on whether we are starting inside a zone (1) or
201 	 * at the beginning of a zone (2). If requesting from the MFT_ZONE,
202 	 * we either start at the current position within the mft zone or at
203 	 * the specified position. If the latter is out of bounds then we start
204 	 * at the beginning of the MFT_ZONE.
205 	 */
206 	done_zones = 0;
207 	pass = 1;
208 	/*
209 	 * zone_start and zone_end are the current search range. search_zone
210 	 * is 1 for mft zone, 2 for data zone 1 (end of mft zone till end of
211 	 * volume) and 4 for data zone 2 (start of volume till start of mft
212 	 * zone).
213 	 */
214 	zone_start = start_lcn;
215 	if (zone_start < 0) {
216 		if (zone == DATA_ZONE)
217 			zone_start = vol->data1_zone_pos;
218 		else
219 			zone_start = vol->mft_zone_pos;
220 		if (!zone_start) {
221 			/*
222 			 * Zone starts at beginning of volume which means a
223 			 * single pass is sufficient.
224 			 */
225 			pass = 2;
226 		}
227 	} else if (zone == DATA_ZONE && zone_start >= vol->mft_zone_start &&
228 			zone_start < vol->mft_zone_end) {
229 		zone_start = vol->mft_zone_end;
230 		/*
231 		 * Starting at beginning of data1_zone which means a single
232 		 * pass in this zone is sufficient.
233 		 */
234 		pass = 2;
235 	} else if (zone == MFT_ZONE && (zone_start < vol->mft_zone_start ||
236 			zone_start >= vol->mft_zone_end)) {
237 		zone_start = vol->mft_lcn;
238 		if (!vol->mft_zone_end)
239 			zone_start = 0;
240 		/*
241 		 * Starting at beginning of volume which means a single pass
242 		 * is sufficient.
243 		 */
244 		pass = 2;
245 	}
246 	if (zone == MFT_ZONE) {
247 		zone_end = vol->mft_zone_end;
248 		search_zone = 1;
249 	} else /* if (zone == DATA_ZONE) */ {
250 		/* Skip searching the mft zone. */
251 		done_zones |= 1;
252 		if (zone_start >= vol->mft_zone_end) {
253 			zone_end = vol->nr_clusters;
254 			search_zone = 2;
255 		} else {
256 			zone_end = vol->mft_zone_start;
257 			search_zone = 4;
258 		}
259 	}
260 	/*
261 	 * bmp_pos is the current bit position inside the bitmap. We use
262 	 * bmp_initial_pos to determine whether or not to do a zone switch.
263 	 */
264 	bmp_pos = bmp_initial_pos = zone_start;
265 
266 	/* Loop until all clusters are allocated, i.e. clusters == 0. */
267 	clusters = count;
268 	rlpos = rlsize = 0;
269 	while (1) {
270 		ntfs_log_trace("Start of outer while loop: done_zones = 0x%x, "
271 				"search_zone = %i, pass = %i, zone_start = "
272 				"0x%llx, zone_end = 0x%llx, bmp_initial_pos = "
273 				"0x%llx, bmp_pos = 0x%llx, rlpos = %i, rlsize = "
274 				"%i.\n", done_zones, search_zone, pass,
275 				(long long)zone_start, (long long)zone_end,
276 				(long long)bmp_initial_pos, (long long)bmp_pos,
277 				rlpos, rlsize);
278 		/* Loop until we run out of free clusters. */
279 		last_read_pos = bmp_pos >> 3;
280 		ntfs_log_trace("last_read_pos = 0x%llx.\n", (long long)last_read_pos);
281 		br = ntfs_attr_pread(vol->lcnbmp_na, last_read_pos, NTFS_LCNALLOC_BSIZE, buf);
282 		if (br <= 0) {
283 			if (!br) {
284 				/* Reached end of attribute. */
285 				ntfs_log_trace("End of attribute reached. Skipping "
286 						"to zone_pass_done.\n");
287 				goto zone_pass_done;
288 			}
289 			err = errno;
290 			ntfs_log_perror("ntfs_attr_pread() failed");
291 			goto err_ret;
292 		}
293 		/*
294 		 * We might have read less than NTFS_LCNALLOC_BSIZE bytes if we are close to
295 		 * the end of the attribute.
296 		 */
297 		buf_size = (int)br << 3;
298 		lcn = bmp_pos & 7;
299 		bmp_pos &= ~7;
300 		need_writeback = 0;
301 		ntfs_log_trace("Before inner while loop: buf_size = %i, lcn = "
302 				"0x%llx, bmp_pos = 0x%llx, need_writeback = %i.\n",
303 				buf_size, (long long)lcn, (long long)bmp_pos,
304 				need_writeback);
305 		while (lcn < buf_size && lcn + bmp_pos < zone_end) {
306 			byte = buf + (lcn >> 3);
307 			ntfs_log_trace("In inner while loop: buf_size = %i, lcn = "
308 					"0x%llx, bmp_pos = 0x%llx, "
309 					"need_writeback = %i, byte ofs = 0x%x, "
310 					"*byte = 0x%x.\n", buf_size,
311 					(long long)lcn, (long long)bmp_pos,
312 					need_writeback, (unsigned int)(lcn >> 3),
313 					(unsigned int)*byte);
314 			/* Skip full bytes. */
315 			if (*byte == 0xff) {
316 				lcn = (lcn + 8) & ~7;
317 				ntfs_log_trace("continuing while loop 1.\n");
318 				if (first_try) {
319 					first_try = 0;
320 					lcn += NTFS_LCNALLOC_SKIP;
321 				}
322 				continue;
323 			}
324 			bit = 1 << (lcn & 7);
325 			ntfs_log_trace("bit = %i.\n", bit);
326 			/* If the bit is already set, go onto the next one. */
327 			if (*byte & bit) {
328 				lcn++;
329 				ntfs_log_trace("continuing while loop 2.\n");
330 				if (first_try) {
331 					first_try = 0;
332 					lcn += NTFS_LCNALLOC_SKIP;
333 				}
334 				continue;
335 			}
336 			/* Reallocate memory if necessary. */
337 			if ((rlpos + 2) * (int)sizeof(runlist) >= rlsize) {
338 				ntfs_log_trace("Reallocating space.\n");
339 				if (!rl)
340 					ntfs_log_trace("First free bit is at LCN = "
341 						"0x%llx.\n", (long long)(lcn + bmp_pos));
342 				rlsize += 4096;
343 				trl = (runlist*)realloc(rl, rlsize);
344 				if (!trl) {
345 					err = ENOMEM;
346 					ntfs_log_perror("Failed to allocate memory");
347 					goto wb_err_ret;
348 				}
349 				rl = trl;
350 				ntfs_log_trace("Reallocated memory, rlsize = "
351 						"0x%x.\n", rlsize);
352 			}
353 			/* Allocate the bitmap bit. */
354 			*byte |= bit;
355 			/* We need to write this bitmap buffer back to disk! */
356 			need_writeback = 1;
357 			ntfs_log_trace("*byte = 0x%x, need_writeback is set.\n",
358 					(unsigned int)*byte);
359 			/*
360 			 * Coalesce with previous run if adjacent LCNs.
361 			 * Otherwise, append a new run.
362 			 */
363 			ntfs_log_trace("Adding run (lcn 0x%llx, len 0x%llx), "
364 					"prev_lcn = 0x%llx, lcn = 0x%llx, "
365 					"bmp_pos = 0x%llx, prev_run_len = "
366 					"0x%llx, rlpos = %i.\n",
367 					(long long)(lcn + bmp_pos), 1LL,
368 					(long long)prev_lcn, (long long)lcn,
369 					(long long)bmp_pos,
370 					(long long)prev_run_len, rlpos);
371 			if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) {
372 				ntfs_log_trace("Coalescing to run (lcn 0x%llx, len "
373 						"0x%llx).\n",
374 						(long long)rl[rlpos - 1].lcn,
375 						(long long) rl[rlpos - 1].length);
376 				rl[rlpos - 1].length = ++prev_run_len;
377 				ntfs_log_trace("Run now (lcn 0x%llx, len 0x%llx), "
378 						"prev_run_len = 0x%llx.\n",
379 						(long long)rl[rlpos - 1].lcn,
380 						(long long)rl[rlpos - 1].length,
381 						(long long)prev_run_len);
382 			} else {
383 				if (rlpos) {
384 					ntfs_log_trace("Adding new run, (previous "
385 						"run lcn 0x%llx, len 0x%llx).\n",
386 						(long long) rl[rlpos - 1].lcn,
387 						(long long) rl[rlpos - 1].length);
388 					rl[rlpos].vcn = rl[rlpos - 1].vcn +
389 							prev_run_len;
390 				} else {
391 					ntfs_log_trace("Adding new run, is first run.\n");
392 					rl[rlpos].vcn = start_vcn;
393 				}
394 				rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
395 				rl[rlpos].length = prev_run_len = 1;
396 				rlpos++;
397 			}
398 			/* Done? */
399 			if (!--clusters) {
400 				if (ntfs_cluster_update_zone_pos(vol,
401 						search_zone, lcn + bmp_pos + 1
402 							+ NTFS_LCNALLOC_SKIP,
403 						bmp_initial_pos)) {
404 					free(rl);
405 					free(buf);
406 					return NULL;
407 				}
408 				goto done_ret;
409 			}
410 			lcn++;
411 		}
412 		bmp_pos += buf_size;
413 		ntfs_log_trace("After inner while loop: buf_size = 0x%x, lcn = "
414 				"0x%llx, bmp_pos = 0x%llx, need_writeback = %i.\n",
415 				buf_size, (long long)lcn,
416 				(long long)bmp_pos, need_writeback);
417 		if (need_writeback) {
418 			s64 bw;
419 			ntfs_log_trace("Writing back.\n");
420 			need_writeback = 0;
421 			bw = ntfs_attr_pwrite(vol->lcnbmp_na, last_read_pos,
422 					br, buf);
423 			if (bw != br) {
424 				if (bw == -1)
425 					err = errno;
426 				else
427 					err = EIO;
428 				ntfs_log_perror("Bitmap writeback failed in "
429 						"read next buffer code path");
430 				goto err_ret;
431 			}
432 		}
433 		if (bmp_pos < zone_end) {
434 			ntfs_log_trace("Continuing outer while loop, bmp_pos = "
435 					"0x%llx, zone_end = 0x%llx.\n",
436 					(long long)bmp_pos,
437 					(long long)zone_end);
438 			continue;
439 		}
440 zone_pass_done:	/* Finished with the current zone pass. */
441 		ntfs_log_trace("At zone_pass_done, pass = %i.\n", pass);
442 		if (pass == 1) {
443 			/*
444 			 * Now do pass 2, scanning the first part of the zone
445 			 * we omitted in pass 1.
446 			 */
447 			pass = 2;
448 			zone_end = zone_start;
449 			switch (search_zone) {
450 			case 1: /* mft_zone */
451 				zone_start = vol->mft_zone_start;
452 				break;
453 			case 2: /* data1_zone */
454 				zone_start = vol->mft_zone_end;
455 				break;
456 			case 4: /* data2_zone */
457 				zone_start = 0;
458 				break;
459 			default:
460 				NTFS_BUG("switch (search_zone) 2");
461 			}
462 			/* Sanity check. */
463 			if (zone_end < zone_start)
464 				zone_end = zone_start;
465 			bmp_pos = zone_start;
466 			ntfs_log_trace("Continuing outer while loop, pass = 2, "
467 					"zone_start = 0x%llx, zone_end = "
468 					"0x%llx, bmp_pos = 0x%llx.\n",
469 					zone_start, zone_end, bmp_pos);
470 			continue;
471 		} /* pass == 2 */
472 done_zones_check:
473 		ntfs_log_trace("At done_zones_check, search_zone = %i, done_zones "
474 				"before = 0x%x, done_zones after = 0x%x.\n",
475 				search_zone, done_zones, done_zones | search_zone);
476 		done_zones |= search_zone;
477 		if (done_zones < 7) {
478 			ntfs_log_trace("Switching zone.\n");
479 			/* Now switch to the next zone we haven't done yet. */
480 			pass = 1;
481 			if (rlpos) {
482 				LCN tc;
483 
484 				tc = rl[rlpos - 1].lcn + rl[rlpos - 1].length
485 					+ NTFS_LCNALLOC_SKIP;
486 
487 				if (ntfs_cluster_update_zone_pos(vol,
488 						search_zone, tc, bmp_initial_pos))
489 					return NULL;
490 			}
491 
492 			switch (search_zone) {
493 			case 1:
494 				ntfs_log_trace("Zone switch: mft -> data1\n");
495 switch_to_data1_zone:		search_zone = 2;
496 				zone_start = bmp_initial_pos =
497 						vol->data1_zone_pos;
498 				zone_end = vol->nr_clusters;
499 				if (zone_start == vol->mft_zone_end)
500 					pass = 2;
501 				if (zone_start >= zone_end) {
502 					vol->data1_zone_pos = zone_start =
503 							vol->mft_zone_end;
504 					pass = 2;
505 				}
506 				break;
507 			case 2:
508 				ntfs_log_trace("Zone switch: data1 -> data2\n");
509 				search_zone = 4;
510 				zone_start = bmp_initial_pos =
511 						vol->data2_zone_pos;
512 				zone_end = vol->mft_zone_start;
513 				if (!zone_start)
514 					pass = 2;
515 				if (zone_start >= zone_end) {
516 					vol->data2_zone_pos = zone_start =
517 							bmp_initial_pos = 0;
518 					pass = 2;
519 				}
520 				break;
521 			case 4:
522 				ntfs_log_trace("Zone switch: data2 -> data1\n");
523 				goto switch_to_data1_zone; /* See above. */
524 			default:
525 				NTFS_BUG("switch (search_zone) 3");
526 			}
527 			ntfs_log_trace("After zone switch, search_zone = %i, pass = "
528 					"%i, bmp_initial_pos = 0x%llx, "
529 					"zone_start = 0x%llx, zone_end = "
530 					"0x%llx.\n", search_zone, pass,
531 					(long long)bmp_initial_pos,
532 					(long long)zone_start,
533 					(long long)zone_end);
534 			bmp_pos = zone_start;
535 			if (zone_start == zone_end) {
536 				ntfs_log_trace("Empty zone, going to "
537 						"done_zones_check.\n");
538 				/* Empty zone. Don't bother searching it. */
539 				goto done_zones_check;
540 			}
541 			ntfs_log_trace("Continuing outer while loop.\n");
542 			continue;
543 		} /* done_zones == 7 */
544 		ntfs_log_trace("All zones are finished.\n");
545 		/*
546 		 * All zones are finished! If DATA_ZONE, shrink mft zone. If
547 		 * MFT_ZONE, we have really run out of space.
548 		 */
549 		mft_zone_size = vol->mft_zone_end - vol->mft_zone_start;
550 		ntfs_log_trace("vol->mft_zone_start = 0x%llx, vol->mft_zone_end = "
551 				"0x%llx, mft_zone_size = 0x%llx.\n",
552 				(long long)vol->mft_zone_start,
553 				(long long)vol->mft_zone_end,
554 				(long long)mft_zone_size);
555 		if (zone == MFT_ZONE || mft_zone_size <= 0) {
556 			ntfs_log_trace("No free clusters left, going to err_ret.\n");
557 			/* Really no more space left on device. */
558 			err = ENOSPC;
559 			goto err_ret;
560 		} /* zone == DATA_ZONE && mft_zone_size > 0 */
561 		ntfs_log_trace("Shrinking mft zone.\n");
562 		zone_end = vol->mft_zone_end;
563 		mft_zone_size >>= 1;
564 		if (mft_zone_size > 0)
565 			vol->mft_zone_end = vol->mft_zone_start + mft_zone_size;
566 		else /* mft zone and data2 zone no longer exist. */
567 			vol->data2_zone_pos = vol->mft_zone_start =
568 					vol->mft_zone_end = 0;
569 		if (vol->mft_zone_pos >= vol->mft_zone_end) {
570 			vol->mft_zone_pos = vol->mft_lcn;
571 			if (!vol->mft_zone_end)
572 				vol->mft_zone_pos = 0;
573 		}
574 		bmp_pos = zone_start = bmp_initial_pos =
575 				vol->data1_zone_pos = vol->mft_zone_end;
576 		search_zone = 2;
577 		pass = 2;
578 		done_zones &= ~2;
579 		ntfs_log_trace("After shrinking mft zone, mft_zone_size = 0x%llx, "
580 				"vol->mft_zone_start = 0x%llx, "
581 				"vol->mft_zone_end = 0x%llx, vol->mft_zone_pos "
582 				"= 0x%llx, search_zone = 2, pass = 2, "
583 				"dones_zones = 0x%x, zone_start = 0x%llx, "
584 				"zone_end = 0x%llx, vol->data1_zone_pos = "
585 				"0x%llx, continuing outer while loop.\n",
586 				(long long)mft_zone_size,
587 				(long long)vol->mft_zone_start,
588 				(long long)vol->mft_zone_end,
589 				(long long)vol->mft_zone_pos,
590 				done_zones,
591 				(long long)zone_start,
592 				(long long)zone_end,
593 				(long long)vol->data1_zone_pos);
594 	}
595 	ntfs_log_debug("After outer while loop.\n");
596 done_ret:
597 	ntfs_log_debug("At done_ret.\n");
598 	/* Add runlist terminator element. */
599 	rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
600 	rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
601 	rl[rlpos].length = 0;
602 	if (need_writeback) {
603 		s64 bw;
604 		ntfs_log_trace("Writing back.\n");
605 		need_writeback = 0;
606 		bw = ntfs_attr_pwrite(vol->lcnbmp_na, last_read_pos, br, buf);
607 		if (bw != br) {
608 			if (bw < 0)
609 				err = errno;
610 			else
611 				err = EIO;
612 			ntfs_log_perror("Bitmap writeback failed");
613 			goto err_ret;
614 		}
615 	}
616 done_err_ret:
617 	ntfs_log_debug("At done_err_ret (follows done_ret).\n");
618 	free(buf);
619 	/* Done! */
620 	if (!err)
621 		return rl;
622 	ntfs_log_perror("Failed to allocate clusters");
623 	errno = err;
624 	return NULL;
625 wb_err_ret:
626 	ntfs_log_trace("At wb_err_ret.\n");
627 	if (need_writeback) {
628 		s64 bw;
629 		ntfs_log_trace("Writing back.\n");
630 		need_writeback = 0;
631 		bw = ntfs_attr_pwrite(vol->lcnbmp_na, last_read_pos, br, buf);
632 		if (bw != br) {
633 			if (bw < 0)
634 				err = errno;
635 			else
636 				err = EIO;
637 			ntfs_log_trace("Bitmap writeback failed in error code path "
638 					"with error code %i.\n", err);
639 		}
640 	}
641 err_ret:
642 	ntfs_log_trace("At err_ret.\n");
643 	if (rl) {
644 		if (err == ENOSPC) {
645 			ntfs_log_trace("err = ENOSPC, first free lcn = 0x%llx, could "
646 					"allocate up to = 0x%llx clusters.\n",
647 					(long long)rl[0].lcn,
648 					(long long)count - clusters);
649 		}
650 		/* Add runlist terminator element. */
651 		rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
652 		rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
653 		rl[rlpos].length = 0;
654 		/* Deallocate all allocated clusters. */
655 		ntfs_log_trace("Deallocating allocated clusters.\n");
656 		ntfs_cluster_free_from_rl(vol, rl);
657 		/* Free the runlist. */
658 		free(rl);
659 		rl = NULL;
660 	} else {
661 		if (err == ENOSPC) {
662 			ntfs_log_trace("No space left at all, err = ENOSPC, first "
663 					"free lcn = 0x%llx.\n",
664 					(long long)vol->data1_zone_pos);
665 		}
666 	}
667 	ntfs_log_trace("rl = NULL, going to done_err_ret.\n");
668 	goto done_err_ret;
669 }
670 
671 /**
672  * ntfs_cluster_free_from_rl - free clusters from runlist
673  * @vol:	mounted ntfs volume on which to free the clusters
674  * @rl:		runlist from which deallocate clusters
675  *
676  * On success return 0 and on error return -1 with errno set to the error code.
677  */
678 int ntfs_cluster_free_from_rl(ntfs_volume *vol, runlist *rl)
679 {
680 	ntfs_log_trace("Entering.\n");
681 
682 	for (; rl->length; rl++) {
683 
684 		ntfs_log_trace("Dealloc lcn 0x%llx, len 0x%llx.\n",
685 			       (long long)rl->lcn, (long long)rl->length);
686 
687 		if (rl->lcn >= 0 && ntfs_bitmap_clear_run(vol->lcnbmp_na,
688 				rl->lcn, rl->length)) {
689 			int eo = errno;
690 			ntfs_log_trace("Eeek! Deallocation of clusters failed.\n");
691 			errno = eo;
692 			return -1;
693 		}
694 	}
695 	return 0;
696 }
697 
698 /**
699  * ntfs_cluster_free - free clusters on an ntfs volume
700  * @vol:	mounted ntfs volume on which to free the clusters
701  * @na:		attribute whose runlist describes the clusters to free
702  * @start_vcn:	vcn in @rl at which to start freeing clusters
703  * @count:	number of clusters to free or -1 for all clusters
704  *
705  * Free @count clusters starting at the cluster @start_vcn in the runlist
706  * described by the attribute @na from the mounted ntfs volume @vol.
707  *
708  * If @count is -1, all clusters from @start_vcn to the end of the runlist
709  * are deallocated.
710  *
711  * On success return the number of deallocated clusters (not counting sparse
712  * clusters) and on error return -1 with errno set to the error code.
713  */
714 int ntfs_cluster_free(ntfs_volume *vol, ntfs_attr *na, VCN start_vcn, s64 count)
715 {
716 	runlist *rl;
717 	s64 nr_freed, delta, to_free;
718 
719 	if (!vol || !vol->lcnbmp_na || !na || start_vcn < 0 ||
720 			(count < 0 && count != -1)) {
721 		ntfs_log_trace("Invalid arguments!\n");
722 		errno = EINVAL;
723 		return -1;
724 	}
725 	ntfs_log_trace("Entering for inode 0x%llx, attr 0x%x, count 0x%llx, "
726 		       "vcn 0x%llx.\n", (unsigned long long)na->ni->mft_no,
727 		       na->type, (long long)count, (long long)start_vcn);
728 
729 	rl = ntfs_attr_find_vcn(na, start_vcn);
730 	if (!rl) {
731 		if (errno == ENOENT)
732 			return 0;
733 		else
734 			return -1;
735 	}
736 
737 	if (rl->lcn < 0 && rl->lcn != LCN_HOLE) {
738 		errno = EIO;
739 		return -1;
740 	}
741 
742 	/* Find the starting cluster inside the run that needs freeing. */
743 	delta = start_vcn - rl->vcn;
744 
745 	/* The number of clusters in this run that need freeing. */
746 	to_free = rl->length - delta;
747 	if (count >= 0 && to_free > count)
748 		to_free = count;
749 
750 	if (rl->lcn != LCN_HOLE) {
751 		/* Do the actual freeing of the clusters in this run. */
752 		if (ntfs_bitmap_clear_run(vol->lcnbmp_na, rl->lcn + delta,
753 				to_free))
754 			return -1;
755 		/* We have freed @to_free real clusters. */
756 		nr_freed = to_free;
757 	} else {
758 		/* No real clusters were freed. */
759 		nr_freed = 0;
760 	}
761 
762 	/* Go to the next run and adjust the number of clusters left to free. */
763 	++rl;
764 	if (count >= 0)
765 		count -= to_free;
766 
767 	/*
768 	 * Loop over the remaining runs, using @count as a capping value, and
769 	 * free them.
770 	 */
771 	for (; rl->length && count != 0; ++rl) {
772 		// FIXME: Need to try ntfs_attr_map_runlist() for attribute
773 		//	  list support! (AIA)
774 		if (rl->lcn < 0 && rl->lcn != LCN_HOLE) {
775 			// FIXME: Eeek! We need rollback! (AIA)
776 			ntfs_log_trace("Eeek! invalid lcn (= %lli).  Should attempt "
777 					"to map runlist!  Leaving inconsistent "
778 					"metadata!\n", (long long)rl->lcn);
779 			errno = EIO;
780 			return -1;
781 		}
782 
783 		/* The number of clusters in this run that need freeing. */
784 		to_free = rl->length;
785 		if (count >= 0 && to_free > count)
786 			to_free = count;
787 
788 		if (rl->lcn != LCN_HOLE) {
789 			/* Do the actual freeing of the clusters in the run. */
790 			if (ntfs_bitmap_clear_run(vol->lcnbmp_na, rl->lcn,
791 					to_free)) {
792 				int eo = errno;
793 
794 				// FIXME: Eeek! We need rollback! (AIA)
795 				ntfs_log_trace("Eeek!  bitmap clear run failed.  "
796 						"Leaving inconsistent metadata!\n");
797 				errno = eo;
798 				return -1;
799 			}
800 			/* We have freed @to_free real clusters. */
801 			nr_freed += to_free;
802 		}
803 
804 		if (count >= 0)
805 			count -= to_free;
806 	}
807 
808 	if (count != -1 && count != 0) {
809 		// FIXME: Eeek! BUG()
810 		ntfs_log_trace("Eeek!  count still not zero (= %lli).  Leaving "
811 				"inconsistent metadata!\n", (long long)count);
812 		errno = EIO;
813 		return -1;
814 	}
815 
816 	/* Done. Return the number of actual clusters that were freed. */
817 	return nr_freed;
818 }
819