xref: /haiku/src/add-ons/kernel/file_systems/ntfs/libntfs/mft.c (revision ed24eb5ff12640d052171c6a7feba37fab8a75d1)
1 /**
2  * mft.c - Mft record handling code. Originated from the Linux-NTFS project.
3  *
4  * Copyright (c) 2000-2004 Anton Altaparmakov
5  * Copyright (c) 2004-2005 Richard Russon
6  * Copyright (c) 2004-2008 Szabolcs Szakacsits
7  * Copyright (c)      2005 Yura Pakhuchiy
8  * Copyright (c) 2014-2021 Jean-Pierre Andre
9  *
10  * This program/include file is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License as published
12  * by the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program/include file is distributed in the hope that it will be
16  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
17  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program (in the main directory of the NTFS-3G
22  * distribution in the file COPYING); if not, write to the Free Software
23  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */
25 
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif
29 
30 #ifdef HAVE_STDLIB_H
31 #include <stdlib.h>
32 #endif
33 #ifdef HAVE_STDIO_H
34 #include <stdio.h>
35 #endif
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #ifdef HAVE_STRING_H
40 #include <string.h>
41 #endif
42 #ifdef HAVE_LIMITS_H
43 #include <limits.h>
44 #endif
45 #include <time.h>
46 
47 #include "compat.h"
48 #include "types.h"
49 #include "device.h"
50 #include "debug.h"
51 #include "bitmap.h"
52 #include "attrib.h"
53 #include "inode.h"
54 #include "volume.h"
55 #include "layout.h"
56 #include "lcnalloc.h"
57 #include "mft.h"
58 #include "logging.h"
59 #include "misc.h"
60 
61 /**
62  * ntfs_mft_records_read - read records from the mft from disk
63  * @vol:	volume to read from
64  * @mref:	starting mft record number to read
65  * @count:	number of mft records to read
66  * @b:		output data buffer
67  *
68  * Read @count mft records starting at @mref from volume @vol into buffer
69  * @b. Return 0 on success or -1 on error, with errno set to the error
70  * code.
71  *
72  * If any of the records exceed the initialized size of the $MFT/$DATA
73  * attribute, i.e. they cannot possibly be allocated mft records, assume this
74  * is a bug and return error code ESPIPE.
75  *
76  * The read mft records are mst deprotected and are hence ready to use. The
77  * caller should check each record with is_baad_record() in case mst
78  * deprotection failed.
79  *
80  * NOTE: @b has to be at least of size @count * vol->mft_record_size.
81  */
82 int ntfs_mft_records_read(const ntfs_volume *vol, const MFT_REF mref,
83 		const s64 count, MFT_RECORD *b)
84 {
85 	s64 br;
86 	VCN m;
87 
88 	ntfs_log_trace("inode %llu\n", (unsigned long long)MREF(mref));
89 
90 	if (!vol || !vol->mft_na || !b || count < 0) {
91 		errno = EINVAL;
92 		ntfs_log_perror("%s: b=%p  count=%lld  mft=%llu", __FUNCTION__,
93 			b, (long long)count, (unsigned long long)MREF(mref));
94 		return -1;
95 	}
96 	m = MREF(mref);
97 	/* Refuse to read non-allocated mft records. */
98 	if (m + count > vol->mft_na->initialized_size >>
99 			vol->mft_record_size_bits) {
100 		errno = ESPIPE;
101 		ntfs_log_perror("Trying to read non-allocated mft records "
102 				"(%lld > %lld)", (long long)m + count,
103 				(long long)vol->mft_na->initialized_size >>
104 				vol->mft_record_size_bits);
105 		return -1;
106 	}
107 	br = ntfs_attr_mst_pread(vol->mft_na, m << vol->mft_record_size_bits,
108 			count, vol->mft_record_size, b);
109 	if (br != count) {
110 		if (br != -1)
111 			errno = EIO;
112 		ntfs_log_perror("Failed to read of MFT, mft=%llu count=%lld "
113 				"br=%lld", (long long)m, (long long)count,
114 				(long long)br);
115 		return -1;
116 	}
117 	return 0;
118 }
119 
120 /**
121  * ntfs_mft_records_write - write mft records to disk
122  * @vol:	volume to write to
123  * @mref:	starting mft record number to write
124  * @count:	number of mft records to write
125  * @b:		data buffer containing the mft records to write
126  *
127  * Write @count mft records starting at @mref from data buffer @b to volume
128  * @vol. Return 0 on success or -1 on error, with errno set to the error code.
129  *
130  * If any of the records exceed the initialized size of the $MFT/$DATA
131  * attribute, i.e. they cannot possibly be allocated mft records, assume this
132  * is a bug and return error code ESPIPE.
133  *
134  * Before the mft records are written, they are mst protected. After the write,
135  * they are deprotected again, thus resulting in an increase in the update
136  * sequence number inside the data buffer @b.
137  *
138  * If any mft records are written which are also represented in the mft mirror
139  * $MFTMirr, we make a copy of the relevant parts of the data buffer @b into a
140  * temporary buffer before we do the actual write. Then if at least one mft
141  * record was successfully written, we write the appropriate mft records from
142  * the copied buffer to the mft mirror, too.
143  */
144 int ntfs_mft_records_write(const ntfs_volume *vol, const MFT_REF mref,
145 		const s64 count, MFT_RECORD *b)
146 {
147 	s64 bw;
148 	VCN m;
149 	void *bmirr = NULL;
150 	int cnt = 0, res = 0;
151 
152 	if (!vol || !vol->mft_na || vol->mftmirr_size <= 0 || !b || count < 0) {
153 		errno = EINVAL;
154 		return -1;
155 	}
156 	m = MREF(mref);
157 	/* Refuse to write non-allocated mft records. */
158 	if (m + count > vol->mft_na->initialized_size >>
159 			vol->mft_record_size_bits) {
160 		errno = ESPIPE;
161 		ntfs_log_perror("Trying to write non-allocated mft records "
162 				"(%lld > %lld)", (long long)m + count,
163 				(long long)vol->mft_na->initialized_size >>
164 				vol->mft_record_size_bits);
165 		return -1;
166 	}
167 	if (m < vol->mftmirr_size) {
168 		if (!vol->mftmirr_na) {
169 			errno = EINVAL;
170 			return -1;
171 		}
172 		cnt = vol->mftmirr_size - m;
173 		if (cnt > count)
174 			cnt = count;
175 		if ((m + cnt) > vol->mftmirr_na->initialized_size >>
176 				vol->mft_record_size_bits) {
177 			errno = ESPIPE;
178 			ntfs_log_perror("Trying to write non-allocated mftmirr"
179 				" records (%lld > %lld)", (long long)m + cnt,
180 				(long long)vol->mftmirr_na->initialized_size >>
181 				vol->mft_record_size_bits);
182 			return -1;
183 		}
184 		bmirr = ntfs_malloc(cnt * vol->mft_record_size);
185 		if (!bmirr)
186 			return -1;
187 		memcpy(bmirr, b, cnt * vol->mft_record_size);
188 	}
189 	bw = ntfs_attr_mst_pwrite(vol->mft_na, m << vol->mft_record_size_bits,
190 			count, vol->mft_record_size, b);
191 	if (bw != count) {
192 		if (bw != -1)
193 			errno = EIO;
194 		if (bw >= 0)
195 			ntfs_log_debug("Error: partial write while writing $Mft "
196 					"record(s)!\n");
197 		else
198 			ntfs_log_perror("Error writing $Mft record(s)");
199 		res = errno;
200 	}
201 	if (bmirr && bw > 0) {
202 		if (bw < cnt)
203 			cnt = bw;
204 		bw = ntfs_attr_mst_pwrite(vol->mftmirr_na,
205 				m << vol->mft_record_size_bits, cnt,
206 				vol->mft_record_size, bmirr);
207 		if (bw != cnt) {
208 			if (bw != -1)
209 				errno = EIO;
210 			ntfs_log_debug("Error: failed to sync $MFTMirr! Run "
211 					"chkdsk.\n");
212 			res = errno;
213 		}
214 	}
215 	free(bmirr);
216 	if (!res)
217 		return res;
218 	errno = res;
219 	return -1;
220 }
221 
222 /*
223  *		Check the consistency of an MFT record
224  *
225  *	Make sure its general fields are safe, then examine all its
226  *	attributes and apply generic checks to them.
227  *	The attribute checks are skipped when a record is being read in
228  *	order to collect its sequence number for creating a new record.
229  *
230  *	Returns 0 if the checks are successful
231  *		-1 with errno = EIO otherwise
232  */
233 
234 int ntfs_mft_record_check(const ntfs_volume *vol, const MFT_REF mref,
235 			  MFT_RECORD *m)
236 {
237 	ATTR_RECORD *a;
238 	ATTR_TYPES previous_type;
239 	int ret = -1;
240 	u32 offset;
241 	s32 space;
242 
243 	if (!ntfs_is_file_record(m->magic)) {
244 		if (!NVolNoFixupWarn(vol))
245 			ntfs_log_error("Record %llu has no FILE magic (0x%x)\n",
246 				(unsigned long long)MREF(mref),
247 				(int)le32_to_cpu(*(le32*)m));
248 		goto err_out;
249 	}
250 
251 	if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
252 		ntfs_log_error("Record %llu has corrupt allocation size "
253 			       "(%u <> %u)\n", (unsigned long long)MREF(mref),
254 			       vol->mft_record_size,
255 			       le32_to_cpu(m->bytes_allocated));
256 		goto err_out;
257 	}
258 	if (!NVolNoFixupWarn(vol)
259 	    && (le32_to_cpu(m->bytes_in_use) > vol->mft_record_size)) {
260 		ntfs_log_error("Record %llu has corrupt in-use size "
261 			       "(%u > %u)\n", (unsigned long long)MREF(mref),
262 			       (int)le32_to_cpu(m->bytes_in_use),
263 			       (int)vol->mft_record_size);
264 		goto err_out;
265 	}
266 	if (le16_to_cpu(m->attrs_offset) & 7) {
267 		ntfs_log_error("Attributes badly aligned in record %llu\n",
268 			       (unsigned long long)MREF(mref));
269 		goto err_out;
270 	}
271 
272 	a = (ATTR_RECORD *)((char *)m + le16_to_cpu(m->attrs_offset));
273 	if (p2n(a) < p2n(m) || (char *)a > (char *)m + vol->mft_record_size) {
274 		ntfs_log_error("Record %llu is corrupt\n",
275 			       (unsigned long long)MREF(mref));
276 		goto err_out;
277 	}
278 
279 	if (!NVolNoFixupWarn(vol)) {
280 		offset = le16_to_cpu(m->attrs_offset);
281 		space = le32_to_cpu(m->bytes_in_use) - offset;
282 		a = (ATTR_RECORD*)((char*)m + offset);
283 		previous_type = AT_STANDARD_INFORMATION;
284 		while ((space >= (s32)offsetof(ATTR_RECORD, resident_end))
285 		    && (a->type != AT_END)
286 		    && (le32_to_cpu(a->type) >= le32_to_cpu(previous_type))) {
287 			if ((le32_to_cpu(a->length) <= (u32)space)
288 			    && !(le32_to_cpu(a->length) & 7)) {
289 				if (!ntfs_attr_inconsistent(a, mref)) {
290 					previous_type = a->type;
291 					offset += le32_to_cpu(a->length);
292 					space -= le32_to_cpu(a->length);
293 					a = (ATTR_RECORD*)((char*)m + offset);
294 				} else
295 					goto err_out;
296 			} else {
297 				ntfs_log_error("Corrupted MFT record %llu\n",
298 				       (unsigned long long)MREF(mref));
299 				goto err_out;
300 			}
301 		}
302 			/* We are supposed to reach an AT_END */
303 		if ((space < 4) || (a->type != AT_END)) {
304 			ntfs_log_error("Bad end of MFT record %llu\n",
305 				       (unsigned long long)MREF(mref));
306 			goto err_out;
307 		}
308 	}
309 
310 	ret = 0;
311 err_out:
312 	if (ret)
313 		errno = EIO;
314 	return ret;
315 }
316 
317 /**
318  * ntfs_file_record_read - read a FILE record from the mft from disk
319  * @vol:	volume to read from
320  * @mref:	mft reference specifying mft record to read
321  * @mrec:	address of pointer in which to return the mft record
322  * @attr:	address of pointer in which to return the first attribute
323  *
324  * Read a FILE record from the mft of @vol from the storage medium. @mref
325  * specifies the mft record to read, including the sequence number, which can
326  * be 0 if no sequence number checking is to be performed.
327  *
328  * The function allocates a buffer large enough to hold the mft record and
329  * reads the record into the buffer (mst deprotecting it in the process).
330  * *@mrec is then set to point to the buffer.
331  *
332  * If @attr is not NULL, *@attr is set to point to the first attribute in the
333  * mft record, i.e. *@attr is a pointer into *@mrec.
334  *
335  * Return 0 on success, or -1 on error, with errno set to the error code.
336  *
337  * The read mft record is checked for having the magic FILE,
338  * and for having a matching sequence number (if MSEQNO(*@mref) != 0).
339  * If either of these fails, -1 is returned and errno is set to EIO. If you get
340  * this, but you still want to read the mft record (e.g. in order to correct
341  * it), use ntfs_mft_record_read() directly.
342  *
343  * Note: Caller has to free *@mrec when finished.
344  *
345  * Note: We do not check if the mft record is flagged in use. The caller can
346  *	 check if desired.
347  */
348 int ntfs_file_record_read(const ntfs_volume *vol, const MFT_REF mref,
349 		MFT_RECORD **mrec, ATTR_RECORD **attr)
350 {
351 	MFT_RECORD *m;
352 
353 	if (!vol || !mrec) {
354 		errno = EINVAL;
355 		ntfs_log_perror("%s: mrec=%p", __FUNCTION__, mrec);
356 		return -1;
357 	}
358 
359 	m = *mrec;
360 	if (!m) {
361 		m = ntfs_malloc(vol->mft_record_size);
362 		if (!m)
363 			return -1;
364 	}
365 	if (ntfs_mft_record_read(vol, mref, m))
366 		goto err_out;
367 
368 	if (ntfs_mft_record_check(vol, mref, m))
369 		goto err_out;
370 
371 	if (MSEQNO(mref) && MSEQNO(mref) != le16_to_cpu(m->sequence_number)) {
372 		ntfs_log_error("Record %llu has wrong SeqNo (%d <> %d)\n",
373 			       (unsigned long long)MREF(mref), MSEQNO(mref),
374 			       le16_to_cpu(m->sequence_number));
375 		errno = EIO;
376 		goto err_out;
377 	}
378 	*mrec = m;
379 	if (attr)
380 		*attr = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset));
381 	return 0;
382 err_out:
383 	if (m != *mrec)
384 		free(m);
385 	return -1;
386 }
387 
388 /**
389  * ntfs_mft_record_layout - layout an mft record into a memory buffer
390  * @vol:	volume to which the mft record will belong
391  * @mref:	mft reference specifying the mft record number
392  * @mrec:	destination buffer of size >= @vol->mft_record_size bytes
393  *
394  * Layout an empty, unused mft record with the mft reference @mref into the
395  * buffer @m.  The volume @vol is needed because the mft record structure was
396  * modified in NTFS 3.1 so we need to know which volume version this mft record
397  * will be used on.
398  *
399  * On success return 0 and on error return -1 with errno set to the error code.
400  */
401 int ntfs_mft_record_layout(const ntfs_volume *vol, const MFT_REF mref,
402 		MFT_RECORD *mrec)
403 {
404 	ATTR_RECORD *a;
405 
406 	if (!vol || !mrec) {
407 		errno = EINVAL;
408 		ntfs_log_perror("%s: mrec=%p", __FUNCTION__, mrec);
409 		return -1;
410 	}
411 	/* Aligned to 2-byte boundary. */
412 	if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver))
413 		mrec->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1);
414 	else {
415 		/* Abort if mref is > 32 bits. */
416 		if (MREF(mref) & 0x0000ffff00000000ull) {
417 			errno = ERANGE;
418 			ntfs_log_perror("Mft reference exceeds 32 bits");
419 			return -1;
420 		}
421 		mrec->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1);
422 		/*
423 		 * Set the NTFS 3.1+ specific fields while we know that the
424 		 * volume version is 3.1+.
425 		 */
426 		mrec->reserved = const_cpu_to_le16(0);
427 		mrec->mft_record_number = cpu_to_le32(MREF(mref));
428 	}
429 	mrec->magic = magic_FILE;
430 	if (vol->mft_record_size >= NTFS_BLOCK_SIZE)
431 		mrec->usa_count = cpu_to_le16(vol->mft_record_size /
432 				NTFS_BLOCK_SIZE + 1);
433 	else {
434 		mrec->usa_count = const_cpu_to_le16(1);
435 		ntfs_log_error("Sector size is bigger than MFT record size.  "
436 				"Setting usa_count to 1.  If Windows chkdsk "
437 				"reports this as corruption, please email %s "
438 				"stating that you saw this message and that "
439 				"the file system created was corrupt.  "
440 				"Thank you.\n", NTFS_DEV_LIST);
441 	}
442 	/* Set the update sequence number to 1. */
443 	*(le16*)((u8*)mrec + le16_to_cpu(mrec->usa_ofs)) = const_cpu_to_le16(1);
444 	mrec->lsn = const_cpu_to_sle64(0ll);
445 	mrec->sequence_number = const_cpu_to_le16(1);
446 	mrec->link_count = const_cpu_to_le16(0);
447 	/* Aligned to 8-byte boundary. */
448 	mrec->attrs_offset = cpu_to_le16((le16_to_cpu(mrec->usa_ofs) +
449 			(le16_to_cpu(mrec->usa_count) << 1) + 7) & ~7);
450 	mrec->flags = const_cpu_to_le16(0);
451 	/*
452 	 * Using attrs_offset plus eight bytes (for the termination attribute),
453 	 * aligned to 8-byte boundary.
454 	 */
455 	mrec->bytes_in_use = cpu_to_le32((le16_to_cpu(mrec->attrs_offset) + 8 +
456 			7) & ~7);
457 	mrec->bytes_allocated = cpu_to_le32(vol->mft_record_size);
458 	mrec->base_mft_record = const_cpu_to_le64((MFT_REF)0);
459 	mrec->next_attr_instance = const_cpu_to_le16(0);
460 	a = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset));
461 	a->type = AT_END;
462 	a->length = const_cpu_to_le32(0);
463 	/* Finally, clear the unused part of the mft record. */
464 	memset((u8*)a + 8, 0, vol->mft_record_size - ((u8*)a + 8 - (u8*)mrec));
465 	return 0;
466 }
467 
468 /**
469  * ntfs_mft_record_format - format an mft record on an ntfs volume
470  * @vol:	volume on which to format the mft record
471  * @mref:	mft reference specifying mft record to format
472  *
473  * Format the mft record with the mft reference @mref in $MFT/$DATA, i.e. lay
474  * out an empty, unused mft record in memory and write it to the volume @vol.
475  *
476  * On success return 0 and on error return -1 with errno set to the error code.
477  */
478 int ntfs_mft_record_format(const ntfs_volume *vol, const MFT_REF mref)
479 {
480 	MFT_RECORD *m;
481 	int ret = -1;
482 
483 	ntfs_log_enter("Entering\n");
484 
485 	m = ntfs_calloc(vol->mft_record_size);
486 	if (!m)
487 		goto out;
488 
489 	if (ntfs_mft_record_layout(vol, mref, m))
490 		goto free_m;
491 
492 	if (ntfs_mft_record_write(vol, mref, m))
493 		goto free_m;
494 
495 	ret = 0;
496 free_m:
497 	free(m);
498 out:
499 	ntfs_log_leave("\n");
500 	return ret;
501 }
502 
503 static const char *es = "  Leaving inconsistent metadata.  Run chkdsk.";
504 
505 /**
506  * ntfs_ffz - Find the first unset (zero) bit in a word
507  * @word:
508  *
509  * Description...
510  *
511  * Returns:
512  */
513 static inline unsigned int ntfs_ffz(unsigned int word)
514 {
515 	return ffs(~word) - 1;
516 }
517 
518 static int ntfs_is_mft(ntfs_inode *ni)
519 {
520 	if (ni && ni->mft_no == FILE_MFT)
521 		return 1;
522 	return 0;
523 }
524 
525 #ifndef PAGE_SIZE
526 #define PAGE_SIZE 4096
527 #endif
528 
529 #define RESERVED_MFT_RECORDS   64
530 
531 /**
532  * ntfs_mft_bitmap_find_free_rec - find a free mft record in the mft bitmap
533  * @vol:	volume on which to search for a free mft record
534  * @base_ni:	open base inode if allocating an extent mft record or NULL
535  *
536  * Search for a free mft record in the mft bitmap attribute on the ntfs volume
537  * @vol.
538  *
539  * If @base_ni is NULL start the search at the default allocator position.
540  *
541  * If @base_ni is not NULL start the search at the mft record after the base
542  * mft record @base_ni.
543  *
544  * Return the free mft record on success and -1 on error with errno set to the
545  * error code.  An error code of ENOSPC means that there are no free mft
546  * records in the currently initialized mft bitmap.
547  */
548 static int ntfs_mft_bitmap_find_free_rec(ntfs_volume *vol, ntfs_inode *base_ni)
549 {
550 	s64 pass_end, ll, data_pos, pass_start, ofs, bit;
551 	ntfs_attr *mftbmp_na;
552 	u8 *buf, *byte;
553 	unsigned int size;
554 	u8 pass, b;
555 	int ret = -1;
556 
557 	ntfs_log_enter("Entering\n");
558 
559 	mftbmp_na = vol->mftbmp_na;
560 	/*
561 	 * Set the end of the pass making sure we do not overflow the mft
562 	 * bitmap.
563 	 */
564 	size = PAGE_SIZE;
565 	pass_end = vol->mft_na->allocated_size >> vol->mft_record_size_bits;
566 	ll = mftbmp_na->initialized_size << 3;
567 	if (pass_end > ll)
568 		pass_end = ll;
569 	pass = 1;
570 	if (!base_ni)
571 		data_pos = vol->mft_data_pos;
572 	else
573 		data_pos = base_ni->mft_no + 1;
574 	if (data_pos < RESERVED_MFT_RECORDS)
575 		data_pos = RESERVED_MFT_RECORDS;
576 	if (data_pos >= pass_end) {
577 		data_pos = RESERVED_MFT_RECORDS;
578 		pass = 2;
579 		/* This happens on a freshly formatted volume. */
580 		if (data_pos >= pass_end) {
581 			errno = ENOSPC;
582 			goto leave;
583 		}
584 	}
585 	if (ntfs_is_mft(base_ni)) {
586 		data_pos = 0;
587 		pass = 2;
588 	}
589 	pass_start = data_pos;
590 	buf = ntfs_malloc(PAGE_SIZE);
591 	if (!buf)
592 		goto leave;
593 
594 	ntfs_log_debug("Starting bitmap search: pass %u, pass_start 0x%llx, "
595 			"pass_end 0x%llx, data_pos 0x%llx.\n", pass,
596 			(long long)pass_start, (long long)pass_end,
597 			(long long)data_pos);
598 #ifdef DEBUG
599 	byte = NULL;
600 	b = 0;
601 #endif
602 	/* Loop until a free mft record is found. */
603 	for (; pass <= 2; size = PAGE_SIZE) {
604 		/* Cap size to pass_end. */
605 		ofs = data_pos >> 3;
606 		ll = ((pass_end + 7) >> 3) - ofs;
607 		if (size > ll)
608 			size = ll;
609 		ll = ntfs_attr_pread(mftbmp_na, ofs, size, buf);
610 		if (ll < 0) {
611 			ntfs_log_perror("Failed to read $MFT bitmap");
612 			free(buf);
613 			goto leave;
614 		}
615 		ntfs_log_debug("Read 0x%llx bytes.\n", (long long)ll);
616 		/* If we read at least one byte, search @buf for a zero bit. */
617 		if (ll) {
618 			size = ll << 3;
619 			bit = data_pos & 7;
620 			data_pos &= ~7ull;
621 			ntfs_log_debug("Before inner for loop: size 0x%x, "
622 					"data_pos 0x%llx, bit 0x%llx, "
623 					"*byte 0x%hhx, b %u.\n", size,
624 					(long long)data_pos, (long long)bit,
625 					(u8) (byte ? *byte : -1), b);
626 			for (; bit < size && data_pos + bit < pass_end;
627 					bit &= ~7ull, bit += 8) {
628 				/*
629 				 * If we're extending $MFT and running out of the first
630 				 * mft record (base record) then give up searching since
631 				 * no guarantee that the found record will be accessible.
632 				 */
633 				if (ntfs_is_mft(base_ni) && bit > 400)
634 					goto out;
635 
636 				byte = buf + (bit >> 3);
637 				if (*byte == 0xff)
638 					continue;
639 
640 				/* Note: ffz() result must be zero based. */
641 				b = ntfs_ffz((unsigned long)*byte);
642 				if (b < 8 && b >= (bit & 7)) {
643 					free(buf);
644 					ret = data_pos + (bit & ~7ull) + b;
645 					goto leave;
646 				}
647 			}
648 			ntfs_log_debug("After inner for loop: size 0x%x, "
649 					"data_pos 0x%llx, bit 0x%llx, "
650 					"*byte 0x%hhx, b %u.\n", size,
651 					(long long)data_pos, (long long)bit,
652 					(u8) (byte ? *byte : -1), b);
653 			data_pos += size;
654 			/*
655 			 * If the end of the pass has not been reached yet,
656 			 * continue searching the mft bitmap for a zero bit.
657 			 */
658 			if (data_pos < pass_end)
659 				continue;
660 		}
661 		/* Do the next pass. */
662 		pass++;
663 		if (pass == 2) {
664 			/*
665 			 * Starting the second pass, in which we scan the first
666 			 * part of the zone which we omitted earlier.
667 			 */
668 			pass_end = pass_start;
669 			data_pos = pass_start = RESERVED_MFT_RECORDS;
670 			ntfs_log_debug("pass %i, pass_start 0x%llx, pass_end "
671 					"0x%llx.\n", pass, (long long)pass_start,
672 					(long long)pass_end);
673 			if (data_pos >= pass_end)
674 				break;
675 		}
676 	}
677 	/* No free mft records in currently initialized mft bitmap. */
678 out:
679 	free(buf);
680 	errno = ENOSPC;
681 leave:
682 	ntfs_log_leave("\n");
683 	return ret;
684 }
685 
686 static int ntfs_mft_attr_extend(ntfs_attr *na)
687 {
688 	int ret = STATUS_ERROR;
689 	ntfs_log_enter("Entering\n");
690 
691 	if (!NInoAttrList(na->ni)) {
692 		if (ntfs_inode_add_attrlist(na->ni)) {
693 			ntfs_log_perror("%s: Can not add attrlist #3", __FUNCTION__);
694 			goto out;
695 		}
696 		/* We can't sync the $MFT inode since its runlist is bogus. */
697 		ret = STATUS_KEEP_SEARCHING;
698 		goto out;
699 	}
700 
701 	if (ntfs_attr_update_mapping_pairs(na, 0)) {
702 		ntfs_log_perror("%s: MP update failed", __FUNCTION__);
703 		goto out;
704 	}
705 
706 	ret = STATUS_OK;
707 out:
708 	ntfs_log_leave("\n");
709 	return ret;
710 }
711 
712 /**
713  * ntfs_mft_bitmap_extend_allocation_i - see ntfs_mft_bitmap_extend_allocation
714  */
715 static int ntfs_mft_bitmap_extend_allocation_i(ntfs_volume *vol)
716 {
717 	LCN lcn;
718 	s64 ll = 0; /* silence compiler warning */
719 	ntfs_attr *mftbmp_na;
720 	runlist_element *rl, *rl2 = NULL; /* silence compiler warning */
721 	ntfs_attr_search_ctx *ctx;
722 	MFT_RECORD *m = NULL; /* silence compiler warning */
723 	ATTR_RECORD *a = NULL; /* silence compiler warning */
724 	int err, mp_size;
725 	int ret = STATUS_ERROR;
726 	u32 old_alen = 0; /* silence compiler warning */
727 	BOOL mp_rebuilt = FALSE;
728 	BOOL update_mp = FALSE;
729 
730 	mftbmp_na = vol->mftbmp_na;
731 	/*
732 	 * Determine the last lcn of the mft bitmap.  The allocated size of the
733 	 * mft bitmap cannot be zero so we are ok to do this.
734 	 */
735 	rl = ntfs_attr_find_vcn(mftbmp_na, (mftbmp_na->allocated_size - 1) >>
736 			vol->cluster_size_bits);
737 	if (!rl || !rl->length || rl->lcn < 0) {
738 		ntfs_log_error("Failed to determine last allocated "
739 				"cluster of mft bitmap attribute.\n");
740 		if (rl)
741 			errno = EIO;
742 		return STATUS_ERROR;
743 	}
744 	lcn = rl->lcn + rl->length;
745 
746 	rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE);
747 	if (!rl2) {
748 		ntfs_log_error("Failed to allocate a cluster for "
749 				"the mft bitmap.\n");
750 		return STATUS_ERROR;
751 	}
752 	rl = ntfs_runlists_merge(mftbmp_na->rl, rl2);
753 	if (!rl) {
754 		err = errno;
755 		ntfs_log_error("Failed to merge runlists for mft "
756 				"bitmap.\n");
757 		if (ntfs_cluster_free_from_rl(vol, rl2))
758 			ntfs_log_error("Failed to deallocate "
759 					"cluster.%s\n", es);
760 		free(rl2);
761 		errno = err;
762 		return STATUS_ERROR;
763 	}
764 	mftbmp_na->rl = rl;
765 	ntfs_log_debug("Adding one run to mft bitmap.\n");
766 	/* Find the last run in the new runlist. */
767 	for (; rl[1].length; rl++)
768 		;
769 	/*
770 	 * Update the attribute record as well.  Note: @rl is the last
771 	 * (non-terminator) runlist element of mft bitmap.
772 	 */
773 	ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
774 	if (!ctx)
775 		goto undo_alloc;
776 
777 	if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
778 			mftbmp_na->name_len, 0, rl[1].vcn, NULL, 0, ctx)) {
779 		ntfs_log_error("Failed to find last attribute extent of "
780 				"mft bitmap attribute.\n");
781 		goto undo_alloc;
782 	}
783 	m = ctx->mrec;
784 	a = ctx->attr;
785 	ll = sle64_to_cpu(a->lowest_vcn);
786 	rl2 = ntfs_attr_find_vcn(mftbmp_na, ll);
787 	if (!rl2 || !rl2->length) {
788 		ntfs_log_error("Failed to determine previous last "
789 				"allocated cluster of mft bitmap attribute.\n");
790 		if (rl2)
791 			errno = EIO;
792 		goto undo_alloc;
793 	}
794 	/* Get the size for the new mapping pairs array for this extent. */
795 	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, INT_MAX);
796 	if (mp_size <= 0) {
797 		ntfs_log_error("Get size for mapping pairs failed for "
798 				"mft bitmap attribute extent.\n");
799 		goto undo_alloc;
800 	}
801 	/* Expand the attribute record if necessary. */
802 	old_alen = le32_to_cpu(a->length);
803 	if (ntfs_attr_record_resize(m, a, mp_size +
804 			le16_to_cpu(a->mapping_pairs_offset))) {
805 		ntfs_log_info("extending $MFT bitmap\n");
806 		ret = ntfs_mft_attr_extend(vol->mftbmp_na);
807 		if (ret == STATUS_OK)
808 			goto ok;
809 		if (ret == STATUS_ERROR) {
810 			ntfs_log_perror("%s: ntfs_mft_attr_extend failed", __FUNCTION__);
811 			update_mp = TRUE;
812 		}
813 		goto undo_alloc;
814 	}
815 	mp_rebuilt = TRUE;
816 	/* Generate the mapping pairs array directly into the attr record. */
817 	if (ntfs_mapping_pairs_build(vol, (u8*)a +
818 			le16_to_cpu(a->mapping_pairs_offset), mp_size, rl2, ll,
819 			NULL)) {
820 		ntfs_log_error("Failed to build mapping pairs array for "
821 				"mft bitmap attribute.\n");
822 		errno = EIO;
823 		goto undo_alloc;
824 	}
825 	/* Update the highest_vcn. */
826 	a->highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
827 	/*
828 	 * We now have extended the mft bitmap allocated_size by one cluster.
829 	 * Reflect this in the ntfs_attr structure and the attribute record.
830 	 */
831 	if (a->lowest_vcn) {
832 		/*
833 		 * We are not in the first attribute extent, switch to it, but
834 		 * first ensure the changes will make it to disk later.
835 		 */
836 		ntfs_inode_mark_dirty(ctx->ntfs_ino);
837 		ntfs_attr_reinit_search_ctx(ctx);
838 		if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
839 				mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
840 			ntfs_log_error("Failed to find first attribute "
841 					"extent of mft bitmap attribute.\n");
842 			goto restore_undo_alloc;
843 		}
844 		a = ctx->attr;
845 	}
846 ok:
847 	mftbmp_na->allocated_size += vol->cluster_size;
848 	a->allocated_size = cpu_to_sle64(mftbmp_na->allocated_size);
849 	/* Ensure the changes make it to disk. */
850 	ntfs_inode_mark_dirty(ctx->ntfs_ino);
851 	ntfs_attr_put_search_ctx(ctx);
852 	return STATUS_OK;
853 
854 restore_undo_alloc:
855 	err = errno;
856 	ntfs_attr_reinit_search_ctx(ctx);
857 	if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
858 			mftbmp_na->name_len, 0, rl[1].vcn, NULL, 0, ctx)) {
859 		ntfs_log_error("Failed to find last attribute extent of "
860 				"mft bitmap attribute.%s\n", es);
861 		ntfs_attr_put_search_ctx(ctx);
862 		mftbmp_na->allocated_size += vol->cluster_size;
863 		/*
864 		 * The only thing that is now wrong is ->allocated_size of the
865 		 * base attribute extent which chkdsk should be able to fix.
866 		 */
867 		errno = err;
868 		return STATUS_ERROR;
869 	}
870 	m = ctx->mrec;
871 	a = ctx->attr;
872 	a->highest_vcn = cpu_to_sle64(rl[1].vcn - 2);
873 	errno = err;
874 undo_alloc:
875 	err = errno;
876 
877 	/* Remove the last run from the runlist. */
878 	lcn = rl->lcn;
879 	rl->lcn = rl[1].lcn;
880 	rl->length = 0;
881 
882 	/* FIXME: use an ntfs_cluster_free_* function */
883 	if (ntfs_bitmap_clear_bit(vol->lcnbmp_na, lcn))
884 		ntfs_log_error("Failed to free cluster.%s\n", es);
885 	else
886 		vol->free_clusters++;
887 	if (mp_rebuilt) {
888 		if (ntfs_mapping_pairs_build(vol, (u8*)a +
889 				le16_to_cpu(a->mapping_pairs_offset),
890 				old_alen - le16_to_cpu(a->mapping_pairs_offset),
891 				rl2, ll, NULL))
892 			ntfs_log_error("Failed to restore mapping "
893 					"pairs array.%s\n", es);
894 		if (ntfs_attr_record_resize(m, a, old_alen))
895 			ntfs_log_error("Failed to restore attribute "
896 					"record.%s\n", es);
897 		ntfs_inode_mark_dirty(ctx->ntfs_ino);
898 	}
899 	if (update_mp) {
900 		if (ntfs_attr_update_mapping_pairs(vol->mftbmp_na, 0))
901 			ntfs_log_perror("%s: MP update failed", __FUNCTION__);
902 	}
903 	if (ctx)
904 		ntfs_attr_put_search_ctx(ctx);
905 	errno = err;
906 	return ret;
907 }
908 
909 /**
910  * ntfs_mft_bitmap_extend_allocation - extend mft bitmap attribute by a cluster
911  * @vol:	volume on which to extend the mft bitmap attribute
912  *
913  * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster.
914  *
915  * Note:  Only changes allocated_size, i.e. does not touch initialized_size or
916  * data_size.
917  *
918  * Return 0 on success and -1 on error with errno set to the error code.
919  */
920 static int ntfs_mft_bitmap_extend_allocation(ntfs_volume *vol)
921 {
922 	int ret;
923 
924 	ntfs_log_enter("Entering\n");
925 	ret = ntfs_mft_bitmap_extend_allocation_i(vol);
926 	ntfs_log_leave("\n");
927 	return ret;
928 }
929 /**
930  * ntfs_mft_bitmap_extend_initialized - extend mft bitmap initialized data
931  * @vol:	volume on which to extend the mft bitmap attribute
932  *
933  * Extend the initialized portion of the mft bitmap attribute on the ntfs
934  * volume @vol by 8 bytes.
935  *
936  * Note:  Only changes initialized_size and data_size, i.e. requires that
937  * allocated_size is big enough to fit the new initialized_size.
938  *
939  * Return 0 on success and -1 on error with errno set to the error code.
940  */
941 static int ntfs_mft_bitmap_extend_initialized(ntfs_volume *vol)
942 {
943 	s64 old_data_size, old_initialized_size, ll;
944 	ntfs_attr *mftbmp_na;
945 	ntfs_attr_search_ctx *ctx;
946 	ATTR_RECORD *a;
947 	int err;
948 	int ret = -1;
949 
950 	ntfs_log_enter("Entering\n");
951 
952 	mftbmp_na = vol->mftbmp_na;
953 	ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
954 	if (!ctx)
955 		goto out;
956 
957 	if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
958 			mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
959 		ntfs_log_error("Failed to find first attribute extent of "
960 				"mft bitmap attribute.\n");
961 		err = errno;
962 		goto put_err_out;
963 	}
964 	a = ctx->attr;
965 	old_data_size = mftbmp_na->data_size;
966 	old_initialized_size = mftbmp_na->initialized_size;
967 	mftbmp_na->initialized_size += 8;
968 	a->initialized_size = cpu_to_sle64(mftbmp_na->initialized_size);
969 	if (mftbmp_na->initialized_size > mftbmp_na->data_size) {
970 		mftbmp_na->data_size = mftbmp_na->initialized_size;
971 		a->data_size = cpu_to_sle64(mftbmp_na->data_size);
972 	}
973 	/* Ensure the changes make it to disk. */
974 	ntfs_inode_mark_dirty(ctx->ntfs_ino);
975 	ntfs_attr_put_search_ctx(ctx);
976 	/* Initialize the mft bitmap attribute value with zeroes. */
977 	ll = 0;
978 	ll = ntfs_attr_pwrite(mftbmp_na, old_initialized_size, 8, &ll);
979 	if (ll == 8) {
980 		ntfs_log_debug("Wrote eight initialized bytes to mft bitmap.\n");
981 		vol->free_mft_records += (8 * 8);
982 		ret = 0;
983 		goto out;
984 	}
985 	ntfs_log_error("Failed to write to mft bitmap.\n");
986 	err = errno;
987 	if (ll >= 0)
988 		err = EIO;
989 	/* Try to recover from the error. */
990 	ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
991 	if (!ctx)
992 		goto err_out;
993 
994 	if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
995 			mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
996 		ntfs_log_error("Failed to find first attribute extent of "
997 				"mft bitmap attribute.%s\n", es);
998 put_err_out:
999 		ntfs_attr_put_search_ctx(ctx);
1000 		goto err_out;
1001 	}
1002 	a = ctx->attr;
1003 	mftbmp_na->initialized_size = old_initialized_size;
1004 	a->initialized_size = cpu_to_sle64(old_initialized_size);
1005 	if (mftbmp_na->data_size != old_data_size) {
1006 		mftbmp_na->data_size = old_data_size;
1007 		a->data_size = cpu_to_sle64(old_data_size);
1008 	}
1009 	ntfs_inode_mark_dirty(ctx->ntfs_ino);
1010 	ntfs_attr_put_search_ctx(ctx);
1011 	ntfs_log_debug("Restored status of mftbmp: allocated_size 0x%llx, "
1012 			"data_size 0x%llx, initialized_size 0x%llx.\n",
1013 			(long long)mftbmp_na->allocated_size,
1014 			(long long)mftbmp_na->data_size,
1015 			(long long)mftbmp_na->initialized_size);
1016 err_out:
1017 	errno = err;
1018 out:
1019 	ntfs_log_leave("\n");
1020 	return ret;
1021 }
1022 
1023 /**
1024  * ntfs_mft_data_extend_allocation - extend mft data attribute
1025  * @vol:	volume on which to extend the mft data attribute
1026  *
1027  * Extend the mft data attribute on the ntfs volume @vol by 16 mft records
1028  * worth of clusters or if not enough space for this by one mft record worth
1029  * of clusters.
1030  *
1031  * Note:  Only changes allocated_size, i.e. does not touch initialized_size or
1032  * data_size.
1033  *
1034  * Return 0 on success and -1 on error with errno set to the error code.
1035  */
1036 static int ntfs_mft_data_extend_allocation(ntfs_volume *vol)
1037 {
1038 	LCN lcn;
1039 	VCN old_last_vcn;
1040 	s64 min_nr, nr, ll = 0; /* silence compiler warning */
1041 	ntfs_attr *mft_na;
1042 	runlist_element *rl, *rl2;
1043 	ntfs_attr_search_ctx *ctx;
1044 	MFT_RECORD *m = NULL; /* silence compiler warning */
1045 	ATTR_RECORD *a = NULL; /* silence compiler warning */
1046 	int err, mp_size;
1047 	int ret = STATUS_ERROR;
1048 	u32 old_alen = 0; /* silence compiler warning */
1049 	BOOL mp_rebuilt = FALSE;
1050 	BOOL update_mp = FALSE;
1051 
1052 	ntfs_log_enter("Extending mft data allocation.\n");
1053 
1054 	mft_na = vol->mft_na;
1055 	/*
1056 	 * Determine the preferred allocation location, i.e. the last lcn of
1057 	 * the mft data attribute.  The allocated size of the mft data
1058 	 * attribute cannot be zero so we are ok to do this.
1059 	 */
1060 	rl = ntfs_attr_find_vcn(mft_na,
1061 			(mft_na->allocated_size - 1) >> vol->cluster_size_bits);
1062 
1063 	if (!rl || !rl->length || rl->lcn < 0) {
1064 		ntfs_log_error("Failed to determine last allocated "
1065 				"cluster of mft data attribute.\n");
1066 		if (rl)
1067 			errno = EIO;
1068 		goto out;
1069 	}
1070 
1071 	lcn = rl->lcn + rl->length;
1072 	ntfs_log_debug("Last lcn of mft data attribute is 0x%llx.\n", (long long)lcn);
1073 	/* Minimum allocation is one mft record worth of clusters. */
1074 	min_nr = vol->mft_record_size >> vol->cluster_size_bits;
1075 	if (!min_nr)
1076 		min_nr = 1;
1077 	/* Want to allocate 16 mft records worth of clusters. */
1078 	nr = vol->mft_record_size << 4 >> vol->cluster_size_bits;
1079 	if (!nr)
1080 		nr = min_nr;
1081 
1082 	old_last_vcn = rl[1].vcn;
1083 	do {
1084 		rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE);
1085 		if (rl2)
1086 			break;
1087 		if (errno != ENOSPC || nr == min_nr) {
1088 			ntfs_log_perror("Failed to allocate (%lld) clusters "
1089 					"for $MFT", (long long)nr);
1090 			goto out;
1091 		}
1092 		/*
1093 		 * There is not enough space to do the allocation, but there
1094 		 * might be enough space to do a minimal allocation so try that
1095 		 * before failing.
1096 		 */
1097 		nr = min_nr;
1098 		ntfs_log_debug("Retrying mft data allocation with minimal cluster "
1099 				"count %lli.\n", (long long)nr);
1100 	} while (1);
1101 
1102 	ntfs_log_debug("Allocated %lld clusters.\n", (long long)nr);
1103 
1104 	rl = ntfs_runlists_merge(mft_na->rl, rl2);
1105 	if (!rl) {
1106 		err = errno;
1107 		ntfs_log_error("Failed to merge runlists for mft data "
1108 				"attribute.\n");
1109 		if (ntfs_cluster_free_from_rl(vol, rl2))
1110 			ntfs_log_error("Failed to deallocate clusters "
1111 					"from the mft data attribute.%s\n", es);
1112 		free(rl2);
1113 		errno = err;
1114 		goto out;
1115 	}
1116 	mft_na->rl = rl;
1117 
1118 	/* Find the last run in the new runlist. */
1119 	for (; rl[1].length; rl++)
1120 		;
1121 	/* Update the attribute record as well. */
1122 	ctx = ntfs_attr_get_search_ctx(mft_na->ni, NULL);
1123 	if (!ctx)
1124 		goto undo_alloc;
1125 
1126 	if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1127 			rl[1].vcn, NULL, 0, ctx)) {
1128 		ntfs_log_error("Failed to find last attribute extent of "
1129 				"mft data attribute.\n");
1130 		goto undo_alloc;
1131 	}
1132 	m = ctx->mrec;
1133 	a = ctx->attr;
1134 	ll = sle64_to_cpu(a->lowest_vcn);
1135 	rl2 = ntfs_attr_find_vcn(mft_na, ll);
1136 	if (!rl2 || !rl2->length) {
1137 		ntfs_log_error("Failed to determine previous last "
1138 				"allocated cluster of mft data attribute.\n");
1139 		if (rl2)
1140 			errno = EIO;
1141 		goto undo_alloc;
1142 	}
1143 	/* Get the size for the new mapping pairs array for this extent. */
1144 	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, INT_MAX);
1145 	if (mp_size <= 0) {
1146 		ntfs_log_error("Get size for mapping pairs failed for "
1147 				"mft data attribute extent.\n");
1148 		goto undo_alloc;
1149 	}
1150 	/* Expand the attribute record if necessary. */
1151 	old_alen = le32_to_cpu(a->length);
1152 	if (ntfs_attr_record_resize(m, a,
1153 			mp_size + le16_to_cpu(a->mapping_pairs_offset))) {
1154 		ret = ntfs_mft_attr_extend(vol->mft_na);
1155 		if (ret == STATUS_OK)
1156 			goto ok;
1157 		if (ret == STATUS_ERROR) {
1158 			ntfs_log_perror("%s: ntfs_mft_attr_extend failed", __FUNCTION__);
1159 			update_mp = TRUE;
1160 		}
1161 		goto undo_alloc;
1162 	}
1163 	mp_rebuilt = TRUE;
1164 	/*
1165 	 * Generate the mapping pairs array directly into the attribute record.
1166 	 */
1167 	if (ntfs_mapping_pairs_build(vol,
1168 			(u8*)a + le16_to_cpu(a->mapping_pairs_offset), mp_size,
1169 			rl2, ll, NULL)) {
1170 		ntfs_log_error("Failed to build mapping pairs array of "
1171 				"mft data attribute.\n");
1172 		errno = EIO;
1173 		goto undo_alloc;
1174 	}
1175 	/* Update the highest_vcn. */
1176 	a->highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
1177 	/*
1178 	 * We now have extended the mft data allocated_size by nr clusters.
1179 	 * Reflect this in the ntfs_attr structure and the attribute record.
1180 	 * @rl is the last (non-terminator) runlist element of mft data
1181 	 * attribute.
1182 	 */
1183 	if (a->lowest_vcn) {
1184 		/*
1185 		 * We are not in the first attribute extent, switch to it, but
1186 		 * first ensure the changes will make it to disk later.
1187 		 */
1188 		ntfs_inode_mark_dirty(ctx->ntfs_ino);
1189 		ntfs_attr_reinit_search_ctx(ctx);
1190 		if (ntfs_attr_lookup(mft_na->type, mft_na->name,
1191 				mft_na->name_len, 0, 0, NULL, 0, ctx)) {
1192 			ntfs_log_error("Failed to find first attribute "
1193 					"extent of mft data attribute.\n");
1194 			goto restore_undo_alloc;
1195 		}
1196 		a = ctx->attr;
1197 	}
1198 ok:
1199 	mft_na->allocated_size += nr << vol->cluster_size_bits;
1200 	a->allocated_size = cpu_to_sle64(mft_na->allocated_size);
1201 	/* Ensure the changes make it to disk. */
1202 	ntfs_inode_mark_dirty(ctx->ntfs_ino);
1203 	ntfs_attr_put_search_ctx(ctx);
1204 	ret = STATUS_OK;
1205 out:
1206 	ntfs_log_leave("\n");
1207 	return ret;
1208 
1209 restore_undo_alloc:
1210 	err = errno;
1211 	ntfs_attr_reinit_search_ctx(ctx);
1212 	if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1213 			rl[1].vcn, NULL, 0, ctx)) {
1214 		ntfs_log_error("Failed to find last attribute extent of "
1215 				"mft data attribute.%s\n", es);
1216 		ntfs_attr_put_search_ctx(ctx);
1217 		mft_na->allocated_size += nr << vol->cluster_size_bits;
1218 		/*
1219 		 * The only thing that is now wrong is ->allocated_size of the
1220 		 * base attribute extent which chkdsk should be able to fix.
1221 		 */
1222 		errno = err;
1223 		ret = STATUS_ERROR;
1224 		goto out;
1225 	}
1226 	m = ctx->mrec;
1227 	a = ctx->attr;
1228 	a->highest_vcn = cpu_to_sle64(old_last_vcn - 1);
1229 	errno = err;
1230 undo_alloc:
1231 	err = errno;
1232 	if (ntfs_cluster_free(vol, mft_na, old_last_vcn, -1) < 0)
1233 		ntfs_log_error("Failed to free clusters from mft data "
1234 				"attribute.%s\n", es);
1235 	if (ntfs_rl_truncate(&mft_na->rl, old_last_vcn))
1236 		ntfs_log_error("Failed to truncate mft data attribute "
1237 				"runlist.%s\n", es);
1238 	if (mp_rebuilt) {
1239 		if (ntfs_mapping_pairs_build(vol, (u8*)a +
1240 				le16_to_cpu(a->mapping_pairs_offset),
1241 				old_alen - le16_to_cpu(a->mapping_pairs_offset),
1242 				rl2, ll, NULL))
1243 			ntfs_log_error("Failed to restore mapping pairs "
1244 					"array.%s\n", es);
1245 		if (ntfs_attr_record_resize(m, a, old_alen))
1246 			ntfs_log_error("Failed to restore attribute "
1247 					"record.%s\n", es);
1248 		ntfs_inode_mark_dirty(ctx->ntfs_ino);
1249 	}
1250 	if (update_mp) {
1251 		if (ntfs_attr_update_mapping_pairs(vol->mft_na, 0))
1252 			ntfs_log_perror("%s: MP update failed", __FUNCTION__);
1253 	}
1254 	if (ctx)
1255 		ntfs_attr_put_search_ctx(ctx);
1256 	errno = err;
1257 	goto out;
1258 }
1259 
1260 
1261 static int ntfs_mft_record_init(ntfs_volume *vol, s64 size)
1262 {
1263 	int ret = -1;
1264 	ntfs_attr *mft_na;
1265 	s64 old_data_initialized, old_data_size;
1266 	ntfs_attr_search_ctx *ctx;
1267 
1268 	ntfs_log_enter("Entering\n");
1269 
1270 	/* NOTE: Caller must sanity check vol, vol->mft_na and vol->mftbmp_na */
1271 
1272 	mft_na = vol->mft_na;
1273 
1274 	/*
1275 	 * The mft record is outside the initialized data. Extend the mft data
1276 	 * attribute until it covers the allocated record. The loop is only
1277 	 * actually traversed more than once when a freshly formatted volume
1278 	 * is first written to so it optimizes away nicely in the common case.
1279 	 */
1280 	ntfs_log_debug("Status of mft data before extension: "
1281 			"allocated_size 0x%llx, data_size 0x%llx, "
1282 			"initialized_size 0x%llx.\n",
1283 			(long long)mft_na->allocated_size,
1284 			(long long)mft_na->data_size,
1285 			(long long)mft_na->initialized_size);
1286 	while (size > mft_na->allocated_size) {
1287 		if (ntfs_mft_data_extend_allocation(vol) == STATUS_ERROR)
1288 			goto out;
1289 		ntfs_log_debug("Status of mft data after allocation extension: "
1290 				"allocated_size 0x%llx, data_size 0x%llx, "
1291 				"initialized_size 0x%llx.\n",
1292 				(long long)mft_na->allocated_size,
1293 				(long long)mft_na->data_size,
1294 				(long long)mft_na->initialized_size);
1295 	}
1296 
1297 	old_data_initialized = mft_na->initialized_size;
1298 	old_data_size = mft_na->data_size;
1299 
1300 	/*
1301 	 * Extend mft data initialized size (and data size of course) to reach
1302 	 * the allocated mft record, formatting the mft records along the way.
1303 	 * Note: We only modify the ntfs_attr structure as that is all that is
1304 	 * needed by ntfs_mft_record_format().  We will update the attribute
1305 	 * record itself in one fell swoop later on.
1306 	 */
1307 	while (size > mft_na->initialized_size) {
1308 		s64 ll2 = mft_na->initialized_size >> vol->mft_record_size_bits;
1309 		mft_na->initialized_size += vol->mft_record_size;
1310 		if (mft_na->initialized_size > mft_na->data_size)
1311 			mft_na->data_size = mft_na->initialized_size;
1312 		ntfs_log_debug("Initializing mft record 0x%llx.\n", (long long)ll2);
1313 		if (ntfs_mft_record_format(vol, ll2) < 0) {
1314 			ntfs_log_perror("Failed to format mft record");
1315 			goto undo_data_init;
1316 		}
1317 	}
1318 
1319 	/* Update the mft data attribute record to reflect the new sizes. */
1320 	ctx = ntfs_attr_get_search_ctx(mft_na->ni, NULL);
1321 	if (!ctx)
1322 		goto undo_data_init;
1323 
1324 	if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1325 			0, NULL, 0, ctx)) {
1326 		ntfs_log_error("Failed to find first attribute extent of "
1327 				"mft data attribute.\n");
1328 		ntfs_attr_put_search_ctx(ctx);
1329 		goto undo_data_init;
1330 	}
1331 	ctx->attr->initialized_size = cpu_to_sle64(mft_na->initialized_size);
1332 	ctx->attr->data_size = cpu_to_sle64(mft_na->data_size);
1333 	ctx->attr->allocated_size = cpu_to_sle64(mft_na->allocated_size);
1334 
1335 	/* Ensure the changes make it to disk. */
1336 	ntfs_inode_mark_dirty(ctx->ntfs_ino);
1337 	ntfs_attr_put_search_ctx(ctx);
1338 	ntfs_log_debug("Status of mft data after mft record initialization: "
1339 			"allocated_size 0x%llx, data_size 0x%llx, "
1340 			"initialized_size 0x%llx.\n",
1341 			(long long)mft_na->allocated_size,
1342 			(long long)mft_na->data_size,
1343 			(long long)mft_na->initialized_size);
1344 
1345 	/* Sanity checks. */
1346 	if (mft_na->data_size > mft_na->allocated_size ||
1347 	    mft_na->initialized_size > mft_na->data_size)
1348 		NTFS_BUG("mft_na sanity checks failed");
1349 
1350 	/* Sync MFT to minimize data loss if there won't be clean unmount. */
1351 	if (ntfs_inode_sync(mft_na->ni))
1352 		goto undo_data_init;
1353 
1354 	ret = 0;
1355 out:
1356 	ntfs_log_leave("\n");
1357 	return ret;
1358 
1359 undo_data_init:
1360 	mft_na->initialized_size = old_data_initialized;
1361 	mft_na->data_size = old_data_size;
1362 	goto out;
1363 }
1364 
1365 static int ntfs_mft_rec_init(ntfs_volume *vol, s64 size)
1366 {
1367 	int ret = -1;
1368 	ntfs_attr *mft_na;
1369 	s64 old_data_initialized, old_data_size;
1370 	ntfs_attr_search_ctx *ctx;
1371 
1372 	ntfs_log_enter("Entering\n");
1373 
1374 	mft_na = vol->mft_na;
1375 
1376 	if (size > mft_na->allocated_size || size > mft_na->initialized_size) {
1377 		errno = EIO;
1378 		ntfs_log_perror("%s: unexpected $MFT sizes, see below", __FUNCTION__);
1379 		ntfs_log_error("$MFT: size=%lld  allocated_size=%lld  "
1380 			       "data_size=%lld  initialized_size=%lld\n",
1381 			       (long long)size,
1382 			       (long long)mft_na->allocated_size,
1383 			       (long long)mft_na->data_size,
1384 			       (long long)mft_na->initialized_size);
1385 		goto out;
1386 	}
1387 
1388 	old_data_initialized = mft_na->initialized_size;
1389 	old_data_size = mft_na->data_size;
1390 
1391 	/* Update the mft data attribute record to reflect the new sizes. */
1392 	ctx = ntfs_attr_get_search_ctx(mft_na->ni, NULL);
1393 	if (!ctx)
1394 		goto undo_data_init;
1395 
1396 	if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1397 			0, NULL, 0, ctx)) {
1398 		ntfs_log_error("Failed to find first attribute extent of "
1399 				"mft data attribute.\n");
1400 		ntfs_attr_put_search_ctx(ctx);
1401 		goto undo_data_init;
1402 	}
1403 	ctx->attr->initialized_size = cpu_to_sle64(mft_na->initialized_size);
1404 	ctx->attr->data_size = cpu_to_sle64(mft_na->data_size);
1405 
1406 	/* CHECKME: ctx->attr->allocation_size is already ok? */
1407 
1408 	/* Ensure the changes make it to disk. */
1409 	ntfs_inode_mark_dirty(ctx->ntfs_ino);
1410 	ntfs_attr_put_search_ctx(ctx);
1411 
1412 	/* Sanity checks. */
1413 	if (mft_na->data_size > mft_na->allocated_size ||
1414 	    mft_na->initialized_size > mft_na->data_size)
1415 		NTFS_BUG("mft_na sanity checks failed");
1416 out:
1417 	ntfs_log_leave("\n");
1418 	return ret;
1419 
1420 undo_data_init:
1421 	mft_na->initialized_size = old_data_initialized;
1422 	mft_na->data_size = old_data_size;
1423 	goto out;
1424 }
1425 
1426 ntfs_inode *ntfs_mft_rec_alloc(ntfs_volume *vol, BOOL mft_data)
1427 {
1428 	s64 ll, bit;
1429 	ntfs_attr *mft_na, *mftbmp_na;
1430 	MFT_RECORD *m;
1431 	ntfs_inode *ni = NULL;
1432 	ntfs_inode *base_ni;
1433 	int err;
1434 	le16 seq_no, usn;
1435 	BOOL forced_mft_data;
1436 
1437 	ntfs_log_enter("Entering\n");
1438 
1439 	mft_na = vol->mft_na;
1440 	mftbmp_na = vol->mftbmp_na;
1441 
1442 	base_ni = mft_na->ni;
1443 
1444 	/*
1445 	 * The first extent containing $MFT:$AT_DATA is better located
1446 	 * in record 15 to make sure it can be read at mount time.
1447 	 * The record 15 is prereserved as a base inode with no
1448 	 * extents and no name, and it is marked in use.
1449 	 */
1450 	forced_mft_data = FALSE;
1451 	if (mft_data) {
1452 		ntfs_inode *ext_ni = ntfs_inode_open(vol, FILE_mft_data);
1453 			/*
1454 			 * If record 15 cannot be opened, it is probably in
1455 			 * use as an extent. Apply standard procedure for
1456 			 * further extents.
1457 			 */
1458 		if (ext_ni) {
1459 			/*
1460 			 * Make sure record 15 is a base extent and it has
1461 			 * no name. A base inode with no name cannot be in use.
1462 			 * The test based on base_mft_record fails for
1463 			 * extents of MFT, so we need a special check.
1464 			 * If already used, apply standard procedure.
1465 			 */
1466    			if (!ext_ni->mrec->base_mft_record
1467 			    && !ext_ni->mrec->link_count)
1468 				forced_mft_data = TRUE;
1469 			ntfs_inode_close(ext_ni);
1470 			/* Double-check, in case it is used for MFT */
1471 			if (forced_mft_data && base_ni->nr_extents) {
1472 				int i;
1473 
1474 				for (i=0; i<base_ni->nr_extents; i++) {
1475 					if (base_ni->extent_nis[i]
1476 					    && (base_ni->extent_nis[i]->mft_no
1477 							== FILE_mft_data))
1478 						forced_mft_data = FALSE;
1479    				}
1480 			}
1481 		}
1482 	}
1483 	if (forced_mft_data)
1484 		bit = FILE_mft_data;
1485 	else
1486 		bit = ntfs_mft_bitmap_find_free_rec(vol, base_ni);
1487 	if (bit >= 0)
1488 		goto found_free_rec;
1489 
1490 	if (errno != ENOSPC)
1491 		goto out;
1492 
1493 	errno = ENOSPC;
1494 	/* strerror() is intentionally used below, we want to log this error. */
1495 	ntfs_log_error("No free mft record for $MFT: %s\n", strerror(errno));
1496 	goto err_out;
1497 
1498 found_free_rec:
1499 	if (ntfs_bitmap_set_bit(mftbmp_na, bit)) {
1500 		ntfs_log_error("Failed to allocate bit in mft bitmap #2\n");
1501 		goto err_out;
1502 	}
1503 
1504 	ll = (bit + 1) << vol->mft_record_size_bits;
1505 	if (ll > mft_na->initialized_size)
1506 		if (ntfs_mft_rec_init(vol, ll) < 0)
1507 			goto undo_mftbmp_alloc;
1508 	/*
1509 	 * We now have allocated and initialized the mft record.  Need to read
1510 	 * it from disk and re-format it, preserving the sequence number if it
1511 	 * is not zero as well as the update sequence number if it is not zero
1512 	 * or -1 (0xffff).
1513 	 */
1514 	m = ntfs_malloc(vol->mft_record_size);
1515 	if (!m)
1516 		goto undo_mftbmp_alloc;
1517 
1518 	if (ntfs_mft_record_read(vol, bit, m)) {
1519 		free(m);
1520 		goto undo_mftbmp_alloc;
1521 	}
1522 	/* Sanity check that the mft record is really not in use. */
1523 	if (!forced_mft_data
1524 	    && (ntfs_is_file_record(m->magic)
1525 	    && (m->flags & MFT_RECORD_IN_USE))) {
1526 		ntfs_log_error("Inode %lld is used but it wasn't marked in "
1527 			       "$MFT bitmap. Fixed.\n", (long long)bit);
1528 		free(m);
1529 		goto undo_mftbmp_alloc;
1530 	}
1531 
1532 		/*
1533 		 * Retrieve the former seq_no and usn so that the new record
1534 		 * cannot be mistaken for the former one.
1535 		 * However the original record may just be garbage, so
1536 		 * use some sensible value when they cannot be retrieved.
1537 		 */
1538 	seq_no = m->sequence_number;
1539 	if (le16_to_cpu(m->usa_ofs) <= (NTFS_BLOCK_SIZE - 2))
1540 		usn = *(le16*)((u8*)m + (le16_to_cpu(m->usa_ofs) & -2));
1541 	else
1542 		usn = const_cpu_to_le16(1);
1543 	if (ntfs_mft_record_layout(vol, bit, m)) {
1544 		ntfs_log_error("Failed to re-format mft record.\n");
1545 		free(m);
1546 		goto undo_mftbmp_alloc;
1547 	}
1548 	if (seq_no)
1549 		m->sequence_number = seq_no;
1550 	seq_no = usn;
1551 	if (seq_no && seq_no != const_cpu_to_le16(0xffff))
1552 		*(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn;
1553 	/* Set the mft record itself in use. */
1554 	m->flags |= MFT_RECORD_IN_USE;
1555 	/* Now need to open an ntfs inode for the mft record. */
1556 	ni = ntfs_inode_allocate(vol);
1557 	if (!ni) {
1558 		ntfs_log_error("Failed to allocate buffer for inode.\n");
1559 		free(m);
1560 		goto undo_mftbmp_alloc;
1561 	}
1562 	ni->mft_no = bit;
1563 	ni->mrec = m;
1564 	/*
1565 	 * If we are allocating an extent mft record, make the opened inode an
1566 	 * extent inode and attach it to the base inode.  Also, set the base
1567 	 * mft record reference in the extent inode.
1568 	 */
1569 	ni->nr_extents = -1;
1570 	ni->base_ni = base_ni;
1571 	m->base_mft_record = MK_LE_MREF(base_ni->mft_no,
1572 					le16_to_cpu(base_ni->mrec->sequence_number));
1573 	/*
1574 	 * Attach the extent inode to the base inode, reallocating
1575 	 * memory if needed.
1576 	 */
1577 	if (!(base_ni->nr_extents & 3)) {
1578 		ntfs_inode **extent_nis;
1579 		int i;
1580 
1581 		i = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
1582 		extent_nis = ntfs_malloc(i);
1583 		if (!extent_nis) {
1584 			free(m);
1585 			free(ni);
1586 			goto undo_mftbmp_alloc;
1587 		}
1588 		if (base_ni->nr_extents) {
1589 			memcpy(extent_nis, base_ni->extent_nis,
1590 					i - 4 * sizeof(ntfs_inode *));
1591 			free(base_ni->extent_nis);
1592 		}
1593 		base_ni->extent_nis = extent_nis;
1594 	}
1595 	base_ni->extent_nis[base_ni->nr_extents++] = ni;
1596 
1597 	/* Make sure the allocated inode is written out to disk later. */
1598 	ntfs_inode_mark_dirty(ni);
1599 	/* Initialize time, allocated and data size in ntfs_inode struct. */
1600 	ni->data_size = ni->allocated_size = 0;
1601 	ni->flags = const_cpu_to_le32(0);
1602 	ni->creation_time = ni->last_data_change_time =
1603 			ni->last_mft_change_time =
1604 			ni->last_access_time = ntfs_current_time();
1605 	/* Update the default mft allocation position if it was used. */
1606 	if (!base_ni)
1607 		vol->mft_data_pos = bit + 1;
1608 	/* Return the opened, allocated inode of the allocated mft record. */
1609 	ntfs_log_error("allocated %sinode %lld\n",
1610 			base_ni ? "extent " : "", (long long)bit);
1611 out:
1612 	ntfs_log_leave("\n");
1613 	return ni;
1614 
1615 undo_mftbmp_alloc:
1616 	err = errno;
1617 	if (ntfs_bitmap_clear_bit(mftbmp_na, bit))
1618 		ntfs_log_error("Failed to clear bit in mft bitmap.%s\n", es);
1619 	errno = err;
1620 err_out:
1621 	if (!errno)
1622 		errno = EIO;
1623 	ni = NULL;
1624 	goto out;
1625 }
1626 
1627 /**
1628  * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume
1629  * @vol:	volume on which to allocate the mft record
1630  * @base_ni:	open base inode if allocating an extent mft record or NULL
1631  *
1632  * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol.
1633  *
1634  * If @base_ni is NULL make the mft record a base mft record and allocate it at
1635  * the default allocator position.
1636  *
1637  * If @base_ni is not NULL make the allocated mft record an extent record,
1638  * allocate it starting at the mft record after the base mft record and attach
1639  * the allocated and opened ntfs inode to the base inode @base_ni.
1640  *
1641  * On success return the now opened ntfs (extent) inode of the mft record.
1642  *
1643  * On error return NULL with errno set to the error code.
1644  *
1645  * To find a free mft record, we scan the mft bitmap for a zero bit.  To
1646  * optimize this we start scanning at the place specified by @base_ni or if
1647  * @base_ni is NULL we start where we last stopped and we perform wrap around
1648  * when we reach the end.  Note, we do not try to allocate mft records below
1649  * number 24 because numbers 0 to 15 are the defined system files anyway and 16
1650  * to 24 are used for storing extension mft records or used by chkdsk to store
1651  * its log. However the record number 15 is dedicated to the first extent to
1652  * the $DATA attribute of $MFT.  This is required to avoid the possibility
1653  * of creating a run list with a circular dependence which once written to disk
1654  * can never be read in again.  Windows will only use records 16 to 24 for
1655  * normal files if the volume is completely out of space.  We never use them
1656  * which means that when the volume is really out of space we cannot create any
1657  * more files while Windows can still create up to 8 small files.  We can start
1658  * doing this at some later time, it does not matter much for now.
1659  *
1660  * When scanning the mft bitmap, we only search up to the last allocated mft
1661  * record.  If there are no free records left in the range 24 to number of
1662  * allocated mft records, then we extend the $MFT/$DATA attribute in order to
1663  * create free mft records.  We extend the allocated size of $MFT/$DATA by 16
1664  * records at a time or one cluster, if cluster size is above 16kiB.  If there
1665  * is not sufficient space to do this, we try to extend by a single mft record
1666  * or one cluster, if cluster size is above the mft record size, but we only do
1667  * this if there is enough free space, which we know from the values returned
1668  * by the failed cluster allocation function when we tried to do the first
1669  * allocation.
1670  *
1671  * No matter how many mft records we allocate, we initialize only the first
1672  * allocated mft record, incrementing mft data size and initialized size
1673  * accordingly, open an ntfs_inode for it and return it to the caller, unless
1674  * there are less than 24 mft records, in which case we allocate and initialize
1675  * mft records until we reach record 24 which we consider as the first free mft
1676  * record for use by normal files.
1677  *
1678  * If during any stage we overflow the initialized data in the mft bitmap, we
1679  * extend the initialized size (and data size) by 8 bytes, allocating another
1680  * cluster if required.  The bitmap data size has to be at least equal to the
1681  * number of mft records in the mft, but it can be bigger, in which case the
1682  * superfluous bits are padded with zeroes.
1683  *
1684  * Thus, when we return successfully (return value non-zero), we will have:
1685  *	- initialized / extended the mft bitmap if necessary,
1686  *	- initialized / extended the mft data if necessary,
1687  *	- set the bit corresponding to the mft record being allocated in the
1688  *	  mft bitmap,
1689  *	- open an ntfs_inode for the allocated mft record, and we will
1690  *	- return the ntfs_inode.
1691  *
1692  * On error (return value zero), nothing will have changed.  If we had changed
1693  * anything before the error occurred, we will have reverted back to the
1694  * starting state before returning to the caller.  Thus, except for bugs, we
1695  * should always leave the volume in a consistent state when returning from
1696  * this function.
1697  *
1698  * Note, this function cannot make use of most of the normal functions, like
1699  * for example for attribute resizing, etc, because when the run list overflows
1700  * the base mft record and an attribute list is used, it is very important that
1701  * the extension mft records used to store the $DATA attribute of $MFT can be
1702  * reached without having to read the information contained inside them, as
1703  * this would make it impossible to find them in the first place after the
1704  * volume is dismounted.  $MFT/$BITMAP probably does not need to follow this
1705  * rule because the bitmap is not essential for finding the mft records, but on
1706  * the other hand, handling the bitmap in this special way would make life
1707  * easier because otherwise there might be circular invocations of functions
1708  * when reading the bitmap but if we are careful, we should be able to avoid
1709  * all problems.
1710  */
1711 ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, ntfs_inode *base_ni)
1712 {
1713 	s64 ll, bit;
1714 	ntfs_attr *mft_na, *mftbmp_na;
1715 	MFT_RECORD *m;
1716 	ntfs_inode *ni = NULL;
1717 	int err;
1718 	u32 usa_ofs;
1719 	le16 seq_no, usn;
1720 	BOOL oldwarn;
1721 
1722 	if (base_ni)
1723 		ntfs_log_enter("Entering (allocating an extent mft record for "
1724 			       "base mft record %lld).\n",
1725 			       (long long)base_ni->mft_no);
1726 	else
1727 		ntfs_log_enter("Entering (allocating a base mft record)\n");
1728 	if (!vol || !vol->mft_na || !vol->mftbmp_na) {
1729 		errno = EINVAL;
1730 		goto out;
1731 	}
1732 
1733 	if (ntfs_is_mft(base_ni)) {
1734 		ni = ntfs_mft_rec_alloc(vol, FALSE);
1735 		goto out;
1736 	}
1737 
1738 	mft_na = vol->mft_na;
1739 	mftbmp_na = vol->mftbmp_na;
1740 retry:
1741 	bit = ntfs_mft_bitmap_find_free_rec(vol, base_ni);
1742 	if (bit >= 0) {
1743 		ntfs_log_debug("found free record (#1) at %lld\n",
1744 				(long long)bit);
1745 		goto found_free_rec;
1746 	}
1747 	if (errno != ENOSPC)
1748 		goto out;
1749 	/*
1750 	 * No free mft records left.  If the mft bitmap already covers more
1751 	 * than the currently used mft records, the next records are all free,
1752 	 * so we can simply allocate the first unused mft record.
1753 	 * Note: We also have to make sure that the mft bitmap at least covers
1754 	 * the first 24 mft records as they are special and whilst they may not
1755 	 * be in use, we do not allocate from them.
1756 	 */
1757 	ll = mft_na->initialized_size >> vol->mft_record_size_bits;
1758 	if (mftbmp_na->initialized_size << 3 > ll &&
1759 			mftbmp_na->initialized_size > RESERVED_MFT_RECORDS / 8) {
1760 		bit = ll;
1761 		if (bit < RESERVED_MFT_RECORDS)
1762 			bit = RESERVED_MFT_RECORDS;
1763 		ntfs_log_debug("found free record (#2) at %lld\n",
1764 				(long long)bit);
1765 		goto found_free_rec;
1766 	}
1767 	/*
1768 	 * The mft bitmap needs to be expanded until it covers the first unused
1769 	 * mft record that we can allocate.
1770 	 * Note: The smallest mft record we allocate is mft record 24.
1771 	 */
1772 	ntfs_log_debug("Status of mftbmp before extension: allocated_size 0x%llx, "
1773 			"data_size 0x%llx, initialized_size 0x%llx.\n",
1774 			(long long)mftbmp_na->allocated_size,
1775 			(long long)mftbmp_na->data_size,
1776 			(long long)mftbmp_na->initialized_size);
1777 	if (mftbmp_na->initialized_size + 8 > mftbmp_na->allocated_size) {
1778 
1779 		int ret = ntfs_mft_bitmap_extend_allocation(vol);
1780 
1781 		if (ret == STATUS_ERROR)
1782 			goto err_out;
1783 		if (ret == STATUS_KEEP_SEARCHING) {
1784 			ret = ntfs_mft_bitmap_extend_allocation(vol);
1785 			if (ret != STATUS_OK)
1786 				goto err_out;
1787 		}
1788 
1789 		ntfs_log_debug("Status of mftbmp after allocation extension: "
1790 				"allocated_size 0x%llx, data_size 0x%llx, "
1791 				"initialized_size 0x%llx.\n",
1792 				(long long)mftbmp_na->allocated_size,
1793 				(long long)mftbmp_na->data_size,
1794 				(long long)mftbmp_na->initialized_size);
1795 	}
1796 	/*
1797 	 * We now have sufficient allocated space, extend the initialized_size
1798 	 * as well as the data_size if necessary and fill the new space with
1799 	 * zeroes.
1800 	 */
1801 	bit = mftbmp_na->initialized_size << 3;
1802 	if (ntfs_mft_bitmap_extend_initialized(vol))
1803 		goto err_out;
1804 	ntfs_log_debug("Status of mftbmp after initialized extension: "
1805 			"allocated_size 0x%llx, data_size 0x%llx, "
1806 			"initialized_size 0x%llx.\n",
1807 			(long long)mftbmp_na->allocated_size,
1808 			(long long)mftbmp_na->data_size,
1809 			(long long)mftbmp_na->initialized_size);
1810 	ntfs_log_debug("found free record (#3) at %lld\n", (long long)bit);
1811 found_free_rec:
1812 	/* @bit is the found free mft record, allocate it in the mft bitmap. */
1813 	if (ntfs_bitmap_set_bit(mftbmp_na, bit)) {
1814 		ntfs_log_error("Failed to allocate bit in mft bitmap.\n");
1815 		goto err_out;
1816 	}
1817 
1818 	/* The mft bitmap is now uptodate.  Deal with mft data attribute now. */
1819 	ll = (bit + 1) << vol->mft_record_size_bits;
1820 	if (ll > mft_na->initialized_size)
1821 		if (ntfs_mft_record_init(vol, ll) < 0)
1822 			goto undo_mftbmp_alloc;
1823 
1824 	/*
1825 	 * We now have allocated and initialized the mft record.  Need to read
1826 	 * it from disk and re-format it, preserving the sequence number if it
1827 	 * is not zero as well as the update sequence number if it is not zero
1828 	 * or -1 (0xffff).
1829 	 */
1830 	m = ntfs_malloc(vol->mft_record_size);
1831 	if (!m)
1832 		goto undo_mftbmp_alloc;
1833 
1834 	/*
1835 	 * As this is allocating a new record, do not expect it to have
1836 	 * been initialized previously, so do not warn over bad fixups
1837 	 * (hence avoid warn flooding when an NTFS partition has been wiped).
1838 	 */
1839 	oldwarn = !NVolNoFixupWarn(vol);
1840 	NVolSetNoFixupWarn(vol);
1841 	if (ntfs_mft_record_read(vol, bit, m)) {
1842 		if (oldwarn)
1843 			NVolClearNoFixupWarn(vol);
1844 		free(m);
1845 		goto undo_mftbmp_alloc;
1846 	}
1847 	if (oldwarn)
1848 		NVolClearNoFixupWarn(vol);
1849 
1850 	/* Sanity check that the mft record is really not in use. */
1851 	if (ntfs_is_file_record(m->magic) && (m->flags & MFT_RECORD_IN_USE)) {
1852 		ntfs_log_error("Inode %lld is used but it wasn't marked in "
1853 			       "$MFT bitmap. Fixed.\n", (long long)bit);
1854 		free(m);
1855 		goto retry;
1856 	}
1857 	seq_no = m->sequence_number;
1858 		/*
1859 		 * As ntfs_mft_record_read() returns what has been read
1860 		 * even when the fixups have been found bad, we have to
1861 		 * check where we fetch the initial usn from.
1862 		 */
1863 	usa_ofs = le16_to_cpu(m->usa_ofs);
1864 	if (!(usa_ofs & 1) && (usa_ofs < NTFS_BLOCK_SIZE)) {
1865 		usn = *(le16*)((u8*)m + usa_ofs);
1866 	} else
1867 		usn = const_cpu_to_le16(1);
1868 	if (ntfs_mft_record_layout(vol, bit, m)) {
1869 		ntfs_log_error("Failed to re-format mft record.\n");
1870 		free(m);
1871 		goto undo_mftbmp_alloc;
1872 	}
1873 	if (seq_no)
1874 		m->sequence_number = seq_no;
1875 	seq_no = usn;
1876 	if (seq_no && seq_no != const_cpu_to_le16(0xffff))
1877 		*(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn;
1878 	/* Set the mft record itself in use. */
1879 	m->flags |= MFT_RECORD_IN_USE;
1880 	/* Now need to open an ntfs inode for the mft record. */
1881 	ni = ntfs_inode_allocate(vol);
1882 	if (!ni) {
1883 		ntfs_log_error("Failed to allocate buffer for inode.\n");
1884 		free(m);
1885 		goto undo_mftbmp_alloc;
1886 	}
1887 	ni->mft_no = bit;
1888 	ni->mrec = m;
1889 	/*
1890 	 * If we are allocating an extent mft record, make the opened inode an
1891 	 * extent inode and attach it to the base inode.  Also, set the base
1892 	 * mft record reference in the extent inode.
1893 	 */
1894 	if (base_ni) {
1895 		ni->nr_extents = -1;
1896 		ni->base_ni = base_ni;
1897 		m->base_mft_record = MK_LE_MREF(base_ni->mft_no,
1898 				le16_to_cpu(base_ni->mrec->sequence_number));
1899 		/*
1900 		 * Attach the extent inode to the base inode, reallocating
1901 		 * memory if needed.
1902 		 */
1903 		if (!(base_ni->nr_extents & 3)) {
1904 			ntfs_inode **extent_nis;
1905 			int i;
1906 
1907 			i = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
1908 			extent_nis = ntfs_malloc(i);
1909 			if (!extent_nis) {
1910 				free(m);
1911 				free(ni);
1912 				goto undo_mftbmp_alloc;
1913 			}
1914 			if (base_ni->nr_extents) {
1915 				memcpy(extent_nis, base_ni->extent_nis,
1916 						i - 4 * sizeof(ntfs_inode *));
1917 				free(base_ni->extent_nis);
1918 			}
1919 			base_ni->extent_nis = extent_nis;
1920 		}
1921 		base_ni->extent_nis[base_ni->nr_extents++] = ni;
1922 	}
1923 	/* Make sure the allocated inode is written out to disk later. */
1924 	ntfs_inode_mark_dirty(ni);
1925 	/* Initialize time, allocated and data size in ntfs_inode struct. */
1926 	ni->data_size = ni->allocated_size = 0;
1927 	ni->flags = const_cpu_to_le32(0);
1928 	ni->creation_time = ni->last_data_change_time =
1929 			ni->last_mft_change_time =
1930 			ni->last_access_time = ntfs_current_time();
1931 	/* Update the default mft allocation position if it was used. */
1932 	if (!base_ni)
1933 		vol->mft_data_pos = bit + 1;
1934 	/* Return the opened, allocated inode of the allocated mft record. */
1935 	ntfs_log_debug("allocated %sinode 0x%llx.\n",
1936 			base_ni ? "extent " : "", (long long)bit);
1937 	vol->free_mft_records--;
1938 out:
1939 	ntfs_log_leave("\n");
1940 	return ni;
1941 
1942 undo_mftbmp_alloc:
1943 	err = errno;
1944 	if (ntfs_bitmap_clear_bit(mftbmp_na, bit))
1945 		ntfs_log_error("Failed to clear bit in mft bitmap.%s\n", es);
1946 	errno = err;
1947 err_out:
1948 	if (!errno)
1949 		errno = EIO;
1950 	ni = NULL;
1951 	goto out;
1952 }
1953 
1954 /**
1955  * ntfs_mft_record_free - free an mft record on an ntfs volume
1956  * @vol:	volume on which to free the mft record
1957  * @ni:		open ntfs inode of the mft record to free
1958  *
1959  * Free the mft record of the open inode @ni on the mounted ntfs volume @vol.
1960  * Note that this function calls ntfs_inode_close() internally and hence you
1961  * cannot use the pointer @ni any more after this function returns success.
1962  *
1963  * On success return 0 and on error return -1 with errno set to the error code.
1964  */
1965 int ntfs_mft_record_free(ntfs_volume *vol, ntfs_inode *ni)
1966 {
1967 	u64 mft_no;
1968 	int err;
1969 	u16 seq_no;
1970 	le16 old_seq_no;
1971 
1972 	ntfs_log_trace("Entering for inode 0x%llx.\n", (long long) ni->mft_no);
1973 
1974 	if (!vol || !vol->mftbmp_na || !ni) {
1975 		errno = EINVAL;
1976 		return -1;
1977 	}
1978 
1979 	/* Cache the mft reference for later. */
1980 	mft_no = ni->mft_no;
1981 
1982 	/* Mark the mft record as not in use. */
1983 	ni->mrec->flags &= ~MFT_RECORD_IN_USE;
1984 
1985 	/* Increment the sequence number, skipping zero, if it is not zero. */
1986 	old_seq_no = ni->mrec->sequence_number;
1987 	seq_no = le16_to_cpu(old_seq_no);
1988 	if (seq_no == 0xffff)
1989 		seq_no = 1;
1990 	else if (seq_no)
1991 		seq_no++;
1992 	ni->mrec->sequence_number = cpu_to_le16(seq_no);
1993 
1994 	/* Set the inode dirty and write it out. */
1995 	ntfs_inode_mark_dirty(ni);
1996 	if (ntfs_inode_sync(ni)) {
1997 		err = errno;
1998 		goto sync_rollback;
1999 	}
2000 
2001 	/* Clear the bit in the $MFT/$BITMAP corresponding to this record. */
2002 	if (ntfs_bitmap_clear_bit(vol->mftbmp_na, mft_no)) {
2003 		err = errno;
2004 		// FIXME: If ntfs_bitmap_clear_run() guarantees rollback on
2005 		//	  error, this could be changed to goto sync_rollback;
2006 		goto bitmap_rollback;
2007 	}
2008 
2009 	/* Throw away the now freed inode. */
2010 #if CACHE_NIDATA_SIZE
2011 	if (!ntfs_inode_real_close(ni)) {
2012 #else
2013 	if (!ntfs_inode_close(ni)) {
2014 #endif
2015 		vol->free_mft_records++;
2016 		return 0;
2017 	}
2018 	err = errno;
2019 
2020 	/* Rollback what we did... */
2021 bitmap_rollback:
2022 	if (ntfs_bitmap_set_bit(vol->mftbmp_na, mft_no))
2023 		ntfs_log_debug("Eeek! Rollback failed in ntfs_mft_record_free().  "
2024 				"Leaving inconsistent metadata!\n");
2025 sync_rollback:
2026 	ni->mrec->flags |= MFT_RECORD_IN_USE;
2027 	ni->mrec->sequence_number = old_seq_no;
2028 	ntfs_inode_mark_dirty(ni);
2029 	errno = err;
2030 	return -1;
2031 }
2032 
2033 /**
2034  * ntfs_mft_usn_dec - Decrement USN by one
2035  * @mrec:	pointer to an mft record
2036  *
2037  * On success return 0 and on error return -1 with errno set.
2038  */
2039 int ntfs_mft_usn_dec(MFT_RECORD *mrec)
2040 {
2041 	u16 usn;
2042 	le16 *usnp;
2043 
2044 	if (!mrec) {
2045 		errno = EINVAL;
2046 		return -1;
2047 	}
2048 	usnp = (le16*)((char*)mrec + le16_to_cpu(mrec->usa_ofs));
2049 	usn = le16_to_cpup(usnp);
2050 	if (usn-- <= 1)
2051 		usn = 0xfffe;
2052 	*usnp = cpu_to_le16(usn);
2053 
2054 	return 0;
2055 }
2056 
2057