xref: /haiku/src/add-ons/kernel/file_systems/fat/bsd/kern/vfs_bio.c (revision 9a6a20d4689307142a7ed26a1437ba47e244e73f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004 Poul-Henning Kamp
5  * Copyright (c) 1994,1997 John S. Dyson
6  * Copyright (c) 2013 The FreeBSD Foundation
7  * All rights reserved.
8  *
9  * Portions of this software were developed by Konstantin Belousov
10  * under sponsorship from the FreeBSD Foundation.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 
35 // Modified to support the Haiku FAT driver. These functions, as implemented here, assume
36 // that the volume's block cache was created with a blockSize equal to DEV_BSIZE. We also
37 // support access to the file cache via these functions, even though the driver doesn't
38 // need that capability in its current form.
39 
40 #include "sys/param.h"
41 #include "sys/systm.h"
42 #include "sys/buf.h"
43 #include "sys/malloc.h"
44 #include "sys/vnode.h"
45 #include "sys/conf.h"
46 
47 #include "fs/msdosfs/bpb.h"
48 #include "fs/msdosfs/denode.h"
49 #include "fs/msdosfs/direntry.h"
50 #include "fs/msdosfs/msdosfsmount.h"
51 
52 #include "dosfs.h"
53 
54 
55 #ifdef USER
56 #define dprintf printf
57 #endif
58 
59 
60 int msdosfs_bmap(struct vnode* a_vp, daddr_t a_bn, struct bufobj** a_bop, daddr_t* a_bnp,
61 	int* a_runp, int* a_runb);
62 int getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo,
63 	int flags, struct buf** bpp);
64 
65 static status_t allocate_data(struct buf* buf, int size);
66 static status_t put_buf(struct buf* buf);
67 
68 
69 /*! The FAT driver uses this in combination with vm_page_count_severe to detect low system
70 	resources. However, there is no analagous Haiku function to map this to.
71 */
72 int
73 buf_dirty_count_severe(void)
74 {
75 	return 0;
76 }
77 
78 
79 /*!	Get a buffer with the specified data.
80 	@param blkno The logical block being requested. If the vnode type is VREG (blkno is relative
81 	to the start of the file), msdosfs_bmap will be called to convert blkno into a block number
82 	relative to the start of the volume. If the vnode type is VBLK, blkno is already relative to
83 	the start of the volume.
84 	@param cred Ignored in the port.
85 	@post bpp Points to the requested struct buf*, if successful. If an error is returned, *bpp is
86 	NULL.
87 */
88 int
89 bread(struct vnode* vp, daddr_t blkno, int size, struct ucred* cred, struct buf** bpp)
90 {
91 	struct buf* buf = NULL;
92 	int error;
93 
94 	error = getblkx(vp, blkno, blkno, size, 0, 0, 0, &buf);
95 
96 	if (error == 0)
97 		*bpp = buf;
98 
99 	return error;
100 }
101 
102 
103 /*! Added for the Haiku port:  common initial steps for bdwrite, bawrite, and bwrite.
104 
105 */
106 static status_t
107 _bwrite(struct buf* buf)
108 {
109 	struct vnode* deviceNode = buf->b_vp;
110 	struct mount* bsdVolume = deviceNode->v_rdev->si_mountpt;
111 	void* blockCache = bsdVolume->mnt_cache;
112 	struct msdosfsmount* fatVolume = (struct msdosfsmount*)bsdVolume->mnt_data;
113 	status_t status = B_OK;
114 
115 	ASSERT(MOUNTED_READ_ONLY(fatVolume) == 0);
116 		// we should not have gotten this far if this is a read-only volume
117 
118 	if (buf->b_vreg != NULL) {
119 		// copy b_data to the file cache
120 		struct vnode* bsdNode = buf->b_vreg;
121 		struct denode* fatNode = (struct denode*)bsdNode->v_data;
122 		off_t fileOffset = 0;
123 		size_t bytesWritten = 0;
124 
125 		if (bsdNode->v_resizing == true)
126 			return status;
127 
128 		ASSERT((fatNode->de_Attributes & ATTR_READONLY) == 0);
129 
130 		fileOffset = de_cn2off(fatVolume, buf->b_lblkno);
131 		ASSERT_ALWAYS((u_long)(fileOffset + buf->b_bufsize) <= fatNode->de_FileSize);
132 
133 		bytesWritten = (size_t)buf->b_bufsize;
134 		status = file_cache_write(bsdNode->v_cache, NULL, fileOffset, buf->b_data, &bytesWritten);
135 		if (bytesWritten != (size_t)buf->b_bufsize)
136 			return EIO;
137 	} else if (buf->b_owned == false) {
138 		// put the single block cache block that was modified
139 		block_cache_put(blockCache, buf->b_blkno);
140 	} else {
141 		// copy b_data into mutiple block cache blocks and put them
142 		uint32 cBlockCount = buf->b_bufsize / CACHED_BLOCK_SIZE;
143 		uint32 i;
144 		for (i = 0; i < cBlockCount && buf->b_bcpointers[i] != NULL; ++i) {
145 			memcpy((caddr_t)buf->b_bcpointers[i], buf->b_data + (i * CACHED_BLOCK_SIZE),
146 				CACHED_BLOCK_SIZE);
147 			block_cache_put(blockCache, buf->b_blkno + i);
148 			buf->b_bcpointers[i] = NULL;
149 		}
150 	}
151 
152 	return status;
153 }
154 
155 
156 /*! The block_cache block(s) corresponding to bp are put or, if a regular file is being
157 	written, the file cache is updated. Nothing is flushed to disk at this time.
158 */
159 void
160 bdwrite(struct buf* bp)
161 {
162 	if (_bwrite(bp) != B_OK)
163 		return;
164 
165 	put_buf(bp);
166 
167 	return;
168 }
169 
170 
171 /*! In FreeBSD, this flushes bp to disk asynchronously. However, Haiku's block cache
172 	has no asynchronous flush option, so the operation is only asynchronous if we are
173 	working with the file cache (i.e. when writing to a regular file). The driver
174 	only uses bawrite if it detects low system resources.
175 */
176 void
177 bawrite(struct buf* bp)
178 {
179 	_bwrite(bp);
180 
181 	if (bp->b_vreg != NULL) {
182 		if (bp->b_vreg->v_resizing == false)
183 			file_cache_sync(bp->b_vreg->v_cache);
184 	} else {
185 		void* blockCache = bp->b_vp->v_rdev->si_mountpt->mnt_cache;
186 
187 		if (bp->b_owned == false) {
188 			block_cache_sync_etc(blockCache, bp->b_blkno, 1);
189 		} else {
190 			block_cache_sync_etc(blockCache, bp->b_blkno,
191 				howmany(bp->b_bufsize, CACHED_BLOCK_SIZE));
192 		}
193 	}
194 
195 	put_buf(bp);
196 
197 	return;
198 }
199 
200 
201 /*! Each bread call must be balanced with either a b(d/a)write (to write changes) or a brelse.
202 
203 */
204 void
205 brelse(struct buf* bp)
206 {
207 	if (bp->b_vreg != NULL) {
208 		put_buf(bp);
209 		return;
210 	}
211 
212 	struct mount* bsdVolume = bp->b_vp->v_rdev->si_mountpt;
213 	void* blockCache = bsdVolume->mnt_cache;
214 	bool readOnly = MOUNTED_READ_ONLY(VFSTOMSDOSFS(bsdVolume));
215 
216 	if (bp->b_owned == false) {
217 		if (readOnly == true)
218 			block_cache_set_dirty(blockCache, bp->b_blkno, false, -1);
219 		block_cache_put(blockCache, bp->b_blkno);
220 		put_buf(bp);
221 	} else {
222 		uint32 cBlockCount = bp->b_bufsize / CACHED_BLOCK_SIZE;
223 		uint32 i;
224 		for (i = 0; i < cBlockCount && bp->b_bcpointers[i] != NULL; ++i) {
225 			if (readOnly == true)
226 				block_cache_set_dirty(blockCache, bp->b_blkno + i, false, -1);
227 			block_cache_put(blockCache, bp->b_blkno + i);
228 			bp->b_bcpointers[i] = NULL;
229 		}
230 
231 		put_buf(bp);
232 	}
233 
234 	return;
235 }
236 
237 
238 /*! Similar to bread, but can be used when it's not necessary to read the existing contents of
239 	the block. As currently implemented, it is not any faster than bread. The last 3 parameters
240 	are ignored; the driver always passes 0 for each of them.
241 	@param size The number of blocks to get.
242 */
243 struct buf*
244 getblk(struct vnode* vp, daddr_t blkno, int size, int slpflag, int slptimeo, int flags)
245 {
246 	struct buf* buf = NULL;
247 	int error = 0;
248 
249 	error = getblkx(vp, blkno, blkno, size, slpflag, slptimeo, flags, &buf);
250 	if (error != 0)
251 		return NULL;
252 
253 	return buf;
254 }
255 
256 
257 /*!	Return a specified block in a BSD-style struct buf.
258 	@param blkno If vp is the device node, a disk block number in units of DEV_BSIZE; otherwise, a
259 	file-relative block number in units of cluster size.
260 	@param dblkno Disk block number, if known by the client. If vp is not the device node, getblkx
261 	will calculate the disk block number from blkno and ignore this parameter.
262 	@param splflag Ignored in the port.
263 	@param slptimeo Ignored in the port.
264 	@param flags Ignored in the port.
265 */
266 int
267 getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo,
268 	int flags, struct buf** bpp)
269 {
270 	struct msdosfsmount* fatVolume;
271 	struct vnode* deviceNode;
272 	status_t status = B_OK;
273 
274 	bool readOnly = true;
275 	bool foundExisting = false;
276 
277 	uint32 i;
278 	void* blockCache = NULL;
279 	uint32 cBlockCount;
280 		// the number of block cache blocks spanned by the client's request
281 	struct buf* newBuf = NULL;
282 		// the buf to be returned
283 
284 	if (vp->v_type == VREG) {
285 		fatVolume = vp->v_mount->mnt_data;
286 		// convert blkno from file-relative to volume-relative
287 		msdosfs_bmap(vp, blkno, NULL, &dblkno, NULL, NULL);
288 			// output (dblkno) is always in units of DEV_BSIZE, even if blkno is in clusters
289 		blockCache = vp->v_mount->mnt_cache;
290 		readOnly
291 			= MOUNTED_READ_ONLY(fatVolume) || ((VTODE(vp))->de_Attributes & ATTR_READONLY) != 0;
292 		deviceNode = fatVolume->pm_devvp;
293 	} else if (vp->v_type == VBLK) {
294 		fatVolume = vp->v_rdev->si_mountpt->mnt_data;
295 		blockCache = vp->v_rdev->si_mountpt->mnt_cache;
296 		readOnly = MOUNTED_READ_ONLY(fatVolume);
297 		deviceNode = vp;
298 	} else {
299 		return ENOTSUP;
300 	}
301 
302 	// Before allocating memory for a new struct buf, try to reuse an existing one
303 	// in the device vnode's lists.
304 	rw_lock_write_lock(&deviceNode->v_bufobj.bo_lock.haikuRW);
305 	if (size == CACHED_BLOCK_SIZE && vp->v_type != VREG
306 		&& SLIST_EMPTY(&deviceNode->v_bufobj.bo_emptybufs) == false) {
307 		// Get a buf with no data space. It will just point to a block cache block.
308 		newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_emptybufs);
309 		SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_emptybufs, link);
310 		--deviceNode->v_bufobj.bo_empties;
311 		foundExisting = true;
312 	} else if (size == (int)fatVolume->pm_bpcluster
313 		&& SLIST_EMPTY(&deviceNode->v_bufobj.bo_clusterbufs) == false) {
314 		// Get a buf with cluster-size data storage from the free list.
315 		newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_clusterbufs);
316 		SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_clusterbufs, link);
317 		--deviceNode->v_bufobj.bo_clusters;
318 		foundExisting = true;
319 	} else if (size == (int)fatVolume->pm_fatblocksize
320 		&& SLIST_EMPTY(&deviceNode->v_bufobj.bo_fatbufs) == false) {
321 		// This branch will never be reached in FAT16 or FAT32 so long as pm_fatblocksize and
322 		// CACHED_BLOCK_SIZE are both 512.
323 		newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_fatbufs);
324 		SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_fatbufs, link);
325 		--deviceNode->v_bufobj.bo_fatblocks;
326 		foundExisting = true;
327 	}
328 	rw_lock_write_unlock(&deviceNode->v_bufobj.bo_lock.haikuRW);
329 
330 	if (foundExisting == false) {
331 		newBuf = malloc(sizeof(struct buf), 0, 0);
332 		newBuf->b_data = NULL;
333 		newBuf->b_bufsize = 0;
334 		for (i = 0; i < 128; ++i)
335 			newBuf->b_bcpointers[i] = NULL;
336 	}
337 
338 	// set up / reset the buf
339 	newBuf->b_bcount = size;
340 	newBuf->b_resid = size;
341 	newBuf->b_blkno = dblkno;
342 		// units of DEV_BSIZE, always
343 	newBuf->b_flags = 0;
344 	newBuf->b_lblkno = blkno;
345 		// units depend on vnode type
346 	newBuf->b_vp = deviceNode;
347 		// note that b_vp does not point to the node passed as vp, unless vp is the deviceNode
348 	newBuf->b_owned = false;
349 	newBuf->b_vreg = vp->v_type == VREG ? vp : NULL;
350 
351 	ASSERT(size == newBuf->b_resid);
352 	cBlockCount = howmany(size, CACHED_BLOCK_SIZE);
353 
354 	// Three branches:
355 	// For regular files, copy from file cache into b_data.
356 	// Otherwise, if the requested size equals the cached block size, use the block cache directly.
357 	// Otherwise, copy from the block cache into b_data.
358 	if (vp->v_type == VREG) {
359 		// The occasions when regular file data is accessed through the ported BSD code
360 		// are limited (e.g. deextend) and occur when the node is locked. If we go down this
361 		// branch, we tend to return early because vp->v_resizing is true.
362 
363 		off_t fileOffset;
364 		size_t bytesRead;
365 
366 		newBuf->b_owned = true;
367 		status = allocate_data(newBuf, size);
368 		if (status != B_OK)
369 			return B_TO_POSIX_ERROR(status);
370 
371 		// Don't use the file cache while resizing; wait until node lock is released to avoid
372 		// deadlocks.
373 		if (vp->v_resizing == true) {
374 			(*bpp) = newBuf;
375 				// we need to return a buffer with b_data allocated even in this case,
376 				// because detrunc may zero out the unused space at the end of the last cluster
377 			return B_TO_POSIX_ERROR(status);
378 		}
379 
380 		fileOffset = de_cn2off(fatVolume, blkno);
381 
382 		ASSERT(size <= (int)newBuf->b_bufsize);
383 		bytesRead = (size_t)size;
384 		status = file_cache_read(vp->v_cache, NULL, fileOffset, newBuf->b_data, &bytesRead);
385 		if (status != B_OK) {
386 			put_buf(newBuf);
387 			return B_TO_POSIX_ERROR(status);
388 		}
389 		if (bytesRead != (size_t)size) {
390 			put_buf(newBuf);
391 			return EIO;
392 		}
393 	} else if (size == CACHED_BLOCK_SIZE && vp->v_type != VREG) {
394 		if (readOnly == true)
395 			newBuf->b_data = (void*)block_cache_get(blockCache, dblkno);
396 		else
397 			newBuf->b_data = block_cache_get_writable(blockCache, dblkno, -1);
398 		if (newBuf->b_data == NULL) {
399 			put_buf(newBuf);
400 			return EIO;
401 		}
402 		newBuf->b_bufsize = CACHED_BLOCK_SIZE;
403 	} else {
404 		// need to get more than one cached block and copy them to make a continuous buffer
405 		newBuf->b_owned = true;
406 		status = allocate_data(newBuf, size);
407 		if (status != 0)
408 			return B_TO_POSIX_ERROR(status);
409 
410 		for (i = 0; i < cBlockCount && status == B_OK; i++) {
411 			if (readOnly == true)
412 				newBuf->b_bcpointers[i] = (void*)block_cache_get(blockCache, dblkno + i);
413 			else
414 				newBuf->b_bcpointers[i] = block_cache_get_writable(blockCache, dblkno + i, -1);
415 			if (newBuf->b_bcpointers[i] == NULL) {
416 				put_buf(newBuf);
417 				return EIO;
418 			}
419 		}
420 
421 		ASSERT(cBlockCount * CACHED_BLOCK_SIZE == (u_long)newBuf->b_bufsize);
422 		for (i = 0; i < cBlockCount; i++) {
423 			memcpy(newBuf->b_data + (i * CACHED_BLOCK_SIZE), (caddr_t)newBuf->b_bcpointers[i],
424 				CACHED_BLOCK_SIZE);
425 		}
426 	}
427 
428 	newBuf->b_resid -= size;
429 	ASSERT(newBuf->b_resid == 0);
430 
431 	*bpp = newBuf;
432 
433 	return B_TO_POSIX_ERROR(status);
434 }
435 
436 
437 /*! Used by deextend to update metadata of pages in the last added cluster.
438 	Not applicable in Haiku.
439 */
440 void
441 vfs_bio_clrbuf(struct buf* bp)
442 {
443 	return;
444 }
445 
446 
447 /*! Used by deextend to zero out the remainder of a cluster beyond EOF. In the Haiku port we avoid
448 	file cache writes when the node is locked (as it is when deextend is called) to prevent
449 	deadlocks. This data must therefore be zero'd after return from deextend.
450 */
451 void
452 vfs_bio_bzero_buf(struct buf* bp, int base, int size)
453 {
454 	return;
455 }
456 
457 
458 /*! Flush buffer to disk synchronously.
459 
460 */
461 int
462 bwrite(struct buf* bp)
463 {
464 	status_t status = _bwrite(bp);
465 	if (status != B_OK) {
466 		put_buf(bp);
467 		return B_TO_POSIX_ERROR(status);
468 	}
469 
470 	if (bp->b_vreg != NULL) {
471 		// file cache
472 		if (bp->b_vreg->v_resizing == false) {
473 			bp->b_vreg->v_sync = true;
474 			status = file_cache_sync(bp->b_vreg->v_cache);
475 			bp->b_vreg->v_sync = false;
476 		}
477 	} else {
478 		// block cache
479 		void* blockCache = bp->b_vp->v_rdev->si_mountpt->mnt_cache;
480 
481 		if (bp->b_owned == false) {
482 			// single block
483 			status = block_cache_sync_etc(blockCache, bp->b_blkno, 1);
484 		} else {
485 			// multiple blocks
486 			status = block_cache_sync_etc(blockCache, bp->b_blkno,
487 				howmany(bp->b_bufsize, CACHED_BLOCK_SIZE));
488 		}
489 	}
490 
491 	put_buf(bp);
492 
493 	return B_TO_POSIX_ERROR(status);
494 }
495 
496 
497 /*! Added for the Haiku port. Ensure that buf->b_data points to 'size' bytes of zero'd memory.
498 
499 */
500 static status_t
501 allocate_data(struct buf* buf, int size)
502 {
503 	if (buf->b_data == NULL) {
504 		// Either this is a newly created buf, or we are recycling a buf that
505 		// has no memory allocated for b_data.
506 		buf->b_data = (caddr_t)calloc(size, sizeof(char));
507 		if (buf->b_data == NULL)
508 			return B_NO_MEMORY;
509 		buf->b_bufsize = size;
510 	} else {
511 		// This is an existing buf with space allocated for b_data; maybe we can reuse it.
512 		if (buf->b_bufsize == size) {
513 			bzero(buf->b_data, buf->b_bufsize);
514 		} else {
515 			free(buf->b_data, 0);
516 			buf->b_data = (caddr_t)calloc(size, sizeof(char));
517 			if (buf->b_data == NULL)
518 				return B_NO_MEMORY;
519 			buf->b_bufsize = size;
520 		}
521 	}
522 
523 	return B_OK;
524 }
525 
526 
527 /*! Added for the Haiku port. Either add buf to a list of unused bufs, or free it (and b_data, if
528 	necessary).
529 */
530 static status_t
531 put_buf(struct buf* buf)
532 {
533 	struct vnode* deviceNode = buf->b_vp;
534 	struct msdosfsmount* fatVolume = (struct msdosfsmount*)deviceNode->v_rdev->si_mountpt->mnt_data;
535 
536 	rw_lock_write_lock(&deviceNode->v_bufobj.bo_lock.haikuRW);
537 	if (buf->b_owned != 0) {
538 		if ((u_long)buf->b_bufsize == fatVolume->pm_bpcluster
539 			&& deviceNode->v_bufobj.bo_clusters < BUF_CACHE_SIZE) {
540 			SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_clusterbufs, buf, link);
541 			++deviceNode->v_bufobj.bo_clusters;
542 		} else if ((u_long)buf->b_bufsize == fatVolume->pm_fatblocksize
543 			&& deviceNode->v_bufobj.bo_fatblocks < BUF_CACHE_SIZE) {
544 			SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_fatbufs, buf, link);
545 			++deviceNode->v_bufobj.bo_fatblocks;
546 		} else {
547 			free(buf->b_data, 0);
548 			free(buf, 0);
549 		}
550 	} else if (deviceNode->v_bufobj.bo_empties < BUF_CACHE_SIZE) {
551 		buf->b_data = NULL;
552 		buf->b_bufsize = 0;
553 		SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_emptybufs, buf, link);
554 		++deviceNode->v_bufobj.bo_empties;
555 	} else {
556 		free(buf, 0);
557 	}
558 	rw_lock_write_unlock(&deviceNode->v_bufobj.bo_lock.haikuRW);
559 
560 	return B_OK;
561 }
562