xref: /haiku/src/add-ons/kernel/file_systems/fat/bsd/kern/vfs_bio.c (revision 342a1b221b5bb385410f758df2c625b70cafdd03)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004 Poul-Henning Kamp
5  * Copyright (c) 1994,1997 John S. Dyson
6  * Copyright (c) 2013 The FreeBSD Foundation
7  * All rights reserved.
8  *
9  * Portions of this software were developed by Konstantin Belousov
10  * under sponsorship from the FreeBSD Foundation.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 
35 // Modified to support the Haiku FAT driver. These functions, as implemented here, assume
36 // that the volume's block cache was created with a blockSize equal to DEV_BSIZE. We also
37 // support access to the file cache via these functions, even though the driver doesn't
38 // need that capability in its current form.
39 
40 #include "sys/param.h"
41 #include "sys/systm.h"
42 #include "sys/buf.h"
43 #include "sys/malloc.h"
44 #include "sys/vnode.h"
45 #include "sys/conf.h"
46 
47 #include "fs/msdosfs/bpb.h"
48 #include "fs/msdosfs/denode.h"
49 #include "fs/msdosfs/direntry.h"
50 #include "fs/msdosfs/msdosfsmount.h"
51 
52 #include "dosfs.h"
53 
54 
55 #ifdef USER
56 #define dprintf printf
57 #endif
58 
59 
60 int msdosfs_bmap(struct vnode* a_vp, daddr_t a_bn, struct bufobj** a_bop, daddr_t* a_bnp,
61 	int* a_runp, int* a_runb);
62 int getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo,
63 	int flags, struct buf** bpp);
64 
65 static status_t allocate_data(struct buf* buf, int size);
66 static status_t put_buf(struct buf* buf);
67 
68 
69 /*! The FAT driver uses this in combination with vm_page_count_severe to detect low system
70 	resources. However, there is no analagous Haiku function to map this to.
71 */
72 int
73 buf_dirty_count_severe(void)
74 {
75 	return 0;
76 }
77 
78 
79 /*!	Get a buffer with the specified data.
80 	@param blkno The logical block being requested. If the vnode type is VREG (blkno is relative
81 	to the start of the file), msdosfs_bmap will be called to convert blkno into a block number
82 	relative to the start of the volume. If the vnode type is VBLK, blkno is already relative to
83 	the start of the volume.
84 	@param cred Ignored in the port.
85 	@post bpp Points to the requested struct buf*, if successful. If an error is returned, *bpp is
86 	NULL.
87 */
88 int
89 bread(struct vnode* vp, daddr_t blkno, int size, struct ucred* cred, struct buf** bpp)
90 {
91 	struct buf* buf = NULL;
92 	int error;
93 
94 	error = getblkx(vp, blkno, blkno, size, 0, 0, 0, &buf);
95 
96 	if (error == 0)
97 		*bpp = buf;
98 
99 	return error;
100 }
101 
102 
103 /*! Added for the Haiku port:  common initial steps for bdwrite, bawrite, and bwrite.
104 
105 */
106 static status_t
107 _bwrite(struct buf* buf)
108 {
109 	struct vnode* deviceNode = buf->b_vp;
110 	struct mount* bsdVolume = deviceNode->v_rdev->si_mountpt;
111 	void* blockCache = bsdVolume->mnt_cache;
112 	struct msdosfsmount* fatVolume = (struct msdosfsmount*)bsdVolume->mnt_data;
113 	status_t status = B_OK;
114 
115 	ASSERT(MOUNTED_READ_ONLY(fatVolume) == 0);
116 		// we should not have gotten this far if this is a read-only volume
117 
118 	if (buf->b_vreg != NULL) {
119 		// copy b_data to the file cache
120 		struct vnode* bsdNode = buf->b_vreg;
121 		struct denode* fatNode = (struct denode*)bsdNode->v_data;
122 		off_t fileOffset = 0;
123 		size_t bytesWritten = 0;
124 
125 		if (bsdNode->v_resizing == true)
126 			return status;
127 
128 		ASSERT((fatNode->de_Attributes & ATTR_READONLY) == 0);
129 
130 		fileOffset = de_cn2off(fatVolume, buf->b_lblkno);
131 		ASSERT((u_long)(fileOffset + buf->b_bufsize) <= fatNode->de_FileSize);
132 
133 		bytesWritten = (size_t)buf->b_bufsize;
134 		status = file_cache_write(bsdNode->v_cache, NULL, fileOffset, buf->b_data, &bytesWritten);
135 		if (bytesWritten != (size_t)buf->b_bufsize)
136 			return EIO;
137 	} else if (buf->b_owned == false) {
138 		// put the single block cache block that was modified
139 		block_cache_put(blockCache, buf->b_blkno);
140 	} else {
141 		// copy b_data into mutiple block cache blocks and put them
142 		uint32 cBlockCount = buf->b_bufsize / CACHED_BLOCK_SIZE;
143 		uint32 i;
144 		for (i = 0; i < cBlockCount && buf->b_bcpointers[i] != NULL; ++i) {
145 			memcpy((caddr_t)buf->b_bcpointers[i], buf->b_data + (i * CACHED_BLOCK_SIZE),
146 				CACHED_BLOCK_SIZE);
147 			block_cache_put(blockCache, buf->b_blkno + i);
148 			buf->b_bcpointers[i] = NULL;
149 		}
150 	}
151 
152 	return status;
153 }
154 
155 
156 /*! The block_cache block(s) corresponding to bp are put or, if a regular file is being
157 	written, the file cache is updated. Nothing is flushed to disk at this time.
158 */
159 void
160 bdwrite(struct buf* bp)
161 {
162 	if (_bwrite(bp) != B_OK)
163 		return;
164 
165 	put_buf(bp);
166 
167 	return;
168 }
169 
170 
171 /*! In FreeBSD, this flushes bp to disk asynchronously. However, Haiku's block cache
172 	has no asynchronous flush option, so the operation is only asynchronous if we are
173 	working with the file cache (i.e. when writing to a regular file). The driver
174 	only uses bawrite if it detects low system resources.
175 */
176 void
177 bawrite(struct buf* bp)
178 {
179 	_bwrite(bp);
180 
181 	if (bp->b_vreg != NULL) {
182 		if (bp->b_vreg->v_resizing == false)
183 			file_cache_sync(bp->b_vreg->v_cache);
184 	} else {
185 		void* blockCache = bp->b_vp->v_rdev->si_mountpt->mnt_cache;
186 
187 		if (bp->b_owned == false) {
188 			block_cache_sync_etc(blockCache, bp->b_blkno, 1);
189 		} else {
190 			block_cache_sync_etc(blockCache, bp->b_blkno,
191 				howmany(bp->b_bufsize, CACHED_BLOCK_SIZE));
192 		}
193 	}
194 
195 	put_buf(bp);
196 
197 	return;
198 }
199 
200 
201 /*! Each bread call must be balanced with either a b(d/a)write (to write changes) or a brelse.
202 
203 */
204 void
205 brelse(struct buf* bp)
206 {
207 	struct mount* bsdVolume = bp->b_vp->v_rdev->si_mountpt;
208 	void* blockCache = bsdVolume->mnt_cache;
209 	bool readOnly = MOUNTED_READ_ONLY(VFSTOMSDOSFS(bsdVolume));
210 
211 	if (bp->b_vreg != NULL) {
212 		put_buf(bp);
213 	} else if (bp->b_owned == false) {
214 		if (readOnly == true)
215 			block_cache_set_dirty(blockCache, bp->b_blkno, false, -1);
216 		block_cache_put(blockCache, bp->b_blkno);
217 		put_buf(bp);
218 	} else {
219 		uint32 cBlockCount = bp->b_bufsize / CACHED_BLOCK_SIZE;
220 		uint32 i;
221 		for (i = 0; i < cBlockCount && bp->b_bcpointers[i] != NULL; ++i) {
222 			if (readOnly == true)
223 				block_cache_set_dirty(blockCache, bp->b_blkno + i, false, -1);
224 			block_cache_put(blockCache, bp->b_blkno + i);
225 			bp->b_bcpointers[i] = NULL;
226 		}
227 
228 		put_buf(bp);
229 	}
230 
231 	return;
232 }
233 
234 
235 /*! Similar to bread, but can be used when it's not necessary to read the existing contents of
236 	the block. As currently implemented, it is not any faster than bread. The last 3 parameters
237 	are ignored; the driver always passes 0 for each of them.
238 	@param size The number of blocks to get.
239 */
240 struct buf*
241 getblk(struct vnode* vp, daddr_t blkno, int size, int slpflag, int slptimeo, int flags)
242 {
243 	struct buf* buf = NULL;
244 	int error = 0;
245 
246 	error = getblkx(vp, blkno, blkno, size, slpflag, slptimeo, flags, &buf);
247 	if (error != 0)
248 		return NULL;
249 
250 	return buf;
251 }
252 
253 
254 /*!	Return a specified block in a BSD-style struct buf.
255 	@param blkno If vp is the device node, a disk block number in units of DEV_BSIZE; otherwise, a
256 	file-relative block number in units of cluster size.
257 	@param dblkno Disk block number, if known by the client. If vp is not the device node, getblkx
258 	will calculate the disk block number from blkno and ignore this parameter.
259 	@param splflag Ignored in the port.
260 	@param slptimeo Ignored in the port.
261 	@param flags Ignored in the port.
262 */
263 int
264 getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo,
265 	int flags, struct buf** bpp)
266 {
267 	struct msdosfsmount* fatVolume;
268 	struct vnode* deviceNode;
269 	status_t status = B_OK;
270 
271 	bool readOnly = true;
272 	bool foundExisting = false;
273 
274 	uint32 i;
275 	void* blockCache = NULL;
276 	uint32 cBlockCount;
277 		// the number of block cache blocks spanned by the client's request
278 	struct buf* newBuf = NULL;
279 		// the buf to be returned
280 
281 	if (vp->v_type == VREG) {
282 		fatVolume = vp->v_mount->mnt_data;
283 		// convert blkno from file-relative to volume-relative
284 		msdosfs_bmap(vp, blkno, NULL, &dblkno, NULL, NULL);
285 			// output (dblkno) is always in units of DEV_BSIZE, even if blkno is in clusters
286 		blockCache = vp->v_mount->mnt_cache;
287 		readOnly
288 			= MOUNTED_READ_ONLY(fatVolume) || ((VTODE(vp))->de_Attributes & ATTR_READONLY) != 0;
289 		deviceNode = fatVolume->pm_devvp;
290 	} else if (vp->v_type == VBLK) {
291 		fatVolume = vp->v_rdev->si_mountpt->mnt_data;
292 		blockCache = vp->v_rdev->si_mountpt->mnt_cache;
293 		readOnly = MOUNTED_READ_ONLY(fatVolume);
294 		deviceNode = vp;
295 	} else {
296 		return ENOTSUP;
297 	}
298 
299 	// Before allocating memory for a new struct buf, try to reuse an existing one
300 	// in the device vnode's lists.
301 	rw_lock_write_lock(&deviceNode->v_bufobj.bo_lock.haikuRW);
302 	if (size == CACHED_BLOCK_SIZE && vp->v_type != VREG
303 		&& SLIST_EMPTY(&deviceNode->v_bufobj.bo_emptybufs) == false) {
304 		// Get a buf with no data space. It will just point to a block cache block.
305 		newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_emptybufs);
306 		SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_emptybufs, link);
307 		--deviceNode->v_bufobj.bo_empties;
308 		foundExisting = true;
309 	} else if (size == (int)fatVolume->pm_bpcluster
310 		&& SLIST_EMPTY(&deviceNode->v_bufobj.bo_clusterbufs) == false) {
311 		// Get a buf with cluster-size data storage from the free list.
312 		newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_clusterbufs);
313 		SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_clusterbufs, link);
314 		--deviceNode->v_bufobj.bo_clusters;
315 		foundExisting = true;
316 	} else if (size == (int)fatVolume->pm_fatblocksize
317 		&& SLIST_EMPTY(&deviceNode->v_bufobj.bo_fatbufs) == false) {
318 		// This branch will never be reached in FAT16 or FAT32 so long as pm_fatblocksize and
319 		// CACHED_BLOCK_SIZE are both 512.
320 		newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_fatbufs);
321 		SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_fatbufs, link);
322 		--deviceNode->v_bufobj.bo_fatblocks;
323 		foundExisting = true;
324 	}
325 	rw_lock_write_unlock(&deviceNode->v_bufobj.bo_lock.haikuRW);
326 
327 	if (foundExisting == false) {
328 		newBuf = malloc(sizeof(struct buf), 0, 0);
329 		newBuf->b_data = NULL;
330 		newBuf->b_bufsize = 0;
331 		for (i = 0; i < 128; ++i)
332 			newBuf->b_bcpointers[i] = NULL;
333 	}
334 
335 	// set up / reset the buf
336 	newBuf->b_bcount = size;
337 	newBuf->b_resid = size;
338 	newBuf->b_blkno = dblkno;
339 		// units of DEV_BSIZE, always
340 	newBuf->b_flags = 0;
341 	newBuf->b_lblkno = blkno;
342 		// units depend on vnode type
343 	newBuf->b_vp = deviceNode;
344 		// note that b_vp does not point to the node passed as vp, unless vp is the deviceNode
345 	newBuf->b_owned = false;
346 	newBuf->b_vreg = vp->v_type == VREG ? vp : NULL;
347 
348 	ASSERT(size == newBuf->b_resid);
349 	cBlockCount = howmany(size, CACHED_BLOCK_SIZE);
350 
351 	// Three branches:
352 	// For regular files, copy from file cache into b_data.
353 	// Otherwise, if the requested size equals the cached block size, use the block cache directly.
354 	// Otherwise, copy from the block cache into b_data.
355 	if (vp->v_type == VREG) {
356 		// The occasions when regular file data is accessed through the ported BSD code
357 		// are limited (e.g. deextend) and occur when the node is locked. If we go down this
358 		// branch, we tend to return early because vp->v_resizing is true.
359 
360 		off_t fileOffset;
361 		size_t bytesRead;
362 
363 		newBuf->b_owned = true;
364 		status = allocate_data(newBuf, size);
365 		if (status != B_OK)
366 			return B_TO_POSIX_ERROR(status);
367 
368 		// Don't use the file cache while resizing; wait until node lock is released to avoid
369 		// deadlocks.
370 		if (vp->v_resizing == true) {
371 			(*bpp) = newBuf;
372 				// we need to return a buffer with b_data allocated even in this case,
373 				// because detrunc may zero out the unused space at the end of the last cluster
374 			return B_TO_POSIX_ERROR(status);
375 		}
376 
377 		fileOffset = de_cn2off(fatVolume, blkno);
378 
379 		ASSERT(size <= (int)newBuf->b_bufsize);
380 		bytesRead = (size_t)size;
381 		status = file_cache_read(vp->v_cache, NULL, fileOffset, newBuf->b_data, &bytesRead);
382 		if (status != B_OK) {
383 			put_buf(newBuf);
384 			return B_TO_POSIX_ERROR(status);
385 		}
386 		if (bytesRead != (size_t)size) {
387 			put_buf(newBuf);
388 			return EIO;
389 		}
390 	} else if (size == CACHED_BLOCK_SIZE && vp->v_type != VREG) {
391 		if (readOnly == true)
392 			newBuf->b_data = (void*)block_cache_get(blockCache, dblkno);
393 		else
394 			newBuf->b_data = block_cache_get_writable(blockCache, dblkno, -1);
395 		if (newBuf->b_data == NULL) {
396 			put_buf(newBuf);
397 			return EIO;
398 		}
399 		newBuf->b_bufsize = CACHED_BLOCK_SIZE;
400 	} else {
401 		// need to get more than one cached block and copy them to make a continuous buffer
402 		newBuf->b_owned = true;
403 		status = allocate_data(newBuf, size);
404 		if (status != 0)
405 			return B_TO_POSIX_ERROR(status);
406 
407 		for (i = 0; i < cBlockCount && status == B_OK; i++) {
408 			if (readOnly == true)
409 				newBuf->b_bcpointers[i] = (void*)block_cache_get(blockCache, dblkno + i);
410 			else
411 				newBuf->b_bcpointers[i] = block_cache_get_writable(blockCache, dblkno + i, -1);
412 			if (newBuf->b_bcpointers[i] == NULL) {
413 				put_buf(newBuf);
414 				return EIO;
415 			}
416 		}
417 
418 		ASSERT(cBlockCount * CACHED_BLOCK_SIZE == (u_long)newBuf->b_bufsize);
419 		for (i = 0; i < cBlockCount; i++) {
420 			memcpy(newBuf->b_data + (i * CACHED_BLOCK_SIZE), (caddr_t)newBuf->b_bcpointers[i],
421 				CACHED_BLOCK_SIZE);
422 		}
423 	}
424 
425 	newBuf->b_resid -= size;
426 	ASSERT(newBuf->b_resid == 0);
427 
428 	*bpp = newBuf;
429 
430 	return B_TO_POSIX_ERROR(status);
431 }
432 
433 
434 /*! Used by deextend to update metadata of pages in the last added cluster.
435 	Not applicable in Haiku.
436 */
437 void
438 vfs_bio_clrbuf(struct buf* bp)
439 {
440 	return;
441 }
442 
443 
444 /*! Used by deextend to zero out the remainder of a cluster beyond EOF. In the Haiku port we avoid
445 	file cache writes when the node is locked (as it is when deextend is called) to prevent
446 	deadlocks. This data must therefore be zero'd after return from deextend.
447 */
448 void
449 vfs_bio_bzero_buf(struct buf* bp, int base, int size)
450 {
451 	return;
452 }
453 
454 
455 /*! Flush buffer to disk synchronously.
456 
457 */
458 int
459 bwrite(struct buf* bp)
460 {
461 	status_t status = _bwrite(bp);
462 	if (status != B_OK) {
463 		put_buf(bp);
464 		return B_TO_POSIX_ERROR(status);
465 	}
466 
467 	if (bp->b_vreg != NULL) {
468 		// file cache
469 		if (bp->b_vreg->v_resizing == false) {
470 			bp->b_vreg->v_sync = true;
471 			status = file_cache_sync(bp->b_vreg->v_cache);
472 			bp->b_vreg->v_sync = false;
473 		}
474 	} else {
475 		// block cache
476 		void* blockCache = bp->b_vp->v_rdev->si_mountpt->mnt_cache;
477 
478 		if (bp->b_owned == false) {
479 			// single block
480 			status = block_cache_sync_etc(blockCache, bp->b_blkno, 1);
481 		} else {
482 			// multiple blocks
483 			status = block_cache_sync_etc(blockCache, bp->b_blkno,
484 				howmany(bp->b_bufsize, CACHED_BLOCK_SIZE));
485 		}
486 	}
487 
488 	put_buf(bp);
489 
490 	return B_TO_POSIX_ERROR(status);
491 }
492 
493 
494 /*! Added for the Haiku port. Ensure that buf->b_data points to 'size' bytes of zero'd memory.
495 
496 */
497 static status_t
498 allocate_data(struct buf* buf, int size)
499 {
500 	if (buf->b_data == NULL) {
501 		// Either this is a newly created buf, or we are recycling a buf that
502 		// has no memory allocated for b_data.
503 		buf->b_data = (caddr_t)calloc(size, sizeof(char));
504 		if (buf->b_data == NULL)
505 			return B_NO_MEMORY;
506 		buf->b_bufsize = size;
507 	} else {
508 		// This is an existing buf with space allocated for b_data; maybe we can reuse it.
509 		if (buf->b_bufsize == size) {
510 			bzero(buf->b_data, buf->b_bufsize);
511 		} else {
512 			free(buf->b_data, 0);
513 			buf->b_data = (caddr_t)calloc(size, sizeof(char));
514 			if (buf->b_data == NULL)
515 				return B_NO_MEMORY;
516 			buf->b_bufsize = size;
517 		}
518 	}
519 
520 	return B_OK;
521 }
522 
523 
524 /*! Added for the Haiku port. Either add buf to a list of unused bufs, or free it (and b_data, if
525 	necessary).
526 */
527 static status_t
528 put_buf(struct buf* buf)
529 {
530 	struct vnode* deviceNode = buf->b_vp;
531 	struct msdosfsmount* fatVolume = (struct msdosfsmount*)deviceNode->v_rdev->si_mountpt->mnt_data;
532 
533 	rw_lock_write_lock(&deviceNode->v_bufobj.bo_lock.haikuRW);
534 	if (buf->b_owned != 0) {
535 		if ((u_long)buf->b_bufsize == fatVolume->pm_bpcluster
536 			&& deviceNode->v_bufobj.bo_clusters < BUF_CACHE_SIZE) {
537 			SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_clusterbufs, buf, link);
538 			++deviceNode->v_bufobj.bo_clusters;
539 		} else if ((u_long)buf->b_bufsize == fatVolume->pm_fatblocksize
540 			&& deviceNode->v_bufobj.bo_fatblocks < BUF_CACHE_SIZE) {
541 			SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_fatbufs, buf, link);
542 			++deviceNode->v_bufobj.bo_fatblocks;
543 		} else {
544 			free(buf->b_data, 0);
545 			free(buf, 0);
546 		}
547 	} else if (deviceNode->v_bufobj.bo_empties < BUF_CACHE_SIZE) {
548 		buf->b_data = NULL;
549 		buf->b_bufsize = 0;
550 		SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_emptybufs, buf, link);
551 		++deviceNode->v_bufobj.bo_empties;
552 	} else {
553 		free(buf, 0);
554 	}
555 	rw_lock_write_unlock(&deviceNode->v_bufobj.bo_lock.haikuRW);
556 
557 	return B_OK;
558 }
559