1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004 Poul-Henning Kamp 5 * Copyright (c) 1994,1997 John S. Dyson 6 * Copyright (c) 2013 The FreeBSD Foundation 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 35 // Modified to support the Haiku FAT driver. These functions, as implemented here, assume 36 // that the volume's block cache was created with a blockSize equal to DEV_BSIZE. We also 37 // support access to the file cache via these functions, even though the driver doesn't 38 // need that capability in its current form. 39 40 #include "sys/param.h" 41 #include "sys/systm.h" 42 #include "sys/buf.h" 43 #include "sys/malloc.h" 44 #include "sys/vnode.h" 45 #include "sys/conf.h" 46 47 #include "fs/msdosfs/bpb.h" 48 #include "fs/msdosfs/denode.h" 49 #include "fs/msdosfs/direntry.h" 50 #include "fs/msdosfs/msdosfsmount.h" 51 52 #include "dosfs.h" 53 54 55 #ifdef USER 56 #define dprintf printf 57 #endif 58 59 60 int msdosfs_bmap(struct vnode* a_vp, daddr_t a_bn, struct bufobj** a_bop, daddr_t* a_bnp, 61 int* a_runp, int* a_runb); 62 int getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo, 63 int flags, struct buf** bpp); 64 65 static status_t allocate_data(struct buf* buf, int size); 66 static status_t put_buf(struct buf* buf); 67 68 69 /*! The FAT driver uses this in combination with vm_page_count_severe to detect low system 70 resources. However, there is no analagous Haiku function to map this to. 71 */ 72 int 73 buf_dirty_count_severe(void) 74 { 75 return 0; 76 } 77 78 79 /*! Get a buffer with the specified data. 80 @param blkno The logical block being requested. If the vnode type is VREG (blkno is relative 81 to the start of the file), msdosfs_bmap will be called to convert blkno into a block number 82 relative to the start of the volume. If the vnode type is VBLK, blkno is already relative to 83 the start of the volume. 84 @param cred Ignored in the port. 85 @post bpp Points to the requested struct buf*, if successful. If an error is returned, *bpp is 86 NULL. 87 */ 88 int 89 bread(struct vnode* vp, daddr_t blkno, int size, struct ucred* cred, struct buf** bpp) 90 { 91 struct buf* buf = NULL; 92 int error; 93 94 error = getblkx(vp, blkno, blkno, size, 0, 0, 0, &buf); 95 96 if (error == 0) 97 *bpp = buf; 98 99 return error; 100 } 101 102 103 /*! Added for the Haiku port: common initial steps for bdwrite, bawrite, and bwrite. 104 105 */ 106 static status_t 107 _bwrite(struct buf* buf) 108 { 109 struct vnode* deviceNode = buf->b_vp; 110 struct mount* bsdVolume = deviceNode->v_rdev->si_mountpt; 111 void* blockCache = bsdVolume->mnt_cache; 112 struct msdosfsmount* fatVolume = (struct msdosfsmount*)bsdVolume->mnt_data; 113 status_t status = B_OK; 114 115 ASSERT(MOUNTED_READ_ONLY(fatVolume) == 0); 116 // we should not have gotten this far if this is a read-only volume 117 118 if (buf->b_vreg != NULL) { 119 // copy b_data to the file cache 120 struct vnode* bsdNode = buf->b_vreg; 121 struct denode* fatNode = (struct denode*)bsdNode->v_data; 122 off_t fileOffset = 0; 123 size_t bytesWritten = 0; 124 125 if (bsdNode->v_resizing == true) 126 return status; 127 128 ASSERT((fatNode->de_Attributes & ATTR_READONLY) == 0); 129 130 fileOffset = de_cn2off(fatVolume, buf->b_lblkno); 131 ASSERT_ALWAYS((u_long)(fileOffset + buf->b_bufsize) <= fatNode->de_FileSize); 132 133 bytesWritten = (size_t)buf->b_bufsize; 134 status = file_cache_write(bsdNode->v_cache, NULL, fileOffset, buf->b_data, &bytesWritten); 135 if (bytesWritten != (size_t)buf->b_bufsize) 136 return EIO; 137 } else if (buf->b_owned == false) { 138 // put the single block cache block that was modified 139 block_cache_put(blockCache, buf->b_blkno); 140 } else { 141 // copy b_data into mutiple block cache blocks and put them 142 uint32 cBlockCount = buf->b_bufsize / CACHED_BLOCK_SIZE; 143 uint32 i; 144 for (i = 0; i < cBlockCount && buf->b_bcpointers[i] != NULL; ++i) { 145 memcpy((caddr_t)buf->b_bcpointers[i], buf->b_data + (i * CACHED_BLOCK_SIZE), 146 CACHED_BLOCK_SIZE); 147 block_cache_put(blockCache, buf->b_blkno + i); 148 buf->b_bcpointers[i] = NULL; 149 } 150 } 151 152 return status; 153 } 154 155 156 /*! The block_cache block(s) corresponding to bp are put or, if a regular file is being 157 written, the file cache is updated. Nothing is flushed to disk at this time. 158 */ 159 void 160 bdwrite(struct buf* bp) 161 { 162 if (_bwrite(bp) != B_OK) 163 return; 164 165 put_buf(bp); 166 167 return; 168 } 169 170 171 /*! In FreeBSD, this flushes bp to disk asynchronously. However, Haiku's block cache 172 has no asynchronous flush option, so the operation is only asynchronous if we are 173 working with the file cache (i.e. when writing to a regular file). The driver 174 only uses bawrite if it detects low system resources. 175 */ 176 void 177 bawrite(struct buf* bp) 178 { 179 _bwrite(bp); 180 181 if (bp->b_vreg != NULL) { 182 if (bp->b_vreg->v_resizing == false) 183 file_cache_sync(bp->b_vreg->v_cache); 184 } else { 185 void* blockCache = bp->b_vp->v_rdev->si_mountpt->mnt_cache; 186 187 if (bp->b_owned == false) { 188 block_cache_sync_etc(blockCache, bp->b_blkno, 1); 189 } else { 190 block_cache_sync_etc(blockCache, bp->b_blkno, 191 howmany(bp->b_bufsize, CACHED_BLOCK_SIZE)); 192 } 193 } 194 195 put_buf(bp); 196 197 return; 198 } 199 200 201 /*! Each bread call must be balanced with either a b(d/a)write (to write changes) or a brelse. 202 203 */ 204 void 205 brelse(struct buf* bp) 206 { 207 if (bp->b_vreg != NULL) { 208 put_buf(bp); 209 return; 210 } 211 212 struct mount* bsdVolume = bp->b_vp->v_rdev->si_mountpt; 213 void* blockCache = bsdVolume->mnt_cache; 214 bool readOnly = MOUNTED_READ_ONLY(VFSTOMSDOSFS(bsdVolume)); 215 216 if (bp->b_owned == false) { 217 if (readOnly == true) 218 block_cache_set_dirty(blockCache, bp->b_blkno, false, -1); 219 block_cache_put(blockCache, bp->b_blkno); 220 put_buf(bp); 221 } else { 222 uint32 cBlockCount = bp->b_bufsize / CACHED_BLOCK_SIZE; 223 uint32 i; 224 for (i = 0; i < cBlockCount && bp->b_bcpointers[i] != NULL; ++i) { 225 if (readOnly == true) 226 block_cache_set_dirty(blockCache, bp->b_blkno + i, false, -1); 227 block_cache_put(blockCache, bp->b_blkno + i); 228 bp->b_bcpointers[i] = NULL; 229 } 230 231 put_buf(bp); 232 } 233 234 return; 235 } 236 237 238 /*! Similar to bread, but can be used when it's not necessary to read the existing contents of 239 the block. As currently implemented, it is not any faster than bread. The last 3 parameters 240 are ignored; the driver always passes 0 for each of them. 241 @param size The number of blocks to get. 242 */ 243 struct buf* 244 getblk(struct vnode* vp, daddr_t blkno, int size, int slpflag, int slptimeo, int flags) 245 { 246 struct buf* buf = NULL; 247 int error = 0; 248 249 error = getblkx(vp, blkno, blkno, size, slpflag, slptimeo, flags, &buf); 250 if (error != 0) 251 return NULL; 252 253 return buf; 254 } 255 256 257 /*! Return a specified block in a BSD-style struct buf. 258 @param blkno If vp is the device node, a disk block number in units of DEV_BSIZE; otherwise, a 259 file-relative block number in units of cluster size. 260 @param dblkno Disk block number, if known by the client. If vp is not the device node, getblkx 261 will calculate the disk block number from blkno and ignore this parameter. 262 @param splflag Ignored in the port. 263 @param slptimeo Ignored in the port. 264 @param flags Ignored in the port. 265 */ 266 int 267 getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo, 268 int flags, struct buf** bpp) 269 { 270 struct msdosfsmount* fatVolume; 271 struct vnode* deviceNode; 272 status_t status = B_OK; 273 274 bool readOnly = true; 275 bool foundExisting = false; 276 277 uint32 i; 278 void* blockCache = NULL; 279 uint32 cBlockCount; 280 // the number of block cache blocks spanned by the client's request 281 struct buf* newBuf = NULL; 282 // the buf to be returned 283 284 if (vp->v_type == VREG) { 285 fatVolume = vp->v_mount->mnt_data; 286 // convert blkno from file-relative to volume-relative 287 msdosfs_bmap(vp, blkno, NULL, &dblkno, NULL, NULL); 288 // output (dblkno) is always in units of DEV_BSIZE, even if blkno is in clusters 289 blockCache = vp->v_mount->mnt_cache; 290 readOnly 291 = MOUNTED_READ_ONLY(fatVolume) || ((VTODE(vp))->de_Attributes & ATTR_READONLY) != 0; 292 deviceNode = fatVolume->pm_devvp; 293 } else if (vp->v_type == VBLK) { 294 fatVolume = vp->v_rdev->si_mountpt->mnt_data; 295 blockCache = vp->v_rdev->si_mountpt->mnt_cache; 296 readOnly = MOUNTED_READ_ONLY(fatVolume); 297 deviceNode = vp; 298 } else { 299 return ENOTSUP; 300 } 301 302 // Before allocating memory for a new struct buf, try to reuse an existing one 303 // in the device vnode's lists. 304 rw_lock_write_lock(&deviceNode->v_bufobj.bo_lock.haikuRW); 305 if (size == CACHED_BLOCK_SIZE && vp->v_type != VREG 306 && SLIST_EMPTY(&deviceNode->v_bufobj.bo_emptybufs) == false) { 307 // Get a buf with no data space. It will just point to a block cache block. 308 newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_emptybufs); 309 SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_emptybufs, link); 310 --deviceNode->v_bufobj.bo_empties; 311 foundExisting = true; 312 } else if (size == (int)fatVolume->pm_bpcluster 313 && SLIST_EMPTY(&deviceNode->v_bufobj.bo_clusterbufs) == false) { 314 // Get a buf with cluster-size data storage from the free list. 315 newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_clusterbufs); 316 SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_clusterbufs, link); 317 --deviceNode->v_bufobj.bo_clusters; 318 foundExisting = true; 319 } else if (size == (int)fatVolume->pm_fatblocksize 320 && SLIST_EMPTY(&deviceNode->v_bufobj.bo_fatbufs) == false) { 321 // This branch will never be reached in FAT16 or FAT32 so long as pm_fatblocksize and 322 // CACHED_BLOCK_SIZE are both 512. 323 newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_fatbufs); 324 SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_fatbufs, link); 325 --deviceNode->v_bufobj.bo_fatblocks; 326 foundExisting = true; 327 } 328 rw_lock_write_unlock(&deviceNode->v_bufobj.bo_lock.haikuRW); 329 330 if (foundExisting == false) { 331 newBuf = malloc(sizeof(struct buf), 0, 0); 332 newBuf->b_data = NULL; 333 newBuf->b_bufsize = 0; 334 for (i = 0; i < 128; ++i) 335 newBuf->b_bcpointers[i] = NULL; 336 } 337 338 // set up / reset the buf 339 newBuf->b_bcount = size; 340 newBuf->b_resid = size; 341 newBuf->b_blkno = dblkno; 342 // units of DEV_BSIZE, always 343 newBuf->b_flags = 0; 344 newBuf->b_lblkno = blkno; 345 // units depend on vnode type 346 newBuf->b_vp = deviceNode; 347 // note that b_vp does not point to the node passed as vp, unless vp is the deviceNode 348 newBuf->b_owned = false; 349 newBuf->b_vreg = vp->v_type == VREG ? vp : NULL; 350 351 ASSERT(size == newBuf->b_resid); 352 cBlockCount = howmany(size, CACHED_BLOCK_SIZE); 353 354 // Three branches: 355 // For regular files, copy from file cache into b_data. 356 // Otherwise, if the requested size equals the cached block size, use the block cache directly. 357 // Otherwise, copy from the block cache into b_data. 358 if (vp->v_type == VREG) { 359 // The occasions when regular file data is accessed through the ported BSD code 360 // are limited (e.g. deextend) and occur when the node is locked. If we go down this 361 // branch, we tend to return early because vp->v_resizing is true. 362 363 off_t fileOffset; 364 size_t bytesRead; 365 366 newBuf->b_owned = true; 367 status = allocate_data(newBuf, size); 368 if (status != B_OK) 369 return B_TO_POSIX_ERROR(status); 370 371 // Don't use the file cache while resizing; wait until node lock is released to avoid 372 // deadlocks. 373 if (vp->v_resizing == true) { 374 (*bpp) = newBuf; 375 // we need to return a buffer with b_data allocated even in this case, 376 // because detrunc may zero out the unused space at the end of the last cluster 377 return B_TO_POSIX_ERROR(status); 378 } 379 380 fileOffset = de_cn2off(fatVolume, blkno); 381 382 ASSERT(size <= (int)newBuf->b_bufsize); 383 bytesRead = (size_t)size; 384 status = file_cache_read(vp->v_cache, NULL, fileOffset, newBuf->b_data, &bytesRead); 385 if (status != B_OK) { 386 put_buf(newBuf); 387 return B_TO_POSIX_ERROR(status); 388 } 389 if (bytesRead != (size_t)size) { 390 put_buf(newBuf); 391 return EIO; 392 } 393 } else if (size == CACHED_BLOCK_SIZE && vp->v_type != VREG) { 394 if (readOnly == true) 395 newBuf->b_data = (void*)block_cache_get(blockCache, dblkno); 396 else 397 newBuf->b_data = block_cache_get_writable(blockCache, dblkno, -1); 398 if (newBuf->b_data == NULL) { 399 put_buf(newBuf); 400 return EIO; 401 } 402 newBuf->b_bufsize = CACHED_BLOCK_SIZE; 403 } else { 404 // need to get more than one cached block and copy them to make a continuous buffer 405 newBuf->b_owned = true; 406 status = allocate_data(newBuf, size); 407 if (status != 0) 408 return B_TO_POSIX_ERROR(status); 409 410 #ifdef _KERNEL_MODE 411 // for high block counts, try to get all blocks in one disk read 412 if (cBlockCount > 4) { 413 size_t prefetchBlocks = cBlockCount; 414 block_cache_prefetch(blockCache, dblkno, &prefetchBlocks); 415 } 416 #endif // _KERNEL_MODE 417 418 for (i = 0; i < cBlockCount && status == B_OK; i++) { 419 if (readOnly == true) 420 newBuf->b_bcpointers[i] = (void*)block_cache_get(blockCache, dblkno + i); 421 else 422 newBuf->b_bcpointers[i] = block_cache_get_writable(blockCache, dblkno + i, -1); 423 if (newBuf->b_bcpointers[i] == NULL) { 424 put_buf(newBuf); 425 return EIO; 426 } 427 } 428 429 ASSERT(cBlockCount * CACHED_BLOCK_SIZE == (u_long)newBuf->b_bufsize); 430 for (i = 0; i < cBlockCount; i++) { 431 memcpy(newBuf->b_data + (i * CACHED_BLOCK_SIZE), (caddr_t)newBuf->b_bcpointers[i], 432 CACHED_BLOCK_SIZE); 433 } 434 } 435 436 newBuf->b_resid -= size; 437 ASSERT(newBuf->b_resid == 0); 438 439 *bpp = newBuf; 440 441 return B_TO_POSIX_ERROR(status); 442 } 443 444 445 /*! Used by deextend to update metadata of pages in the last added cluster. 446 Not applicable in Haiku. 447 */ 448 void 449 vfs_bio_clrbuf(struct buf* bp) 450 { 451 return; 452 } 453 454 455 /*! Used by deextend to zero out the remainder of a cluster beyond EOF. In the Haiku port we avoid 456 file cache writes when the node is locked (as it is when deextend is called) to prevent 457 deadlocks. This data must therefore be zero'd after return from deextend. 458 */ 459 void 460 vfs_bio_bzero_buf(struct buf* bp, int base, int size) 461 { 462 return; 463 } 464 465 466 /*! Flush buffer to disk synchronously. 467 468 */ 469 int 470 bwrite(struct buf* bp) 471 { 472 status_t status = _bwrite(bp); 473 if (status != B_OK) { 474 put_buf(bp); 475 return B_TO_POSIX_ERROR(status); 476 } 477 478 if (bp->b_vreg != NULL) { 479 // file cache 480 if (bp->b_vreg->v_resizing == false) { 481 bp->b_vreg->v_sync = true; 482 status = file_cache_sync(bp->b_vreg->v_cache); 483 bp->b_vreg->v_sync = false; 484 } 485 } else { 486 // block cache 487 void* blockCache = bp->b_vp->v_rdev->si_mountpt->mnt_cache; 488 489 if (bp->b_owned == false) { 490 // single block 491 status = block_cache_sync_etc(blockCache, bp->b_blkno, 1); 492 } else { 493 // multiple blocks 494 status = block_cache_sync_etc(blockCache, bp->b_blkno, 495 howmany(bp->b_bufsize, CACHED_BLOCK_SIZE)); 496 } 497 } 498 499 put_buf(bp); 500 501 return B_TO_POSIX_ERROR(status); 502 } 503 504 505 /*! Added for the Haiku port. Ensure that buf->b_data points to 'size' bytes of zero'd memory. 506 507 */ 508 static status_t 509 allocate_data(struct buf* buf, int size) 510 { 511 if (buf->b_data == NULL) { 512 // Either this is a newly created buf, or we are recycling a buf that 513 // has no memory allocated for b_data. 514 buf->b_data = (caddr_t)calloc(size, sizeof(char)); 515 if (buf->b_data == NULL) 516 return B_NO_MEMORY; 517 buf->b_bufsize = size; 518 } else { 519 // This is an existing buf with space allocated for b_data; maybe we can reuse it. 520 if (buf->b_bufsize == size) { 521 bzero(buf->b_data, buf->b_bufsize); 522 } else { 523 free(buf->b_data, 0); 524 buf->b_data = (caddr_t)calloc(size, sizeof(char)); 525 if (buf->b_data == NULL) 526 return B_NO_MEMORY; 527 buf->b_bufsize = size; 528 } 529 } 530 531 return B_OK; 532 } 533 534 535 /*! Added for the Haiku port. Either add buf to a list of unused bufs, or free it (and b_data, if 536 necessary). 537 */ 538 static status_t 539 put_buf(struct buf* buf) 540 { 541 struct vnode* deviceNode = buf->b_vp; 542 struct msdosfsmount* fatVolume = (struct msdosfsmount*)deviceNode->v_rdev->si_mountpt->mnt_data; 543 544 rw_lock_write_lock(&deviceNode->v_bufobj.bo_lock.haikuRW); 545 if (buf->b_owned != 0) { 546 if ((u_long)buf->b_bufsize == fatVolume->pm_bpcluster 547 && deviceNode->v_bufobj.bo_clusters < BUF_CACHE_SIZE) { 548 SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_clusterbufs, buf, link); 549 ++deviceNode->v_bufobj.bo_clusters; 550 } else if ((u_long)buf->b_bufsize == fatVolume->pm_fatblocksize 551 && deviceNode->v_bufobj.bo_fatblocks < BUF_CACHE_SIZE) { 552 SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_fatbufs, buf, link); 553 ++deviceNode->v_bufobj.bo_fatblocks; 554 } else { 555 free(buf->b_data, 0); 556 free(buf, 0); 557 } 558 } else if (deviceNode->v_bufobj.bo_empties < BUF_CACHE_SIZE) { 559 buf->b_data = NULL; 560 buf->b_bufsize = 0; 561 SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_emptybufs, buf, link); 562 ++deviceNode->v_bufobj.bo_empties; 563 } else { 564 free(buf, 0); 565 } 566 rw_lock_write_unlock(&deviceNode->v_bufobj.bo_lock.haikuRW); 567 568 return B_OK; 569 } 570