1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004 Poul-Henning Kamp 5 * Copyright (c) 1994,1997 John S. Dyson 6 * Copyright (c) 2013 The FreeBSD Foundation 7 * All rights reserved. 8 * 9 * Portions of this software were developed by Konstantin Belousov 10 * under sponsorship from the FreeBSD Foundation. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 35 // Modified to support the Haiku FAT driver. These functions, as implemented here, assume 36 // that the volume's block cache was created with a blockSize equal to DEV_BSIZE. We also 37 // support access to the file cache via these functions, even though the driver doesn't 38 // need that capability in its current form. 39 40 #include "sys/param.h" 41 #include "sys/systm.h" 42 #include "sys/buf.h" 43 #include "sys/malloc.h" 44 #include "sys/vnode.h" 45 #include "sys/conf.h" 46 47 #include "fs/msdosfs/bpb.h" 48 #include "fs/msdosfs/denode.h" 49 #include "fs/msdosfs/direntry.h" 50 #include "fs/msdosfs/msdosfsmount.h" 51 52 #include "dosfs.h" 53 54 55 #ifdef USER 56 #define dprintf printf 57 #endif 58 59 60 int msdosfs_bmap(struct vnode* a_vp, daddr_t a_bn, struct bufobj** a_bop, daddr_t* a_bnp, 61 int* a_runp, int* a_runb); 62 int getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo, 63 int flags, struct buf** bpp); 64 65 static status_t allocate_data(struct buf* buf, int size); 66 static status_t put_buf(struct buf* buf); 67 68 69 /*! The FAT driver uses this in combination with vm_page_count_severe to detect low system 70 resources. However, there is no analagous Haiku function to map this to. 71 */ 72 int 73 buf_dirty_count_severe(void) 74 { 75 return 0; 76 } 77 78 79 /*! Get a buffer with the specified data. 80 @param blkno The logical block being requested. If the vnode type is VREG (blkno is relative 81 to the start of the file), msdosfs_bmap will be called to convert blkno into a block number 82 relative to the start of the volume. If the vnode type is VBLK, blkno is already relative to 83 the start of the volume. 84 @param cred Ignored in the port. 85 @post bpp Points to the requested struct buf*, if successful. If an error is returned, *bpp is 86 NULL. 87 */ 88 int 89 bread(struct vnode* vp, daddr_t blkno, int size, struct ucred* cred, struct buf** bpp) 90 { 91 struct buf* buf = NULL; 92 int error; 93 94 error = getblkx(vp, blkno, blkno, size, 0, 0, 0, &buf); 95 96 if (error == 0) 97 *bpp = buf; 98 99 return error; 100 } 101 102 103 /*! Added for the Haiku port: common initial steps for bdwrite, bawrite, and bwrite. 104 105 */ 106 static status_t 107 _bwrite(struct buf* buf) 108 { 109 struct vnode* deviceNode = buf->b_vp; 110 struct mount* bsdVolume = deviceNode->v_rdev->si_mountpt; 111 void* blockCache = bsdVolume->mnt_cache; 112 struct msdosfsmount* fatVolume = (struct msdosfsmount*)bsdVolume->mnt_data; 113 status_t status = B_OK; 114 115 ASSERT(MOUNTED_READ_ONLY(fatVolume) == 0); 116 // we should not have gotten this far if this is a read-only volume 117 118 if (buf->b_vreg != NULL) { 119 // copy b_data to the file cache 120 struct vnode* bsdNode = buf->b_vreg; 121 struct denode* fatNode = (struct denode*)bsdNode->v_data; 122 off_t fileOffset = 0; 123 size_t bytesWritten = 0; 124 125 if (bsdNode->v_resizing == true) 126 return status; 127 128 ASSERT((fatNode->de_Attributes & ATTR_READONLY) == 0); 129 130 fileOffset = de_cn2off(fatVolume, buf->b_lblkno); 131 ASSERT_ALWAYS((u_long)(fileOffset + buf->b_bufsize) <= fatNode->de_FileSize); 132 133 bytesWritten = (size_t)buf->b_bufsize; 134 status = file_cache_write(bsdNode->v_cache, NULL, fileOffset, buf->b_data, &bytesWritten); 135 if (bytesWritten != (size_t)buf->b_bufsize) 136 return EIO; 137 } else if (buf->b_owned == false) { 138 // put the single block cache block that was modified 139 block_cache_put(blockCache, buf->b_blkno); 140 } else { 141 // copy b_data into mutiple block cache blocks and put them 142 uint32 cBlockCount = buf->b_bufsize / CACHED_BLOCK_SIZE; 143 uint32 i; 144 for (i = 0; i < cBlockCount && buf->b_bcpointers[i] != NULL; ++i) { 145 memcpy((caddr_t)buf->b_bcpointers[i], buf->b_data + (i * CACHED_BLOCK_SIZE), 146 CACHED_BLOCK_SIZE); 147 block_cache_put(blockCache, buf->b_blkno + i); 148 buf->b_bcpointers[i] = NULL; 149 } 150 } 151 152 return status; 153 } 154 155 156 /*! The block_cache block(s) corresponding to bp are put or, if a regular file is being 157 written, the file cache is updated. Nothing is flushed to disk at this time. 158 */ 159 void 160 bdwrite(struct buf* bp) 161 { 162 if (_bwrite(bp) != B_OK) 163 return; 164 165 put_buf(bp); 166 167 return; 168 } 169 170 171 /*! In FreeBSD, this flushes bp to disk asynchronously. However, Haiku's block cache 172 has no asynchronous flush option, so the operation is only asynchronous if we are 173 working with the file cache (i.e. when writing to a regular file). The driver 174 only uses bawrite if it detects low system resources. 175 */ 176 void 177 bawrite(struct buf* bp) 178 { 179 _bwrite(bp); 180 181 if (bp->b_vreg != NULL) { 182 if (bp->b_vreg->v_resizing == false) 183 file_cache_sync(bp->b_vreg->v_cache); 184 } else { 185 void* blockCache = bp->b_vp->v_rdev->si_mountpt->mnt_cache; 186 187 if (bp->b_owned == false) { 188 block_cache_sync_etc(blockCache, bp->b_blkno, 1); 189 } else { 190 block_cache_sync_etc(blockCache, bp->b_blkno, 191 howmany(bp->b_bufsize, CACHED_BLOCK_SIZE)); 192 } 193 } 194 195 put_buf(bp); 196 197 return; 198 } 199 200 201 /*! Each bread call must be balanced with either a b(d/a)write (to write changes) or a brelse. 202 203 */ 204 void 205 brelse(struct buf* bp) 206 { 207 struct mount* bsdVolume = bp->b_vp->v_rdev->si_mountpt; 208 void* blockCache = bsdVolume->mnt_cache; 209 bool readOnly = MOUNTED_READ_ONLY(VFSTOMSDOSFS(bsdVolume)); 210 211 if (bp->b_vreg != NULL) { 212 put_buf(bp); 213 } else if (bp->b_owned == false) { 214 if (readOnly == true) 215 block_cache_set_dirty(blockCache, bp->b_blkno, false, -1); 216 block_cache_put(blockCache, bp->b_blkno); 217 put_buf(bp); 218 } else { 219 uint32 cBlockCount = bp->b_bufsize / CACHED_BLOCK_SIZE; 220 uint32 i; 221 for (i = 0; i < cBlockCount && bp->b_bcpointers[i] != NULL; ++i) { 222 if (readOnly == true) 223 block_cache_set_dirty(blockCache, bp->b_blkno + i, false, -1); 224 block_cache_put(blockCache, bp->b_blkno + i); 225 bp->b_bcpointers[i] = NULL; 226 } 227 228 put_buf(bp); 229 } 230 231 return; 232 } 233 234 235 /*! Similar to bread, but can be used when it's not necessary to read the existing contents of 236 the block. As currently implemented, it is not any faster than bread. The last 3 parameters 237 are ignored; the driver always passes 0 for each of them. 238 @param size The number of blocks to get. 239 */ 240 struct buf* 241 getblk(struct vnode* vp, daddr_t blkno, int size, int slpflag, int slptimeo, int flags) 242 { 243 struct buf* buf = NULL; 244 int error = 0; 245 246 error = getblkx(vp, blkno, blkno, size, slpflag, slptimeo, flags, &buf); 247 if (error != 0) 248 return NULL; 249 250 return buf; 251 } 252 253 254 /*! Return a specified block in a BSD-style struct buf. 255 @param blkno If vp is the device node, a disk block number in units of DEV_BSIZE; otherwise, a 256 file-relative block number in units of cluster size. 257 @param dblkno Disk block number, if known by the client. If vp is not the device node, getblkx 258 will calculate the disk block number from blkno and ignore this parameter. 259 @param splflag Ignored in the port. 260 @param slptimeo Ignored in the port. 261 @param flags Ignored in the port. 262 */ 263 int 264 getblkx(struct vnode* vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo, 265 int flags, struct buf** bpp) 266 { 267 struct msdosfsmount* fatVolume; 268 struct vnode* deviceNode; 269 status_t status = B_OK; 270 271 bool readOnly = true; 272 bool foundExisting = false; 273 274 uint32 i; 275 void* blockCache = NULL; 276 uint32 cBlockCount; 277 // the number of block cache blocks spanned by the client's request 278 struct buf* newBuf = NULL; 279 // the buf to be returned 280 281 if (vp->v_type == VREG) { 282 fatVolume = vp->v_mount->mnt_data; 283 // convert blkno from file-relative to volume-relative 284 msdosfs_bmap(vp, blkno, NULL, &dblkno, NULL, NULL); 285 // output (dblkno) is always in units of DEV_BSIZE, even if blkno is in clusters 286 blockCache = vp->v_mount->mnt_cache; 287 readOnly 288 = MOUNTED_READ_ONLY(fatVolume) || ((VTODE(vp))->de_Attributes & ATTR_READONLY) != 0; 289 deviceNode = fatVolume->pm_devvp; 290 } else if (vp->v_type == VBLK) { 291 fatVolume = vp->v_rdev->si_mountpt->mnt_data; 292 blockCache = vp->v_rdev->si_mountpt->mnt_cache; 293 readOnly = MOUNTED_READ_ONLY(fatVolume); 294 deviceNode = vp; 295 } else { 296 return ENOTSUP; 297 } 298 299 // Before allocating memory for a new struct buf, try to reuse an existing one 300 // in the device vnode's lists. 301 rw_lock_write_lock(&deviceNode->v_bufobj.bo_lock.haikuRW); 302 if (size == CACHED_BLOCK_SIZE && vp->v_type != VREG 303 && SLIST_EMPTY(&deviceNode->v_bufobj.bo_emptybufs) == false) { 304 // Get a buf with no data space. It will just point to a block cache block. 305 newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_emptybufs); 306 SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_emptybufs, link); 307 --deviceNode->v_bufobj.bo_empties; 308 foundExisting = true; 309 } else if (size == (int)fatVolume->pm_bpcluster 310 && SLIST_EMPTY(&deviceNode->v_bufobj.bo_clusterbufs) == false) { 311 // Get a buf with cluster-size data storage from the free list. 312 newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_clusterbufs); 313 SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_clusterbufs, link); 314 --deviceNode->v_bufobj.bo_clusters; 315 foundExisting = true; 316 } else if (size == (int)fatVolume->pm_fatblocksize 317 && SLIST_EMPTY(&deviceNode->v_bufobj.bo_fatbufs) == false) { 318 // This branch will never be reached in FAT16 or FAT32 so long as pm_fatblocksize and 319 // CACHED_BLOCK_SIZE are both 512. 320 newBuf = SLIST_FIRST(&deviceNode->v_bufobj.bo_fatbufs); 321 SLIST_REMOVE_HEAD(&deviceNode->v_bufobj.bo_fatbufs, link); 322 --deviceNode->v_bufobj.bo_fatblocks; 323 foundExisting = true; 324 } 325 rw_lock_write_unlock(&deviceNode->v_bufobj.bo_lock.haikuRW); 326 327 if (foundExisting == false) { 328 newBuf = malloc(sizeof(struct buf), 0, 0); 329 newBuf->b_data = NULL; 330 newBuf->b_bufsize = 0; 331 for (i = 0; i < 128; ++i) 332 newBuf->b_bcpointers[i] = NULL; 333 } 334 335 // set up / reset the buf 336 newBuf->b_bcount = size; 337 newBuf->b_resid = size; 338 newBuf->b_blkno = dblkno; 339 // units of DEV_BSIZE, always 340 newBuf->b_flags = 0; 341 newBuf->b_lblkno = blkno; 342 // units depend on vnode type 343 newBuf->b_vp = deviceNode; 344 // note that b_vp does not point to the node passed as vp, unless vp is the deviceNode 345 newBuf->b_owned = false; 346 newBuf->b_vreg = vp->v_type == VREG ? vp : NULL; 347 348 ASSERT(size == newBuf->b_resid); 349 cBlockCount = howmany(size, CACHED_BLOCK_SIZE); 350 351 // Three branches: 352 // For regular files, copy from file cache into b_data. 353 // Otherwise, if the requested size equals the cached block size, use the block cache directly. 354 // Otherwise, copy from the block cache into b_data. 355 if (vp->v_type == VREG) { 356 // The occasions when regular file data is accessed through the ported BSD code 357 // are limited (e.g. deextend) and occur when the node is locked. If we go down this 358 // branch, we tend to return early because vp->v_resizing is true. 359 360 off_t fileOffset; 361 size_t bytesRead; 362 363 newBuf->b_owned = true; 364 status = allocate_data(newBuf, size); 365 if (status != B_OK) 366 return B_TO_POSIX_ERROR(status); 367 368 // Don't use the file cache while resizing; wait until node lock is released to avoid 369 // deadlocks. 370 if (vp->v_resizing == true) { 371 (*bpp) = newBuf; 372 // we need to return a buffer with b_data allocated even in this case, 373 // because detrunc may zero out the unused space at the end of the last cluster 374 return B_TO_POSIX_ERROR(status); 375 } 376 377 fileOffset = de_cn2off(fatVolume, blkno); 378 379 ASSERT(size <= (int)newBuf->b_bufsize); 380 bytesRead = (size_t)size; 381 status = file_cache_read(vp->v_cache, NULL, fileOffset, newBuf->b_data, &bytesRead); 382 if (status != B_OK) { 383 put_buf(newBuf); 384 return B_TO_POSIX_ERROR(status); 385 } 386 if (bytesRead != (size_t)size) { 387 put_buf(newBuf); 388 return EIO; 389 } 390 } else if (size == CACHED_BLOCK_SIZE && vp->v_type != VREG) { 391 if (readOnly == true) 392 newBuf->b_data = (void*)block_cache_get(blockCache, dblkno); 393 else 394 newBuf->b_data = block_cache_get_writable(blockCache, dblkno, -1); 395 if (newBuf->b_data == NULL) { 396 put_buf(newBuf); 397 return EIO; 398 } 399 newBuf->b_bufsize = CACHED_BLOCK_SIZE; 400 } else { 401 // need to get more than one cached block and copy them to make a continuous buffer 402 newBuf->b_owned = true; 403 status = allocate_data(newBuf, size); 404 if (status != 0) 405 return B_TO_POSIX_ERROR(status); 406 407 for (i = 0; i < cBlockCount && status == B_OK; i++) { 408 if (readOnly == true) 409 newBuf->b_bcpointers[i] = (void*)block_cache_get(blockCache, dblkno + i); 410 else 411 newBuf->b_bcpointers[i] = block_cache_get_writable(blockCache, dblkno + i, -1); 412 if (newBuf->b_bcpointers[i] == NULL) { 413 put_buf(newBuf); 414 return EIO; 415 } 416 } 417 418 ASSERT(cBlockCount * CACHED_BLOCK_SIZE == (u_long)newBuf->b_bufsize); 419 for (i = 0; i < cBlockCount; i++) { 420 memcpy(newBuf->b_data + (i * CACHED_BLOCK_SIZE), (caddr_t)newBuf->b_bcpointers[i], 421 CACHED_BLOCK_SIZE); 422 } 423 } 424 425 newBuf->b_resid -= size; 426 ASSERT(newBuf->b_resid == 0); 427 428 *bpp = newBuf; 429 430 return B_TO_POSIX_ERROR(status); 431 } 432 433 434 /*! Used by deextend to update metadata of pages in the last added cluster. 435 Not applicable in Haiku. 436 */ 437 void 438 vfs_bio_clrbuf(struct buf* bp) 439 { 440 return; 441 } 442 443 444 /*! Used by deextend to zero out the remainder of a cluster beyond EOF. In the Haiku port we avoid 445 file cache writes when the node is locked (as it is when deextend is called) to prevent 446 deadlocks. This data must therefore be zero'd after return from deextend. 447 */ 448 void 449 vfs_bio_bzero_buf(struct buf* bp, int base, int size) 450 { 451 return; 452 } 453 454 455 /*! Flush buffer to disk synchronously. 456 457 */ 458 int 459 bwrite(struct buf* bp) 460 { 461 status_t status = _bwrite(bp); 462 if (status != B_OK) { 463 put_buf(bp); 464 return B_TO_POSIX_ERROR(status); 465 } 466 467 if (bp->b_vreg != NULL) { 468 // file cache 469 if (bp->b_vreg->v_resizing == false) { 470 bp->b_vreg->v_sync = true; 471 status = file_cache_sync(bp->b_vreg->v_cache); 472 bp->b_vreg->v_sync = false; 473 } 474 } else { 475 // block cache 476 void* blockCache = bp->b_vp->v_rdev->si_mountpt->mnt_cache; 477 478 if (bp->b_owned == false) { 479 // single block 480 status = block_cache_sync_etc(blockCache, bp->b_blkno, 1); 481 } else { 482 // multiple blocks 483 status = block_cache_sync_etc(blockCache, bp->b_blkno, 484 howmany(bp->b_bufsize, CACHED_BLOCK_SIZE)); 485 } 486 } 487 488 put_buf(bp); 489 490 return B_TO_POSIX_ERROR(status); 491 } 492 493 494 /*! Added for the Haiku port. Ensure that buf->b_data points to 'size' bytes of zero'd memory. 495 496 */ 497 static status_t 498 allocate_data(struct buf* buf, int size) 499 { 500 if (buf->b_data == NULL) { 501 // Either this is a newly created buf, or we are recycling a buf that 502 // has no memory allocated for b_data. 503 buf->b_data = (caddr_t)calloc(size, sizeof(char)); 504 if (buf->b_data == NULL) 505 return B_NO_MEMORY; 506 buf->b_bufsize = size; 507 } else { 508 // This is an existing buf with space allocated for b_data; maybe we can reuse it. 509 if (buf->b_bufsize == size) { 510 bzero(buf->b_data, buf->b_bufsize); 511 } else { 512 free(buf->b_data, 0); 513 buf->b_data = (caddr_t)calloc(size, sizeof(char)); 514 if (buf->b_data == NULL) 515 return B_NO_MEMORY; 516 buf->b_bufsize = size; 517 } 518 } 519 520 return B_OK; 521 } 522 523 524 /*! Added for the Haiku port. Either add buf to a list of unused bufs, or free it (and b_data, if 525 necessary). 526 */ 527 static status_t 528 put_buf(struct buf* buf) 529 { 530 struct vnode* deviceNode = buf->b_vp; 531 struct msdosfsmount* fatVolume = (struct msdosfsmount*)deviceNode->v_rdev->si_mountpt->mnt_data; 532 533 rw_lock_write_lock(&deviceNode->v_bufobj.bo_lock.haikuRW); 534 if (buf->b_owned != 0) { 535 if ((u_long)buf->b_bufsize == fatVolume->pm_bpcluster 536 && deviceNode->v_bufobj.bo_clusters < BUF_CACHE_SIZE) { 537 SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_clusterbufs, buf, link); 538 ++deviceNode->v_bufobj.bo_clusters; 539 } else if ((u_long)buf->b_bufsize == fatVolume->pm_fatblocksize 540 && deviceNode->v_bufobj.bo_fatblocks < BUF_CACHE_SIZE) { 541 SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_fatbufs, buf, link); 542 ++deviceNode->v_bufobj.bo_fatblocks; 543 } else { 544 free(buf->b_data, 0); 545 free(buf, 0); 546 } 547 } else if (deviceNode->v_bufobj.bo_empties < BUF_CACHE_SIZE) { 548 buf->b_data = NULL; 549 buf->b_bufsize = 0; 550 SLIST_INSERT_HEAD(&deviceNode->v_bufobj.bo_emptybufs, buf, link); 551 ++deviceNode->v_bufobj.bo_empties; 552 } else { 553 free(buf, 0); 554 } 555 rw_lock_write_unlock(&deviceNode->v_bufobj.bo_lock.haikuRW); 556 557 return B_OK; 558 } 559