1 /* 2 * Copyright 2001-2008, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 //! inode access functions 7 8 9 #include "Debug.h" 10 #include "Inode.h" 11 #include "BPlusTree.h" 12 #include "Index.h" 13 14 15 #if defined(BFS_TRACING) && !defined(BFS_SHELL) && !defined(_BOOT_MODE) 16 namespace BFSInodeTracing { 17 18 class Create : public AbstractTraceEntry { 19 public: 20 Create(Inode* inode, Inode* parent, const char* name, int32 mode, 21 int openMode, uint32 type) 22 : 23 fInode(inode), 24 fID(inode->ID()), 25 fParent(parent), 26 fParentID(parent != NULL ? parent->ID() : 0), 27 fMode(mode), 28 fOpenMode(openMode), 29 fType(type) 30 { 31 if (name != NULL) 32 strlcpy(fName, name, sizeof(fName)); 33 else 34 fName[0] = '\0'; 35 36 Initialized(); 37 } 38 39 virtual void AddDump(TraceOutput& out) 40 { 41 out.Print("bfs:Create %Ld (%p), parent %Ld (%p), \"%s\", " 42 "mode %lx, omode %x, type %lx", fID, fInode, fParentID, 43 fParent, fName, fMode, fOpenMode, fType); 44 } 45 46 private: 47 Inode* fInode; 48 ino_t fID; 49 Inode* fParent; 50 ino_t fParentID; 51 char fName[32]; 52 int32 fMode; 53 int fOpenMode; 54 uint32 fType; 55 56 }; 57 58 class Remove : public AbstractTraceEntry { 59 public: 60 Remove(Inode* inode, const char* name) 61 : 62 fInode(inode), 63 fID(inode->ID()) 64 { 65 strlcpy(fName, name, sizeof(fName)); 66 Initialized(); 67 } 68 69 virtual void AddDump(TraceOutput& out) 70 { 71 out.Print("bfs:Remove %Ld (%p), \"%s\"", fID, fInode, fName); 72 } 73 74 private: 75 Inode* fInode; 76 ino_t fID; 77 char fName[32]; 78 }; 79 80 class Action : public AbstractTraceEntry { 81 public: 82 Action(const char* action, Inode* inode) 83 : 84 fInode(inode), 85 fID(inode->ID()) 86 { 87 strlcpy(fAction, action, sizeof(fAction)); 88 Initialized(); 89 } 90 91 virtual void AddDump(TraceOutput& out) 92 { 93 out.Print("bfs:%s %Ld (%p)\n", fAction, fID, fInode); 94 } 95 96 private: 97 Inode* fInode; 98 ino_t fID; 99 char fAction[16]; 100 }; 101 102 class Resize : public AbstractTraceEntry { 103 public: 104 Resize(Inode* inode, off_t oldSize, off_t newSize, bool trim) 105 : 106 fInode(inode), 107 fID(inode->ID()), 108 fOldSize(oldSize), 109 fNewSize(newSize), 110 fTrim(trim) 111 { 112 Initialized(); 113 } 114 115 virtual void AddDump(TraceOutput& out) 116 { 117 out.Print("bfs:%s %Ld (%p), %Ld -> %Ld", fTrim ? "Trim" : "Resize", 118 fID, fInode, fOldSize, fNewSize); 119 } 120 121 private: 122 Inode* fInode; 123 ino_t fID; 124 off_t fOldSize; 125 off_t fNewSize; 126 bool fTrim; 127 }; 128 129 } // namespace BFSInodeTracing 130 131 # define T(x) new(std::nothrow) BFSInodeTracing::x; 132 #else 133 # define T(x) ; 134 #endif 135 136 class InodeAllocator { 137 public: 138 InodeAllocator(Transaction &transaction); 139 ~InodeAllocator(); 140 141 status_t New(block_run *parentRun, mode_t mode, block_run &run, 142 Inode **_inode); 143 status_t CreateTree(); 144 status_t Keep(); 145 146 private: 147 static void _TransactionListener(int32 id, int32 event, void *_inode); 148 149 Transaction *fTransaction; 150 block_run fRun; 151 Inode *fInode; 152 }; 153 154 155 InodeAllocator::InodeAllocator(Transaction &transaction) 156 : 157 fTransaction(&transaction), 158 fInode(NULL) 159 { 160 } 161 162 163 InodeAllocator::~InodeAllocator() 164 { 165 if (fTransaction != NULL) { 166 Volume *volume = fTransaction->GetVolume(); 167 168 if (fInode != NULL) { 169 fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 170 // this unblocks any pending bfs_read_vnode() calls 171 fInode->Free(*fTransaction); 172 remove_vnode(volume->FSVolume(), fInode->ID()); 173 } else 174 volume->Free(*fTransaction, fRun); 175 } 176 177 delete fInode; 178 } 179 180 181 status_t 182 InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run, 183 Inode **_inode) 184 { 185 Volume *volume = fTransaction->GetVolume(); 186 187 status_t status = volume->AllocateForInode(*fTransaction, parentRun, mode, 188 fRun); 189 if (status < B_OK) { 190 // don't free the space in the destructor, because 191 // the allocation failed 192 fTransaction = NULL; 193 RETURN_ERROR(status); 194 } 195 196 run = fRun; 197 fInode = new Inode(volume, *fTransaction, volume->ToVnode(run), mode, run); 198 if (fInode == NULL) 199 RETURN_ERROR(B_NO_MEMORY); 200 201 if (volume->ID() >= 0) { 202 status = new_vnode(volume->FSVolume(), fInode->ID(), fInode, 203 &gBFSVnodeOps); 204 if (status < B_OK) { 205 delete fInode; 206 fInode = NULL; 207 RETURN_ERROR(status); 208 } 209 } 210 211 *_inode = fInode; 212 return B_OK; 213 } 214 215 216 status_t 217 InodeAllocator::CreateTree() 218 { 219 Volume *volume = fTransaction->GetVolume(); 220 221 // force S_STR_INDEX to be set, if no type is set 222 if ((fInode->Mode() & S_INDEX_TYPES) == 0) 223 fInode->Node().mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX); 224 225 BPlusTree *tree = fInode->fTree = new BPlusTree(*fTransaction, fInode); 226 if (tree == NULL || tree->InitCheck() < B_OK) 227 return B_ERROR; 228 229 if (fInode->IsRegularNode()) { 230 if (tree->Insert(*fTransaction, ".", fInode->ID()) < B_OK 231 || tree->Insert(*fTransaction, "..", 232 volume->ToVnode(fInode->Parent())) < B_OK) 233 return B_ERROR; 234 } 235 return B_OK; 236 } 237 238 239 status_t 240 InodeAllocator::Keep() 241 { 242 ASSERT(fInode != NULL && fTransaction != NULL); 243 Volume *volume = fTransaction->GetVolume(); 244 245 status_t status = fInode->WriteBack(*fTransaction); 246 if (status < B_OK) { 247 FATAL(("writing new inode %Ld failed!\n", fInode->ID())); 248 return status; 249 } 250 251 if (!fInode->IsSymLink() && volume->ID() >= 0) { 252 status = publish_vnode(volume->FSVolume(), fInode->ID(), fInode, 253 &gBFSVnodeOps, fInode->Mode(), 0); 254 } 255 256 if (status == B_OK) { 257 cache_add_transaction_listener(volume->BlockCache(), fTransaction->ID(), 258 TRANSACTION_ABORTED, &_TransactionListener, fInode); 259 } 260 261 fTransaction = NULL; 262 fInode = NULL; 263 264 return status; 265 } 266 267 268 /*static*/ void 269 InodeAllocator::_TransactionListener(int32 id, int32 event, void *_inode) 270 { 271 Inode *inode = (Inode *)_inode; 272 273 if (event == TRANSACTION_ABORTED) 274 panic("transaction %d aborted, inode %p still around!\n", (int)id, inode); 275 } 276 277 278 // #pragma mark - 279 280 281 status_t 282 bfs_inode::InitCheck(Volume *volume) 283 { 284 if (Magic1() != INODE_MAGIC1 285 || !(Flags() & INODE_IN_USE) 286 || inode_num.Length() != 1 287 // matches inode size? 288 || (uint32)InodeSize() != volume->InodeSize() 289 // parent resides on disk? 290 || parent.AllocationGroup() > int32(volume->AllocationGroups()) 291 || parent.AllocationGroup() < 0 292 || parent.Start() > (1L << volume->AllocationGroupShift()) 293 || parent.Length() != 1 294 // attributes, too? 295 || attributes.AllocationGroup() > int32(volume->AllocationGroups()) 296 || attributes.AllocationGroup() < 0 297 || attributes.Start() > (1L << volume->AllocationGroupShift())) 298 RETURN_ERROR(B_BAD_DATA); 299 300 if (Flags() & INODE_DELETED) 301 return B_NOT_ALLOWED; 302 303 // ToDo: Add some tests to check the integrity of the other stuff here, 304 // especially for the data_stream! 305 306 return B_OK; 307 } 308 309 310 // #pragma mark - Inode 311 312 313 Inode::Inode(Volume *volume, ino_t id) 314 : 315 fVolume(volume), 316 fID(id), 317 fTree(NULL), 318 fAttributes(NULL), 319 fCache(NULL), 320 fMap(NULL) 321 { 322 PRINT(("Inode::Inode(volume = %p, id = %Ld) @ %p\n", volume, id, this)); 323 324 NodeGetter node(volume, this); 325 memcpy(&fNode, node.Node(), sizeof(bfs_inode)); 326 327 char lockName[B_OS_NAME_LENGTH]; 328 snprintf(lockName, sizeof(lockName), "bfs inode %d.%d", 329 (int)BlockRun().AllocationGroup(), BlockRun().Start()); 330 fLock.Initialize(lockName); 331 332 // these two will help to maintain the indices 333 fOldSize = Size(); 334 fOldLastModified = LastModified(); 335 336 if (IsContainer()) 337 fTree = new BPlusTree(this); 338 if (IsFile() || IsAttribute()) { 339 SetFileCache(file_cache_create(fVolume->ID(), ID(), Size())); 340 SetMap(file_map_create(volume->ID(), ID(), Size())); 341 } 342 } 343 344 345 Inode::Inode(Volume *volume, Transaction &transaction, ino_t id, mode_t mode, 346 block_run &run) 347 : 348 fVolume(volume), 349 fID(id), 350 fTree(NULL), 351 fAttributes(NULL), 352 fCache(NULL), 353 fMap(NULL) 354 { 355 PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %Ld) @ %p\n", 356 volume, &transaction, id, this)); 357 358 char lockName[B_OS_NAME_LENGTH]; 359 snprintf(lockName, sizeof(lockName), "bfs inode+%d.%d", 360 (int)run.AllocationGroup(), run.Start()); 361 fLock.Initialize(lockName); 362 363 NodeGetter node(volume, transaction, this, true); 364 memset(&fNode, 0, sizeof(bfs_inode)); 365 366 // Initialize the bfs_inode structure -- it's not written back to disk 367 // here, because it will be done so already when the inode could be 368 // created completely. 369 370 Node().magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1); 371 Node().inode_num = run; 372 Node().mode = HOST_ENDIAN_TO_BFS_INT32(mode); 373 Node().flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 374 375 Node().create_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) 376 << INODE_TIME_SHIFT); 377 Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64(Node().create_time 378 | (volume->GetUniqueID() & INODE_TIME_MASK)); 379 // we use Volume::GetUniqueID() to avoid having too many duplicates 380 // in the last_modified index 381 382 Node().inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize()); 383 384 // these two will help to maintain the indices 385 fOldSize = Size(); 386 fOldLastModified = LastModified(); 387 } 388 389 390 Inode::~Inode() 391 { 392 PRINT(("Inode::~Inode() @ %p\n", this)); 393 394 file_cache_delete(FileCache()); 395 file_map_delete(Map()); 396 delete fTree; 397 } 398 399 400 status_t 401 Inode::InitCheck(bool checkNode) 402 { 403 // test inode magic and flags 404 if (checkNode) { 405 status_t status = Node().InitCheck(fVolume); 406 if (status == B_BUSY) 407 return B_BUSY; 408 409 if (status < B_OK) { 410 FATAL(("inode at block %Ld corrupt!\n", BlockNumber())); 411 RETURN_ERROR(B_BAD_DATA); 412 } 413 } 414 415 if (IsContainer()) { 416 // inodes that have a B+tree 417 if (fTree == NULL) 418 RETURN_ERROR(B_NO_MEMORY); 419 420 status_t status = fTree->InitCheck(); 421 if (status < B_OK) { 422 FATAL(("inode tree at block %Ld corrupt!\n", BlockNumber())); 423 RETURN_ERROR(B_BAD_DATA); 424 } 425 } 426 427 // it's more important to know that the inode is corrupt 428 // so we check for the lock not until here 429 return fLock.InitCheck(); 430 } 431 432 433 status_t 434 Inode::WriteBack(Transaction &transaction) 435 { 436 NodeGetter node(fVolume, transaction, this); 437 if (node.WritableNode() == NULL) 438 return B_IO_ERROR; 439 440 memcpy(node.WritableNode(), &Node(), sizeof(bfs_inode)); 441 return B_OK; 442 } 443 444 445 status_t 446 Inode::CheckPermissions(int accessMode) const 447 { 448 uid_t user = geteuid(); 449 gid_t group = getegid(); 450 451 // you never have write access to a read-only volume 452 if (accessMode & W_OK && fVolume->IsReadOnly()) 453 return B_READ_ONLY_DEVICE; 454 455 // root users always have full access (but they can't execute files without 456 // any execute permissions set) 457 if (user == 0) { 458 if (!((accessMode & X_OK) != 0 && (Mode() & S_IXUSR) == 0) 459 || (Mode() & S_DIRECTORY) != 0) { 460 return B_OK; 461 } 462 } 463 464 // shift mode bits, to check directly against accessMode 465 mode_t mode = Mode(); 466 if (user == (uid_t)fNode.UserID()) 467 mode >>= 6; 468 else if (group == (gid_t)fNode.GroupID()) 469 mode >>= 3; 470 471 if (accessMode & ~(mode & S_IRWXO)) 472 return B_NOT_ALLOWED; 473 474 return B_OK; 475 } 476 477 478 // #pragma mark - attributes 479 480 481 void 482 Inode::_AddIterator(AttributeIterator *iterator) 483 { 484 if (fSmallDataLock.Lock() < B_OK) 485 return; 486 487 fIterators.Add(iterator); 488 489 fSmallDataLock.Unlock(); 490 } 491 492 493 void 494 Inode::_RemoveIterator(AttributeIterator *iterator) 495 { 496 if (fSmallDataLock.Lock() < B_OK) 497 return; 498 499 fIterators.Remove(iterator); 500 501 fSmallDataLock.Unlock(); 502 } 503 504 505 /*! Tries to free up "bytes" space in the small_data section by moving 506 attributes to real files. Used for system attributes like the name. 507 You need to hold the fSmallDataLock when you call this method 508 */ 509 status_t 510 Inode::_MakeSpaceForSmallData(Transaction &transaction, bfs_inode *node, 511 const char *name, int32 bytes) 512 { 513 ASSERT(fSmallDataLock.IsLocked()); 514 515 while (bytes > 0) { 516 small_data *item = node->SmallDataStart(), *max = NULL; 517 int32 index = 0, maxIndex = 0; 518 for (; !item->IsLast(node); item = item->Next(), index++) { 519 // should not remove those 520 if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name())) 521 continue; 522 523 if (max == NULL || max->Size() < item->Size()) { 524 maxIndex = index; 525 max = item; 526 } 527 528 // Remove the first one large enough to free the needed amount of 529 // bytes 530 if (bytes < (int32)item->Size()) 531 break; 532 } 533 534 if (item->IsLast(node) || (int32)item->Size() < bytes) 535 return B_ERROR; 536 537 bytes -= max->Size(); 538 539 // Move the attribute to a real attribute file 540 // Luckily, this doesn't cause any index updates 541 542 Inode *attribute; 543 status_t status = CreateAttribute(transaction, item->Name(), 544 item->Type(), &attribute); 545 if (status < B_OK) 546 RETURN_ERROR(status); 547 548 size_t length = item->DataSize(); 549 status = attribute->WriteAt(transaction, 0, item->Data(), &length); 550 551 ReleaseAttribute(attribute); 552 553 if (status < B_OK) { 554 Vnode vnode(fVolume, Attributes()); 555 Inode *attributes; 556 if (vnode.Get(&attributes) < B_OK 557 || attributes->Remove(transaction, name) < B_OK) { 558 FATAL(("Could not remove newly created attribute!\n")); 559 } 560 561 RETURN_ERROR(status); 562 } 563 564 _RemoveSmallData(node, max, maxIndex); 565 } 566 return B_OK; 567 } 568 569 570 /*! Private function which removes the given attribute from the small_data 571 section. 572 You need to hold the fSmallDataLock when you call this method 573 */ 574 status_t 575 Inode::_RemoveSmallData(bfs_inode *node, small_data *item, int32 index) 576 { 577 ASSERT(fSmallDataLock.IsLocked()); 578 579 small_data *next = item->Next(); 580 if (!next->IsLast(node)) { 581 // find the last attribute 582 small_data *last = next; 583 while (!last->IsLast(node)) 584 last = last->Next(); 585 586 int32 size = (uint8 *)last - (uint8 *)next; 587 if (size < 0 588 || size > (uint8 *)node + fVolume->BlockSize() - (uint8 *)next) 589 return B_BAD_DATA; 590 591 memmove(item, next, size); 592 593 // Move the "last" one to its new location and 594 // correctly terminate the small_data section 595 last = (small_data *)((uint8 *)last - ((uint8 *)next - (uint8 *)item)); 596 memset(last, 0, (uint8 *)node + fVolume->BlockSize() - (uint8 *)last); 597 } else 598 memset(item, 0, item->Size()); 599 600 // update all current iterators 601 AttributeIterator *iterator = NULL; 602 while ((iterator = fIterators.Next(iterator)) != NULL) { 603 iterator->Update(index, -1); 604 } 605 606 return B_OK; 607 } 608 609 610 //! Removes the given attribute from the small_data section. 611 status_t 612 Inode::_RemoveSmallData(Transaction &transaction, NodeGetter &nodeGetter, 613 const char *name) 614 { 615 if (name == NULL) 616 return B_BAD_VALUE; 617 618 bfs_inode *node = nodeGetter.WritableNode(); 619 SimpleLocker locker(fSmallDataLock); 620 621 // search for the small_data item 622 623 small_data *item = node->SmallDataStart(); 624 int32 index = 0; 625 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 626 item = item->Next(); 627 index++; 628 } 629 630 if (item->IsLast(node)) 631 return B_ENTRY_NOT_FOUND; 632 633 nodeGetter.MakeWritable(transaction); 634 status_t status = _RemoveSmallData(node, item, index); 635 if (status == B_OK) 636 status = WriteBack(transaction); 637 638 return status; 639 } 640 641 642 /*! Try to place the given attribute in the small_data section - if the 643 new attribute is too big to fit in that section, it returns B_DEVICE_FULL. 644 In that case, the attribute should be written to a real attribute file; 645 it's the caller's responsibility to remove any existing attributes in the 646 small data section if that's the case. 647 648 Note that you need to write back the inode yourself after having called that 649 method - it's a bad API decision that it needs a transaction but enforces 650 you to write back the inode all by yourself, but it's just more efficient 651 in most cases... 652 */ 653 status_t 654 Inode::_AddSmallData(Transaction &transaction, NodeGetter &nodeGetter, 655 const char *name, uint32 type, const uint8 *data, size_t length, bool force) 656 { 657 // TODO: support new write attr semantics and write offset! 658 bfs_inode *node = nodeGetter.WritableNode(); 659 660 if (node == NULL || name == NULL || data == NULL) 661 return B_BAD_VALUE; 662 663 // reject any requests that can't fit into the small_data section 664 uint32 nameLength = strlen(name); 665 uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + length + 1; 666 if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode)) 667 return B_DEVICE_FULL; 668 669 nodeGetter.MakeWritable(transaction); 670 SimpleLocker locker(fSmallDataLock); 671 672 // Find the last item or one with the same name we have to add 673 small_data *item = node->SmallDataStart(); 674 int32 index = 0; 675 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 676 item = item->Next(); 677 index++; 678 } 679 680 // is the attribute already in the small_data section? 681 // then just replace the data part of that one 682 if (!item->IsLast(node)) { 683 // find last attribute 684 small_data *last = item; 685 while (!last->IsLast(node)) 686 last = last->Next(); 687 688 // try to change the attributes value 689 if (item->data_size > length 690 || force 691 || ((uint8 *)last + length - item->DataSize()) 692 <= ((uint8 *)node + fVolume->InodeSize())) { 693 // Make room for the new attribute if needed (and we are forced 694 // to do so) 695 if (force && ((uint8 *)last + length - item->DataSize()) 696 > ((uint8 *)node + fVolume->InodeSize())) { 697 // We also take the free space at the end of the small_data 698 // section into account, and request only what's really needed 699 uint32 needed = length - item->DataSize() - 700 (uint32)((uint8 *)node + fVolume->InodeSize() 701 - (uint8 *)last); 702 703 if (_MakeSpaceForSmallData(transaction, node, name, needed) 704 < B_OK) 705 return B_ERROR; 706 707 // reset our pointers 708 item = node->SmallDataStart(); 709 index = 0; 710 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 711 item = item->Next(); 712 index++; 713 } 714 715 last = item; 716 while (!last->IsLast(node)) 717 last = last->Next(); 718 } 719 720 // Normally, we can just overwrite the attribute data as the size 721 // is specified by the type and does not change that often 722 if (length != item->DataSize()) { 723 // move the attributes after the current one 724 small_data *next = item->Next(); 725 if (!next->IsLast(node)) { 726 memmove((uint8 *)item + spaceNeeded, next, 727 (uint8 *)last - (uint8 *)next); 728 } 729 730 // Move the "last" one to its new location and 731 // correctly terminate the small_data section 732 last = (small_data *)((uint8 *)last 733 - ((uint8 *)next - ((uint8 *)item + spaceNeeded))); 734 if ((uint8 *)last < (uint8 *)node + fVolume->BlockSize()) { 735 memset(last, 0, (uint8 *)node + fVolume->BlockSize() 736 - (uint8 *)last); 737 } 738 739 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); 740 } 741 742 item->type = HOST_ENDIAN_TO_BFS_INT32(type); 743 memcpy(item->Data(), data, length); 744 item->Data()[length] = '\0'; 745 746 return B_OK; 747 } 748 749 return B_DEVICE_FULL; 750 } 751 752 // try to add the new attribute! 753 754 if ((uint8 *)item + spaceNeeded > (uint8 *)node + fVolume->InodeSize()) { 755 // there is not enough space for it! 756 if (!force) 757 return B_DEVICE_FULL; 758 759 // make room for the new attribute 760 if (_MakeSpaceForSmallData(transaction, node, name, spaceNeeded) < B_OK) 761 return B_ERROR; 762 763 // get new last item! 764 item = node->SmallDataStart(); 765 index = 0; 766 while (!item->IsLast(node)) { 767 item = item->Next(); 768 index++; 769 } 770 } 771 772 memset(item, 0, spaceNeeded); 773 item->type = HOST_ENDIAN_TO_BFS_INT32(type); 774 item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength); 775 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); 776 strcpy(item->Name(), name); 777 memcpy(item->Data(), data, length); 778 779 // correctly terminate the small_data section 780 item = item->Next(); 781 if (!item->IsLast(node)) 782 memset(item, 0, (uint8 *)node + fVolume->InodeSize() - (uint8 *)item); 783 784 // update all current iterators 785 AttributeIterator *iterator = NULL; 786 while ((iterator = fIterators.Next(iterator)) != NULL) { 787 iterator->Update(index, 1); 788 } 789 790 return B_OK; 791 } 792 793 794 /*! Iterates through the small_data section of an inode. 795 To start at the beginning of this section, you let smallData 796 point to NULL, like: 797 small_data *data = NULL; 798 while (inode->GetNextSmallData(&data) { ... } 799 800 This function is reentrant and doesn't allocate any memory; 801 you can safely stop calling it at any point (you don't need 802 to iterate through the whole list). 803 You need to hold the fSmallDataLock when you call this method 804 */ 805 status_t 806 Inode::_GetNextSmallData(bfs_inode *node, small_data **_smallData) const 807 { 808 if (node == NULL) 809 RETURN_ERROR(B_BAD_VALUE); 810 811 ASSERT(fSmallDataLock.IsLocked()); 812 813 small_data *data = *_smallData; 814 815 // begin from the start? 816 if (data == NULL) 817 data = node->SmallDataStart(); 818 else 819 data = data->Next(); 820 821 // is already last item? 822 if (data->IsLast(node)) 823 return B_ENTRY_NOT_FOUND; 824 825 *_smallData = data; 826 827 return B_OK; 828 } 829 830 831 /*! Finds the attribute "name" in the small data section, and 832 returns a pointer to it (or NULL if it doesn't exist). 833 You need to hold the fSmallDataLock when you call this method 834 */ 835 small_data * 836 Inode::FindSmallData(const bfs_inode *node, const char *name) const 837 { 838 ASSERT(fSmallDataLock.IsLocked()); 839 840 small_data *smallData = NULL; 841 while (_GetNextSmallData(const_cast<bfs_inode *>(node), &smallData) 842 == B_OK) { 843 if (!strcmp(smallData->Name(), name)) 844 return smallData; 845 } 846 return NULL; 847 } 848 849 850 /*! Returns a pointer to the node's name if present in the small data 851 section, NULL otherwise. 852 You need to hold the fSmallDataLock when you call this method 853 */ 854 const char * 855 Inode::Name(const bfs_inode *node) const 856 { 857 ASSERT(fSmallDataLock.IsLocked()); 858 859 small_data *smallData = NULL; 860 while (_GetNextSmallData((bfs_inode *)node, &smallData) == B_OK) { 861 if (*smallData->Name() == FILE_NAME_NAME 862 && smallData->NameSize() == FILE_NAME_NAME_LENGTH) 863 return (const char *)smallData->Data(); 864 } 865 return NULL; 866 } 867 868 869 /*! Copies the node's name into the provided buffer. 870 The buffer should be B_FILE_NAME_LENGTH bytes large. 871 */ 872 status_t 873 Inode::GetName(char *buffer, size_t size) const 874 { 875 NodeGetter node(fVolume, this); 876 SimpleLocker locker(fSmallDataLock); 877 878 const char *name = Name(node.Node()); 879 if (name == NULL) 880 return B_ENTRY_NOT_FOUND; 881 882 strlcpy(buffer, name, size); 883 return B_OK; 884 } 885 886 887 /*! Changes or set the name of a file: in the inode small_data section only, it 888 doesn't change it in the parent directory's b+tree. 889 Note that you need to write back the inode yourself after having called 890 that method. It suffers from the same API decision as AddSmallData() does 891 (and for the same reason). 892 */ 893 status_t 894 Inode::SetName(Transaction &transaction, const char *name) 895 { 896 if (name == NULL || *name == '\0') 897 return B_BAD_VALUE; 898 899 NodeGetter node(fVolume, transaction, this); 900 const char nameTag[2] = {FILE_NAME_NAME, 0}; 901 902 return _AddSmallData(transaction, node, nameTag, FILE_NAME_TYPE, 903 (uint8 *)name, strlen(name), true); 904 } 905 906 907 status_t 908 Inode::_RemoveAttribute(Transaction &transaction, const char *name, 909 bool hasIndex, Index *index) 910 { 911 // remove the attribute file if it exists 912 Vnode vnode(fVolume, Attributes()); 913 Inode *attributes; 914 status_t status = vnode.Get(&attributes); 915 if (status < B_OK) 916 return status; 917 918 // update index 919 if (index != NULL) { 920 Inode *attribute; 921 if ((hasIndex || fVolume->CheckForLiveQuery(name)) 922 && GetAttribute(name, &attribute) == B_OK) { 923 uint8 data[BPLUSTREE_MAX_KEY_LENGTH]; 924 size_t length = BPLUSTREE_MAX_KEY_LENGTH; 925 if (attribute->ReadAt(0, data, &length) == B_OK) { 926 index->Update(transaction, name, attribute->Type(), data, 927 length, NULL, 0, this); 928 } 929 930 ReleaseAttribute(attribute); 931 } 932 } 933 934 if ((status = attributes->Remove(transaction, name)) < B_OK) 935 return status; 936 937 if (attributes->IsEmpty()) { 938 // remove attribute directory (don't fail if that can't be done) 939 if (remove_vnode(fVolume->FSVolume(), attributes->ID()) == B_OK) { 940 // update the inode, so that no one will ever doubt it's deleted :-) 941 attributes->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); 942 if (attributes->WriteBack(transaction) == B_OK) { 943 Attributes().SetTo(0, 0, 0); 944 WriteBack(transaction); 945 } else 946 unremove_vnode(fVolume->FSVolume(), attributes->ID()); 947 } 948 } 949 950 return status; 951 } 952 953 954 /*! Reads data from the specified attribute. 955 This is a high-level attribute function that understands attributes 956 in the small_data section as well as real attribute files. 957 */ 958 status_t 959 Inode::ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, 960 size_t *_length) 961 { 962 if (pos < 0) 963 pos = 0; 964 965 // search in the small_data section (which has to be locked first) 966 { 967 NodeGetter node(fVolume, this); 968 SimpleLocker locker(fSmallDataLock); 969 970 small_data *smallData = FindSmallData(node.Node(), name); 971 if (smallData != NULL) { 972 size_t length = *_length; 973 if (pos >= smallData->data_size) { 974 *_length = 0; 975 return B_OK; 976 } 977 if (length + pos > smallData->DataSize()) 978 length = smallData->DataSize() - pos; 979 980 memcpy(buffer, smallData->Data() + pos, length); 981 *_length = length; 982 return B_OK; 983 } 984 } 985 986 // search in the attribute directory 987 Inode *attribute; 988 status_t status = GetAttribute(name, &attribute); 989 if (status == B_OK) { 990 status = attribute->ReadAt(pos, (uint8 *)buffer, _length); 991 992 ReleaseAttribute(attribute); 993 } 994 995 RETURN_ERROR(status); 996 } 997 998 999 /*! Writes data to the specified attribute. 1000 This is a high-level attribute function that understands attributes 1001 in the small_data section as well as real attribute files. 1002 */ 1003 status_t 1004 Inode::WriteAttribute(Transaction &transaction, const char *name, int32 type, 1005 off_t pos, const uint8 *buffer, size_t *_length) 1006 { 1007 // needed to maintain the index 1008 uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH], *oldData = NULL; 1009 size_t oldLength = 0; 1010 1011 // TODO: we actually depend on that the contents of "buffer" are constant. 1012 // If they get changed during the write (hey, user programs), we may mess 1013 // up our index trees! 1014 // TODO: for attribute files, we need to log the first 1015 // BPLUSTREE_MAX_KEY_LENGTH bytes of the data stream, or the same as above 1016 // might happen. 1017 1018 Index index(fVolume); 1019 index.SetTo(name); 1020 1021 Inode *attribute = NULL; 1022 status_t status = B_OK; 1023 1024 if (GetAttribute(name, &attribute) < B_OK) { 1025 // save the old attribute data 1026 NodeGetter node(fVolume, transaction, this); 1027 fSmallDataLock.Lock(); 1028 1029 small_data *smallData = FindSmallData(node.Node(), name); 1030 if (smallData != NULL) { 1031 oldLength = smallData->DataSize(); 1032 if (oldLength > 0) { 1033 if (oldLength > BPLUSTREE_MAX_KEY_LENGTH) 1034 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 1035 memcpy(oldData = oldBuffer, smallData->Data(), oldLength); 1036 } 1037 } 1038 fSmallDataLock.Unlock(); 1039 1040 // if the attribute doesn't exist yet (as a file), try to put it in the 1041 // small_data section first - if that fails (due to insufficent space), 1042 // create a real attribute file 1043 status = _AddSmallData(transaction, node, name, type, buffer, *_length); 1044 if (status == B_DEVICE_FULL) { 1045 if (smallData != NULL) { 1046 // remove the old attribute from the small data section - there 1047 // is no space left for the new data 1048 status = _RemoveSmallData(transaction, node, name); 1049 } else 1050 status = B_OK; 1051 1052 if (status == B_OK) 1053 status = CreateAttribute(transaction, name, type, &attribute); 1054 if (status < B_OK) 1055 RETURN_ERROR(status); 1056 } else if (status == B_OK) 1057 status = WriteBack(transaction); 1058 } 1059 1060 if (attribute != NULL) { 1061 if (attribute->Lock().LockWrite() == B_OK) { 1062 // Save the old attribute data (if this fails, oldLength will 1063 // reflect it) 1064 if (fVolume->CheckForLiveQuery(name) && attribute->Size() > 0) { 1065 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 1066 if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK) 1067 oldData = oldBuffer; 1068 } 1069 1070 // check if the data fits into the small_data section again 1071 NodeGetter node(fVolume, transaction, this); 1072 status = _AddSmallData(transaction, node, name, type, buffer, 1073 *_length); 1074 1075 if (status == B_OK) { 1076 // it does - remove its file 1077 attribute->Lock().UnlockWrite(); 1078 status = _RemoveAttribute(transaction, name, false, NULL); 1079 } else { 1080 // The attribute type might have been changed - we need to 1081 // adopt the new one 1082 attribute->Node().type = HOST_ENDIAN_TO_BFS_INT32(type); 1083 status = attribute->WriteBack(transaction); 1084 attribute->Lock().UnlockWrite(); 1085 1086 if (status == B_OK) { 1087 status = attribute->WriteAt(transaction, pos, buffer, 1088 _length); 1089 } 1090 } 1091 } else 1092 status = B_ERROR; 1093 1094 ReleaseAttribute(attribute); 1095 } 1096 1097 // TODO: find a better way than this "pos" thing (the begin of the old key 1098 // must be copied to the start of the new one for a comparison) 1099 if (status == B_OK && pos == 0) { 1100 // index only the first BPLUSTREE_MAX_KEY_LENGTH bytes 1101 uint16 length = *_length; 1102 if (length > BPLUSTREE_MAX_KEY_LENGTH) 1103 length = BPLUSTREE_MAX_KEY_LENGTH; 1104 1105 // Update index. Note, Index::Update() may be called even if 1106 // initializing the index failed - it will just update the live 1107 // queries in this case 1108 if (pos < length || pos < oldLength) { 1109 index.Update(transaction, name, type, oldData, oldLength, buffer, 1110 length, this); 1111 } 1112 } 1113 return status; 1114 } 1115 1116 1117 /*! Removes the specified attribute from the inode. 1118 This is a high-level attribute function that understands attributes 1119 in the small_data section as well as real attribute files. 1120 */ 1121 status_t 1122 Inode::RemoveAttribute(Transaction &transaction, const char *name) 1123 { 1124 Index index(fVolume); 1125 bool hasIndex = index.SetTo(name) == B_OK; 1126 NodeGetter node(fVolume, this); 1127 1128 // update index for attributes in the small_data section 1129 { 1130 fSmallDataLock.Lock(); 1131 1132 small_data *smallData = FindSmallData(node.Node(), name); 1133 if (smallData != NULL) { 1134 uint32 length = smallData->DataSize(); 1135 if (length > BPLUSTREE_MAX_KEY_LENGTH) 1136 length = BPLUSTREE_MAX_KEY_LENGTH; 1137 index.Update(transaction, name, smallData->Type(), 1138 smallData->Data(), length, NULL, 0, this); 1139 } 1140 fSmallDataLock.Unlock(); 1141 } 1142 1143 status_t status = _RemoveSmallData(transaction, node, name); 1144 if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) { 1145 // remove the attribute file if it exists 1146 status = _RemoveAttribute(transaction, name, hasIndex, &index); 1147 } 1148 return status; 1149 } 1150 1151 1152 status_t 1153 Inode::GetAttribute(const char *name, Inode **_attribute) 1154 { 1155 // does this inode even have attributes? 1156 if (Attributes().IsZero()) 1157 return B_ENTRY_NOT_FOUND; 1158 1159 Vnode vnode(fVolume, Attributes()); 1160 Inode *attributes; 1161 if (vnode.Get(&attributes) < B_OK) { 1162 FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n", 1163 name)); 1164 return B_ERROR; 1165 } 1166 1167 BPlusTree *tree; 1168 status_t status = attributes->GetTree(&tree); 1169 if (status == B_OK) { 1170 ino_t id; 1171 status = tree->Find((uint8 *)name, (uint16)strlen(name), &id); 1172 if (status == B_OK) { 1173 Vnode vnode(fVolume, id); 1174 Inode *inode; 1175 // Check if the attribute is really an attribute 1176 if (vnode.Get(&inode) < B_OK || !inode->IsAttribute()) 1177 return B_ERROR; 1178 1179 *_attribute = inode; 1180 vnode.Keep(); 1181 return B_OK; 1182 } 1183 } 1184 return status; 1185 } 1186 1187 1188 void 1189 Inode::ReleaseAttribute(Inode *attribute) 1190 { 1191 if (attribute == NULL) 1192 return; 1193 1194 put_vnode(fVolume->FSVolume(), attribute->ID()); 1195 } 1196 1197 1198 status_t 1199 Inode::CreateAttribute(Transaction &transaction, const char *name, uint32 type, 1200 Inode **attribute) 1201 { 1202 // do we need to create the attribute directory first? 1203 if (Attributes().IsZero()) { 1204 status_t status = Inode::Create(transaction, this, NULL, 1205 S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL); 1206 if (status < B_OK) 1207 RETURN_ERROR(status); 1208 } 1209 Vnode vnode(fVolume, Attributes()); 1210 Inode *attributes; 1211 if (vnode.Get(&attributes) < B_OK) 1212 return B_ERROR; 1213 1214 // Inode::Create() locks the inode for us 1215 return Inode::Create(transaction, attributes, name, 1216 S_ATTR | S_FILE | 0666, 0, type, NULL, NULL, attribute); 1217 } 1218 1219 1220 // #pragma mark - directory tree 1221 1222 1223 /*! Gives the caller direct access to the b+tree for a given directory. 1224 The tree is no longer created on demand, but when the inode is first 1225 created. That will report any potential errors upfront, saves locking, 1226 and should work as good (though a bit slower). 1227 */ 1228 status_t 1229 Inode::GetTree(BPlusTree **tree) 1230 { 1231 if (fTree) { 1232 *tree = fTree; 1233 return B_OK; 1234 } 1235 1236 RETURN_ERROR(B_BAD_VALUE); 1237 } 1238 1239 1240 bool 1241 Inode::IsEmpty() 1242 { 1243 BPlusTree *tree; 1244 status_t status = GetTree(&tree); 1245 if (status < B_OK) 1246 return status; 1247 1248 TreeIterator iterator(tree); 1249 1250 // index and attribute directories are really empty when they are 1251 // empty - directories for standard files always contain ".", and 1252 // "..", so we need to ignore those two 1253 1254 uint32 count = 0; 1255 char name[BPLUSTREE_MAX_KEY_LENGTH]; 1256 uint16 length; 1257 ino_t id; 1258 while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH, 1259 &id) == B_OK) { 1260 if (Mode() & (S_ATTR_DIR | S_INDEX_DIR)) 1261 return false; 1262 1263 if (++count > 2 || (strcmp(".", name) && strcmp("..", name))) 1264 return false; 1265 } 1266 return true; 1267 } 1268 1269 1270 // #pragma mark - data stream 1271 1272 1273 /*! Finds the block_run where "pos" is located in the data_stream of 1274 the inode. 1275 If successful, "offset" will then be set to the file offset 1276 of the block_run returned; so "pos - offset" is for the block_run 1277 what "pos" is for the whole stream. 1278 The caller has to make sure that "pos" is inside the stream. 1279 */ 1280 status_t 1281 Inode::FindBlockRun(off_t pos, block_run &run, off_t &offset) 1282 { 1283 data_stream *data = &Node().data; 1284 1285 // find matching block run 1286 1287 if (data->MaxDirectRange() > 0 && pos >= data->MaxDirectRange()) { 1288 if (data->MaxDoubleIndirectRange() > 0 1289 && pos >= data->MaxIndirectRange()) { 1290 // access to double indirect blocks 1291 1292 CachedBlock cached(fVolume); 1293 1294 off_t start = pos - data->MaxIndirectRange(); 1295 int32 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT 1296 + cached.BlockShift())) 1297 * (fVolume->BlockSize() / sizeof(block_run)); 1298 int32 directSize = NUM_ARRAY_BLOCKS << cached.BlockShift(); 1299 int32 index = start / indirectSize; 1300 int32 runsPerBlock = cached.BlockSize() / sizeof(block_run); 1301 1302 block_run *indirect = (block_run *)cached.SetTo( 1303 fVolume->ToBlock(data->double_indirect) + index / runsPerBlock); 1304 if (indirect == NULL) 1305 RETURN_ERROR(B_ERROR); 1306 1307 int32 current = (start % indirectSize) / directSize; 1308 1309 indirect = (block_run *)cached.SetTo( 1310 fVolume->ToBlock(indirect[index % runsPerBlock]) 1311 + current / runsPerBlock); 1312 if (indirect == NULL) 1313 RETURN_ERROR(B_ERROR); 1314 1315 run = indirect[current % runsPerBlock]; 1316 offset = data->MaxIndirectRange() + (index * indirectSize) 1317 + (current * directSize); 1318 } else { 1319 // access to indirect blocks 1320 1321 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1322 off_t runBlockEnd = data->MaxDirectRange(); 1323 1324 CachedBlock cached(fVolume); 1325 off_t block = fVolume->ToBlock(data->indirect); 1326 1327 for (int32 i = 0; i < data->indirect.Length(); i++) { 1328 block_run *indirect = (block_run *)cached.SetTo(block + i); 1329 if (indirect == NULL) 1330 RETURN_ERROR(B_IO_ERROR); 1331 1332 int32 current = -1; 1333 while (++current < runsPerBlock) { 1334 if (indirect[current].IsZero()) 1335 break; 1336 1337 runBlockEnd += indirect[current].Length() 1338 << cached.BlockShift(); 1339 if (runBlockEnd > pos) { 1340 run = indirect[current]; 1341 offset = runBlockEnd - (run.Length() 1342 << cached.BlockShift()); 1343 return fVolume->ValidateBlockRun(run); 1344 } 1345 } 1346 } 1347 RETURN_ERROR(B_ERROR); 1348 } 1349 } else { 1350 // access from direct blocks 1351 1352 off_t runBlockEnd = 0LL; 1353 int32 current = -1; 1354 1355 while (++current < NUM_DIRECT_BLOCKS) { 1356 if (data->direct[current].IsZero()) 1357 break; 1358 1359 runBlockEnd += data->direct[current].Length() 1360 << fVolume->BlockShift(); 1361 if (runBlockEnd > pos) { 1362 run = data->direct[current]; 1363 offset = runBlockEnd - (run.Length() << fVolume->BlockShift()); 1364 return fVolume->ValidateBlockRun(run); 1365 } 1366 } 1367 1368 return B_ENTRY_NOT_FOUND; 1369 } 1370 return fVolume->ValidateBlockRun(run); 1371 } 1372 1373 1374 status_t 1375 Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length) 1376 { 1377 size_t length = *_length; 1378 1379 // set/check boundaries for pos/length 1380 if (pos < 0) 1381 return B_BAD_VALUE; 1382 1383 ReadLocked locker(Lock()); 1384 1385 if (pos >= Size() || length == 0) { 1386 *_length = 0; 1387 return B_NO_ERROR; 1388 } 1389 1390 locker.Unlock(); 1391 1392 return file_cache_read(FileCache(), NULL, pos, buffer, _length); 1393 } 1394 1395 1396 status_t 1397 Inode::WriteAt(Transaction &transaction, off_t pos, const uint8 *buffer, 1398 size_t *_length) 1399 { 1400 WriteLocked locker(Lock()); 1401 if (locker.IsLocked() < B_OK) 1402 RETURN_ERROR(B_ERROR); 1403 1404 // update the last modification time in memory, it will be written 1405 // back to the inode, and the index when the file is closed 1406 // ToDo: should update the internal last modified time only at this point! 1407 Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) 1408 << INODE_TIME_SHIFT); 1409 1410 // TODO: support INODE_LOGGED! 1411 1412 size_t length = *_length; 1413 bool changeSize = false; 1414 1415 // set/check boundaries for pos/length 1416 if (pos < 0) 1417 return B_BAD_VALUE; 1418 1419 if (pos + length > Size()) 1420 changeSize = true; 1421 1422 locker.Unlock(); 1423 1424 // the transaction doesn't have to be started already 1425 if (changeSize && !transaction.IsStarted()) 1426 transaction.Start(fVolume, BlockNumber()); 1427 1428 locker.Lock(); 1429 1430 if (pos + length > Size()) { 1431 // let's grow the data stream to the size needed 1432 status_t status = SetFileSize(transaction, pos + length); 1433 if (status < B_OK) { 1434 *_length = 0; 1435 RETURN_ERROR(status); 1436 } 1437 // TODO: In theory we would need to update the file size 1438 // index here as part of the current transaction - this might just 1439 // be a bit too expensive, but worth a try. 1440 1441 // we need to write back the inode here because it has to 1442 // go into this transaction (we cannot wait until the file 1443 // is closed) 1444 status = WriteBack(transaction); 1445 if (status < B_OK) 1446 return status; 1447 } 1448 1449 // If we don't want to write anything, we can now return (we may 1450 // just have changed the file size using the position parameter) 1451 if (length == 0) 1452 return B_OK; 1453 1454 locker.Unlock(); 1455 1456 return file_cache_write(FileCache(), NULL, pos, buffer, _length); 1457 } 1458 1459 1460 /*! Fills the gap between the old file size and the new file size 1461 with zeros. 1462 It's more or less a copy of Inode::WriteAt() but it can handle 1463 length differences of more than just 4 GB, and it never uses 1464 the log, even if the INODE_LOGGED flag is set. 1465 */ 1466 status_t 1467 Inode::FillGapWithZeros(off_t pos, off_t newSize) 1468 { 1469 while (pos < newSize) { 1470 size_t size; 1471 if (newSize > pos + 1024 * 1024 * 1024) 1472 size = 1024 * 1024 * 1024; 1473 else 1474 size = newSize - pos; 1475 1476 status_t status = file_cache_write(FileCache(), NULL, pos, NULL, &size); 1477 if (status < B_OK) 1478 return status; 1479 1480 pos += size; 1481 } 1482 1483 return B_OK; 1484 } 1485 1486 1487 /*! Allocates NUM_ARRAY_BLOCKS blocks, and clears their contents. Growing 1488 the indirect and double indirect range uses this method. 1489 The allocated block_run is saved in "run" 1490 */ 1491 status_t 1492 Inode::_AllocateBlockArray(Transaction &transaction, block_run &run) 1493 { 1494 if (!run.IsZero()) 1495 return B_BAD_VALUE; 1496 1497 status_t status = fVolume->Allocate(transaction, this, NUM_ARRAY_BLOCKS, 1498 run, NUM_ARRAY_BLOCKS); 1499 if (status < B_OK) 1500 return status; 1501 1502 // make sure those blocks are empty 1503 CachedBlock cached(fVolume); 1504 off_t block = fVolume->ToBlock(run); 1505 1506 for (int32 i = 0; i < run.Length(); i++) { 1507 block_run *runs = (block_run *)cached.SetToWritable(transaction, 1508 block + i, true); 1509 if (runs == NULL) 1510 return B_IO_ERROR; 1511 } 1512 return B_OK; 1513 } 1514 1515 1516 status_t 1517 Inode::_GrowStream(Transaction &transaction, off_t size) 1518 { 1519 data_stream *data = &Node().data; 1520 1521 // is the data stream already large enough to hold the new size? 1522 // (can be the case with preallocated blocks) 1523 if (size < data->MaxDirectRange() 1524 || size < data->MaxIndirectRange() 1525 || size < data->MaxDoubleIndirectRange()) { 1526 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1527 return B_OK; 1528 } 1529 1530 // how many bytes are still needed? (unused ranges are always zero) 1531 uint16 minimum = 1; 1532 off_t bytes; 1533 if (data->Size() < data->MaxDoubleIndirectRange()) { 1534 bytes = size - data->MaxDoubleIndirectRange(); 1535 // The double indirect range can only handle multiple of 1536 // NUM_ARRAY_BLOCKS 1537 minimum = NUM_ARRAY_BLOCKS; 1538 } else if (data->Size() < data->MaxIndirectRange()) 1539 bytes = size - data->MaxIndirectRange(); 1540 else if (data->Size() < data->MaxDirectRange()) 1541 bytes = size - data->MaxDirectRange(); 1542 else 1543 bytes = size - data->Size(); 1544 1545 // do we have enough free blocks on the disk? 1546 off_t blocksRequested = (bytes + fVolume->BlockSize() - 1) 1547 >> fVolume->BlockShift(); 1548 if (blocksRequested > fVolume->FreeBlocks()) 1549 return B_DEVICE_FULL; 1550 1551 off_t blocksNeeded = blocksRequested; 1552 // because of preallocations and partial allocations, the number of 1553 // blocks we need to allocate may be different from the one we request 1554 // from the block allocator 1555 1556 // Should we preallocate some blocks (currently, always 64k)? 1557 // Attributes, attribute directories, and long symlinks usually won't get 1558 // that big, and should stay close to the inode - preallocating could be 1559 // counterproductive. 1560 // Also, if free disk space is tight, we probably don't want to do this as 1561 // well. 1562 if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink() 1563 && blocksRequested < (65536 >> fVolume->BlockShift()) 1564 && fVolume->FreeBlocks() > 128) 1565 blocksRequested = 65536 >> fVolume->BlockShift(); 1566 1567 while (blocksNeeded > 0) { 1568 // the requested blocks do not need to be returned with a 1569 // single allocation, so we need to iterate until we have 1570 // enough blocks allocated 1571 block_run run; 1572 status_t status = fVolume->Allocate(transaction, this, blocksRequested, 1573 run, minimum); 1574 if (status < B_OK) 1575 return status; 1576 1577 // okay, we have the needed blocks, so just distribute them to the 1578 // different ranges of the stream (direct, indirect & double indirect) 1579 1580 // TODO: if anything goes wrong here, we probably want to free the 1581 // blocks that couldn't be distributed into the stream! 1582 1583 blocksNeeded -= run.Length(); 1584 // don't preallocate if the first allocation was already too small 1585 blocksRequested = blocksNeeded; 1586 if (minimum > 1) { 1587 // make sure that "blocks" is a multiple of minimum 1588 blocksRequested = (blocksRequested + minimum - 1) & ~(minimum - 1); 1589 } 1590 1591 // Direct block range 1592 1593 if (data->Size() <= data->MaxDirectRange()) { 1594 // let's try to put them into the direct block range 1595 int32 free = 0; 1596 for (; free < NUM_DIRECT_BLOCKS; free++) 1597 if (data->direct[free].IsZero()) 1598 break; 1599 1600 if (free < NUM_DIRECT_BLOCKS) { 1601 // can we merge the last allocated run with the new one? 1602 int32 last = free - 1; 1603 if (free > 0 && data->direct[last].MergeableWith(run)) { 1604 data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16( 1605 data->direct[last].Length() + run.Length()); 1606 } else 1607 data->direct[free] = run; 1608 1609 data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64( 1610 data->MaxDirectRange() 1611 + run.Length() * fVolume->BlockSize()); 1612 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 1613 ? data->max_direct_range : size); 1614 continue; 1615 } 1616 } 1617 1618 // Indirect block range 1619 1620 if (data->Size() <= data->MaxIndirectRange() 1621 || !data->MaxIndirectRange()) { 1622 CachedBlock cached(fVolume); 1623 block_run *runs = NULL; 1624 uint32 free = 0; 1625 off_t block; 1626 1627 // if there is no indirect block yet, create one 1628 if (data->indirect.IsZero()) { 1629 status = _AllocateBlockArray(transaction, data->indirect); 1630 if (status < B_OK) 1631 return status; 1632 1633 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1634 data->MaxDirectRange()); 1635 // insert the block_run in the first block 1636 runs = (block_run *)cached.SetTo(data->indirect); 1637 } else { 1638 uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run); 1639 block = fVolume->ToBlock(data->indirect); 1640 1641 // search first empty entry 1642 int32 i = 0; 1643 for (; i < data->indirect.Length(); i++) { 1644 if ((runs = (block_run *)cached.SetTo(block + i)) == NULL) 1645 return B_IO_ERROR; 1646 1647 for (free = 0; free < numberOfRuns; free++) 1648 if (runs[free].IsZero()) 1649 break; 1650 1651 if (free < numberOfRuns) 1652 break; 1653 } 1654 if (i == data->indirect.Length()) 1655 runs = NULL; 1656 } 1657 1658 if (runs != NULL) { 1659 // try to insert the run to the last one - note that this 1660 // doesn't take block borders into account, so it could be 1661 // further optimized 1662 cached.MakeWritable(transaction); 1663 1664 int32 last = free - 1; 1665 if (free > 0 && runs[last].MergeableWith(run)) { 1666 runs[last].length = HOST_ENDIAN_TO_BFS_INT16( 1667 runs[last].Length() + run.Length()); 1668 } else 1669 runs[free] = run; 1670 1671 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1672 data->MaxIndirectRange() 1673 + (run.Length() << fVolume->BlockShift())); 1674 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 1675 ? data->MaxIndirectRange() : size); 1676 continue; 1677 } 1678 } 1679 1680 // Double indirect block range 1681 1682 if (data->Size() <= data->MaxDoubleIndirectRange() 1683 || !data->max_double_indirect_range) { 1684 while ((run.Length() % NUM_ARRAY_BLOCKS) != 0) { 1685 // The number of allocated blocks isn't a multiple of 1686 // NUM_ARRAY_BLOCKS, so we have to change this. This can happen 1687 // the first time the stream grows into the double 1688 // indirect range. 1689 // First, free the remaining blocks that don't fit into a 1690 // multiple of NUM_ARRAY_BLOCKS 1691 int32 rest = run.Length() % NUM_ARRAY_BLOCKS; 1692 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest); 1693 1694 status = fVolume->Free(transaction, 1695 block_run::Run(run.AllocationGroup(), 1696 run.Start() + run.Length(), rest)); 1697 if (status < B_OK) 1698 return status; 1699 1700 blocksNeeded += rest; 1701 blocksRequested = (blocksNeeded + NUM_ARRAY_BLOCKS - 1) 1702 & ~(NUM_ARRAY_BLOCKS - 1); 1703 minimum = NUM_ARRAY_BLOCKS; 1704 // we make sure here that we have at minimum 1705 // NUM_ARRAY_BLOCKS allocated, so if the allocation 1706 // succeeds, we don't run into an endless loop 1707 1708 // Are there any blocks left in the run? If not, allocate 1709 // a new one 1710 if (run.length == 0) 1711 continue; 1712 } 1713 1714 // if there is no double indirect block yet, create one 1715 if (data->double_indirect.IsZero()) { 1716 status = _AllocateBlockArray(transaction, 1717 data->double_indirect); 1718 if (status < B_OK) 1719 return status; 1720 1721 data->max_double_indirect_range = data->max_indirect_range; 1722 } 1723 1724 // calculate the index where to insert the new blocks 1725 1726 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1727 int32 indirectSize = ((1L << INDIRECT_BLOCKS_SHIFT) 1728 << fVolume->BlockShift()) * runsPerBlock; 1729 int32 directSize = NUM_ARRAY_BLOCKS << fVolume->BlockShift(); 1730 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1731 1732 off_t start = data->MaxDoubleIndirectRange() 1733 - data->MaxIndirectRange(); 1734 int32 indirectIndex = start / indirectSize; 1735 int32 index = start / directSize; 1736 1737 // distribute the blocks to the array and allocate 1738 // new array blocks when needed 1739 1740 CachedBlock cached(fVolume); 1741 CachedBlock cachedDirect(fVolume); 1742 block_run *array = NULL; 1743 uint32 runLength = run.Length(); 1744 1745 while (run.length != 0) { 1746 // get the indirect array block 1747 if (array == NULL) { 1748 array = (block_run *)cached.SetTo(fVolume->ToBlock( 1749 data->double_indirect) + indirectIndex / runsPerBlock); 1750 if (array == NULL) 1751 return B_IO_ERROR; 1752 } 1753 1754 do { 1755 // do we need a new array block? 1756 if (array[indirectIndex % runsPerBlock].IsZero()) { 1757 cached.MakeWritable(transaction); 1758 1759 status = _AllocateBlockArray(transaction, 1760 array[indirectIndex % runsPerBlock]); 1761 if (status < B_OK) 1762 return status; 1763 } 1764 1765 block_run *runs = (block_run *)cachedDirect.SetToWritable( 1766 transaction, fVolume->ToBlock(array[indirectIndex 1767 % runsPerBlock]) + index / runsPerBlock); 1768 if (runs == NULL) 1769 return B_IO_ERROR; 1770 1771 do { 1772 // insert the block_run into the array 1773 runs[index % runsPerBlock] = run; 1774 runs[index % runsPerBlock].length 1775 = HOST_ENDIAN_TO_BFS_INT16(NUM_ARRAY_BLOCKS); 1776 1777 // alter the remaining block_run 1778 run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() 1779 + NUM_ARRAY_BLOCKS); 1780 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() 1781 - NUM_ARRAY_BLOCKS); 1782 } while ((++index % runsPerBlock) != 0 && run.length); 1783 } while ((index % runsPerArray) != 0 && run.length); 1784 1785 if (++indirectIndex % runsPerBlock == 0) { 1786 array = NULL; 1787 index = 0; 1788 } 1789 } 1790 1791 data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1792 data->MaxDoubleIndirectRange() 1793 + (runLength << fVolume->BlockShift())); 1794 data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64( 1795 data->max_double_indirect_range) : size; 1796 1797 continue; 1798 } 1799 1800 RETURN_ERROR(EFBIG); 1801 } 1802 // update the size of the data stream 1803 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1804 1805 return B_OK; 1806 } 1807 1808 1809 status_t 1810 Inode::_FreeStaticStreamArray(Transaction &transaction, int32 level, 1811 block_run run, off_t size, off_t offset, off_t &max) 1812 { 1813 int32 indirectSize = 0; 1814 if (level == 0) 1815 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + fVolume->BlockShift())) 1816 * (fVolume->BlockSize() / sizeof(block_run)); 1817 else if (level == 1) 1818 indirectSize = 4 << fVolume->BlockShift(); 1819 1820 off_t start; 1821 if (size > offset) 1822 start = size - offset; 1823 else 1824 start = 0; 1825 1826 int32 index = start / indirectSize; 1827 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1828 1829 CachedBlock cached(fVolume); 1830 off_t blockNumber = fVolume->ToBlock(run); 1831 1832 // set the file offset to the current block run 1833 offset += (off_t)index * indirectSize; 1834 1835 for (int32 i = index / runsPerBlock; i < run.Length(); i++) { 1836 block_run *array = (block_run *)cached.SetToWritable(transaction, 1837 blockNumber + i); 1838 if (array == NULL) 1839 RETURN_ERROR(B_ERROR); 1840 1841 for (index = index % runsPerBlock; index < runsPerBlock; index++) { 1842 if (array[index].IsZero()) { 1843 // we also want to break out of the outer loop 1844 i = run.Length(); 1845 break; 1846 } 1847 1848 status_t status = B_OK; 1849 if (level == 0) { 1850 status = _FreeStaticStreamArray(transaction, 1, array[index], 1851 size, offset, max); 1852 } else if (offset >= size) 1853 status = fVolume->Free(transaction, array[index]); 1854 else 1855 max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize); 1856 1857 if (status < B_OK) 1858 RETURN_ERROR(status); 1859 1860 if (offset >= size) 1861 array[index].SetTo(0, 0, 0); 1862 1863 offset += indirectSize; 1864 } 1865 index = 0; 1866 } 1867 return B_OK; 1868 } 1869 1870 1871 /*! Frees all block_runs in the array which come after the specified size. 1872 It also trims the last block_run that contain the size. 1873 "offset" and "max" are maintained until the last block_run that doesn't 1874 have to be freed - after this, the values won't be correct anymore, but 1875 will still assure correct function for all subsequent calls. 1876 "max" is considered to be in file system byte order. 1877 */ 1878 status_t 1879 Inode::_FreeStreamArray(Transaction &transaction, block_run *array, 1880 uint32 arrayLength, off_t size, off_t &offset, off_t &max) 1881 { 1882 PRINT(("FreeStreamArray: arrayLength %lu, size %Ld, offset %Ld, max %Ld\n", 1883 arrayLength, size, offset, max)); 1884 1885 off_t newOffset = offset; 1886 uint32 i = 0; 1887 for (; i < arrayLength; i++, offset = newOffset) { 1888 if (array[i].IsZero()) 1889 break; 1890 1891 newOffset += (off_t)array[i].Length() << fVolume->BlockShift(); 1892 if (newOffset <= size) 1893 continue; 1894 1895 block_run run = array[i]; 1896 1897 // determine the block_run to be freed 1898 if (newOffset > size && offset < size) { 1899 // free partial block_run (and update the original block_run) 1900 run.start = HOST_ENDIAN_TO_BFS_INT16(array[i].Start() 1901 + ((size + fVolume->BlockSize() - 1 - offset) 1902 >> fVolume->BlockShift())); 1903 // round to next block 1904 array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start() 1905 - array[i].Start()); 1906 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() 1907 - array[i].Length()); 1908 1909 if (run.length == 0) 1910 continue; 1911 1912 // update maximum range 1913 max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length() 1914 << fVolume->BlockShift())); 1915 } else { 1916 // free the whole block_run 1917 array[i].SetTo(0, 0, 0); 1918 1919 if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset) 1920 max = HOST_ENDIAN_TO_BFS_INT64(offset); 1921 } 1922 1923 if (fVolume->Free(transaction, run) < B_OK) 1924 return B_IO_ERROR; 1925 } 1926 return B_OK; 1927 } 1928 1929 1930 status_t 1931 Inode::_ShrinkStream(Transaction &transaction, off_t size) 1932 { 1933 data_stream *data = &Node().data; 1934 status_t status; 1935 1936 if (data->MaxDoubleIndirectRange() > size) { 1937 off_t *maxDoubleIndirect = &data->max_double_indirect_range; 1938 // gcc 4 work-around: "error: cannot bind packed field 1939 // 'data->data_stream::max_double_indirect_range' to 'off_t&'" 1940 status = _FreeStaticStreamArray(transaction, 0, data->double_indirect, 1941 size, data->MaxIndirectRange(), *maxDoubleIndirect); 1942 if (status < B_OK) 1943 return status; 1944 1945 if (size <= data->MaxIndirectRange()) { 1946 fVolume->Free(transaction, data->double_indirect); 1947 data->double_indirect.SetTo(0, 0, 0); 1948 data->max_double_indirect_range = 0; 1949 } 1950 } 1951 1952 if (data->MaxIndirectRange() > size) { 1953 CachedBlock cached(fVolume); 1954 off_t block = fVolume->ToBlock(data->indirect); 1955 off_t offset = data->MaxDirectRange(); 1956 1957 for (int32 i = 0; i < data->indirect.Length(); i++) { 1958 block_run *array = (block_run *)cached.SetToWritable(transaction, 1959 block + i); 1960 if (array == NULL) 1961 break; 1962 1963 off_t *maxIndirect = &data->max_indirect_range; 1964 // gcc 4 work-around: "error: cannot bind packed field 1965 // 'data->data_stream::max_indirect_range' to 'off_t&'" 1966 if (_FreeStreamArray(transaction, array, fVolume->BlockSize() 1967 / sizeof(block_run), size, offset, *maxIndirect) != B_OK) 1968 return B_IO_ERROR; 1969 } 1970 if (data->max_direct_range == data->max_indirect_range) { 1971 fVolume->Free(transaction, data->indirect); 1972 data->indirect.SetTo(0, 0, 0); 1973 data->max_indirect_range = 0; 1974 } 1975 } 1976 1977 if (data->MaxDirectRange() > size) { 1978 off_t offset = 0; 1979 off_t *maxDirect = &data->max_direct_range; 1980 // gcc 4 work-around: "error: cannot bind packed field 1981 // 'data->data_stream::max_direct_range' to 'off_t&'" 1982 status = _FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS, 1983 size, offset, *maxDirect); 1984 if (status < B_OK) 1985 return status; 1986 } 1987 1988 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1989 return B_OK; 1990 } 1991 1992 1993 status_t 1994 Inode::SetFileSize(Transaction &transaction, off_t size) 1995 { 1996 if (size < 0) 1997 return B_BAD_VALUE; 1998 1999 off_t oldSize = Size(); 2000 2001 if (size == oldSize) 2002 return B_OK; 2003 2004 T(Resize(this, oldSize, size, false)); 2005 2006 // should the data stream grow or shrink? 2007 status_t status; 2008 if (size > oldSize) { 2009 status = _GrowStream(transaction, size); 2010 if (status < B_OK) { 2011 // if the growing of the stream fails, the whole operation 2012 // fails, so we should shrink the stream to its former size 2013 _ShrinkStream(transaction, oldSize); 2014 } 2015 } else 2016 status = _ShrinkStream(transaction, size); 2017 2018 if (status < B_OK) 2019 return status; 2020 2021 file_cache_set_size(FileCache(), size); 2022 file_map_set_size(Map(), size); 2023 2024 return WriteBack(transaction); 2025 } 2026 2027 2028 status_t 2029 Inode::Append(Transaction &transaction, off_t bytes) 2030 { 2031 return SetFileSize(transaction, Size() + bytes); 2032 } 2033 2034 2035 /*! Checks wether or not this inode's data stream needs to be trimmed 2036 because of an earlier preallocation. 2037 Returns true if there are any blocks to be trimmed. 2038 */ 2039 bool 2040 Inode::NeedsTrimming() 2041 { 2042 // We never trim preallocated index blocks to make them grow as smooth as 2043 // possible. There are only few indices anyway, so this doesn't hurt. 2044 // Also, if an inode is already in deleted state, we don't bother trimming 2045 // it. 2046 if (IsIndex() || IsDeleted()) 2047 return false; 2048 2049 off_t roundedSize = (Size() + fVolume->BlockSize() - 1) 2050 & ~(fVolume->BlockSize() - 1); 2051 2052 return Node().data.MaxDirectRange() > roundedSize 2053 || Node().data.MaxIndirectRange() > roundedSize 2054 || Node().data.MaxDoubleIndirectRange() > roundedSize; 2055 } 2056 2057 2058 status_t 2059 Inode::TrimPreallocation(Transaction &transaction) 2060 { 2061 T(Resize(this, max_c(Node().data.MaxDirectRange(), 2062 Node().data.MaxIndirectRange()), Size(), true)); 2063 2064 status_t status = _ShrinkStream(transaction, Size()); 2065 if (status < B_OK) 2066 return status; 2067 2068 return WriteBack(transaction); 2069 } 2070 2071 2072 //! Frees the file's data stream and removes all attributes 2073 status_t 2074 Inode::Free(Transaction &transaction) 2075 { 2076 FUNCTION(); 2077 2078 // Perhaps there should be an implementation of Inode::ShrinkStream() that 2079 // just frees the data_stream, but doesn't change the inode (since it is 2080 // freed anyway) - that would make an undelete command possible 2081 status_t status = SetFileSize(transaction, 0); 2082 if (status < B_OK) 2083 return status; 2084 2085 // Free all attributes, and remove their indices 2086 { 2087 // We have to limit the scope of AttributeIterator, so that its 2088 // destructor is not called after the inode is deleted 2089 AttributeIterator iterator(this); 2090 2091 char name[B_FILE_NAME_LENGTH]; 2092 uint32 type; 2093 size_t length; 2094 ino_t id; 2095 while ((status = iterator.GetNext(name, &length, &type, &id)) == B_OK) { 2096 RemoveAttribute(transaction, name); 2097 } 2098 } 2099 2100 if (WriteBack(transaction) < B_OK) 2101 return B_IO_ERROR; 2102 2103 return fVolume->Free(transaction, BlockRun()); 2104 } 2105 2106 2107 //! Synchronizes (writes back to disk) the file stream of the inode. 2108 status_t 2109 Inode::Sync() 2110 { 2111 if (FileCache()) 2112 return file_cache_sync(FileCache()); 2113 2114 // We may also want to flush the attribute's data stream to 2115 // disk here... (do we?) 2116 2117 if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0) 2118 return B_OK; 2119 2120 data_stream *data = &Node().data; 2121 status_t status = B_OK; 2122 2123 // flush direct range 2124 2125 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 2126 if (data->direct[i].IsZero()) 2127 return B_OK; 2128 2129 status = block_cache_sync_etc(fVolume->BlockCache(), 2130 fVolume->ToBlock(data->direct[i]), data->direct[i].Length()); 2131 if (status != B_OK) 2132 return status; 2133 } 2134 2135 // flush indirect range 2136 2137 if (data->max_indirect_range == 0) 2138 return B_OK; 2139 2140 CachedBlock cached(fVolume); 2141 off_t block = fVolume->ToBlock(data->indirect); 2142 int32 count = fVolume->BlockSize() / sizeof(block_run); 2143 2144 for (int32 j = 0; j < data->indirect.Length(); j++) { 2145 block_run *runs = (block_run *)cached.SetTo(block + j); 2146 if (runs == NULL) 2147 break; 2148 2149 for (int32 i = 0; i < count; i++) { 2150 if (runs[i].IsZero()) 2151 return B_OK; 2152 2153 status = block_cache_sync_etc(fVolume->BlockCache(), 2154 fVolume->ToBlock(runs[i]), runs[i].Length()); 2155 if (status != B_OK) 2156 return status; 2157 } 2158 } 2159 2160 // flush double indirect range 2161 2162 if (data->max_double_indirect_range == 0) 2163 return B_OK; 2164 2165 off_t indirectBlock = fVolume->ToBlock(data->double_indirect); 2166 2167 for (int32 l = 0; l < data->double_indirect.Length(); l++) { 2168 block_run *indirectRuns = (block_run *)cached.SetTo(indirectBlock + l); 2169 if (indirectRuns == NULL) 2170 return B_FILE_ERROR; 2171 2172 CachedBlock directCached(fVolume); 2173 2174 for (int32 k = 0; k < count; k++) { 2175 if (indirectRuns[k].IsZero()) 2176 return B_OK; 2177 2178 block = fVolume->ToBlock(indirectRuns[k]); 2179 for (int32 j = 0; j < indirectRuns[k].Length(); j++) { 2180 block_run *runs = (block_run *)directCached.SetTo(block + j); 2181 if (runs == NULL) 2182 return B_FILE_ERROR; 2183 2184 for (int32 i = 0; i < count; i++) { 2185 if (runs[i].IsZero()) 2186 return B_OK; 2187 2188 // TODO: combine single block_runs to bigger ones when 2189 // they are adjacent 2190 status = block_cache_sync_etc(fVolume->BlockCache(), 2191 fVolume->ToBlock(runs[i]), runs[i].Length()); 2192 if (status != B_OK) 2193 return status; 2194 } 2195 } 2196 } 2197 } 2198 return B_OK; 2199 } 2200 2201 2202 // #pragma mark - creation/deletion 2203 2204 2205 status_t 2206 Inode::Remove(Transaction &transaction, const char *name, ino_t *_id, 2207 bool isDirectory) 2208 { 2209 BPlusTree *tree; 2210 if (GetTree(&tree) != B_OK) 2211 RETURN_ERROR(B_BAD_VALUE); 2212 2213 RecursiveLocker locker(fVolume->Lock()); 2214 2215 // does the file even exist? 2216 off_t id; 2217 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) < B_OK) 2218 return B_ENTRY_NOT_FOUND; 2219 2220 if (_id) 2221 *_id = id; 2222 2223 Vnode vnode(fVolume, id); 2224 Inode *inode; 2225 status_t status = vnode.Get(&inode); 2226 if (status < B_OK) { 2227 REPORT_ERROR(status); 2228 return B_ENTRY_NOT_FOUND; 2229 } 2230 2231 T(Remove(inode, name)); 2232 2233 // Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR 2234 // bit is set for indices in BFS, not for attribute directories) - but you 2235 // should really be able to do whatever you want with your indices 2236 // without having to remove all files first :) 2237 if (!inode->IsIndex()) { 2238 // if it's not of the correct type, don't delete it! 2239 if (inode->IsContainer() != isDirectory) 2240 return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY; 2241 2242 // only delete empty directories 2243 if (isDirectory && !inode->IsEmpty()) 2244 return B_DIRECTORY_NOT_EMPTY; 2245 } 2246 2247 // remove_vnode() allows the inode to be accessed until the last put_vnode() 2248 status = remove_vnode(fVolume->FSVolume(), id); 2249 if (status != B_OK) 2250 return status; 2251 2252 if (tree->Remove(transaction, name, id) < B_OK) { 2253 unremove_vnode(fVolume->FSVolume(), id); 2254 RETURN_ERROR(B_ERROR); 2255 } 2256 2257 #ifdef DEBUG 2258 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) == B_OK) { 2259 DIE(("deleted entry still there")); 2260 } 2261 #endif 2262 2263 // update the inode, so that no one will ever doubt it's deleted :-) 2264 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); 2265 inode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 2266 2267 // In balance to the Inode::Create() method, the main indices 2268 // are updated here (name, size, & last_modified) 2269 2270 Index index(fVolume); 2271 if (inode->IsRegularNode()) { 2272 index.RemoveName(transaction, name, inode); 2273 // If removing from the index fails, it is not regarded as a 2274 // fatal error and will not be reported back! 2275 // Deleted inodes won't be visible in queries anyway. 2276 } 2277 2278 if (inode->IsFile() || inode->IsSymLink()) { 2279 if (inode->IsFile()) 2280 index.RemoveSize(transaction, inode); 2281 index.RemoveLastModified(transaction, inode); 2282 } 2283 2284 return inode->WriteBack(transaction); 2285 } 2286 2287 2288 /*! Creates the inode with the specified parent directory, and automatically 2289 adds the created inode to that parent directory. If an attribute directory 2290 is created, it will also automatically be added to the parent inode as 2291 such. However, the indices root node, and the regular root node won't be 2292 added to the super block. 2293 It will also create the initial B+tree for the inode if it's a directory 2294 of any kind. 2295 If the "_id" or "_inode" variable is given and non-NULL to store the 2296 inode's ID, the inode stays locked - you have to call put_vnode() if you 2297 don't use it anymore. 2298 If the node already exists, this method will fail if O_EXCL is set, or it's 2299 a directory or a symlink. Otherwise, it will just be returned. If O_TRUNC 2300 has been specified, the file will also be truncated. 2301 */ 2302 status_t 2303 Inode::Create(Transaction &transaction, Inode *parent, const char *name, 2304 int32 mode, int openMode, uint32 type, bool *_created, ino_t *_id, 2305 Inode **_inode) 2306 { 2307 FUNCTION_START(("name = %s, mode = %ld\n", name, mode)); 2308 2309 block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0); 2310 Volume *volume = transaction.GetVolume(); 2311 BPlusTree *tree = NULL; 2312 2313 if (parent && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) { 2314 // check if the file already exists in the directory 2315 if (parent->GetTree(&tree) != B_OK) 2316 RETURN_ERROR(B_BAD_VALUE); 2317 } 2318 2319 WriteLocked locker(parent != NULL ? &parent->Lock() : NULL); 2320 // the parent directory is locked during the whole inode creation 2321 2322 if (tree != NULL) { 2323 // Does the file already exist? 2324 off_t offset; 2325 if (tree->Find((uint8 *)name, (uint16)strlen(name), &offset) == B_OK) { 2326 // Return if the file should be a directory/link or opened in 2327 // exclusive mode 2328 if (S_ISDIR(mode) || S_ISLNK(mode) || openMode & O_EXCL) 2329 return B_FILE_EXISTS; 2330 2331 Vnode vnode(volume, offset); 2332 Inode *inode; 2333 status_t status = vnode.Get(&inode); 2334 if (status < B_OK) { 2335 REPORT_ERROR(status); 2336 return B_ENTRY_NOT_FOUND; 2337 } 2338 2339 // if it's a directory, bail out! 2340 if (inode->IsDirectory()) 2341 return B_IS_A_DIRECTORY; 2342 2343 // we want to open the file, so we should have the rights to do so 2344 if (inode->CheckPermissions(openModeToAccess(openMode)) != B_OK) 2345 return B_NOT_ALLOWED; 2346 2347 if (openMode & O_TRUNC) { 2348 // we need write access in order to truncate the file 2349 status = inode->CheckPermissions(W_OK); 2350 if (status != B_OK) 2351 return status; 2352 2353 // truncate the existing file 2354 WriteLocked locked(inode->Lock()); 2355 2356 status_t status = inode->SetFileSize(transaction, 0); 2357 if (status >= B_OK) 2358 status = inode->WriteBack(transaction); 2359 2360 if (status < B_OK) 2361 return status; 2362 } 2363 2364 if (_created) 2365 *_created = false; 2366 if (_id) 2367 *_id = inode->ID(); 2368 if (_inode) 2369 *_inode = inode; 2370 2371 // Only keep the vnode in memory if the _id or _inode pointer is 2372 // provided 2373 if (_id != NULL || _inode != NULL) 2374 vnode.Keep(); 2375 2376 return B_OK; 2377 } 2378 } else if (parent && (mode & S_ATTR_DIR) == 0) 2379 return B_BAD_VALUE; 2380 2381 status_t status; 2382 2383 // do we have the power to create new files at all? 2384 if (parent != NULL && (status = parent->CheckPermissions(W_OK)) != B_OK) 2385 RETURN_ERROR(status); 2386 2387 // allocate space for the new inode 2388 InodeAllocator allocator(transaction); 2389 block_run run; 2390 Inode *inode; 2391 status = allocator.New(&parentRun, mode, run, &inode); 2392 if (status < B_OK) 2393 return status; 2394 2395 T(Create(inode, parent, name, mode, openMode, type)); 2396 2397 // Initialize the parts of the bfs_inode structure that 2398 // InodeAllocator::New() hasn't touched yet 2399 2400 bfs_inode *node = &inode->Node(); 2401 2402 if (parent == NULL) { 2403 // we set the parent to itself in this case 2404 // (only happens for the root and indices node) 2405 node->parent = run; 2406 } else 2407 node->parent = parentRun; 2408 2409 node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid()); 2410 node->gid = HOST_ENDIAN_TO_BFS_INT32(parent 2411 ? parent->Node().GroupID() : getegid()); 2412 // the group ID is inherited from the parent, if available 2413 2414 node->type = HOST_ENDIAN_TO_BFS_INT32(type); 2415 2416 inode->WriteBack(transaction); 2417 // make sure the initialized node is available to others 2418 2419 // only add the name to regular files, directories, or symlinks 2420 // don't add it to attributes, or indices 2421 if (tree && inode->IsRegularNode() 2422 && inode->SetName(transaction, name) < B_OK) 2423 return B_ERROR; 2424 2425 // Initialize b+tree if it's a directory (and add "." & ".." if it's 2426 // a standard directory for files - not for attributes or indices) 2427 if (inode->IsContainer()) { 2428 status = allocator.CreateTree(); 2429 if (status < B_OK) 2430 return status; 2431 } 2432 2433 // Add a link to the inode from the parent, depending on its type 2434 // (the vnode is not published yet, so it is safe to make the inode 2435 // accessable to the file system here) 2436 if (tree) { 2437 status = tree->Insert(transaction, name, inode->ID()); 2438 } else if (parent && (mode & S_ATTR_DIR) != 0) { 2439 parent->Attributes() = run; 2440 status = parent->WriteBack(transaction); 2441 } 2442 2443 // Note, we only care if the inode could be made accessable for the 2444 // two cases above; the root node or the indices root node must 2445 // handle this case on their own (or other cases where "parent" is 2446 // NULL) 2447 if (status < B_OK) 2448 RETURN_ERROR(status); 2449 2450 // Update the main indices (name, size & last_modified) 2451 // (live queries might want to access us after this) 2452 2453 Index index(volume); 2454 if (inode->IsRegularNode() && name != NULL) { 2455 // the name index only contains regular files 2456 // (but not the root node where name == NULL) 2457 status = index.InsertName(transaction, name, inode); 2458 if (status < B_OK && status != B_BAD_INDEX) { 2459 // We have to remove the node from the parent at this point, 2460 // because the InodeAllocator destructor can't handle this 2461 // case (and if it fails, we can't do anything about it...) 2462 if (tree) 2463 tree->Remove(transaction, name, inode->ID()); 2464 else if (parent != NULL && (mode & S_ATTR_DIR) != 0) 2465 parent->Node().attributes.SetTo(0, 0, 0); 2466 2467 RETURN_ERROR(status); 2468 } 2469 } 2470 2471 inode->UpdateOldLastModified(); 2472 2473 // The "size" & "last_modified" indices don't contain directories 2474 if (inode->IsFile() || inode->IsSymLink()) { 2475 // if adding to these indices fails, the inode creation will not be 2476 // harmed; they are considered less important than the "name" index 2477 if (inode->IsFile()) 2478 index.InsertSize(transaction, inode); 2479 index.InsertLastModified(transaction, inode); 2480 } 2481 2482 // Everything worked well until this point, we have a fully 2483 // initialized inode, and we want to keep it 2484 allocator.Keep(); 2485 2486 if (inode->IsFile() || inode->IsAttribute()) { 2487 inode->SetFileCache(file_cache_create(volume->ID(), inode->ID(), 2488 inode->Size())); 2489 inode->SetMap(file_map_create(volume->ID(), inode->ID(), 2490 inode->Size())); 2491 } 2492 2493 if (_created) 2494 *_created = true; 2495 if (_id != NULL) 2496 *_id = inode->ID(); 2497 if (_inode != NULL) 2498 *_inode = inode; 2499 2500 // if either _id or _inode is passed, we will keep the inode locked 2501 if (_id == NULL && _inode == NULL) 2502 put_vnode(volume->FSVolume(), inode->ID()); 2503 2504 return B_OK; 2505 } 2506 2507 2508 // #pragma mark - AttributeIterator 2509 2510 2511 AttributeIterator::AttributeIterator(Inode *inode) 2512 : 2513 fCurrentSmallData(0), 2514 fInode(inode), 2515 fAttributes(NULL), 2516 fIterator(NULL), 2517 fBuffer(NULL) 2518 { 2519 inode->_AddIterator(this); 2520 } 2521 2522 2523 AttributeIterator::~AttributeIterator() 2524 { 2525 if (fAttributes) 2526 put_vnode(fAttributes->GetVolume()->FSVolume(), fAttributes->ID()); 2527 2528 delete fIterator; 2529 fInode->_RemoveIterator(this); 2530 } 2531 2532 2533 status_t 2534 AttributeIterator::Rewind() 2535 { 2536 fCurrentSmallData = 0; 2537 2538 if (fIterator != NULL) 2539 fIterator->Rewind(); 2540 2541 return B_OK; 2542 } 2543 2544 2545 status_t 2546 AttributeIterator::GetNext(char *name, size_t *_length, uint32 *_type, 2547 ino_t *_id) 2548 { 2549 // read attributes out of the small data section 2550 2551 if (fCurrentSmallData >= 0) { 2552 NodeGetter nodeGetter(fInode->GetVolume(), fInode); 2553 const bfs_inode *node = nodeGetter.Node(); 2554 const small_data *item = ((bfs_inode *)node)->SmallDataStart(); 2555 2556 fInode->SmallDataLock().Lock(); 2557 2558 int32 i = 0; 2559 for (;;item = item->Next()) { 2560 if (item->IsLast(node)) 2561 break; 2562 2563 if (item->NameSize() == FILE_NAME_NAME_LENGTH 2564 && *item->Name() == FILE_NAME_NAME) 2565 continue; 2566 2567 if (i++ == fCurrentSmallData) 2568 break; 2569 } 2570 2571 if (!item->IsLast(node)) { 2572 strncpy(name, item->Name(), B_FILE_NAME_LENGTH); 2573 *_type = item->Type(); 2574 *_length = item->NameSize(); 2575 *_id = (ino_t)fCurrentSmallData; 2576 2577 fCurrentSmallData = i; 2578 } 2579 else { 2580 // stop traversing the small_data section 2581 fCurrentSmallData = -1; 2582 } 2583 2584 fInode->SmallDataLock().Unlock(); 2585 2586 if (fCurrentSmallData != -1) 2587 return B_OK; 2588 } 2589 2590 // read attributes out of the attribute directory 2591 2592 if (fInode->Attributes().IsZero()) 2593 return B_ENTRY_NOT_FOUND; 2594 2595 Volume *volume = fInode->GetVolume(); 2596 2597 // if you haven't yet access to the attributes directory, get it 2598 if (fAttributes == NULL) { 2599 if (get_vnode(volume->FSVolume(), volume->ToVnode(fInode->Attributes()), 2600 (void **)&fAttributes) != B_OK) { 2601 FATAL(("get_vnode() failed in AttributeIterator::GetNext(ino_t" 2602 " = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2603 return B_ENTRY_NOT_FOUND; 2604 } 2605 2606 BPlusTree *tree; 2607 if (fAttributes->GetTree(&tree) < B_OK 2608 || (fIterator = new TreeIterator(tree)) == NULL) { 2609 FATAL(("could not get tree in AttributeIterator::GetNext(ino_t" 2610 " = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2611 return B_ENTRY_NOT_FOUND; 2612 } 2613 } 2614 2615 uint16 length; 2616 ino_t id; 2617 status_t status = fIterator->GetNextEntry(name, &length, 2618 B_FILE_NAME_LENGTH, &id); 2619 if (status < B_OK) 2620 return status; 2621 2622 Vnode vnode(volume,id); 2623 Inode *attribute; 2624 if ((status = vnode.Get(&attribute)) == B_OK) { 2625 *_type = attribute->Type(); 2626 *_length = length; 2627 *_id = id; 2628 } 2629 2630 return status; 2631 } 2632 2633 2634 void 2635 AttributeIterator::Update(uint16 index, int8 change) 2636 { 2637 // fCurrentSmallData points already to the next item 2638 if (index < fCurrentSmallData) 2639 fCurrentSmallData += change; 2640 } 2641 2642