1 /* 2 * Copyright 2001-2007, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 //! inode access functions 7 8 9 #include "Debug.h" 10 #include "Inode.h" 11 #include "BPlusTree.h" 12 #include "Index.h" 13 14 15 class InodeAllocator { 16 public: 17 InodeAllocator(Transaction &transaction); 18 ~InodeAllocator(); 19 20 status_t New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode); 21 status_t CreateTree(); 22 status_t Keep(); 23 24 private: 25 Transaction *fTransaction; 26 block_run fRun; 27 Inode *fInode; 28 }; 29 30 31 InodeAllocator::InodeAllocator(Transaction &transaction) 32 : 33 fTransaction(&transaction), 34 fInode(NULL) 35 { 36 } 37 38 39 InodeAllocator::~InodeAllocator() 40 { 41 if (fTransaction != NULL) { 42 Volume *volume = fTransaction->GetVolume(); 43 44 if (fInode != NULL) { 45 fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 46 // this unblocks any pending bfs_read_vnode() calls 47 fInode->Free(*fTransaction); 48 remove_vnode(volume->ID(), fInode->ID()); 49 } else 50 volume->Free(*fTransaction, fRun); 51 } 52 53 delete fInode; 54 } 55 56 57 status_t 58 InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode) 59 { 60 Volume *volume = fTransaction->GetVolume(); 61 62 status_t status = volume->AllocateForInode(*fTransaction, parentRun, mode, fRun); 63 if (status < B_OK) { 64 // don't free the space in the destructor, because 65 // the allocation failed 66 fTransaction = NULL; 67 RETURN_ERROR(status); 68 } 69 70 run = fRun; 71 fInode = new Inode(volume, *fTransaction, volume->ToVnode(run), mode, run); 72 if (fInode == NULL) 73 RETURN_ERROR(B_NO_MEMORY); 74 75 if (volume->ID() >= 0) { 76 status = new_vnode(volume->ID(), fInode->ID(), fInode); 77 if (status < B_OK) { 78 delete fInode; 79 fInode = NULL; 80 RETURN_ERROR(status); 81 } 82 } 83 84 *_inode = fInode; 85 return B_OK; 86 } 87 88 89 status_t 90 InodeAllocator::CreateTree() 91 { 92 Volume *volume = fTransaction->GetVolume(); 93 94 // force S_STR_INDEX to be set, if no type is set 95 if ((fInode->Mode() & S_INDEX_TYPES) == 0) 96 fInode->Node().mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX); 97 98 BPlusTree *tree = fInode->fTree = new BPlusTree(*fTransaction, fInode); 99 if (tree == NULL || tree->InitCheck() < B_OK) 100 return B_ERROR; 101 102 if (fInode->IsRegularNode()) { 103 if (tree->Insert(*fTransaction, ".", fInode->ID()) < B_OK 104 || tree->Insert(*fTransaction, "..", volume->ToVnode(fInode->Parent())) < B_OK) 105 return B_ERROR; 106 } 107 return B_OK; 108 } 109 110 111 status_t 112 InodeAllocator::Keep() 113 { 114 ASSERT(fInode != NULL && fTransaction != NULL); 115 Volume *volume = fTransaction->GetVolume(); 116 117 status_t status = fInode->WriteBack(*fTransaction); 118 if (status < B_OK) { 119 FATAL(("writing new inode %Ld failed!\n", fInode->ID())); 120 return status; 121 } 122 123 if (!fInode->IsSymLink() && volume->ID() >= 0) 124 status = publish_vnode(volume->ID(), fInode->ID(), fInode); 125 126 fTransaction = NULL; 127 fInode = NULL; 128 129 return status; 130 } 131 132 133 // #pragma mark - 134 135 136 status_t 137 bfs_inode::InitCheck(Volume *volume) 138 { 139 if (Magic1() != INODE_MAGIC1 140 || !(Flags() & INODE_IN_USE) 141 || inode_num.Length() != 1 142 // matches inode size? 143 || (uint32)InodeSize() != volume->InodeSize() 144 // parent resides on disk? 145 || parent.AllocationGroup() > int32(volume->AllocationGroups()) 146 || parent.AllocationGroup() < 0 147 || parent.Start() > (1L << volume->AllocationGroupShift()) 148 || parent.Length() != 1 149 // attributes, too? 150 || attributes.AllocationGroup() > int32(volume->AllocationGroups()) 151 || attributes.AllocationGroup() < 0 152 || attributes.Start() > (1L << volume->AllocationGroupShift())) 153 RETURN_ERROR(B_BAD_DATA); 154 155 if (Flags() & INODE_DELETED) 156 return B_NOT_ALLOWED; 157 158 // ToDo: Add some tests to check the integrity of the other stuff here, 159 // especially for the data_stream! 160 161 return B_OK; 162 } 163 164 165 // #pragma mark - Inode 166 167 168 Inode::Inode(Volume *volume, ino_t id) 169 : 170 fVolume(volume), 171 fID(id), 172 fTree(NULL), 173 fAttributes(NULL), 174 fCache(NULL) 175 { 176 PRINT(("Inode::Inode(volume = %p, id = %Ld) @ %p\n", volume, id, this)); 177 178 NodeGetter node(volume, this); 179 memcpy(&fNode, node.Node(), sizeof(bfs_inode)); 180 181 char lockName[B_OS_NAME_LENGTH]; 182 snprintf(lockName, sizeof(lockName), "bfs inode %d.%d", 183 (int)BlockRun().AllocationGroup(), BlockRun().Start()); 184 fLock.Initialize(lockName); 185 186 // these two will help to maintain the indices 187 fOldSize = Size(); 188 fOldLastModified = LastModified(); 189 190 if (IsContainer()) 191 fTree = new BPlusTree(this); 192 if (IsFile() || IsAttribute()) 193 SetFileCache(file_cache_create(fVolume->ID(), ID(), Size(), fVolume->Device())); 194 } 195 196 197 Inode::Inode(Volume *volume, Transaction &transaction, ino_t id, mode_t mode, 198 block_run &run) 199 : 200 fVolume(volume), 201 fID(id), 202 fTree(NULL), 203 fAttributes(NULL), 204 fCache(NULL) 205 { 206 PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %Ld) @ %p\n", 207 volume, &transaction, id, this)); 208 209 char lockName[B_OS_NAME_LENGTH]; 210 snprintf(lockName, sizeof(lockName), "bfs inode+%d.%d", 211 (int)run.AllocationGroup(), run.Start()); 212 fLock.Initialize(lockName); 213 214 NodeGetter node(volume, transaction, this, true); 215 memset(&fNode, 0, sizeof(bfs_inode)); 216 217 // Initialize the bfs_inode structure -- it's not written back to disk 218 // here, because it will be done so already when the inode could be 219 // created completely. 220 221 Node().magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1); 222 Node().inode_num = run; 223 Node().mode = HOST_ENDIAN_TO_BFS_INT32(mode); 224 Node().flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 225 226 Node().create_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) << INODE_TIME_SHIFT); 227 Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64(Node().create_time 228 | (volume->GetUniqueID() & INODE_TIME_MASK)); 229 // we use Volume::GetUniqueID() to avoid having too many duplicates in the 230 // last_modified index 231 232 Node().inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize()); 233 234 // these two will help to maintain the indices 235 fOldSize = Size(); 236 fOldLastModified = LastModified(); 237 } 238 239 240 Inode::~Inode() 241 { 242 PRINT(("Inode::~Inode() @ %p\n", this)); 243 244 file_cache_delete(FileCache()); 245 delete fTree; 246 } 247 248 249 status_t 250 Inode::InitCheck(bool checkNode) 251 { 252 // test inode magic and flags 253 if (checkNode) { 254 status_t status = Node().InitCheck(fVolume); 255 if (status == B_BUSY) 256 return B_BUSY; 257 258 if (status < B_OK) { 259 FATAL(("inode at block %Ld corrupt!\n", BlockNumber())); 260 RETURN_ERROR(B_BAD_DATA); 261 } 262 } 263 264 if (IsContainer()) { 265 // inodes that have a 266 if (fTree == NULL) 267 RETURN_ERROR(B_NO_MEMORY); 268 269 status_t status = fTree->InitCheck(); 270 if (status < B_OK) { 271 FATAL(("inode tree at block %Ld corrupt!\n", BlockNumber())); 272 RETURN_ERROR(B_BAD_DATA); 273 } 274 } 275 276 // it's more important to know that the inode is corrupt 277 // so we check for the lock not until here 278 return fLock.InitCheck(); 279 } 280 281 282 status_t 283 Inode::WriteBack(Transaction &transaction) 284 { 285 NodeGetter node(fVolume, transaction, this); 286 if (node.WritableNode() == NULL) 287 return B_IO_ERROR; 288 289 memcpy(node.WritableNode(), &Node(), sizeof(bfs_inode)); 290 return B_OK; 291 } 292 293 294 status_t 295 Inode::CheckPermissions(int accessMode) const 296 { 297 uid_t user = geteuid(); 298 gid_t group = getegid(); 299 300 // you never have write access to a read-only volume 301 if (accessMode & W_OK && fVolume->IsReadOnly()) 302 return B_READ_ONLY_DEVICE; 303 304 // root users always have full access (but they can't execute anything) 305 if (user == 0 && !((accessMode & X_OK) && (Mode() & S_IXUSR) == 0)) 306 return B_OK; 307 308 // shift mode bits, to check directly against accessMode 309 mode_t mode = Mode(); 310 if (user == (uid_t)fNode.UserID()) 311 mode >>= 6; 312 else if (group == (gid_t)fNode.GroupID()) 313 mode >>= 3; 314 315 if (accessMode & ~(mode & S_IRWXO)) 316 return B_NOT_ALLOWED; 317 318 return B_OK; 319 } 320 321 322 // #pragma mark - attributes 323 324 325 void 326 Inode::_AddIterator(AttributeIterator *iterator) 327 { 328 if (fSmallDataLock.Lock() < B_OK) 329 return; 330 331 fIterators.Add(iterator); 332 333 fSmallDataLock.Unlock(); 334 } 335 336 337 void 338 Inode::_RemoveIterator(AttributeIterator *iterator) 339 { 340 if (fSmallDataLock.Lock() < B_OK) 341 return; 342 343 fIterators.Remove(iterator); 344 345 fSmallDataLock.Unlock(); 346 } 347 348 349 /*! 350 Tries to free up "bytes" space in the small_data section by moving 351 attributes to real files. Used for system attributes like the name. 352 You need to hold the fSmallDataLock when you call this method 353 */ 354 status_t 355 Inode::_MakeSpaceForSmallData(Transaction &transaction, bfs_inode *node, 356 const char *name, int32 bytes) 357 { 358 ASSERT(fSmallDataLock.IsLocked()); 359 360 while (bytes > 0) { 361 small_data *item = node->SmallDataStart(), *max = NULL; 362 int32 index = 0, maxIndex = 0; 363 for (; !item->IsLast(node); item = item->Next(), index++) { 364 // should not remove those 365 if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name())) 366 continue; 367 368 if (max == NULL || max->Size() < item->Size()) { 369 maxIndex = index; 370 max = item; 371 } 372 373 // remove the first one large enough to free the needed amount of bytes 374 if (bytes < (int32)item->Size()) 375 break; 376 } 377 378 if (item->IsLast(node) || (int32)item->Size() < bytes) 379 return B_ERROR; 380 381 bytes -= max->Size(); 382 383 // Move the attribute to a real attribute file 384 // Luckily, this doesn't cause any index updates 385 386 Inode *attribute; 387 status_t status = CreateAttribute(transaction, item->Name(), item->Type(), 388 &attribute); 389 if (status < B_OK) 390 RETURN_ERROR(status); 391 392 size_t length = item->DataSize(); 393 status = attribute->WriteAt(transaction, 0, item->Data(), &length); 394 395 ReleaseAttribute(attribute); 396 397 if (status < B_OK) { 398 Vnode vnode(fVolume, Attributes()); 399 Inode *attributes; 400 if (vnode.Get(&attributes) < B_OK 401 || attributes->Remove(transaction, name) < B_OK) { 402 FATAL(("Could not remove newly created attribute!\n")); 403 } 404 405 RETURN_ERROR(status); 406 } 407 408 _RemoveSmallData(node, max, maxIndex); 409 } 410 return B_OK; 411 } 412 413 414 /*! 415 Private function which removes the given attribute from the small_data 416 section. 417 You need to hold the fSmallDataLock when you call this method 418 */ 419 status_t 420 Inode::_RemoveSmallData(bfs_inode *node, small_data *item, int32 index) 421 { 422 ASSERT(fSmallDataLock.IsLocked()); 423 424 small_data *next = item->Next(); 425 if (!next->IsLast(node)) { 426 // find the last attribute 427 small_data *last = next; 428 while (!last->IsLast(node)) 429 last = last->Next(); 430 431 int32 size = (uint8 *)last - (uint8 *)next; 432 if (size < 0 || size > (uint8 *)node + fVolume->BlockSize() - (uint8 *)next) 433 return B_BAD_DATA; 434 435 memmove(item, next, size); 436 437 // Move the "last" one to its new location and 438 // correctly terminate the small_data section 439 last = (small_data *)((uint8 *)last - ((uint8 *)next - (uint8 *)item)); 440 memset(last, 0, (uint8 *)node + fVolume->BlockSize() - (uint8 *)last); 441 } else 442 memset(item, 0, item->Size()); 443 444 // update all current iterators 445 AttributeIterator *iterator = NULL; 446 while ((iterator = fIterators.Next(iterator)) != NULL) { 447 iterator->Update(index, -1); 448 } 449 450 return B_OK; 451 } 452 453 454 //! Removes the given attribute from the small_data section. 455 status_t 456 Inode::_RemoveSmallData(Transaction &transaction, NodeGetter &nodeGetter, 457 const char *name) 458 { 459 if (name == NULL) 460 return B_BAD_VALUE; 461 462 bfs_inode *node = nodeGetter.WritableNode(); 463 SimpleLocker locker(fSmallDataLock); 464 465 // search for the small_data item 466 467 small_data *item = node->SmallDataStart(); 468 int32 index = 0; 469 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 470 item = item->Next(); 471 index++; 472 } 473 474 if (item->IsLast(node)) 475 return B_ENTRY_NOT_FOUND; 476 477 nodeGetter.MakeWritable(transaction); 478 status_t status = _RemoveSmallData(node, item, index); 479 if (status == B_OK) 480 status = WriteBack(transaction); 481 482 return status; 483 } 484 485 486 /*! 487 Try to place the given attribute in the small_data section - if the 488 new attribute is too big to fit in that section, it returns B_DEVICE_FULL. 489 In that case, the attribute should be written to a real attribute file; 490 it's the caller's responsibility to remove any existing attributes in the small 491 data section if that's the case. 492 493 Note that you need to write back the inode yourself after having called that 494 method - it's a bad API decision that it needs a transaction but enforces you 495 to write back the inode all by yourself, but it's just more efficient in most 496 cases... 497 */ 498 status_t 499 Inode::_AddSmallData(Transaction &transaction, NodeGetter &nodeGetter, 500 const char *name, uint32 type, const uint8 *data, size_t length, bool force) 501 { 502 // TODO: support new write attr semantics and write offset! 503 bfs_inode *node = nodeGetter.WritableNode(); 504 505 if (node == NULL || name == NULL || data == NULL) 506 return B_BAD_VALUE; 507 508 // reject any requests that can't fit into the small_data section 509 uint32 nameLength = strlen(name); 510 uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + length + 1; 511 if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode)) 512 return B_DEVICE_FULL; 513 514 nodeGetter.MakeWritable(transaction); 515 SimpleLocker locker(fSmallDataLock); 516 517 // Find the last item or one with the same name we have to add 518 small_data *item = node->SmallDataStart(); 519 int32 index = 0; 520 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 521 item = item->Next(); 522 index++; 523 } 524 525 // is the attribute already in the small_data section? 526 // then just replace the data part of that one 527 if (!item->IsLast(node)) { 528 // find last attribute 529 small_data *last = item; 530 while (!last->IsLast(node)) 531 last = last->Next(); 532 533 // try to change the attributes value 534 if (item->data_size > length 535 || force 536 || ((uint8 *)last + length - item->DataSize()) <= ((uint8 *)node + fVolume->InodeSize())) { 537 // make room for the new attribute if needed (and we are forced to do so) 538 if (force 539 && ((uint8 *)last + length - item->DataSize()) > ((uint8 *)node + fVolume->InodeSize())) { 540 // We also take the free space at the end of the small_data section 541 // into account, and request only what's really needed 542 uint32 needed = length - item->DataSize() - 543 (uint32)((uint8 *)node + fVolume->InodeSize() - (uint8 *)last); 544 545 if (_MakeSpaceForSmallData(transaction, node, name, needed) < B_OK) 546 return B_ERROR; 547 548 // reset our pointers 549 item = node->SmallDataStart(); 550 index = 0; 551 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 552 item = item->Next(); 553 index++; 554 } 555 556 last = item; 557 while (!last->IsLast(node)) 558 last = last->Next(); 559 } 560 561 // Normally, we can just overwrite the attribute data as the size 562 // is specified by the type and does not change that often 563 if (length != item->DataSize()) { 564 // move the attributes after the current one 565 small_data *next = item->Next(); 566 if (!next->IsLast(node)) 567 memmove((uint8 *)item + spaceNeeded, next, (uint8 *)last - (uint8 *)next); 568 569 // Move the "last" one to its new location and 570 // correctly terminate the small_data section 571 last = (small_data *)((uint8 *)last - ((uint8 *)next - ((uint8 *)item + spaceNeeded))); 572 if ((uint8 *)last < (uint8 *)node + fVolume->BlockSize()) 573 memset(last, 0, (uint8 *)node + fVolume->BlockSize() - (uint8 *)last); 574 575 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); 576 } 577 578 item->type = HOST_ENDIAN_TO_BFS_INT32(type); 579 memcpy(item->Data(), data, length); 580 item->Data()[length] = '\0'; 581 582 return B_OK; 583 } 584 585 return B_DEVICE_FULL; 586 } 587 588 // try to add the new attribute! 589 590 if ((uint8 *)item + spaceNeeded > (uint8 *)node + fVolume->InodeSize()) { 591 // there is not enough space for it! 592 if (!force) 593 return B_DEVICE_FULL; 594 595 // make room for the new attribute 596 if (_MakeSpaceForSmallData(transaction, node, name, spaceNeeded) < B_OK) 597 return B_ERROR; 598 599 // get new last item! 600 item = node->SmallDataStart(); 601 index = 0; 602 while (!item->IsLast(node)) { 603 item = item->Next(); 604 index++; 605 } 606 } 607 608 memset(item, 0, spaceNeeded); 609 item->type = HOST_ENDIAN_TO_BFS_INT32(type); 610 item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength); 611 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); 612 strcpy(item->Name(), name); 613 memcpy(item->Data(), data, length); 614 615 // correctly terminate the small_data section 616 item = item->Next(); 617 if (!item->IsLast(node)) 618 memset(item, 0, (uint8 *)node + fVolume->InodeSize() - (uint8 *)item); 619 620 // update all current iterators 621 AttributeIterator *iterator = NULL; 622 while ((iterator = fIterators.Next(iterator)) != NULL) { 623 iterator->Update(index, 1); 624 } 625 626 return B_OK; 627 } 628 629 630 /*! 631 Iterates through the small_data section of an inode. 632 To start at the beginning of this section, you let smallData 633 point to NULL, like: 634 small_data *data = NULL; 635 while (inode->GetNextSmallData(&data) { ... } 636 637 This function is reentrant and doesn't allocate any memory; 638 you can safely stop calling it at any point (you don't need 639 to iterate through the whole list). 640 You need to hold the fSmallDataLock when you call this method 641 */ 642 status_t 643 Inode::_GetNextSmallData(bfs_inode *node, small_data **_smallData) const 644 { 645 if (node == NULL) 646 RETURN_ERROR(B_BAD_VALUE); 647 648 ASSERT(fSmallDataLock.IsLocked()); 649 650 small_data *data = *_smallData; 651 652 // begin from the start? 653 if (data == NULL) 654 data = node->SmallDataStart(); 655 else 656 data = data->Next(); 657 658 // is already last item? 659 if (data->IsLast(node)) 660 return B_ENTRY_NOT_FOUND; 661 662 *_smallData = data; 663 664 return B_OK; 665 } 666 667 668 /*! 669 Finds the attribute "name" in the small data section, and 670 returns a pointer to it (or NULL if it doesn't exist). 671 You need to hold the fSmallDataLock when you call this method 672 */ 673 small_data * 674 Inode::FindSmallData(const bfs_inode *node, const char *name) const 675 { 676 ASSERT(fSmallDataLock.IsLocked()); 677 678 small_data *smallData = NULL; 679 while (_GetNextSmallData(const_cast<bfs_inode *>(node), &smallData) == B_OK) { 680 if (!strcmp(smallData->Name(), name)) 681 return smallData; 682 } 683 return NULL; 684 } 685 686 687 /*! 688 Returns a pointer to the node's name if present in the small data 689 section, NULL otherwise. 690 You need to hold the fSmallDataLock when you call this method 691 */ 692 const char * 693 Inode::Name(const bfs_inode *node) const 694 { 695 ASSERT(fSmallDataLock.IsLocked()); 696 697 small_data *smallData = NULL; 698 while (_GetNextSmallData((bfs_inode *)node, &smallData) == B_OK) { 699 if (*smallData->Name() == FILE_NAME_NAME 700 && smallData->NameSize() == FILE_NAME_NAME_LENGTH) 701 return (const char *)smallData->Data(); 702 } 703 return NULL; 704 } 705 706 707 /*! 708 Copies the node's name into the provided buffer. 709 The buffer should be B_FILE_NAME_LENGTH bytes large. 710 */ 711 status_t 712 Inode::GetName(char *buffer, size_t size) const 713 { 714 NodeGetter node(fVolume, this); 715 SimpleLocker locker(fSmallDataLock); 716 717 const char *name = Name(node.Node()); 718 if (name == NULL) 719 return B_ENTRY_NOT_FOUND; 720 721 strlcpy(buffer, name, size); 722 return B_OK; 723 } 724 725 726 /*! 727 Changes or set the name of a file: in the inode small_data section only, it 728 doesn't change it in the parent directory's b+tree. 729 Note that you need to write back the inode yourself after having called 730 that method. It suffers from the same API decision as AddSmallData() does 731 (and for the same reason). 732 */ 733 status_t 734 Inode::SetName(Transaction &transaction, const char *name) 735 { 736 if (name == NULL || *name == '\0') 737 return B_BAD_VALUE; 738 739 NodeGetter node(fVolume, transaction, this); 740 const char nameTag[2] = {FILE_NAME_NAME, 0}; 741 742 return _AddSmallData(transaction, node, nameTag, FILE_NAME_TYPE, 743 (uint8 *)name, strlen(name), true); 744 } 745 746 747 status_t 748 Inode::_RemoveAttribute(Transaction &transaction, const char *name, 749 bool hasIndex, Index *index) 750 { 751 // remove the attribute file if it exists 752 Vnode vnode(fVolume, Attributes()); 753 Inode *attributes; 754 status_t status = vnode.Get(&attributes); 755 if (status < B_OK) 756 return status; 757 758 // update index 759 if (index != NULL) { 760 Inode *attribute; 761 if ((hasIndex || fVolume->CheckForLiveQuery(name)) 762 && GetAttribute(name, &attribute) == B_OK) { 763 uint8 data[BPLUSTREE_MAX_KEY_LENGTH]; 764 size_t length = BPLUSTREE_MAX_KEY_LENGTH; 765 if (attribute->ReadAt(0, data, &length) == B_OK) { 766 index->Update(transaction, name, attribute->Type(), data, 767 length, NULL, 0, this); 768 } 769 770 ReleaseAttribute(attribute); 771 } 772 } 773 774 if ((status = attributes->Remove(transaction, name)) < B_OK) 775 return status; 776 777 if (attributes->IsEmpty()) { 778 // remove attribute directory (don't fail if that can't be done) 779 if (remove_vnode(fVolume->ID(), attributes->ID()) == B_OK) { 780 // update the inode, so that no one will ever doubt it's deleted :-) 781 attributes->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); 782 if (attributes->WriteBack(transaction) == B_OK) { 783 Attributes().SetTo(0, 0, 0); 784 WriteBack(transaction); 785 } else 786 unremove_vnode(fVolume->ID(), attributes->ID()); 787 } 788 } 789 790 return status; 791 } 792 793 794 /*! 795 Reads data from the specified attribute. 796 This is a high-level attribute function that understands attributes 797 in the small_data section as well as real attribute files. 798 */ 799 status_t 800 Inode::ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, 801 size_t *_length) 802 { 803 if (pos < 0) 804 pos = 0; 805 806 // search in the small_data section (which has to be locked first) 807 { 808 NodeGetter node(fVolume, this); 809 SimpleLocker locker(fSmallDataLock); 810 811 small_data *smallData = FindSmallData(node.Node(), name); 812 if (smallData != NULL) { 813 size_t length = *_length; 814 if (pos >= smallData->data_size) { 815 *_length = 0; 816 return B_OK; 817 } 818 if (length + pos > smallData->DataSize()) 819 length = smallData->DataSize() - pos; 820 821 memcpy(buffer, smallData->Data() + pos, length); 822 *_length = length; 823 return B_OK; 824 } 825 } 826 827 // search in the attribute directory 828 Inode *attribute; 829 status_t status = GetAttribute(name, &attribute); 830 if (status == B_OK) { 831 if (attribute->Lock().Lock() == B_OK) { 832 status = attribute->ReadAt(pos, (uint8 *)buffer, _length); 833 attribute->Lock().Unlock(); 834 } else 835 status = B_ERROR; 836 837 ReleaseAttribute(attribute); 838 } 839 840 RETURN_ERROR(status); 841 } 842 843 844 /*! 845 Writes data to the specified attribute. 846 This is a high-level attribute function that understands attributes 847 in the small_data section as well as real attribute files. 848 */ 849 status_t 850 Inode::WriteAttribute(Transaction &transaction, const char *name, int32 type, 851 off_t pos, const uint8 *buffer, size_t *_length) 852 { 853 // needed to maintain the index 854 uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH], *oldData = NULL; 855 size_t oldLength = 0; 856 857 // TODO: we actually depend on that the contents of "buffer" are constant. 858 // If they get changed during the write (hey, user programs), we may mess 859 // up our index trees! 860 861 Index index(fVolume); 862 index.SetTo(name); 863 864 Inode *attribute = NULL; 865 status_t status = B_OK; 866 867 if (GetAttribute(name, &attribute) < B_OK) { 868 // save the old attribute data 869 NodeGetter node(fVolume, transaction, this); 870 fSmallDataLock.Lock(); 871 872 small_data *smallData = FindSmallData(node.Node(), name); 873 if (smallData != NULL) { 874 oldLength = smallData->DataSize(); 875 if (oldLength > 0) { 876 if (oldLength > BPLUSTREE_MAX_KEY_LENGTH) 877 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 878 memcpy(oldData = oldBuffer, smallData->Data(), oldLength); 879 } 880 } 881 fSmallDataLock.Unlock(); 882 883 // if the attribute doesn't exist yet (as a file), try to put it in the 884 // small_data section first - if that fails (due to insufficent space), 885 // create a real attribute file 886 status = _AddSmallData(transaction, node, name, type, buffer, *_length); 887 if (status == B_DEVICE_FULL) { 888 if (smallData != NULL) { 889 // remove the old attribute from the small data section - there 890 // is no space left for the new data 891 status = _RemoveSmallData(transaction, node, name); 892 } else 893 status = B_OK; 894 895 if (status == B_OK) 896 status = CreateAttribute(transaction, name, type, &attribute); 897 if (status < B_OK) 898 RETURN_ERROR(status); 899 } else if (status == B_OK) 900 status = WriteBack(transaction); 901 } 902 903 if (attribute != NULL) { 904 if (attribute->Lock().LockWrite() == B_OK) { 905 // save the old attribute data (if this fails, oldLength will reflect it) 906 if (fVolume->CheckForLiveQuery(name) && attribute->Size() > 0) { 907 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 908 if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK) 909 oldData = oldBuffer; 910 } 911 912 // check if the data fits into the small_data section again 913 NodeGetter node(fVolume, transaction, this); 914 status = _AddSmallData(transaction, node, name, type, buffer, *_length); 915 if (status == B_OK) { 916 // it does - remove its file 917 status = _RemoveAttribute(transaction, name, false, NULL); 918 } else { 919 status = attribute->WriteAt(transaction, pos, buffer, _length); 920 if (status == B_OK) { 921 // The attribute type might have been changed - we need to adopt 922 // the new one - the attribute's inode will be written back on close 923 attribute->Node().type = HOST_ENDIAN_TO_BFS_INT32(type); 924 } 925 926 attribute->Lock().UnlockWrite(); 927 } 928 } else 929 status = B_ERROR; 930 931 ReleaseAttribute(attribute); 932 } 933 934 // TODO: find a better way than this "pos" thing (the begin of the old key 935 // must be copied to the start of the new one for a comparison) 936 if (status == B_OK && pos == 0) { 937 // index only the first BPLUSTREE_MAX_KEY_LENGTH bytes 938 uint16 length = *_length; 939 if (length > BPLUSTREE_MAX_KEY_LENGTH) 940 length = BPLUSTREE_MAX_KEY_LENGTH; 941 942 // Update index. Note, Index::Update() may be called even if initializing 943 // the index failed - it will just update the live queries in this case 944 if (pos < length || pos < oldLength) { 945 index.Update(transaction, name, type, oldData, oldLength, buffer, 946 length, this); 947 } 948 } 949 return status; 950 } 951 952 953 /*! 954 Removes the specified attribute from the inode. 955 This is a high-level attribute function that understands attributes 956 in the small_data section as well as real attribute files. 957 */ 958 status_t 959 Inode::RemoveAttribute(Transaction &transaction, const char *name) 960 { 961 Index index(fVolume); 962 bool hasIndex = index.SetTo(name) == B_OK; 963 NodeGetter node(fVolume, this); 964 965 // update index for attributes in the small_data section 966 { 967 fSmallDataLock.Lock(); 968 969 small_data *smallData = FindSmallData(node.Node(), name); 970 if (smallData != NULL) { 971 uint32 length = smallData->DataSize(); 972 if (length > BPLUSTREE_MAX_KEY_LENGTH) 973 length = BPLUSTREE_MAX_KEY_LENGTH; 974 index.Update(transaction, name, smallData->Type(), smallData->Data(), 975 length, NULL, 0, this); 976 } 977 fSmallDataLock.Unlock(); 978 } 979 980 status_t status = _RemoveSmallData(transaction, node, name); 981 if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) { 982 // remove the attribute file if it exists 983 status = _RemoveAttribute(transaction, name, hasIndex, &index); 984 } 985 return status; 986 } 987 988 989 status_t 990 Inode::GetAttribute(const char *name, Inode **_attribute) 991 { 992 // does this inode even have attributes? 993 if (Attributes().IsZero()) 994 return B_ENTRY_NOT_FOUND; 995 996 Vnode vnode(fVolume, Attributes()); 997 Inode *attributes; 998 if (vnode.Get(&attributes) < B_OK) { 999 FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n", name)); 1000 return B_ERROR; 1001 } 1002 1003 BPlusTree *tree; 1004 status_t status = attributes->GetTree(&tree); 1005 if (status == B_OK) { 1006 ino_t id; 1007 status = tree->Find((uint8 *)name, (uint16)strlen(name), &id); 1008 if (status == B_OK) { 1009 Vnode vnode(fVolume, id); 1010 Inode *inode; 1011 // Check if the attribute is really an attribute 1012 if (vnode.Get(&inode) < B_OK || !inode->IsAttribute()) 1013 return B_ERROR; 1014 1015 *_attribute = inode; 1016 vnode.Keep(); 1017 return B_OK; 1018 } 1019 } 1020 return status; 1021 } 1022 1023 1024 void 1025 Inode::ReleaseAttribute(Inode *attribute) 1026 { 1027 if (attribute == NULL) 1028 return; 1029 1030 put_vnode(fVolume->ID(), attribute->ID()); 1031 } 1032 1033 1034 status_t 1035 Inode::CreateAttribute(Transaction &transaction, const char *name, uint32 type, 1036 Inode **attribute) 1037 { 1038 // do we need to create the attribute directory first? 1039 if (Attributes().IsZero()) { 1040 status_t status = Inode::Create(transaction, this, NULL, 1041 S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL); 1042 if (status < B_OK) 1043 RETURN_ERROR(status); 1044 } 1045 Vnode vnode(fVolume, Attributes()); 1046 Inode *attributes; 1047 if (vnode.Get(&attributes) < B_OK) 1048 return B_ERROR; 1049 1050 // Inode::Create() locks the inode for us 1051 return Inode::Create(transaction, attributes, name, 1052 S_ATTR | S_FILE | 0666, 0, type, NULL, NULL, attribute); 1053 } 1054 1055 1056 // #pragma mark - directory tree 1057 1058 1059 /*! 1060 Gives the caller direct access to the b+tree for a given directory. 1061 The tree is no longer created on demand, but when the inode is first 1062 created. That will report any potential errors upfront, saves locking, 1063 and should work as good (though a bit slower). 1064 */ 1065 status_t 1066 Inode::GetTree(BPlusTree **tree) 1067 { 1068 if (fTree) { 1069 *tree = fTree; 1070 return B_OK; 1071 } 1072 1073 RETURN_ERROR(B_BAD_VALUE); 1074 } 1075 1076 1077 bool 1078 Inode::IsEmpty() 1079 { 1080 BPlusTree *tree; 1081 status_t status = GetTree(&tree); 1082 if (status < B_OK) 1083 return status; 1084 1085 TreeIterator iterator(tree); 1086 1087 // index and attribute directories are really empty when they are 1088 // empty - directories for standard files always contain ".", and 1089 // "..", so we need to ignore those two 1090 1091 uint32 count = 0; 1092 char name[BPLUSTREE_MAX_KEY_LENGTH]; 1093 uint16 length; 1094 ino_t id; 1095 while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH, 1096 &id) == B_OK) { 1097 if (Mode() & (S_ATTR_DIR | S_INDEX_DIR)) 1098 return false; 1099 1100 if (++count > 2 || strcmp(".", name) && strcmp("..", name)) 1101 return false; 1102 } 1103 return true; 1104 } 1105 1106 1107 // #pragma mark - data stream 1108 1109 1110 /*! 1111 Finds the block_run where "pos" is located in the data_stream of 1112 the inode. 1113 If successful, "offset" will then be set to the file offset 1114 of the block_run returned; so "pos - offset" is for the block_run 1115 what "pos" is for the whole stream. 1116 The caller has to make sure that "pos" is inside the stream. 1117 */ 1118 status_t 1119 Inode::FindBlockRun(off_t pos, block_run &run, off_t &offset) 1120 { 1121 data_stream *data = &Node().data; 1122 1123 // find matching block run 1124 1125 if (data->MaxDirectRange() > 0 && pos >= data->MaxDirectRange()) { 1126 if (data->MaxDoubleIndirectRange() > 0 && pos >= data->MaxIndirectRange()) { 1127 // access to double indirect blocks 1128 1129 CachedBlock cached(fVolume); 1130 1131 off_t start = pos - data->MaxIndirectRange(); 1132 int32 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT 1133 + cached.BlockShift())) 1134 * (fVolume->BlockSize() / sizeof(block_run)); 1135 int32 directSize = NUM_ARRAY_BLOCKS << cached.BlockShift(); 1136 int32 index = start / indirectSize; 1137 int32 runsPerBlock = cached.BlockSize() / sizeof(block_run); 1138 1139 block_run *indirect = (block_run *)cached.SetTo( 1140 fVolume->ToBlock(data->double_indirect) + index / runsPerBlock); 1141 if (indirect == NULL) 1142 RETURN_ERROR(B_ERROR); 1143 1144 //printf("\tstart = %Ld, indirectSize = %ld, directSize = %ld, index = %ld\n",start,indirectSize,directSize,index); 1145 //printf("\tlook for indirect block at %ld,%d\n",indirect[index].allocation_group,indirect[index].start); 1146 1147 int32 current = (start % indirectSize) / directSize; 1148 1149 indirect = (block_run *)cached.SetTo( 1150 fVolume->ToBlock(indirect[index % runsPerBlock]) 1151 + current / runsPerBlock); 1152 if (indirect == NULL) 1153 RETURN_ERROR(B_ERROR); 1154 1155 run = indirect[current % runsPerBlock]; 1156 offset = data->MaxIndirectRange() + (index * indirectSize) 1157 + (current * directSize); 1158 //printf("\tfCurrent = %ld, fRunFileOffset = %Ld, fRunBlockEnd = %Ld, fRun = %ld,%d\n",fCurrent,fRunFileOffset,fRunBlockEnd,fRun.allocation_group,fRun.start); 1159 } else { 1160 // access to indirect blocks 1161 1162 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1163 off_t runBlockEnd = data->MaxDirectRange(); 1164 1165 CachedBlock cached(fVolume); 1166 off_t block = fVolume->ToBlock(data->indirect); 1167 1168 for (int32 i = 0; i < data->indirect.Length(); i++) { 1169 block_run *indirect = (block_run *)cached.SetTo(block + i); 1170 if (indirect == NULL) 1171 RETURN_ERROR(B_IO_ERROR); 1172 1173 int32 current = -1; 1174 while (++current < runsPerBlock) { 1175 if (indirect[current].IsZero()) 1176 break; 1177 1178 runBlockEnd += indirect[current].Length() 1179 << cached.BlockShift(); 1180 if (runBlockEnd > pos) { 1181 run = indirect[current]; 1182 offset = runBlockEnd - (run.Length() 1183 << cached.BlockShift()); 1184 //printf("reading from indirect block: %ld,%d\n",fRun.allocation_group,fRun.start); 1185 //printf("### indirect-run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset); 1186 return fVolume->ValidateBlockRun(run); 1187 } 1188 } 1189 } 1190 RETURN_ERROR(B_ERROR); 1191 } 1192 } else { 1193 // access from direct blocks 1194 1195 off_t runBlockEnd = 0LL; 1196 int32 current = -1; 1197 1198 while (++current < NUM_DIRECT_BLOCKS) { 1199 if (data->direct[current].IsZero()) 1200 break; 1201 1202 runBlockEnd += data->direct[current].Length() 1203 << fVolume->BlockShift(); 1204 if (runBlockEnd > pos) { 1205 run = data->direct[current]; 1206 offset = runBlockEnd - (run.Length() << fVolume->BlockShift()); 1207 //printf("### run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset); 1208 return fVolume->ValidateBlockRun(run); 1209 } 1210 } 1211 1212 //PRINT(("FindBlockRun() failed in direct range: size = %Ld, pos = %Ld\n",data->size,pos)); 1213 return B_ENTRY_NOT_FOUND; 1214 } 1215 return fVolume->ValidateBlockRun(run); 1216 } 1217 1218 1219 status_t 1220 Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length) 1221 { 1222 #if 0 1223 // call the right ReadAt() method, depending on the inode flags 1224 1225 if (Flags() & INODE_LOGGED) 1226 return ((Stream<Access::Logged> *)this)->ReadAt(pos, buffer, _length); 1227 1228 return ((Stream<Access::Cached> *)this)->ReadAt(pos, buffer, _length); 1229 #endif 1230 size_t length = *_length; 1231 1232 // set/check boundaries for pos/length 1233 if (pos < 0) 1234 return B_BAD_VALUE; 1235 1236 if (pos >= Size() || length == 0) { 1237 *_length = 0; 1238 return B_NO_ERROR; 1239 } 1240 1241 return file_cache_read(FileCache(), pos, buffer, _length); 1242 } 1243 1244 1245 status_t 1246 Inode::WriteAt(Transaction &transaction, off_t pos, const uint8 *buffer, 1247 size_t *_length) 1248 { 1249 // update the last modification time in memory, it will be written 1250 // back to the inode, and the index when the file is closed 1251 // ToDo: should update the internal last modified time only at this point! 1252 Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) 1253 << INODE_TIME_SHIFT); 1254 1255 // TODO: support INODE_LOGGED! 1256 #if 0 1257 if (Flags() & INODE_LOGGED) 1258 return ((Stream<Access::Logged> *)this)->WriteAt(transaction, pos, buffer, _length); 1259 1260 return ((Stream<Access::Cached> *)this)->WriteAt(transaction, pos, buffer, _length); 1261 #endif 1262 size_t length = *_length; 1263 1264 // set/check boundaries for pos/length 1265 if (pos < 0) 1266 return B_BAD_VALUE; 1267 1268 if (pos + length > Size()) { 1269 off_t oldSize = Size(); 1270 1271 // the transaction doesn't have to be started already 1272 // ToDo: what's that INODE_NO_TRANSACTION flag good for again? 1273 if ((Flags() & INODE_NO_TRANSACTION) == 0 1274 && !transaction.IsStarted()) 1275 transaction.Start(fVolume, BlockNumber()); 1276 1277 // let's grow the data stream to the size needed 1278 status_t status = SetFileSize(transaction, pos + length); 1279 if (status < B_OK) { 1280 *_length = 0; 1281 RETURN_ERROR(status); 1282 } 1283 // If the position of the write was beyond the file size, we 1284 // have to fill the gap between that position and the old file 1285 // size with zeros. 1286 FillGapWithZeros(oldSize, pos); 1287 1288 // we need to write back the inode here because it has to 1289 // go into this transaction (we cannot wait until the file 1290 // is closed) 1291 status = WriteBack(transaction); 1292 if (status < B_OK) 1293 return status; 1294 } 1295 1296 // If we don't want to write anything, we can now return (we may 1297 // just have changed the file size using the position parameter) 1298 if (length == 0) 1299 return B_OK; 1300 1301 return file_cache_write(FileCache(), pos, buffer, _length); 1302 } 1303 1304 1305 /*! 1306 Fills the gap between the old file size and the new file size 1307 with zeros. 1308 It's more or less a copy of Inode::WriteAt() but it can handle 1309 length differences of more than just 4 GB, and it never uses 1310 the log, even if the INODE_LOGGED flag is set. 1311 */ 1312 status_t 1313 Inode::FillGapWithZeros(off_t pos, off_t newSize) 1314 { 1315 // ToDo: we currently do anything here, same as original BFS! 1316 //if (pos >= newSize) 1317 return B_OK; 1318 #if 0 1319 block_run run; 1320 off_t offset; 1321 if (FindBlockRun(pos, run, offset) < B_OK) 1322 RETURN_ERROR(B_BAD_VALUE); 1323 1324 off_t length = newSize - pos; 1325 uint32 bytesWritten = 0; 1326 uint32 blockSize = fVolume->BlockSize(); 1327 uint32 blockShift = fVolume->BlockShift(); 1328 uint8 *block; 1329 1330 // the first block_run we write could not be aligned to the block_size boundary 1331 // (write partial block at the beginning) 1332 1333 // pos % block_size == (pos - offset) % block_size, offset % block_size == 0 1334 if (pos % blockSize != 0) { 1335 run.start += (pos - offset) / blockSize; 1336 run.length -= (pos - offset) / blockSize; 1337 1338 CachedBlock cached(fVolume,run); 1339 if ((block = cached.Block()) == NULL) 1340 RETURN_ERROR(B_BAD_VALUE); 1341 1342 bytesWritten = blockSize - (pos % blockSize); 1343 if (length < bytesWritten) 1344 bytesWritten = length; 1345 1346 memset(block + (pos % blockSize), 0, bytesWritten); 1347 if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) 1348 RETURN_ERROR(B_IO_ERROR); 1349 1350 pos += bytesWritten; 1351 1352 length -= bytesWritten; 1353 if (length == 0) 1354 return B_OK; 1355 1356 if (FindBlockRun(pos, run, offset) < B_OK) 1357 RETURN_ERROR(B_BAD_VALUE); 1358 } 1359 1360 while (length > 0) { 1361 // offset is the offset to the current pos in the block_run 1362 run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift)); 1363 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift)); 1364 1365 CachedBlock cached(fVolume); 1366 off_t blockNumber = fVolume->ToBlock(run); 1367 for (int32 i = 0; i < run.Length(); i++) { 1368 if ((block = cached.SetTo(blockNumber + i, true)) == NULL) 1369 RETURN_ERROR(B_IO_ERROR); 1370 1371 if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) 1372 RETURN_ERROR(B_IO_ERROR); 1373 } 1374 1375 int32 bytes = run.Length() << blockShift; 1376 length -= bytes; 1377 bytesWritten += bytes; 1378 1379 // since we don't respect a last partial block, length can be lower 1380 if (length <= 0) 1381 break; 1382 1383 pos += bytes; 1384 1385 if (FindBlockRun(pos, run, offset) < B_OK) 1386 RETURN_ERROR(B_BAD_VALUE); 1387 } 1388 return B_OK; 1389 #endif 1390 } 1391 1392 1393 /*! 1394 Allocates NUM_ARRAY_BLOCKS blocks, and clears their contents. Growing 1395 the indirect and double indirect range uses this method. 1396 The allocated block_run is saved in "run" 1397 */ 1398 status_t 1399 Inode::_AllocateBlockArray(Transaction &transaction, block_run &run) 1400 { 1401 if (!run.IsZero()) 1402 return B_BAD_VALUE; 1403 1404 status_t status = fVolume->Allocate(transaction, this, NUM_ARRAY_BLOCKS, 1405 run, NUM_ARRAY_BLOCKS); 1406 if (status < B_OK) 1407 return status; 1408 1409 // make sure those blocks are empty 1410 CachedBlock cached(fVolume); 1411 off_t block = fVolume->ToBlock(run); 1412 1413 for (int32 i = 0; i < run.Length(); i++) { 1414 block_run *runs = (block_run *)cached.SetToWritable(transaction, 1415 block + i, true); 1416 if (runs == NULL) 1417 return B_IO_ERROR; 1418 } 1419 return B_OK; 1420 } 1421 1422 1423 status_t 1424 Inode::_GrowStream(Transaction &transaction, off_t size) 1425 { 1426 data_stream *data = &Node().data; 1427 1428 // is the data stream already large enough to hold the new size? 1429 // (can be the case with preallocated blocks) 1430 if (size < data->MaxDirectRange() 1431 || size < data->MaxIndirectRange() 1432 || size < data->MaxDoubleIndirectRange()) { 1433 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1434 return B_OK; 1435 } 1436 1437 // how many bytes are still needed? (unused ranges are always zero) 1438 uint16 minimum = 1; 1439 off_t bytes; 1440 if (data->Size() < data->MaxDoubleIndirectRange()) { 1441 bytes = size - data->MaxDoubleIndirectRange(); 1442 // the double indirect range can only handle multiple of NUM_ARRAY_BLOCKS 1443 minimum = NUM_ARRAY_BLOCKS; 1444 } else if (data->Size() < data->MaxIndirectRange()) 1445 bytes = size - data->MaxIndirectRange(); 1446 else if (data->Size() < data->MaxDirectRange()) 1447 bytes = size - data->MaxDirectRange(); 1448 else 1449 bytes = size - data->Size(); 1450 1451 // do we have enough free blocks on the disk? 1452 off_t blocksRequested = (bytes + fVolume->BlockSize() - 1) 1453 >> fVolume->BlockShift(); 1454 if (blocksRequested > fVolume->FreeBlocks()) 1455 return B_DEVICE_FULL; 1456 1457 off_t blocksNeeded = blocksRequested; 1458 // because of preallocations and partial allocations, the number of 1459 // blocks we need to allocate may be different from the one we request 1460 // from the block allocator 1461 1462 // Should we preallocate some blocks (currently, always 64k)? 1463 // Attributes, attribute directories, and long symlinks usually won't get 1464 // that big, and should stay close to the inode - preallocating could be 1465 // counterproductive. 1466 // Also, if free disk space is tight, we probably don't want to do this as well. 1467 if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink() 1468 && blocksRequested < (65536 >> fVolume->BlockShift()) 1469 && fVolume->FreeBlocks() > 128) 1470 blocksRequested = 65536 >> fVolume->BlockShift(); 1471 1472 while (blocksNeeded > 0) { 1473 // the requested blocks do not need to be returned with a 1474 // single allocation, so we need to iterate until we have 1475 // enough blocks allocated 1476 block_run run; 1477 status_t status = fVolume->Allocate(transaction, this, blocksRequested, 1478 run, minimum); 1479 if (status < B_OK) 1480 return status; 1481 1482 // okay, we have the needed blocks, so just distribute them to the 1483 // different ranges of the stream (direct, indirect & double indirect) 1484 1485 // TODO: if anything goes wrong here, we probably want to free the 1486 // blocks that couldn't be distributed into the stream! 1487 1488 blocksNeeded -= run.Length(); 1489 // don't preallocate if the first allocation was already too small 1490 blocksRequested = blocksNeeded; 1491 if (minimum > 1) { 1492 // make sure that "blocks" is a multiple of minimum 1493 blocksRequested = (blocksRequested + minimum - 1) & ~(minimum - 1); 1494 } 1495 1496 // Direct block range 1497 1498 if (data->Size() <= data->MaxDirectRange()) { 1499 // let's try to put them into the direct block range 1500 int32 free = 0; 1501 for (; free < NUM_DIRECT_BLOCKS; free++) 1502 if (data->direct[free].IsZero()) 1503 break; 1504 1505 if (free < NUM_DIRECT_BLOCKS) { 1506 // can we merge the last allocated run with the new one? 1507 int32 last = free - 1; 1508 if (free > 0 && data->direct[last].MergeableWith(run)) { 1509 data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16( 1510 data->direct[last].Length() + run.Length()); 1511 } else 1512 data->direct[free] = run; 1513 1514 data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64( 1515 data->MaxDirectRange() + run.Length() * fVolume->BlockSize()); 1516 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 1517 ? data->max_direct_range : size); 1518 continue; 1519 } 1520 } 1521 1522 // Indirect block range 1523 1524 if (data->Size() <= data->MaxIndirectRange() 1525 || !data->MaxIndirectRange()) { 1526 CachedBlock cached(fVolume); 1527 block_run *runs = NULL; 1528 uint32 free = 0; 1529 off_t block; 1530 1531 // if there is no indirect block yet, create one 1532 if (data->indirect.IsZero()) { 1533 status = _AllocateBlockArray(transaction, data->indirect); 1534 if (status < B_OK) 1535 return status; 1536 1537 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1538 data->MaxDirectRange()); 1539 // insert the block_run in the first block 1540 runs = (block_run *)cached.SetTo(data->indirect); 1541 } else { 1542 uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run); 1543 block = fVolume->ToBlock(data->indirect); 1544 1545 // search first empty entry 1546 int32 i = 0; 1547 for (; i < data->indirect.Length(); i++) { 1548 if ((runs = (block_run *)cached.SetTo(block + i)) == NULL) 1549 return B_IO_ERROR; 1550 1551 for (free = 0; free < numberOfRuns; free++) 1552 if (runs[free].IsZero()) 1553 break; 1554 1555 if (free < numberOfRuns) 1556 break; 1557 } 1558 if (i == data->indirect.Length()) 1559 runs = NULL; 1560 } 1561 1562 if (runs != NULL) { 1563 // try to insert the run to the last one - note that this 1564 // doesn't take block borders into account, so it could be 1565 // further optimized 1566 cached.MakeWritable(transaction); 1567 1568 int32 last = free - 1; 1569 if (free > 0 && runs[last].MergeableWith(run)) { 1570 runs[last].length = HOST_ENDIAN_TO_BFS_INT16( 1571 runs[last].Length() + run.Length()); 1572 } else 1573 runs[free] = run; 1574 1575 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1576 data->MaxIndirectRange() 1577 + (run.Length() << fVolume->BlockShift())); 1578 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 1579 ? data->MaxIndirectRange() : size); 1580 continue; 1581 } 1582 } 1583 1584 // Double indirect block range 1585 1586 if (data->Size() <= data->MaxDoubleIndirectRange() 1587 || !data->max_double_indirect_range) { 1588 while ((run.Length() % NUM_ARRAY_BLOCKS) != 0) { 1589 // The number of allocated blocks isn't a multiple of 1590 // NUM_ARRAY_BLOCKS, so we have to change this. This can happen 1591 // the first time the stream grows into the double 1592 // indirect range. 1593 // First, free the remaining blocks that don't fit into a multiple 1594 // of NUM_ARRAY_BLOCKS 1595 int32 rest = run.Length() % NUM_ARRAY_BLOCKS; 1596 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest); 1597 1598 status = fVolume->Free(transaction, 1599 block_run::Run(run.AllocationGroup(), 1600 run.Start() + run.Length(), rest)); 1601 if (status < B_OK) 1602 return status; 1603 1604 blocksNeeded += rest; 1605 blocksRequested = (blocksNeeded + NUM_ARRAY_BLOCKS - 1) 1606 & ~(NUM_ARRAY_BLOCKS - 1); 1607 minimum = NUM_ARRAY_BLOCKS; 1608 // we make sure here that we have at minimum 1609 // NUM_ARRAY_BLOCKS allocated, so if the allocation 1610 // succeeds, we don't run into an endless loop 1611 1612 // Are there any blocks left in the run? If not, allocate a new one 1613 if (run.length == 0) 1614 continue; 1615 } 1616 1617 // if there is no double indirect block yet, create one 1618 if (data->double_indirect.IsZero()) { 1619 status = _AllocateBlockArray(transaction, data->double_indirect); 1620 if (status < B_OK) 1621 return status; 1622 1623 data->max_double_indirect_range = data->max_indirect_range; 1624 } 1625 1626 // calculate the index where to insert the new blocks 1627 1628 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1629 int32 indirectSize = ((1L << INDIRECT_BLOCKS_SHIFT) << fVolume->BlockShift()) 1630 * runsPerBlock; 1631 int32 directSize = NUM_ARRAY_BLOCKS << fVolume->BlockShift(); 1632 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1633 1634 off_t start = data->MaxDoubleIndirectRange() - data->MaxIndirectRange(); 1635 int32 indirectIndex = start / indirectSize; 1636 int32 index = start / directSize; 1637 1638 // distribute the blocks to the array and allocate 1639 // new array blocks when needed 1640 1641 CachedBlock cached(fVolume); 1642 CachedBlock cachedDirect(fVolume); 1643 block_run *array = NULL; 1644 uint32 runLength = run.Length(); 1645 1646 // TODO: the following code is commented - it could be used to 1647 // preallocate all needed block arrays to see in advance if the 1648 // allocation will succeed. 1649 // I will probably remove it later, because it's no perfect solution 1650 // either: if the allocation was broken up before (blocksNeeded != 0), 1651 // it doesn't guarantee anything. 1652 // And since failing in this case is not that common, it doesn't have 1653 // to be optimized in that way. 1654 // Anyway, I wanted to have it in CVS - all those lines, and they will 1655 // be removed soon :-) 1656 /* 1657 // allocate new block arrays if needed 1658 1659 off_t block = -1; 1660 1661 for (int32 i = 0;i < needed;i++) { 1662 // get the block to insert the run into 1663 block = fVolume->ToBlock(data->double_indirect) + i + indirectIndex / runsPerBlock; 1664 if (cached.BlockNumber() != block) 1665 array = (block_run *)cached.SetTo(block); 1666 1667 if (array == NULL) 1668 return B_ERROR; 1669 1670 status = AllocateBlockArray(transaction, array[i + indirectIndex % runsPerBlock]); 1671 if (status < B_OK) 1672 return status; 1673 } 1674 */ 1675 1676 while (run.length != 0) { 1677 // get the indirect array block 1678 if (array == NULL) { 1679 array = (block_run *)cached.SetTo(fVolume->ToBlock( 1680 data->double_indirect) + indirectIndex / runsPerBlock); 1681 if (array == NULL) 1682 return B_IO_ERROR; 1683 } 1684 1685 do { 1686 // do we need a new array block? 1687 if (array[indirectIndex % runsPerBlock].IsZero()) { 1688 status = _AllocateBlockArray(transaction, 1689 array[indirectIndex % runsPerBlock]); 1690 if (status < B_OK) 1691 return status; 1692 } 1693 1694 block_run *runs = (block_run *)cachedDirect.SetToWritable( 1695 transaction, fVolume->ToBlock(array[indirectIndex 1696 % runsPerBlock]) + index / runsPerBlock); 1697 if (runs == NULL) 1698 return B_IO_ERROR; 1699 1700 do { 1701 // insert the block_run into the array 1702 runs[index % runsPerBlock] = run; 1703 runs[index % runsPerBlock].length 1704 = HOST_ENDIAN_TO_BFS_INT16(NUM_ARRAY_BLOCKS); 1705 1706 // alter the remaining block_run 1707 run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() 1708 + NUM_ARRAY_BLOCKS); 1709 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() 1710 - NUM_ARRAY_BLOCKS); 1711 } while ((++index % runsPerBlock) != 0 && run.length); 1712 } while ((index % runsPerArray) != 0 && run.length); 1713 1714 if (++indirectIndex % runsPerBlock == 0) { 1715 array = NULL; 1716 index = 0; 1717 } 1718 } 1719 1720 data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1721 data->MaxDoubleIndirectRange() 1722 + (runLength << fVolume->BlockShift())); 1723 data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64( 1724 data->max_double_indirect_range) : size; 1725 1726 continue; 1727 } 1728 1729 RETURN_ERROR(EFBIG); 1730 } 1731 // update the size of the data stream 1732 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1733 1734 return B_OK; 1735 } 1736 1737 1738 status_t 1739 Inode::_FreeStaticStreamArray(Transaction &transaction, int32 level, 1740 block_run run, off_t size, off_t offset, off_t &max) 1741 { 1742 int32 indirectSize = 0; 1743 if (level == 0) 1744 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + fVolume->BlockShift())) 1745 * (fVolume->BlockSize() / sizeof(block_run)); 1746 else if (level == 1) 1747 indirectSize = 4 << fVolume->BlockShift(); 1748 1749 off_t start; 1750 if (size > offset) 1751 start = size - offset; 1752 else 1753 start = 0; 1754 1755 int32 index = start / indirectSize; 1756 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1757 1758 CachedBlock cached(fVolume); 1759 off_t blockNumber = fVolume->ToBlock(run); 1760 1761 // set the file offset to the current block run 1762 offset += (off_t)index * indirectSize; 1763 1764 for (int32 i = index / runsPerBlock; i < run.Length(); i++) { 1765 block_run *array = (block_run *)cached.SetToWritable(transaction, 1766 blockNumber + i); 1767 if (array == NULL) 1768 RETURN_ERROR(B_ERROR); 1769 1770 for (index = index % runsPerBlock; index < runsPerBlock; index++) { 1771 if (array[index].IsZero()) { 1772 // we also want to break out of the outer loop 1773 i = run.Length(); 1774 break; 1775 } 1776 1777 status_t status = B_OK; 1778 if (level == 0) { 1779 status = _FreeStaticStreamArray(transaction, 1, array[index], 1780 size, offset, max); 1781 } else if (offset >= size) 1782 status = fVolume->Free(transaction, array[index]); 1783 else 1784 max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize); 1785 1786 if (status < B_OK) 1787 RETURN_ERROR(status); 1788 1789 if (offset >= size) 1790 array[index].SetTo(0, 0, 0); 1791 1792 offset += indirectSize; 1793 } 1794 index = 0; 1795 } 1796 return B_OK; 1797 } 1798 1799 1800 /*! 1801 Frees all block_runs in the array which come after the specified size. 1802 It also trims the last block_run that contain the size. 1803 "offset" and "max" are maintained until the last block_run that doesn't 1804 have to be freed - after this, the values won't be correct anymore, but 1805 will still assure correct function for all subsequent calls. 1806 "max" is considered to be in file system byte order. 1807 */ 1808 status_t 1809 Inode::_FreeStreamArray(Transaction &transaction, block_run *array, 1810 uint32 arrayLength, off_t size, off_t &offset, off_t &max) 1811 { 1812 PRINT(("FreeStreamArray: arrayLength %lu, size %Ld, offset %Ld, max %Ld\n", 1813 arrayLength, size, offset, max)); 1814 1815 off_t newOffset = offset; 1816 uint32 i = 0; 1817 for (; i < arrayLength; i++, offset = newOffset) { 1818 if (array[i].IsZero()) 1819 break; 1820 1821 newOffset += (off_t)array[i].Length() << fVolume->BlockShift(); 1822 if (newOffset <= size) 1823 continue; 1824 1825 block_run run = array[i]; 1826 1827 // determine the block_run to be freed 1828 if (newOffset > size && offset < size) { 1829 // free partial block_run (and update the original block_run) 1830 run.start = array[i].start 1831 + ((size - offset) >> fVolume->BlockShift()) + 1; 1832 array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start() 1833 - array[i].Start()); 1834 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() 1835 - array[i].Length()); 1836 1837 if (run.length == 0) 1838 continue; 1839 1840 // update maximum range 1841 max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length() 1842 << fVolume->BlockShift())); 1843 } else { 1844 // free the whole block_run 1845 array[i].SetTo(0, 0, 0); 1846 1847 if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset) 1848 max = HOST_ENDIAN_TO_BFS_INT64(offset); 1849 } 1850 1851 if (fVolume->Free(transaction, run) < B_OK) 1852 return B_IO_ERROR; 1853 } 1854 return B_OK; 1855 } 1856 1857 1858 status_t 1859 Inode::_ShrinkStream(Transaction &transaction, off_t size) 1860 { 1861 data_stream *data = &Node().data; 1862 status_t status; 1863 1864 if (data->MaxDoubleIndirectRange() > size) { 1865 off_t *maxDoubleIndirect = &data->max_double_indirect_range; 1866 // gcc 4 work-around: "error: cannot bind packed field 1867 // 'data->data_stream::max_double_indirect_range' to 'off_t&'" 1868 status = _FreeStaticStreamArray(transaction, 0, data->double_indirect, 1869 size, data->MaxIndirectRange(), *maxDoubleIndirect); 1870 if (status < B_OK) 1871 return status; 1872 1873 if (size <= data->MaxIndirectRange()) { 1874 fVolume->Free(transaction, data->double_indirect); 1875 data->double_indirect.SetTo(0, 0, 0); 1876 data->max_double_indirect_range = 0; 1877 } 1878 } 1879 1880 if (data->MaxIndirectRange() > size) { 1881 CachedBlock cached(fVolume); 1882 off_t block = fVolume->ToBlock(data->indirect); 1883 off_t offset = data->MaxDirectRange(); 1884 1885 for (int32 i = 0; i < data->indirect.Length(); i++) { 1886 block_run *array = (block_run *)cached.SetToWritable(transaction, 1887 block + i); 1888 if (array == NULL) 1889 break; 1890 1891 off_t *maxIndirect = &data->max_indirect_range; 1892 // gcc 4 work-around: "error: cannot bind packed field 1893 // 'data->data_stream::max_indirect_range' to 'off_t&'" 1894 if (_FreeStreamArray(transaction, array, fVolume->BlockSize() 1895 / sizeof(block_run), size, offset, *maxIndirect) != B_OK) 1896 return B_IO_ERROR; 1897 } 1898 if (data->max_direct_range == data->max_indirect_range) { 1899 fVolume->Free(transaction, data->indirect); 1900 data->indirect.SetTo(0, 0, 0); 1901 data->max_indirect_range = 0; 1902 } 1903 } 1904 1905 if (data->MaxDirectRange() > size) { 1906 off_t offset = 0; 1907 off_t *maxDirect = &data->max_direct_range; 1908 // gcc 4 work-around: "error: cannot bind packed field 1909 // 'data->data_stream::max_direct_range' to 'off_t&'" 1910 status = _FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS, 1911 size, offset, *maxDirect); 1912 if (status < B_OK) 1913 return status; 1914 } 1915 1916 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1917 return B_OK; 1918 } 1919 1920 1921 status_t 1922 Inode::SetFileSize(Transaction &transaction, off_t size) 1923 { 1924 if (size < 0) 1925 return B_BAD_VALUE; 1926 1927 off_t oldSize = Size(); 1928 1929 if (size == oldSize) 1930 return B_OK; 1931 1932 // should the data stream grow or shrink? 1933 status_t status; 1934 if (size > oldSize) { 1935 status = _GrowStream(transaction, size); 1936 if (status < B_OK) { 1937 // if the growing of the stream fails, the whole operation 1938 // fails, so we should shrink the stream to its former size 1939 _ShrinkStream(transaction, oldSize); 1940 } 1941 } else 1942 status = _ShrinkStream(transaction, size); 1943 1944 if (status < B_OK) 1945 return status; 1946 1947 file_cache_set_size(FileCache(), size); 1948 return WriteBack(transaction); 1949 } 1950 1951 1952 status_t 1953 Inode::Append(Transaction &transaction, off_t bytes) 1954 { 1955 return SetFileSize(transaction, Size() + bytes); 1956 } 1957 1958 1959 /*! 1960 Checks wether or not this inode's data stream needs to be trimmed 1961 because of an earlier preallocation. 1962 Returns true if there are any blocks to be trimmed. 1963 */ 1964 bool 1965 Inode::NeedsTrimming() 1966 { 1967 // We never trim preallocated index blocks to make them grow as smooth as possible. 1968 // There are only few indices anyway, so this doesn't hurt 1969 // Also, if an inode is already in deleted state, we don't bother trimming it 1970 if (IsIndex() || IsDeleted()) 1971 return false; 1972 1973 off_t roundedSize = (Size() + fVolume->BlockSize() - 1) 1974 & ~(fVolume->BlockSize() - 1); 1975 1976 return Node().data.MaxDirectRange() > roundedSize 1977 || Node().data.MaxIndirectRange() > roundedSize 1978 || Node().data.MaxDoubleIndirectRange() > roundedSize; 1979 } 1980 1981 1982 status_t 1983 Inode::TrimPreallocation(Transaction &transaction) 1984 { 1985 status_t status = _ShrinkStream(transaction, Size()); 1986 if (status < B_OK) 1987 return status; 1988 1989 return WriteBack(transaction); 1990 } 1991 1992 1993 //! Frees the file's data stream and removes all attributes 1994 status_t 1995 Inode::Free(Transaction &transaction) 1996 { 1997 FUNCTION(); 1998 1999 // Perhaps there should be an implementation of Inode::ShrinkStream() that 2000 // just frees the data_stream, but doesn't change the inode (since it is 2001 // freed anyway) - that would make an undelete command possible 2002 status_t status = SetFileSize(transaction, 0); 2003 if (status < B_OK) 2004 return status; 2005 2006 // Free all attributes, and remove their indices 2007 { 2008 // We have to limit the scope of AttributeIterator, so that its 2009 // destructor is not called after the inode is deleted 2010 AttributeIterator iterator(this); 2011 2012 char name[B_FILE_NAME_LENGTH]; 2013 uint32 type; 2014 size_t length; 2015 ino_t id; 2016 while ((status = iterator.GetNext(name, &length, &type, &id)) == B_OK) { 2017 RemoveAttribute(transaction, name); 2018 } 2019 } 2020 2021 if (WriteBack(transaction) < B_OK) 2022 return B_IO_ERROR; 2023 2024 return fVolume->Free(transaction, BlockRun()); 2025 } 2026 2027 2028 //! Synchronizes (writes back to disk) the file stream of the inode. 2029 status_t 2030 Inode::Sync() 2031 { 2032 if (FileCache()) 2033 return file_cache_sync(FileCache()); 2034 2035 // We may also want to flush the attribute's data stream to 2036 // disk here... (do we?) 2037 2038 if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0) 2039 return B_OK; 2040 2041 data_stream *data = &Node().data; 2042 status_t status = B_OK; 2043 2044 // flush direct range 2045 2046 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 2047 if (data->direct[i].IsZero()) 2048 return B_OK; 2049 2050 status = block_cache_sync_etc(fVolume->BlockCache(), 2051 fVolume->ToBlock(data->direct[i]), data->direct[i].Length()); 2052 if (status != B_OK) 2053 return status; 2054 } 2055 2056 // flush indirect range 2057 2058 if (data->max_indirect_range == 0) 2059 return B_OK; 2060 2061 CachedBlock cached(fVolume); 2062 off_t block = fVolume->ToBlock(data->indirect); 2063 int32 count = fVolume->BlockSize() / sizeof(block_run); 2064 2065 for (int32 j = 0; j < data->indirect.Length(); j++) { 2066 block_run *runs = (block_run *)cached.SetTo(block + j); 2067 if (runs == NULL) 2068 break; 2069 2070 for (int32 i = 0; i < count; i++) { 2071 if (runs[i].IsZero()) 2072 return B_OK; 2073 2074 status = block_cache_sync_etc(fVolume->BlockCache(), 2075 fVolume->ToBlock(runs[i]), runs[i].Length()); 2076 if (status != B_OK) 2077 return status; 2078 } 2079 } 2080 2081 // flush double indirect range 2082 2083 if (data->max_double_indirect_range == 0) 2084 return B_OK; 2085 2086 off_t indirectBlock = fVolume->ToBlock(data->double_indirect); 2087 2088 for (int32 l = 0; l < data->double_indirect.Length(); l++) { 2089 block_run *indirectRuns = (block_run *)cached.SetTo(indirectBlock + l); 2090 if (indirectRuns == NULL) 2091 return B_FILE_ERROR; 2092 2093 CachedBlock directCached(fVolume); 2094 2095 for (int32 k = 0; k < count; k++) { 2096 if (indirectRuns[k].IsZero()) 2097 return B_OK; 2098 2099 block = fVolume->ToBlock(indirectRuns[k]); 2100 for (int32 j = 0; j < indirectRuns[k].Length(); j++) { 2101 block_run *runs = (block_run *)directCached.SetTo(block + j); 2102 if (runs == NULL) 2103 return B_FILE_ERROR; 2104 2105 for (int32 i = 0; i < count; i++) { 2106 if (runs[i].IsZero()) 2107 return B_OK; 2108 2109 // TODO: combine single block_runs to bigger ones when 2110 // they are adjacent 2111 status = block_cache_sync_etc(fVolume->BlockCache(), 2112 fVolume->ToBlock(runs[i]), runs[i].Length()); 2113 if (status != B_OK) 2114 return status; 2115 } 2116 } 2117 } 2118 } 2119 return B_OK; 2120 } 2121 2122 2123 // #pragma mark - creation/deletion 2124 2125 2126 status_t 2127 Inode::Remove(Transaction &transaction, const char *name, ino_t *_id, 2128 bool isDirectory) 2129 { 2130 BPlusTree *tree; 2131 if (GetTree(&tree) != B_OK) 2132 RETURN_ERROR(B_BAD_VALUE); 2133 2134 RecursiveLocker locker(fVolume->Lock()); 2135 2136 // does the file even exist? 2137 off_t id; 2138 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) < B_OK) 2139 return B_ENTRY_NOT_FOUND; 2140 2141 if (_id) 2142 *_id = id; 2143 2144 Vnode vnode(fVolume, id); 2145 Inode *inode; 2146 status_t status = vnode.Get(&inode); 2147 if (status < B_OK) { 2148 REPORT_ERROR(status); 2149 return B_ENTRY_NOT_FOUND; 2150 } 2151 2152 // Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR 2153 // bit is set for indices in BFS, not for attribute directories) - but you 2154 // should really be able to do whatever you want with your indices 2155 // without having to remove all files first :) 2156 if (!inode->IsIndex()) { 2157 // if it's not of the correct type, don't delete it! 2158 if (inode->IsContainer() != isDirectory) 2159 return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY; 2160 2161 // only delete empty directories 2162 if (isDirectory && !inode->IsEmpty()) 2163 return B_DIRECTORY_NOT_EMPTY; 2164 } 2165 2166 // remove_vnode() allows the inode to be accessed until the last put_vnode() 2167 status = remove_vnode(fVolume->ID(), id); 2168 if (status != B_OK) 2169 return status; 2170 2171 if (tree->Remove(transaction, name, id) < B_OK) { 2172 unremove_vnode(fVolume->ID(), id); 2173 RETURN_ERROR(B_ERROR); 2174 } 2175 2176 #ifdef DEBUG 2177 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) == B_OK) { 2178 DIE(("deleted entry still there")); 2179 } 2180 #endif 2181 2182 // update the inode, so that no one will ever doubt it's deleted :-) 2183 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); 2184 inode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 2185 2186 // In balance to the Inode::Create() method, the main indices 2187 // are updated here (name, size, & last_modified) 2188 2189 Index index(fVolume); 2190 if (inode->IsRegularNode()) { 2191 index.RemoveName(transaction, name, inode); 2192 // If removing from the index fails, it is not regarded as a 2193 // fatal error and will not be reported back! 2194 // Deleted inodes won't be visible in queries anyway. 2195 } 2196 2197 if (inode->IsFile() || inode->IsSymLink()) { 2198 if (inode->IsFile()) 2199 index.RemoveSize(transaction, inode); 2200 index.RemoveLastModified(transaction, inode); 2201 } 2202 2203 return inode->WriteBack(transaction); 2204 } 2205 2206 2207 /*! 2208 Creates the inode with the specified parent directory, and automatically 2209 adds the created inode to that parent directory. If an attribute directory 2210 is created, it will also automatically be added to the parent inode as 2211 such. However, the indices root node, and the regular root node won't be 2212 added to the super block. 2213 It will also create the initial B+tree for the inode if it's a directory 2214 of any kind. 2215 If the "_id" or "_inode" variable is given and non-NULL to store the 2216 inode's ID, the inode stays locked - you have to call put_vnode() if you 2217 don't use it anymore. 2218 If the node already exists, this method will fail if O_EXCL is set, or it's 2219 a directory or a symlink. Otherwise, it will just be returned. If O_TRUNC 2220 has been specified, the file will also be truncated. 2221 */ 2222 status_t 2223 Inode::Create(Transaction &transaction, Inode *parent, const char *name, 2224 int32 mode, int openMode, uint32 type, bool *_created, ino_t *_id, 2225 Inode **_inode) 2226 { 2227 FUNCTION_START(("name = %s, mode = %ld\n", name, mode)); 2228 2229 block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0); 2230 Volume *volume = transaction.GetVolume(); 2231 BPlusTree *tree = NULL; 2232 2233 if (parent && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) { 2234 // check if the file already exists in the directory 2235 if (parent->GetTree(&tree) != B_OK) 2236 RETURN_ERROR(B_BAD_VALUE); 2237 } 2238 2239 WriteLocked locker(parent != NULL ? &parent->Lock() : NULL); 2240 // the parent directory is locked during the whole inode creation 2241 2242 if (tree != NULL) { 2243 // does the file already exist? 2244 off_t offset; 2245 if (tree->Find((uint8 *)name, (uint16)strlen(name), &offset) == B_OK) { 2246 // return if the file should be a directory/link or opened in exclusive mode 2247 if (S_ISDIR(mode) || S_ISLNK(mode) || openMode & O_EXCL) 2248 return B_FILE_EXISTS; 2249 2250 Vnode vnode(volume, offset); 2251 Inode *inode; 2252 status_t status = vnode.Get(&inode); 2253 if (status < B_OK) { 2254 REPORT_ERROR(status); 2255 return B_ENTRY_NOT_FOUND; 2256 } 2257 2258 // if it's a directory, bail out! 2259 if (inode->IsDirectory()) 2260 return B_IS_A_DIRECTORY; 2261 2262 // we want to open the file, so we should have the rights to do so 2263 if (inode->CheckPermissions(openModeToAccess(openMode)) != B_OK) 2264 return B_NOT_ALLOWED; 2265 2266 if (openMode & O_TRUNC) { 2267 // we need write access in order to truncate the file 2268 status = inode->CheckPermissions(W_OK); 2269 if (status != B_OK) 2270 return status; 2271 2272 // truncate the existing file 2273 WriteLocked locked(inode->Lock()); 2274 2275 status_t status = inode->SetFileSize(transaction, 0); 2276 if (status >= B_OK) 2277 status = inode->WriteBack(transaction); 2278 2279 if (status < B_OK) 2280 return status; 2281 } 2282 2283 if (_created) 2284 *_created = false; 2285 if (_id) 2286 *_id = inode->ID(); 2287 if (_inode) 2288 *_inode = inode; 2289 2290 // only keep the vnode in memory if the _id or _inode pointer is provided 2291 if (_id != NULL || _inode != NULL) 2292 vnode.Keep(); 2293 2294 return B_OK; 2295 } 2296 } else if (parent && (mode & S_ATTR_DIR) == 0) 2297 return B_BAD_VALUE; 2298 2299 status_t status; 2300 2301 // do we have the power to create new files at all? 2302 if (parent != NULL && (status = parent->CheckPermissions(W_OK)) != B_OK) 2303 RETURN_ERROR(status); 2304 2305 // allocate space for the new inode 2306 InodeAllocator allocator(transaction); 2307 block_run run; 2308 Inode *inode; 2309 status = allocator.New(&parentRun, mode, run, &inode); 2310 if (status < B_OK) 2311 return status; 2312 2313 // Initialize the parts of the bfs_inode structure that 2314 // InodeAllocator::New() hasn't touched yet 2315 2316 bfs_inode *node = &inode->Node(); 2317 2318 if (parent == NULL) { 2319 // we set the parent to itself in this case 2320 // (only happens for the root and indices node) 2321 node->parent = run; 2322 } else 2323 node->parent = parentRun; 2324 2325 node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid()); 2326 node->gid = HOST_ENDIAN_TO_BFS_INT32(parent 2327 ? parent->Node().GroupID() : getegid()); 2328 // the group ID is inherited from the parent, if available 2329 2330 node->type = HOST_ENDIAN_TO_BFS_INT32(type); 2331 2332 inode->WriteBack(transaction); 2333 // make sure the initialized node is available to others 2334 2335 // only add the name to regular files, directories, or symlinks 2336 // don't add it to attributes, or indices 2337 if (tree && inode->IsRegularNode() && inode->SetName(transaction, name) < B_OK) 2338 return B_ERROR; 2339 2340 // Initialize b+tree if it's a directory (and add "." & ".." if it's 2341 // a standard directory for files - not for attributes or indices) 2342 if (inode->IsContainer()) { 2343 status = allocator.CreateTree(); 2344 if (status < B_OK) 2345 return status; 2346 } 2347 2348 // Add a link to the inode from the parent, depending on its type 2349 // (the vnode is not published yet, so it is safe to make the inode 2350 // accessable to the file system here) 2351 if (tree) { 2352 status = tree->Insert(transaction, name, inode->ID()); 2353 } else if (parent && (mode & S_ATTR_DIR) != 0) { 2354 parent->Attributes() = run; 2355 status = parent->WriteBack(transaction); 2356 } 2357 2358 // Note, we only care if the inode could be made accessable for the 2359 // two cases above; the root node or the indices root node must 2360 // handle this case on their own (or other cases where "parent" is 2361 // NULL) 2362 if (status < B_OK) 2363 RETURN_ERROR(status); 2364 2365 // Update the main indices (name, size & last_modified) 2366 // (live queries might want to access us after this) 2367 2368 Index index(volume); 2369 if (inode->IsRegularNode() && name != NULL) { 2370 // the name index only contains regular files 2371 // (but not the root node where name == NULL) 2372 status = index.InsertName(transaction, name, inode); 2373 if (status < B_OK && status != B_BAD_INDEX) { 2374 // We have to remove the node from the parent at this point, 2375 // because the InodeAllocator destructor can't handle this 2376 // case (and if it fails, we can't do anything about it...) 2377 if (tree) 2378 tree->Remove(transaction, name, inode->ID()); 2379 else if (parent != NULL && (mode & S_ATTR_DIR) != 0) 2380 parent->Node().attributes.SetTo(0, 0, 0); 2381 2382 RETURN_ERROR(status); 2383 } 2384 } 2385 2386 inode->UpdateOldLastModified(); 2387 2388 // The "size" & "last_modified" indices don't contain directories 2389 if (inode->IsFile() || inode->IsSymLink()) { 2390 // if adding to these indices fails, the inode creation will not be harmed; 2391 // they are considered less important than the "name" index 2392 if (inode->IsFile()) 2393 index.InsertSize(transaction, inode); 2394 index.InsertLastModified(transaction, inode); 2395 } 2396 2397 // Everything worked well until this point, we have a fully 2398 // initialized inode, and we want to keep it 2399 allocator.Keep(); 2400 2401 if (inode->IsFile() || inode->IsAttribute()) { 2402 inode->SetFileCache(file_cache_create(volume->ID(), inode->ID(), 2403 inode->Size(), volume->Device())); 2404 } 2405 2406 if (_created) 2407 *_created = true; 2408 if (_id != NULL) 2409 *_id = inode->ID(); 2410 if (_inode != NULL) 2411 *_inode = inode; 2412 2413 // if either _id or _inode is passed, we will keep the inode locked 2414 if (_id == NULL && _inode == NULL) 2415 put_vnode(volume->ID(), inode->ID()); 2416 2417 return B_OK; 2418 } 2419 2420 2421 // #pragma mark - AttributeIterator 2422 2423 2424 AttributeIterator::AttributeIterator(Inode *inode) 2425 : 2426 fCurrentSmallData(0), 2427 fInode(inode), 2428 fAttributes(NULL), 2429 fIterator(NULL), 2430 fBuffer(NULL) 2431 { 2432 inode->_AddIterator(this); 2433 } 2434 2435 2436 AttributeIterator::~AttributeIterator() 2437 { 2438 if (fAttributes) 2439 put_vnode(fAttributes->GetVolume()->ID(), fAttributes->ID()); 2440 2441 delete fIterator; 2442 fInode->_RemoveIterator(this); 2443 } 2444 2445 2446 status_t 2447 AttributeIterator::Rewind() 2448 { 2449 fCurrentSmallData = 0; 2450 2451 if (fIterator != NULL) 2452 fIterator->Rewind(); 2453 2454 return B_OK; 2455 } 2456 2457 2458 status_t 2459 AttributeIterator::GetNext(char *name, size_t *_length, uint32 *_type, 2460 ino_t *_id) 2461 { 2462 // read attributes out of the small data section 2463 2464 if (fCurrentSmallData >= 0) { 2465 NodeGetter nodeGetter(fInode->GetVolume(), fInode); 2466 const bfs_inode *node = nodeGetter.Node(); 2467 const small_data *item = ((bfs_inode *)node)->SmallDataStart(); 2468 2469 fInode->SmallDataLock().Lock(); 2470 2471 int32 i = 0; 2472 for (;;item = item->Next()) { 2473 if (item->IsLast(node)) 2474 break; 2475 2476 if (item->NameSize() == FILE_NAME_NAME_LENGTH 2477 && *item->Name() == FILE_NAME_NAME) 2478 continue; 2479 2480 if (i++ == fCurrentSmallData) 2481 break; 2482 } 2483 2484 if (!item->IsLast(node)) { 2485 strncpy(name, item->Name(), B_FILE_NAME_LENGTH); 2486 *_type = item->Type(); 2487 *_length = item->NameSize(); 2488 *_id = (ino_t)fCurrentSmallData; 2489 2490 fCurrentSmallData = i; 2491 } 2492 else { 2493 // stop traversing the small_data section 2494 fCurrentSmallData = -1; 2495 } 2496 2497 fInode->SmallDataLock().Unlock(); 2498 2499 if (fCurrentSmallData != -1) 2500 return B_OK; 2501 } 2502 2503 // read attributes out of the attribute directory 2504 2505 if (fInode->Attributes().IsZero()) 2506 return B_ENTRY_NOT_FOUND; 2507 2508 Volume *volume = fInode->GetVolume(); 2509 2510 // if you haven't yet access to the attributes directory, get it 2511 if (fAttributes == NULL) { 2512 if (get_vnode(volume->ID(), volume->ToVnode(fInode->Attributes()), 2513 (void **)&fAttributes) != B_OK) { 2514 FATAL(("get_vnode() failed in AttributeIterator::GetNext(ino_t" 2515 " = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2516 return B_ENTRY_NOT_FOUND; 2517 } 2518 2519 BPlusTree *tree; 2520 if (fAttributes->GetTree(&tree) < B_OK 2521 || (fIterator = new TreeIterator(tree)) == NULL) { 2522 FATAL(("could not get tree in AttributeIterator::GetNext(ino_t" 2523 " = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2524 return B_ENTRY_NOT_FOUND; 2525 } 2526 } 2527 2528 uint16 length; 2529 ino_t id; 2530 status_t status = fIterator->GetNextEntry(name, &length, 2531 B_FILE_NAME_LENGTH, &id); 2532 if (status < B_OK) 2533 return status; 2534 2535 Vnode vnode(volume,id); 2536 Inode *attribute; 2537 if ((status = vnode.Get(&attribute)) == B_OK) { 2538 *_type = attribute->Type(); 2539 *_length = attribute->Size(); 2540 *_id = id; 2541 } 2542 2543 return status; 2544 } 2545 2546 2547 void 2548 AttributeIterator::Update(uint16 index, int8 change) 2549 { 2550 // fCurrentSmallData points already to the next item 2551 if (index < fCurrentSmallData) 2552 fCurrentSmallData += change; 2553 } 2554 2555