1 /* 2 * Copyright 2001-2007, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 //! inode access functions 7 8 9 #include "Debug.h" 10 #include "Inode.h" 11 #include "BPlusTree.h" 12 #include "Index.h" 13 14 15 class InodeAllocator { 16 public: 17 InodeAllocator(Transaction &transaction); 18 ~InodeAllocator(); 19 20 status_t New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode); 21 status_t CreateTree(); 22 status_t Keep(); 23 24 private: 25 Transaction *fTransaction; 26 block_run fRun; 27 Inode *fInode; 28 }; 29 30 31 InodeAllocator::InodeAllocator(Transaction &transaction) 32 : 33 fTransaction(&transaction), 34 fInode(NULL) 35 { 36 } 37 38 39 InodeAllocator::~InodeAllocator() 40 { 41 if (fTransaction != NULL) { 42 Volume *volume = fTransaction->GetVolume(); 43 44 if (fInode != NULL) { 45 fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 46 // this unblocks any pending bfs_read_vnode() calls 47 fInode->Free(*fTransaction); 48 remove_vnode(volume->ID(), fInode->ID()); 49 } else 50 volume->Free(*fTransaction, fRun); 51 } 52 53 delete fInode; 54 } 55 56 57 status_t 58 InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode) 59 { 60 Volume *volume = fTransaction->GetVolume(); 61 62 status_t status = volume->AllocateForInode(*fTransaction, parentRun, mode, fRun); 63 if (status < B_OK) { 64 // don't free the space in the destructor, because 65 // the allocation failed 66 fTransaction = NULL; 67 RETURN_ERROR(status); 68 } 69 70 run = fRun; 71 fInode = new Inode(volume, *fTransaction, volume->ToVnode(run), mode, run); 72 if (fInode == NULL) 73 RETURN_ERROR(B_NO_MEMORY); 74 75 if (volume->ID() >= 0) { 76 status = new_vnode(volume->ID(), fInode->ID(), fInode); 77 if (status < B_OK) { 78 delete fInode; 79 fInode = NULL; 80 RETURN_ERROR(status); 81 } 82 } 83 84 *_inode = fInode; 85 return B_OK; 86 } 87 88 89 status_t 90 InodeAllocator::CreateTree() 91 { 92 Volume *volume = fTransaction->GetVolume(); 93 94 // force S_STR_INDEX to be set, if no type is set 95 if ((fInode->Mode() & S_INDEX_TYPES) == 0) 96 fInode->Node().mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX); 97 98 BPlusTree *tree = fInode->fTree = new BPlusTree(*fTransaction, fInode); 99 if (tree == NULL || tree->InitCheck() < B_OK) 100 return B_ERROR; 101 102 if (fInode->IsRegularNode()) { 103 if (tree->Insert(*fTransaction, ".", fInode->ID()) < B_OK 104 || tree->Insert(*fTransaction, "..", volume->ToVnode(fInode->Parent())) < B_OK) 105 return B_ERROR; 106 } 107 return B_OK; 108 } 109 110 111 status_t 112 InodeAllocator::Keep() 113 { 114 ASSERT(fInode != NULL && fTransaction != NULL); 115 Volume *volume = fTransaction->GetVolume(); 116 117 status_t status = fInode->WriteBack(*fTransaction); 118 if (status < B_OK) { 119 FATAL(("writing new inode %Ld failed!\n", fInode->ID())); 120 return status; 121 } 122 123 if (!fInode->IsSymLink() && volume->ID() >= 0) 124 status = publish_vnode(volume->ID(), fInode->ID(), fInode); 125 126 fTransaction = NULL; 127 fInode = NULL; 128 129 return status; 130 } 131 132 133 // #pragma mark - 134 135 136 status_t 137 bfs_inode::InitCheck(Volume *volume) 138 { 139 if (Magic1() != INODE_MAGIC1 140 || !(Flags() & INODE_IN_USE) 141 || inode_num.Length() != 1 142 // matches inode size? 143 || (uint32)InodeSize() != volume->InodeSize() 144 // parent resides on disk? 145 || parent.AllocationGroup() > int32(volume->AllocationGroups()) 146 || parent.AllocationGroup() < 0 147 || parent.Start() > (1L << volume->AllocationGroupShift()) 148 || parent.Length() != 1 149 // attributes, too? 150 || attributes.AllocationGroup() > int32(volume->AllocationGroups()) 151 || attributes.AllocationGroup() < 0 152 || attributes.Start() > (1L << volume->AllocationGroupShift())) 153 RETURN_ERROR(B_BAD_DATA); 154 155 if (Flags() & INODE_DELETED) 156 return B_NOT_ALLOWED; 157 158 // ToDo: Add some tests to check the integrity of the other stuff here, 159 // especially for the data_stream! 160 161 return B_OK; 162 } 163 164 165 // #pragma mark - Inode 166 167 168 Inode::Inode(Volume *volume, vnode_id id) 169 : 170 fVolume(volume), 171 fID(id), 172 fTree(NULL), 173 fAttributes(NULL), 174 fCache(NULL) 175 { 176 PRINT(("Inode::Inode(volume = %p, id = %Ld) @ %p\n", volume, id, this)); 177 178 NodeGetter node(volume, this); 179 memcpy(&fNode, node.Node(), sizeof(bfs_inode)); 180 181 char lockName[B_OS_NAME_LENGTH]; 182 snprintf(lockName, sizeof(lockName), "bfs inode %d.%d", 183 (int)BlockRun().AllocationGroup(), BlockRun().Start()); 184 fLock.Initialize(lockName); 185 186 // these two will help to maintain the indices 187 fOldSize = Size(); 188 fOldLastModified = LastModified(); 189 190 if (IsContainer()) 191 fTree = new BPlusTree(this); 192 if (IsFile() || IsAttribute()) 193 SetFileCache(file_cache_create(fVolume->ID(), ID(), Size(), fVolume->Device())); 194 } 195 196 197 Inode::Inode(Volume *volume, Transaction &transaction, vnode_id id, mode_t mode, block_run &run) 198 : 199 fVolume(volume), 200 fID(id), 201 fTree(NULL), 202 fAttributes(NULL), 203 fCache(NULL) 204 { 205 PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %Ld) @ %p\n", 206 volume, &transaction, id, this)); 207 208 char lockName[B_OS_NAME_LENGTH]; 209 snprintf(lockName, sizeof(lockName), "bfs inode+%d.%d", 210 (int)run.AllocationGroup(), run.Start()); 211 fLock.Initialize(lockName); 212 213 NodeGetter node(volume, transaction, this, true); 214 memset(&fNode, 0, sizeof(bfs_inode)); 215 216 // Initialize the bfs_inode structure -- it's not written back to disk 217 // here, because it will be done so already when the inode could be 218 // created completely. 219 220 Node().magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1); 221 Node().inode_num = run; 222 Node().mode = HOST_ENDIAN_TO_BFS_INT32(mode); 223 Node().flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 224 225 Node().create_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) << INODE_TIME_SHIFT); 226 Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64(Node().create_time 227 | (volume->GetUniqueID() & INODE_TIME_MASK)); 228 // we use Volume::GetUniqueID() to avoid having too many duplicates in the 229 // last_modified index 230 231 Node().inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize()); 232 233 // these two will help to maintain the indices 234 fOldSize = Size(); 235 fOldLastModified = LastModified(); 236 } 237 238 239 Inode::~Inode() 240 { 241 PRINT(("Inode::~Inode() @ %p\n", this)); 242 243 file_cache_delete(FileCache()); 244 delete fTree; 245 } 246 247 248 status_t 249 Inode::InitCheck(bool checkNode) 250 { 251 // test inode magic and flags 252 if (checkNode) { 253 status_t status = Node().InitCheck(fVolume); 254 if (status == B_BUSY) 255 return B_BUSY; 256 257 if (status < B_OK) { 258 FATAL(("inode at block %Ld corrupt!\n", BlockNumber())); 259 RETURN_ERROR(B_BAD_DATA); 260 } 261 } 262 263 if (IsContainer()) { 264 // inodes that have a 265 if (fTree == NULL) 266 RETURN_ERROR(B_NO_MEMORY); 267 268 status_t status = fTree->InitCheck(); 269 if (status < B_OK) { 270 FATAL(("inode tree at block %Ld corrupt!\n", BlockNumber())); 271 RETURN_ERROR(B_BAD_DATA); 272 } 273 } 274 275 // it's more important to know that the inode is corrupt 276 // so we check for the lock not until here 277 return fLock.InitCheck(); 278 } 279 280 281 status_t 282 Inode::WriteBack(Transaction &transaction) 283 { 284 NodeGetter node(fVolume, transaction, this); 285 if (node.WritableNode() == NULL) 286 return B_IO_ERROR; 287 288 memcpy(node.WritableNode(), &Node(), sizeof(bfs_inode)); 289 return B_OK; 290 } 291 292 293 status_t 294 Inode::CheckPermissions(int accessMode) const 295 { 296 uid_t user = geteuid(); 297 gid_t group = getegid(); 298 299 // you never have write access to a read-only volume 300 if (accessMode & W_OK && fVolume->IsReadOnly()) 301 return B_READ_ONLY_DEVICE; 302 303 // root users always have full access (but they can't execute anything) 304 if (user == 0 && !((accessMode & X_OK) && (Mode() & S_IXUSR) == 0)) 305 return B_OK; 306 307 // shift mode bits, to check directly against accessMode 308 mode_t mode = Mode(); 309 if (user == (uid_t)fNode.UserID()) 310 mode >>= 6; 311 else if (group == (gid_t)fNode.GroupID()) 312 mode >>= 3; 313 314 if (accessMode & ~(mode & S_IRWXO)) 315 return B_NOT_ALLOWED; 316 317 return B_OK; 318 } 319 320 321 // #pragma mark - attributes 322 323 324 void 325 Inode::_AddIterator(AttributeIterator *iterator) 326 { 327 if (fSmallDataLock.Lock() < B_OK) 328 return; 329 330 fIterators.Add(iterator); 331 332 fSmallDataLock.Unlock(); 333 } 334 335 336 void 337 Inode::_RemoveIterator(AttributeIterator *iterator) 338 { 339 if (fSmallDataLock.Lock() < B_OK) 340 return; 341 342 fIterators.Remove(iterator); 343 344 fSmallDataLock.Unlock(); 345 } 346 347 348 /*! 349 Tries to free up "bytes" space in the small_data section by moving 350 attributes to real files. Used for system attributes like the name. 351 You need to hold the fSmallDataLock when you call this method 352 */ 353 status_t 354 Inode::_MakeSpaceForSmallData(Transaction &transaction, bfs_inode *node, 355 const char *name, int32 bytes) 356 { 357 ASSERT(fSmallDataLock.IsLocked()); 358 359 while (bytes > 0) { 360 small_data *item = node->SmallDataStart(), *max = NULL; 361 int32 index = 0, maxIndex = 0; 362 for (; !item->IsLast(node); item = item->Next(), index++) { 363 // should not remove those 364 if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name())) 365 continue; 366 367 if (max == NULL || max->Size() < item->Size()) { 368 maxIndex = index; 369 max = item; 370 } 371 372 // remove the first one large enough to free the needed amount of bytes 373 if (bytes < (int32)item->Size()) 374 break; 375 } 376 377 if (item->IsLast(node) || (int32)item->Size() < bytes) 378 return B_ERROR; 379 380 bytes -= max->Size(); 381 382 // Move the attribute to a real attribute file 383 // Luckily, this doesn't cause any index updates 384 385 Inode *attribute; 386 status_t status = CreateAttribute(transaction, item->Name(), item->Type(), 387 &attribute); 388 if (status < B_OK) 389 RETURN_ERROR(status); 390 391 size_t length = item->DataSize(); 392 status = attribute->WriteAt(transaction, 0, item->Data(), &length); 393 394 ReleaseAttribute(attribute); 395 396 if (status < B_OK) { 397 Vnode vnode(fVolume, Attributes()); 398 Inode *attributes; 399 if (vnode.Get(&attributes) < B_OK 400 || attributes->Remove(transaction, name) < B_OK) { 401 FATAL(("Could not remove newly created attribute!\n")); 402 } 403 404 RETURN_ERROR(status); 405 } 406 407 _RemoveSmallData(node, max, maxIndex); 408 } 409 return B_OK; 410 } 411 412 413 /*! 414 Private function which removes the given attribute from the small_data 415 section. 416 You need to hold the fSmallDataLock when you call this method 417 */ 418 status_t 419 Inode::_RemoveSmallData(bfs_inode *node, small_data *item, int32 index) 420 { 421 ASSERT(fSmallDataLock.IsLocked()); 422 423 small_data *next = item->Next(); 424 if (!next->IsLast(node)) { 425 // find the last attribute 426 small_data *last = next; 427 while (!last->IsLast(node)) 428 last = last->Next(); 429 430 int32 size = (uint8 *)last - (uint8 *)next; 431 if (size < 0 || size > (uint8 *)node + fVolume->BlockSize() - (uint8 *)next) 432 return B_BAD_DATA; 433 434 memmove(item, next, size); 435 436 // Move the "last" one to its new location and 437 // correctly terminate the small_data section 438 last = (small_data *)((uint8 *)last - ((uint8 *)next - (uint8 *)item)); 439 memset(last, 0, (uint8 *)node + fVolume->BlockSize() - (uint8 *)last); 440 } else 441 memset(item, 0, item->Size()); 442 443 // update all current iterators 444 AttributeIterator *iterator = NULL; 445 while ((iterator = fIterators.Next(iterator)) != NULL) { 446 iterator->Update(index, -1); 447 } 448 449 return B_OK; 450 } 451 452 453 //! Removes the given attribute from the small_data section. 454 status_t 455 Inode::_RemoveSmallData(Transaction &transaction, NodeGetter &nodeGetter, 456 const char *name) 457 { 458 if (name == NULL) 459 return B_BAD_VALUE; 460 461 bfs_inode *node = nodeGetter.WritableNode(); 462 SimpleLocker locker(fSmallDataLock); 463 464 // search for the small_data item 465 466 small_data *item = node->SmallDataStart(); 467 int32 index = 0; 468 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 469 item = item->Next(); 470 index++; 471 } 472 473 if (item->IsLast(node)) 474 return B_ENTRY_NOT_FOUND; 475 476 nodeGetter.MakeWritable(transaction); 477 status_t status = _RemoveSmallData(node, item, index); 478 if (status == B_OK) 479 status = WriteBack(transaction); 480 481 return status; 482 } 483 484 485 /*! 486 Try to place the given attribute in the small_data section - if the 487 new attribute is too big to fit in that section, it returns B_DEVICE_FULL. 488 In that case, the attribute should be written to a real attribute file; 489 it's the caller's responsibility to remove any existing attributes in the small 490 data section if that's the case. 491 492 Note that you need to write back the inode yourself after having called that 493 method - it's a bad API decision that it needs a transaction but enforces you 494 to write back the inode all by yourself, but it's just more efficient in most 495 cases... 496 */ 497 status_t 498 Inode::_AddSmallData(Transaction &transaction, NodeGetter &nodeGetter, 499 const char *name, uint32 type, const uint8 *data, size_t length, bool force) 500 { 501 bfs_inode *node = nodeGetter.WritableNode(); 502 503 if (node == NULL || name == NULL || data == NULL) 504 return B_BAD_VALUE; 505 506 // reject any requests that can't fit into the small_data section 507 uint32 nameLength = strlen(name); 508 uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + length + 1; 509 if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode)) 510 return B_DEVICE_FULL; 511 512 nodeGetter.MakeWritable(transaction); 513 SimpleLocker locker(fSmallDataLock); 514 515 // Find the last item or one with the same name we have to add 516 small_data *item = node->SmallDataStart(); 517 int32 index = 0; 518 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 519 item = item->Next(); 520 index++; 521 } 522 523 // is the attribute already in the small_data section? 524 // then just replace the data part of that one 525 if (!item->IsLast(node)) { 526 // find last attribute 527 small_data *last = item; 528 while (!last->IsLast(node)) 529 last = last->Next(); 530 531 // try to change the attributes value 532 if (item->data_size > length 533 || force 534 || ((uint8 *)last + length - item->DataSize()) <= ((uint8 *)node + fVolume->InodeSize())) { 535 // make room for the new attribute if needed (and we are forced to do so) 536 if (force 537 && ((uint8 *)last + length - item->DataSize()) > ((uint8 *)node + fVolume->InodeSize())) { 538 // We also take the free space at the end of the small_data section 539 // into account, and request only what's really needed 540 uint32 needed = length - item->DataSize() - 541 (uint32)((uint8 *)node + fVolume->InodeSize() - (uint8 *)last); 542 543 if (_MakeSpaceForSmallData(transaction, node, name, needed) < B_OK) 544 return B_ERROR; 545 546 // reset our pointers 547 item = node->SmallDataStart(); 548 index = 0; 549 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 550 item = item->Next(); 551 index++; 552 } 553 554 last = item; 555 while (!last->IsLast(node)) 556 last = last->Next(); 557 } 558 559 // Normally, we can just overwrite the attribute data as the size 560 // is specified by the type and does not change that often 561 if (length != item->DataSize()) { 562 // move the attributes after the current one 563 small_data *next = item->Next(); 564 if (!next->IsLast(node)) 565 memmove((uint8 *)item + spaceNeeded, next, (uint8 *)last - (uint8 *)next); 566 567 // Move the "last" one to its new location and 568 // correctly terminate the small_data section 569 last = (small_data *)((uint8 *)last - ((uint8 *)next - ((uint8 *)item + spaceNeeded))); 570 if ((uint8 *)last < (uint8 *)node + fVolume->BlockSize()) 571 memset(last, 0, (uint8 *)node + fVolume->BlockSize() - (uint8 *)last); 572 573 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); 574 } 575 576 item->type = HOST_ENDIAN_TO_BFS_INT32(type); 577 memcpy(item->Data(), data, length); 578 item->Data()[length] = '\0'; 579 580 return B_OK; 581 } 582 583 return B_DEVICE_FULL; 584 } 585 586 // try to add the new attribute! 587 588 if ((uint8 *)item + spaceNeeded > (uint8 *)node + fVolume->InodeSize()) { 589 // there is not enough space for it! 590 if (!force) 591 return B_DEVICE_FULL; 592 593 // make room for the new attribute 594 if (_MakeSpaceForSmallData(transaction, node, name, spaceNeeded) < B_OK) 595 return B_ERROR; 596 597 // get new last item! 598 item = node->SmallDataStart(); 599 index = 0; 600 while (!item->IsLast(node)) { 601 item = item->Next(); 602 index++; 603 } 604 } 605 606 memset(item, 0, spaceNeeded); 607 item->type = HOST_ENDIAN_TO_BFS_INT32(type); 608 item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength); 609 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); 610 strcpy(item->Name(), name); 611 memcpy(item->Data(), data, length); 612 613 // correctly terminate the small_data section 614 item = item->Next(); 615 if (!item->IsLast(node)) 616 memset(item, 0, (uint8 *)node + fVolume->InodeSize() - (uint8 *)item); 617 618 // update all current iterators 619 AttributeIterator *iterator = NULL; 620 while ((iterator = fIterators.Next(iterator)) != NULL) { 621 iterator->Update(index, 1); 622 } 623 624 return B_OK; 625 } 626 627 628 /*! 629 Iterates through the small_data section of an inode. 630 To start at the beginning of this section, you let smallData 631 point to NULL, like: 632 small_data *data = NULL; 633 while (inode->GetNextSmallData(&data) { ... } 634 635 This function is reentrant and doesn't allocate any memory; 636 you can safely stop calling it at any point (you don't need 637 to iterate through the whole list). 638 You need to hold the fSmallDataLock when you call this method 639 */ 640 status_t 641 Inode::_GetNextSmallData(bfs_inode *node, small_data **_smallData) const 642 { 643 if (node == NULL) 644 RETURN_ERROR(B_BAD_VALUE); 645 646 ASSERT(fSmallDataLock.IsLocked()); 647 648 small_data *data = *_smallData; 649 650 // begin from the start? 651 if (data == NULL) 652 data = node->SmallDataStart(); 653 else 654 data = data->Next(); 655 656 // is already last item? 657 if (data->IsLast(node)) 658 return B_ENTRY_NOT_FOUND; 659 660 *_smallData = data; 661 662 return B_OK; 663 } 664 665 666 /*! 667 Finds the attribute "name" in the small data section, and 668 returns a pointer to it (or NULL if it doesn't exist). 669 You need to hold the fSmallDataLock when you call this method 670 */ 671 small_data * 672 Inode::FindSmallData(const bfs_inode *node, const char *name) const 673 { 674 ASSERT(fSmallDataLock.IsLocked()); 675 676 small_data *smallData = NULL; 677 while (_GetNextSmallData(const_cast<bfs_inode *>(node), &smallData) == B_OK) { 678 if (!strcmp(smallData->Name(), name)) 679 return smallData; 680 } 681 return NULL; 682 } 683 684 685 /*! 686 Returns a pointer to the node's name if present in the small data 687 section, NULL otherwise. 688 You need to hold the fSmallDataLock when you call this method 689 */ 690 const char * 691 Inode::Name(const bfs_inode *node) const 692 { 693 ASSERT(fSmallDataLock.IsLocked()); 694 695 small_data *smallData = NULL; 696 while (_GetNextSmallData((bfs_inode *)node, &smallData) == B_OK) { 697 if (*smallData->Name() == FILE_NAME_NAME 698 && smallData->NameSize() == FILE_NAME_NAME_LENGTH) 699 return (const char *)smallData->Data(); 700 } 701 return NULL; 702 } 703 704 705 /*! 706 Copies the node's name into the provided buffer. 707 The buffer should be B_FILE_NAME_LENGTH bytes large. 708 */ 709 status_t 710 Inode::GetName(char *buffer, size_t size) const 711 { 712 NodeGetter node(fVolume, this); 713 SimpleLocker locker(fSmallDataLock); 714 715 const char *name = Name(node.Node()); 716 if (name == NULL) 717 return B_ENTRY_NOT_FOUND; 718 719 strlcpy(buffer, name, size); 720 return B_OK; 721 } 722 723 724 /*! 725 Changes or set the name of a file: in the inode small_data section only, it 726 doesn't change it in the parent directory's b+tree. 727 Note that you need to write back the inode yourself after having called 728 that method. It suffers from the same API decision as AddSmallData() does 729 (and for the same reason). 730 */ 731 status_t 732 Inode::SetName(Transaction &transaction, const char *name) 733 { 734 if (name == NULL || *name == '\0') 735 return B_BAD_VALUE; 736 737 NodeGetter node(fVolume, transaction, this); 738 const char nameTag[2] = {FILE_NAME_NAME, 0}; 739 740 return _AddSmallData(transaction, node, nameTag, FILE_NAME_TYPE, 741 (uint8 *)name, strlen(name), true); 742 } 743 744 745 status_t 746 Inode::_RemoveAttribute(Transaction &transaction, const char *name, 747 bool hasIndex, Index *index) 748 { 749 // remove the attribute file if it exists 750 Vnode vnode(fVolume, Attributes()); 751 Inode *attributes; 752 status_t status = vnode.Get(&attributes); 753 if (status < B_OK) 754 return status; 755 756 // update index 757 if (index != NULL) { 758 Inode *attribute; 759 if ((hasIndex || fVolume->CheckForLiveQuery(name)) 760 && GetAttribute(name, &attribute) == B_OK) { 761 uint8 data[BPLUSTREE_MAX_KEY_LENGTH]; 762 size_t length = BPLUSTREE_MAX_KEY_LENGTH; 763 if (attribute->ReadAt(0, data, &length) == B_OK) { 764 index->Update(transaction, name, attribute->Type(), data, 765 length, NULL, 0, this); 766 } 767 768 ReleaseAttribute(attribute); 769 } 770 } 771 772 if ((status = attributes->Remove(transaction, name)) < B_OK) 773 return status; 774 775 if (attributes->IsEmpty()) { 776 // remove attribute directory (don't fail if that can't be done) 777 if (remove_vnode(fVolume->ID(), attributes->ID()) == B_OK) { 778 // update the inode, so that no one will ever doubt it's deleted :-) 779 attributes->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); 780 if (attributes->WriteBack(transaction) == B_OK) { 781 Attributes().SetTo(0, 0, 0); 782 WriteBack(transaction); 783 } else 784 unremove_vnode(fVolume->ID(), attributes->ID()); 785 } 786 } 787 788 return status; 789 } 790 791 792 /*! 793 Reads data from the specified attribute. 794 This is a high-level attribute function that understands attributes 795 in the small_data section as well as real attribute files. 796 */ 797 status_t 798 Inode::ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, 799 size_t *_length) 800 { 801 if (pos < 0) 802 pos = 0; 803 804 // search in the small_data section (which has to be locked first) 805 { 806 NodeGetter node(fVolume, this); 807 SimpleLocker locker(fSmallDataLock); 808 809 small_data *smallData = FindSmallData(node.Node(), name); 810 if (smallData != NULL) { 811 size_t length = *_length; 812 if (pos >= smallData->data_size) { 813 *_length = 0; 814 return B_OK; 815 } 816 if (length + pos > smallData->DataSize()) 817 length = smallData->DataSize() - pos; 818 819 memcpy(buffer, smallData->Data() + pos, length); 820 *_length = length; 821 return B_OK; 822 } 823 } 824 825 // search in the attribute directory 826 Inode *attribute; 827 status_t status = GetAttribute(name, &attribute); 828 if (status == B_OK) { 829 if (attribute->Lock().Lock() == B_OK) { 830 status = attribute->ReadAt(pos, (uint8 *)buffer, _length); 831 attribute->Lock().Unlock(); 832 } else 833 status = B_ERROR; 834 835 ReleaseAttribute(attribute); 836 } 837 838 RETURN_ERROR(status); 839 } 840 841 842 /*! 843 Writes data to the specified attribute. 844 This is a high-level attribute function that understands attributes 845 in the small_data section as well as real attribute files. 846 */ 847 status_t 848 Inode::WriteAttribute(Transaction &transaction, const char *name, int32 type, 849 off_t pos, const uint8 *buffer, size_t *_length) 850 { 851 // needed to maintain the index 852 uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH], *oldData = NULL; 853 size_t oldLength = 0; 854 855 // TODO: we actually depend on that the contents of "buffer" are constant. 856 // If they get changed during the write (hey, user programs), we may mess 857 // up our index trees! 858 859 Index index(fVolume); 860 index.SetTo(name); 861 862 Inode *attribute = NULL; 863 status_t status = B_OK; 864 865 if (GetAttribute(name, &attribute) < B_OK) { 866 // save the old attribute data 867 NodeGetter node(fVolume, transaction, this); 868 fSmallDataLock.Lock(); 869 870 small_data *smallData = FindSmallData(node.Node(), name); 871 if (smallData != NULL) { 872 oldLength = smallData->DataSize(); 873 if (oldLength > 0) { 874 if (oldLength > BPLUSTREE_MAX_KEY_LENGTH) 875 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 876 memcpy(oldData = oldBuffer, smallData->Data(), oldLength); 877 } 878 } 879 fSmallDataLock.Unlock(); 880 881 // if the attribute doesn't exist yet (as a file), try to put it in the 882 // small_data section first - if that fails (due to insufficent space), 883 // create a real attribute file 884 status = _AddSmallData(transaction, node, name, type, buffer, *_length); 885 if (status == B_DEVICE_FULL) { 886 if (smallData != NULL) { 887 // remove the old attribute from the small data section - there 888 // is no space left for the new data 889 status = _RemoveSmallData(transaction, node, name); 890 } else 891 status = B_OK; 892 893 if (status == B_OK) 894 status = CreateAttribute(transaction, name, type, &attribute); 895 if (status < B_OK) 896 RETURN_ERROR(status); 897 } else if (status == B_OK) 898 status = WriteBack(transaction); 899 } 900 901 if (attribute != NULL) { 902 if (attribute->Lock().LockWrite() == B_OK) { 903 // save the old attribute data (if this fails, oldLength will reflect it) 904 if (fVolume->CheckForLiveQuery(name) && attribute->Size() > 0) { 905 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 906 if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK) 907 oldData = oldBuffer; 908 } 909 910 // check if the data fits into the small_data section again 911 NodeGetter node(fVolume, transaction, this); 912 status = _AddSmallData(transaction, node, name, type, buffer, *_length); 913 if (status == B_OK) { 914 // it does - remove its file 915 status = _RemoveAttribute(transaction, name, false, NULL); 916 } else { 917 status = attribute->WriteAt(transaction, pos, buffer, _length); 918 if (status == B_OK) { 919 // The attribute type might have been changed - we need to adopt 920 // the new one - the attribute's inode will be written back on close 921 attribute->Node().type = HOST_ENDIAN_TO_BFS_INT32(type); 922 } 923 924 attribute->Lock().UnlockWrite(); 925 } 926 } else 927 status = B_ERROR; 928 929 ReleaseAttribute(attribute); 930 } 931 932 // TODO: find a better way than this "pos" thing (the begin of the old key 933 // must be copied to the start of the new one for a comparison) 934 if (status == B_OK && pos == 0) { 935 // index only the first BPLUSTREE_MAX_KEY_LENGTH bytes 936 uint16 length = *_length; 937 if (length > BPLUSTREE_MAX_KEY_LENGTH) 938 length = BPLUSTREE_MAX_KEY_LENGTH; 939 940 // Update index. Note, Index::Update() may be called even if initializing 941 // the index failed - it will just update the live queries in this case 942 if (pos < length || pos < oldLength) { 943 index.Update(transaction, name, type, oldData, oldLength, buffer, 944 length, this); 945 } 946 } 947 return status; 948 } 949 950 951 /*! 952 Removes the specified attribute from the inode. 953 This is a high-level attribute function that understands attributes 954 in the small_data section as well as real attribute files. 955 */ 956 status_t 957 Inode::RemoveAttribute(Transaction &transaction, const char *name) 958 { 959 Index index(fVolume); 960 bool hasIndex = index.SetTo(name) == B_OK; 961 NodeGetter node(fVolume, this); 962 963 // update index for attributes in the small_data section 964 { 965 fSmallDataLock.Lock(); 966 967 small_data *smallData = FindSmallData(node.Node(), name); 968 if (smallData != NULL) { 969 uint32 length = smallData->DataSize(); 970 if (length > BPLUSTREE_MAX_KEY_LENGTH) 971 length = BPLUSTREE_MAX_KEY_LENGTH; 972 index.Update(transaction, name, smallData->Type(), smallData->Data(), 973 length, NULL, 0, this); 974 } 975 fSmallDataLock.Unlock(); 976 } 977 978 status_t status = _RemoveSmallData(transaction, node, name); 979 if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) { 980 // remove the attribute file if it exists 981 status = _RemoveAttribute(transaction, name, hasIndex, &index); 982 } 983 return status; 984 } 985 986 987 status_t 988 Inode::GetAttribute(const char *name, Inode **_attribute) 989 { 990 // does this inode even have attributes? 991 if (Attributes().IsZero()) 992 return B_ENTRY_NOT_FOUND; 993 994 Vnode vnode(fVolume, Attributes()); 995 Inode *attributes; 996 if (vnode.Get(&attributes) < B_OK) { 997 FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n", name)); 998 return B_ERROR; 999 } 1000 1001 BPlusTree *tree; 1002 status_t status = attributes->GetTree(&tree); 1003 if (status == B_OK) { 1004 vnode_id id; 1005 status = tree->Find((uint8 *)name, (uint16)strlen(name), &id); 1006 if (status == B_OK) { 1007 Vnode vnode(fVolume, id); 1008 Inode *inode; 1009 // Check if the attribute is really an attribute 1010 if (vnode.Get(&inode) < B_OK || !inode->IsAttribute()) 1011 return B_ERROR; 1012 1013 *_attribute = inode; 1014 vnode.Keep(); 1015 return B_OK; 1016 } 1017 } 1018 return status; 1019 } 1020 1021 1022 void 1023 Inode::ReleaseAttribute(Inode *attribute) 1024 { 1025 if (attribute == NULL) 1026 return; 1027 1028 put_vnode(fVolume->ID(), attribute->ID()); 1029 } 1030 1031 1032 status_t 1033 Inode::CreateAttribute(Transaction &transaction, const char *name, uint32 type, 1034 Inode **attribute) 1035 { 1036 // do we need to create the attribute directory first? 1037 if (Attributes().IsZero()) { 1038 status_t status = Inode::Create(transaction, this, NULL, 1039 S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL); 1040 if (status < B_OK) 1041 RETURN_ERROR(status); 1042 } 1043 Vnode vnode(fVolume, Attributes()); 1044 Inode *attributes; 1045 if (vnode.Get(&attributes) < B_OK) 1046 return B_ERROR; 1047 1048 // Inode::Create() locks the inode for us 1049 return Inode::Create(transaction, attributes, name, 1050 S_ATTR | S_FILE | 0666, 0, type, NULL, attribute); 1051 } 1052 1053 1054 // #pragma mark - directory tree 1055 1056 1057 /*! 1058 Gives the caller direct access to the b+tree for a given directory. 1059 The tree is no longer created on demand, but when the inode is first 1060 created. That will report any potential errors upfront, saves locking, 1061 and should work as good (though a bit slower). 1062 */ 1063 status_t 1064 Inode::GetTree(BPlusTree **tree) 1065 { 1066 if (fTree) { 1067 *tree = fTree; 1068 return B_OK; 1069 } 1070 1071 RETURN_ERROR(B_BAD_VALUE); 1072 } 1073 1074 1075 bool 1076 Inode::IsEmpty() 1077 { 1078 BPlusTree *tree; 1079 status_t status = GetTree(&tree); 1080 if (status < B_OK) 1081 return status; 1082 1083 TreeIterator iterator(tree); 1084 1085 // index and attribute directories are really empty when they are 1086 // empty - directories for standard files always contain ".", and 1087 // "..", so we need to ignore those two 1088 1089 uint32 count = 0; 1090 char name[BPLUSTREE_MAX_KEY_LENGTH]; 1091 uint16 length; 1092 vnode_id id; 1093 while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH, 1094 &id) == B_OK) { 1095 if (Mode() & (S_ATTR_DIR | S_INDEX_DIR)) 1096 return false; 1097 1098 if (++count > 2 || strcmp(".", name) && strcmp("..", name)) 1099 return false; 1100 } 1101 return true; 1102 } 1103 1104 1105 // #pragma mark - data stream 1106 1107 1108 /*! 1109 Finds the block_run where "pos" is located in the data_stream of 1110 the inode. 1111 If successful, "offset" will then be set to the file offset 1112 of the block_run returned; so "pos - offset" is for the block_run 1113 what "pos" is for the whole stream. 1114 The caller has to make sure that "pos" is inside the stream. 1115 */ 1116 status_t 1117 Inode::FindBlockRun(off_t pos, block_run &run, off_t &offset) 1118 { 1119 data_stream *data = &Node().data; 1120 1121 // find matching block run 1122 1123 if (data->MaxDirectRange() > 0 && pos >= data->MaxDirectRange()) { 1124 if (data->MaxDoubleIndirectRange() > 0 && pos >= data->MaxIndirectRange()) { 1125 // access to double indirect blocks 1126 1127 CachedBlock cached(fVolume); 1128 1129 off_t start = pos - data->MaxIndirectRange(); 1130 int32 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT 1131 + cached.BlockShift())) 1132 * (fVolume->BlockSize() / sizeof(block_run)); 1133 int32 directSize = NUM_ARRAY_BLOCKS << cached.BlockShift(); 1134 int32 index = start / indirectSize; 1135 int32 runsPerBlock = cached.BlockSize() / sizeof(block_run); 1136 1137 block_run *indirect = (block_run *)cached.SetTo( 1138 fVolume->ToBlock(data->double_indirect) + index / runsPerBlock); 1139 if (indirect == NULL) 1140 RETURN_ERROR(B_ERROR); 1141 1142 //printf("\tstart = %Ld, indirectSize = %ld, directSize = %ld, index = %ld\n",start,indirectSize,directSize,index); 1143 //printf("\tlook for indirect block at %ld,%d\n",indirect[index].allocation_group,indirect[index].start); 1144 1145 int32 current = (start % indirectSize) / directSize; 1146 1147 indirect = (block_run *)cached.SetTo( 1148 fVolume->ToBlock(indirect[index % runsPerBlock]) 1149 + current / runsPerBlock); 1150 if (indirect == NULL) 1151 RETURN_ERROR(B_ERROR); 1152 1153 run = indirect[current % runsPerBlock]; 1154 offset = data->MaxIndirectRange() + (index * indirectSize) 1155 + (current * directSize); 1156 //printf("\tfCurrent = %ld, fRunFileOffset = %Ld, fRunBlockEnd = %Ld, fRun = %ld,%d\n",fCurrent,fRunFileOffset,fRunBlockEnd,fRun.allocation_group,fRun.start); 1157 } else { 1158 // access to indirect blocks 1159 1160 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1161 off_t runBlockEnd = data->MaxDirectRange(); 1162 1163 CachedBlock cached(fVolume); 1164 off_t block = fVolume->ToBlock(data->indirect); 1165 1166 for (int32 i = 0; i < data->indirect.Length(); i++) { 1167 block_run *indirect = (block_run *)cached.SetTo(block + i); 1168 if (indirect == NULL) 1169 RETURN_ERROR(B_IO_ERROR); 1170 1171 int32 current = -1; 1172 while (++current < runsPerBlock) { 1173 if (indirect[current].IsZero()) 1174 break; 1175 1176 runBlockEnd += indirect[current].Length() 1177 << cached.BlockShift(); 1178 if (runBlockEnd > pos) { 1179 run = indirect[current]; 1180 offset = runBlockEnd - (run.Length() 1181 << cached.BlockShift()); 1182 //printf("reading from indirect block: %ld,%d\n",fRun.allocation_group,fRun.start); 1183 //printf("### indirect-run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset); 1184 return fVolume->ValidateBlockRun(run); 1185 } 1186 } 1187 } 1188 RETURN_ERROR(B_ERROR); 1189 } 1190 } else { 1191 // access from direct blocks 1192 1193 off_t runBlockEnd = 0LL; 1194 int32 current = -1; 1195 1196 while (++current < NUM_DIRECT_BLOCKS) { 1197 if (data->direct[current].IsZero()) 1198 break; 1199 1200 runBlockEnd += data->direct[current].Length() 1201 << fVolume->BlockShift(); 1202 if (runBlockEnd > pos) { 1203 run = data->direct[current]; 1204 offset = runBlockEnd - (run.Length() << fVolume->BlockShift()); 1205 //printf("### run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset); 1206 return fVolume->ValidateBlockRun(run); 1207 } 1208 } 1209 1210 //PRINT(("FindBlockRun() failed in direct range: size = %Ld, pos = %Ld\n",data->size,pos)); 1211 return B_ENTRY_NOT_FOUND; 1212 } 1213 return fVolume->ValidateBlockRun(run); 1214 } 1215 1216 1217 status_t 1218 Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length) 1219 { 1220 #if 0 1221 // call the right ReadAt() method, depending on the inode flags 1222 1223 if (Flags() & INODE_LOGGED) 1224 return ((Stream<Access::Logged> *)this)->ReadAt(pos, buffer, _length); 1225 1226 return ((Stream<Access::Cached> *)this)->ReadAt(pos, buffer, _length); 1227 #endif 1228 size_t length = *_length; 1229 1230 // set/check boundaries for pos/length 1231 if (pos < 0) 1232 return B_BAD_VALUE; 1233 1234 if (pos >= Size() || length == 0) { 1235 *_length = 0; 1236 return B_NO_ERROR; 1237 } 1238 1239 return file_cache_read(FileCache(), pos, buffer, _length); 1240 } 1241 1242 1243 status_t 1244 Inode::WriteAt(Transaction &transaction, off_t pos, const uint8 *buffer, 1245 size_t *_length) 1246 { 1247 // update the last modification time in memory, it will be written 1248 // back to the inode, and the index when the file is closed 1249 // ToDo: should update the internal last modified time only at this point! 1250 Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) 1251 << INODE_TIME_SHIFT); 1252 1253 // TODO: support INODE_LOGGED! 1254 #if 0 1255 if (Flags() & INODE_LOGGED) 1256 return ((Stream<Access::Logged> *)this)->WriteAt(transaction, pos, buffer, _length); 1257 1258 return ((Stream<Access::Cached> *)this)->WriteAt(transaction, pos, buffer, _length); 1259 #endif 1260 size_t length = *_length; 1261 1262 // set/check boundaries for pos/length 1263 if (pos < 0) 1264 return B_BAD_VALUE; 1265 1266 if (pos + length > Size()) { 1267 off_t oldSize = Size(); 1268 1269 // the transaction doesn't have to be started already 1270 // ToDo: what's that INODE_NO_TRANSACTION flag good for again? 1271 if ((Flags() & INODE_NO_TRANSACTION) == 0 1272 && !transaction.IsStarted()) 1273 transaction.Start(fVolume, BlockNumber()); 1274 1275 // let's grow the data stream to the size needed 1276 status_t status = SetFileSize(transaction, pos + length); 1277 if (status < B_OK) { 1278 *_length = 0; 1279 RETURN_ERROR(status); 1280 } 1281 // If the position of the write was beyond the file size, we 1282 // have to fill the gap between that position and the old file 1283 // size with zeros. 1284 FillGapWithZeros(oldSize, pos); 1285 1286 // we need to write back the inode here because it has to 1287 // go into this transaction (we cannot wait until the file 1288 // is closed) 1289 status = WriteBack(transaction); 1290 if (status < B_OK) 1291 return status; 1292 } 1293 1294 // If we don't want to write anything, we can now return (we may 1295 // just have changed the file size using the position parameter) 1296 if (length == 0) 1297 return B_OK; 1298 1299 return file_cache_write(FileCache(), pos, buffer, _length); 1300 } 1301 1302 1303 /*! 1304 Fills the gap between the old file size and the new file size 1305 with zeros. 1306 It's more or less a copy of Inode::WriteAt() but it can handle 1307 length differences of more than just 4 GB, and it never uses 1308 the log, even if the INODE_LOGGED flag is set. 1309 */ 1310 status_t 1311 Inode::FillGapWithZeros(off_t pos, off_t newSize) 1312 { 1313 // ToDo: we currently do anything here, same as original BFS! 1314 //if (pos >= newSize) 1315 return B_OK; 1316 #if 0 1317 block_run run; 1318 off_t offset; 1319 if (FindBlockRun(pos, run, offset) < B_OK) 1320 RETURN_ERROR(B_BAD_VALUE); 1321 1322 off_t length = newSize - pos; 1323 uint32 bytesWritten = 0; 1324 uint32 blockSize = fVolume->BlockSize(); 1325 uint32 blockShift = fVolume->BlockShift(); 1326 uint8 *block; 1327 1328 // the first block_run we write could not be aligned to the block_size boundary 1329 // (write partial block at the beginning) 1330 1331 // pos % block_size == (pos - offset) % block_size, offset % block_size == 0 1332 if (pos % blockSize != 0) { 1333 run.start += (pos - offset) / blockSize; 1334 run.length -= (pos - offset) / blockSize; 1335 1336 CachedBlock cached(fVolume,run); 1337 if ((block = cached.Block()) == NULL) 1338 RETURN_ERROR(B_BAD_VALUE); 1339 1340 bytesWritten = blockSize - (pos % blockSize); 1341 if (length < bytesWritten) 1342 bytesWritten = length; 1343 1344 memset(block + (pos % blockSize), 0, bytesWritten); 1345 if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) 1346 RETURN_ERROR(B_IO_ERROR); 1347 1348 pos += bytesWritten; 1349 1350 length -= bytesWritten; 1351 if (length == 0) 1352 return B_OK; 1353 1354 if (FindBlockRun(pos, run, offset) < B_OK) 1355 RETURN_ERROR(B_BAD_VALUE); 1356 } 1357 1358 while (length > 0) { 1359 // offset is the offset to the current pos in the block_run 1360 run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift)); 1361 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift)); 1362 1363 CachedBlock cached(fVolume); 1364 off_t blockNumber = fVolume->ToBlock(run); 1365 for (int32 i = 0; i < run.Length(); i++) { 1366 if ((block = cached.SetTo(blockNumber + i, true)) == NULL) 1367 RETURN_ERROR(B_IO_ERROR); 1368 1369 if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) 1370 RETURN_ERROR(B_IO_ERROR); 1371 } 1372 1373 int32 bytes = run.Length() << blockShift; 1374 length -= bytes; 1375 bytesWritten += bytes; 1376 1377 // since we don't respect a last partial block, length can be lower 1378 if (length <= 0) 1379 break; 1380 1381 pos += bytes; 1382 1383 if (FindBlockRun(pos, run, offset) < B_OK) 1384 RETURN_ERROR(B_BAD_VALUE); 1385 } 1386 return B_OK; 1387 #endif 1388 } 1389 1390 1391 /*! 1392 Allocates NUM_ARRAY_BLOCKS blocks, and clears their contents. Growing 1393 the indirect and double indirect range uses this method. 1394 The allocated block_run is saved in "run" 1395 */ 1396 status_t 1397 Inode::_AllocateBlockArray(Transaction &transaction, block_run &run) 1398 { 1399 if (!run.IsZero()) 1400 return B_BAD_VALUE; 1401 1402 status_t status = fVolume->Allocate(transaction, this, NUM_ARRAY_BLOCKS, 1403 run, NUM_ARRAY_BLOCKS); 1404 if (status < B_OK) 1405 return status; 1406 1407 // make sure those blocks are empty 1408 CachedBlock cached(fVolume); 1409 off_t block = fVolume->ToBlock(run); 1410 1411 for (int32 i = 0; i < run.Length(); i++) { 1412 block_run *runs = (block_run *)cached.SetToWritable(transaction, 1413 block + i, true); 1414 if (runs == NULL) 1415 return B_IO_ERROR; 1416 } 1417 return B_OK; 1418 } 1419 1420 1421 status_t 1422 Inode::_GrowStream(Transaction &transaction, off_t size) 1423 { 1424 data_stream *data = &Node().data; 1425 1426 // is the data stream already large enough to hold the new size? 1427 // (can be the case with preallocated blocks) 1428 if (size < data->MaxDirectRange() 1429 || size < data->MaxIndirectRange() 1430 || size < data->MaxDoubleIndirectRange()) { 1431 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1432 return B_OK; 1433 } 1434 1435 // how many bytes are still needed? (unused ranges are always zero) 1436 uint16 minimum = 1; 1437 off_t bytes; 1438 if (data->Size() < data->MaxDoubleIndirectRange()) { 1439 bytes = size - data->MaxDoubleIndirectRange(); 1440 // the double indirect range can only handle multiple of NUM_ARRAY_BLOCKS 1441 minimum = NUM_ARRAY_BLOCKS; 1442 } else if (data->Size() < data->MaxIndirectRange()) 1443 bytes = size - data->MaxIndirectRange(); 1444 else if (data->Size() < data->MaxDirectRange()) 1445 bytes = size - data->MaxDirectRange(); 1446 else 1447 bytes = size - data->Size(); 1448 1449 // do we have enough free blocks on the disk? 1450 off_t blocksRequested = (bytes + fVolume->BlockSize() - 1) 1451 >> fVolume->BlockShift(); 1452 if (blocksRequested > fVolume->FreeBlocks()) 1453 return B_DEVICE_FULL; 1454 1455 off_t blocksNeeded = blocksRequested; 1456 // because of preallocations and partial allocations, the number of 1457 // blocks we need to allocate may be different from the one we request 1458 // from the block allocator 1459 1460 // Should we preallocate some blocks (currently, always 64k)? 1461 // Attributes, attribute directories, and long symlinks usually won't get 1462 // that big, and should stay close to the inode - preallocating could be 1463 // counterproductive. 1464 // Also, if free disk space is tight, we probably don't want to do this as well. 1465 if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink() 1466 && blocksRequested < (65536 >> fVolume->BlockShift()) 1467 && fVolume->FreeBlocks() > 128) 1468 blocksRequested = 65536 >> fVolume->BlockShift(); 1469 1470 while (blocksNeeded > 0) { 1471 // the requested blocks do not need to be returned with a 1472 // single allocation, so we need to iterate until we have 1473 // enough blocks allocated 1474 block_run run; 1475 status_t status = fVolume->Allocate(transaction, this, blocksRequested, 1476 run, minimum); 1477 if (status < B_OK) 1478 return status; 1479 1480 // okay, we have the needed blocks, so just distribute them to the 1481 // different ranges of the stream (direct, indirect & double indirect) 1482 1483 // TODO: if anything goes wrong here, we probably want to free the 1484 // blocks that couldn't be distributed into the stream! 1485 1486 blocksNeeded -= run.Length(); 1487 // don't preallocate if the first allocation was already too small 1488 blocksRequested = blocksNeeded; 1489 if (minimum > 1) { 1490 // make sure that "blocks" is a multiple of minimum 1491 blocksRequested = (blocksRequested + minimum - 1) & ~(minimum - 1); 1492 } 1493 1494 // Direct block range 1495 1496 if (data->Size() <= data->MaxDirectRange()) { 1497 // let's try to put them into the direct block range 1498 int32 free = 0; 1499 for (; free < NUM_DIRECT_BLOCKS; free++) 1500 if (data->direct[free].IsZero()) 1501 break; 1502 1503 if (free < NUM_DIRECT_BLOCKS) { 1504 // can we merge the last allocated run with the new one? 1505 int32 last = free - 1; 1506 if (free > 0 && data->direct[last].MergeableWith(run)) { 1507 data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16( 1508 data->direct[last].Length() + run.Length()); 1509 } else 1510 data->direct[free] = run; 1511 1512 data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64( 1513 data->MaxDirectRange() + run.Length() * fVolume->BlockSize()); 1514 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 1515 ? data->max_direct_range : size); 1516 continue; 1517 } 1518 } 1519 1520 // Indirect block range 1521 1522 if (data->Size() <= data->MaxIndirectRange() 1523 || !data->MaxIndirectRange()) { 1524 CachedBlock cached(fVolume); 1525 block_run *runs = NULL; 1526 uint32 free = 0; 1527 off_t block; 1528 1529 // if there is no indirect block yet, create one 1530 if (data->indirect.IsZero()) { 1531 status = _AllocateBlockArray(transaction, data->indirect); 1532 if (status < B_OK) 1533 return status; 1534 1535 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1536 data->MaxDirectRange()); 1537 // insert the block_run in the first block 1538 runs = (block_run *)cached.SetTo(data->indirect); 1539 } else { 1540 uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run); 1541 block = fVolume->ToBlock(data->indirect); 1542 1543 // search first empty entry 1544 int32 i = 0; 1545 for (; i < data->indirect.Length(); i++) { 1546 if ((runs = (block_run *)cached.SetTo(block + i)) == NULL) 1547 return B_IO_ERROR; 1548 1549 for (free = 0; free < numberOfRuns; free++) 1550 if (runs[free].IsZero()) 1551 break; 1552 1553 if (free < numberOfRuns) 1554 break; 1555 } 1556 if (i == data->indirect.Length()) 1557 runs = NULL; 1558 } 1559 1560 if (runs != NULL) { 1561 // try to insert the run to the last one - note that this 1562 // doesn't take block borders into account, so it could be 1563 // further optimized 1564 cached.MakeWritable(transaction); 1565 1566 int32 last = free - 1; 1567 if (free > 0 && runs[last].MergeableWith(run)) { 1568 runs[last].length = HOST_ENDIAN_TO_BFS_INT16( 1569 runs[last].Length() + run.Length()); 1570 } else 1571 runs[free] = run; 1572 1573 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1574 data->MaxIndirectRange() 1575 + (run.Length() << fVolume->BlockShift())); 1576 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 1577 ? data->MaxIndirectRange() : size); 1578 continue; 1579 } 1580 } 1581 1582 // Double indirect block range 1583 1584 if (data->Size() <= data->MaxDoubleIndirectRange() 1585 || !data->max_double_indirect_range) { 1586 while ((run.Length() % NUM_ARRAY_BLOCKS) != 0) { 1587 // The number of allocated blocks isn't a multiple of 1588 // NUM_ARRAY_BLOCKS, so we have to change this. This can happen 1589 // the first time the stream grows into the double 1590 // indirect range. 1591 // First, free the remaining blocks that don't fit into a multiple 1592 // of NUM_ARRAY_BLOCKS 1593 int32 rest = run.Length() % NUM_ARRAY_BLOCKS; 1594 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest); 1595 1596 status = fVolume->Free(transaction, 1597 block_run::Run(run.AllocationGroup(), 1598 run.Start() + run.Length(), rest)); 1599 if (status < B_OK) 1600 return status; 1601 1602 blocksNeeded += rest; 1603 blocksRequested = (blocksNeeded + NUM_ARRAY_BLOCKS - 1) 1604 & ~(NUM_ARRAY_BLOCKS - 1); 1605 minimum = NUM_ARRAY_BLOCKS; 1606 // we make sure here that we have at minimum 1607 // NUM_ARRAY_BLOCKS allocated, so if the allocation 1608 // succeeds, we don't run into an endless loop 1609 1610 // Are there any blocks left in the run? If not, allocate a new one 1611 if (run.length == 0) 1612 continue; 1613 } 1614 1615 // if there is no double indirect block yet, create one 1616 if (data->double_indirect.IsZero()) { 1617 status = _AllocateBlockArray(transaction, data->double_indirect); 1618 if (status < B_OK) 1619 return status; 1620 1621 data->max_double_indirect_range = data->max_indirect_range; 1622 } 1623 1624 // calculate the index where to insert the new blocks 1625 1626 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1627 int32 indirectSize = ((1L << INDIRECT_BLOCKS_SHIFT) << fVolume->BlockShift()) 1628 * runsPerBlock; 1629 int32 directSize = NUM_ARRAY_BLOCKS << fVolume->BlockShift(); 1630 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1631 1632 off_t start = data->MaxDoubleIndirectRange() - data->MaxIndirectRange(); 1633 int32 indirectIndex = start / indirectSize; 1634 int32 index = start / directSize; 1635 1636 // distribute the blocks to the array and allocate 1637 // new array blocks when needed 1638 1639 CachedBlock cached(fVolume); 1640 CachedBlock cachedDirect(fVolume); 1641 block_run *array = NULL; 1642 uint32 runLength = run.Length(); 1643 1644 // TODO: the following code is commented - it could be used to 1645 // preallocate all needed block arrays to see in advance if the 1646 // allocation will succeed. 1647 // I will probably remove it later, because it's no perfect solution 1648 // either: if the allocation was broken up before (blocksNeeded != 0), 1649 // it doesn't guarantee anything. 1650 // And since failing in this case is not that common, it doesn't have 1651 // to be optimized in that way. 1652 // Anyway, I wanted to have it in CVS - all those lines, and they will 1653 // be removed soon :-) 1654 /* 1655 // allocate new block arrays if needed 1656 1657 off_t block = -1; 1658 1659 for (int32 i = 0;i < needed;i++) { 1660 // get the block to insert the run into 1661 block = fVolume->ToBlock(data->double_indirect) + i + indirectIndex / runsPerBlock; 1662 if (cached.BlockNumber() != block) 1663 array = (block_run *)cached.SetTo(block); 1664 1665 if (array == NULL) 1666 return B_ERROR; 1667 1668 status = AllocateBlockArray(transaction, array[i + indirectIndex % runsPerBlock]); 1669 if (status < B_OK) 1670 return status; 1671 } 1672 */ 1673 1674 while (run.length != 0) { 1675 // get the indirect array block 1676 if (array == NULL) { 1677 array = (block_run *)cached.SetTo(fVolume->ToBlock( 1678 data->double_indirect) + indirectIndex / runsPerBlock); 1679 if (array == NULL) 1680 return B_IO_ERROR; 1681 } 1682 1683 do { 1684 // do we need a new array block? 1685 if (array[indirectIndex % runsPerBlock].IsZero()) { 1686 status = _AllocateBlockArray(transaction, 1687 array[indirectIndex % runsPerBlock]); 1688 if (status < B_OK) 1689 return status; 1690 } 1691 1692 block_run *runs = (block_run *)cachedDirect.SetToWritable( 1693 transaction, fVolume->ToBlock(array[indirectIndex 1694 % runsPerBlock]) + index / runsPerBlock); 1695 if (runs == NULL) 1696 return B_IO_ERROR; 1697 1698 do { 1699 // insert the block_run into the array 1700 runs[index % runsPerBlock] = run; 1701 runs[index % runsPerBlock].length 1702 = HOST_ENDIAN_TO_BFS_INT16(NUM_ARRAY_BLOCKS); 1703 1704 // alter the remaining block_run 1705 run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() 1706 + NUM_ARRAY_BLOCKS); 1707 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() 1708 - NUM_ARRAY_BLOCKS); 1709 } while ((++index % runsPerBlock) != 0 && run.length); 1710 } while ((index % runsPerArray) != 0 && run.length); 1711 1712 if (++indirectIndex % runsPerBlock == 0) { 1713 array = NULL; 1714 index = 0; 1715 } 1716 } 1717 1718 data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64( 1719 data->MaxDoubleIndirectRange() 1720 + (runLength << fVolume->BlockShift())); 1721 data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64( 1722 data->max_double_indirect_range) : size; 1723 1724 continue; 1725 } 1726 1727 RETURN_ERROR(EFBIG); 1728 } 1729 // update the size of the data stream 1730 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1731 1732 return B_OK; 1733 } 1734 1735 1736 status_t 1737 Inode::_FreeStaticStreamArray(Transaction &transaction, int32 level, 1738 block_run run, off_t size, off_t offset, off_t &max) 1739 { 1740 int32 indirectSize = 0; 1741 if (level == 0) 1742 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + fVolume->BlockShift())) 1743 * (fVolume->BlockSize() / sizeof(block_run)); 1744 else if (level == 1) 1745 indirectSize = 4 << fVolume->BlockShift(); 1746 1747 off_t start; 1748 if (size > offset) 1749 start = size - offset; 1750 else 1751 start = 0; 1752 1753 int32 index = start / indirectSize; 1754 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1755 1756 CachedBlock cached(fVolume); 1757 off_t blockNumber = fVolume->ToBlock(run); 1758 1759 // set the file offset to the current block run 1760 offset += (off_t)index * indirectSize; 1761 1762 for (int32 i = index / runsPerBlock; i < run.Length(); i++) { 1763 block_run *array = (block_run *)cached.SetToWritable(transaction, 1764 blockNumber + i); 1765 if (array == NULL) 1766 RETURN_ERROR(B_ERROR); 1767 1768 for (index = index % runsPerBlock; index < runsPerBlock; index++) { 1769 if (array[index].IsZero()) { 1770 // we also want to break out of the outer loop 1771 i = run.Length(); 1772 break; 1773 } 1774 1775 status_t status = B_OK; 1776 if (level == 0) { 1777 status = _FreeStaticStreamArray(transaction, 1, array[index], 1778 size, offset, max); 1779 } else if (offset >= size) 1780 status = fVolume->Free(transaction, array[index]); 1781 else 1782 max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize); 1783 1784 if (status < B_OK) 1785 RETURN_ERROR(status); 1786 1787 if (offset >= size) 1788 array[index].SetTo(0, 0, 0); 1789 1790 offset += indirectSize; 1791 } 1792 index = 0; 1793 } 1794 return B_OK; 1795 } 1796 1797 1798 /*! 1799 Frees all block_runs in the array which come after the specified size. 1800 It also trims the last block_run that contain the size. 1801 "offset" and "max" are maintained until the last block_run that doesn't 1802 have to be freed - after this, the values won't be correct anymore, but 1803 will still assure correct function for all subsequent calls. 1804 "max" is considered to be in file system byte order. 1805 */ 1806 status_t 1807 Inode::_FreeStreamArray(Transaction &transaction, block_run *array, 1808 uint32 arrayLength, off_t size, off_t &offset, off_t &max) 1809 { 1810 PRINT(("FreeStreamArray: arrayLength %lu, size %Ld, offset %Ld, max %Ld\n", 1811 arrayLength, size, offset, max)); 1812 1813 off_t newOffset = offset; 1814 uint32 i = 0; 1815 for (; i < arrayLength; i++, offset = newOffset) { 1816 if (array[i].IsZero()) 1817 break; 1818 1819 newOffset += (off_t)array[i].Length() << fVolume->BlockShift(); 1820 if (newOffset <= size) 1821 continue; 1822 1823 block_run run = array[i]; 1824 1825 // determine the block_run to be freed 1826 if (newOffset > size && offset < size) { 1827 // free partial block_run (and update the original block_run) 1828 run.start = array[i].start 1829 + ((size - offset) >> fVolume->BlockShift()) + 1; 1830 array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start() 1831 - array[i].Start()); 1832 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() 1833 - array[i].Length()); 1834 1835 if (run.length == 0) 1836 continue; 1837 1838 // update maximum range 1839 max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length() 1840 << fVolume->BlockShift())); 1841 } else { 1842 // free the whole block_run 1843 array[i].SetTo(0, 0, 0); 1844 1845 if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset) 1846 max = HOST_ENDIAN_TO_BFS_INT64(offset); 1847 } 1848 1849 if (fVolume->Free(transaction, run) < B_OK) 1850 return B_IO_ERROR; 1851 } 1852 return B_OK; 1853 } 1854 1855 1856 status_t 1857 Inode::_ShrinkStream(Transaction &transaction, off_t size) 1858 { 1859 data_stream *data = &Node().data; 1860 status_t status; 1861 1862 if (data->MaxDoubleIndirectRange() > size) { 1863 off_t *maxDoubleIndirect = &data->max_double_indirect_range; 1864 // gcc 4 work-around: "error: cannot bind packed field 1865 // 'data->data_stream::max_double_indirect_range' to 'off_t&'" 1866 status = _FreeStaticStreamArray(transaction, 0, data->double_indirect, 1867 size, data->MaxIndirectRange(), *maxDoubleIndirect); 1868 if (status < B_OK) 1869 return status; 1870 1871 if (size <= data->MaxIndirectRange()) { 1872 fVolume->Free(transaction, data->double_indirect); 1873 data->double_indirect.SetTo(0, 0, 0); 1874 data->max_double_indirect_range = 0; 1875 } 1876 } 1877 1878 if (data->MaxIndirectRange() > size) { 1879 CachedBlock cached(fVolume); 1880 off_t block = fVolume->ToBlock(data->indirect); 1881 off_t offset = data->MaxDirectRange(); 1882 1883 for (int32 i = 0; i < data->indirect.Length(); i++) { 1884 block_run *array = (block_run *)cached.SetToWritable(transaction, 1885 block + i); 1886 if (array == NULL) 1887 break; 1888 1889 off_t *maxIndirect = &data->max_indirect_range; 1890 // gcc 4 work-around: "error: cannot bind packed field 1891 // 'data->data_stream::max_indirect_range' to 'off_t&'" 1892 if (_FreeStreamArray(transaction, array, fVolume->BlockSize() 1893 / sizeof(block_run), size, offset, *maxIndirect) != B_OK) 1894 return B_IO_ERROR; 1895 } 1896 if (data->max_direct_range == data->max_indirect_range) { 1897 fVolume->Free(transaction, data->indirect); 1898 data->indirect.SetTo(0, 0, 0); 1899 data->max_indirect_range = 0; 1900 } 1901 } 1902 1903 if (data->MaxDirectRange() > size) { 1904 off_t offset = 0; 1905 off_t *maxDirect = &data->max_direct_range; 1906 // gcc 4 work-around: "error: cannot bind packed field 1907 // 'data->data_stream::max_direct_range' to 'off_t&'" 1908 status = _FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS, 1909 size, offset, *maxDirect); 1910 if (status < B_OK) 1911 return status; 1912 } 1913 1914 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1915 return B_OK; 1916 } 1917 1918 1919 status_t 1920 Inode::SetFileSize(Transaction &transaction, off_t size) 1921 { 1922 if (size < 0) 1923 return B_BAD_VALUE; 1924 1925 off_t oldSize = Size(); 1926 1927 if (size == oldSize) 1928 return B_OK; 1929 1930 // should the data stream grow or shrink? 1931 status_t status; 1932 if (size > oldSize) { 1933 status = _GrowStream(transaction, size); 1934 if (status < B_OK) { 1935 // if the growing of the stream fails, the whole operation 1936 // fails, so we should shrink the stream to its former size 1937 _ShrinkStream(transaction, oldSize); 1938 } 1939 } else 1940 status = _ShrinkStream(transaction, size); 1941 1942 if (status < B_OK) 1943 return status; 1944 1945 file_cache_set_size(FileCache(), size); 1946 return WriteBack(transaction); 1947 } 1948 1949 1950 status_t 1951 Inode::Append(Transaction &transaction, off_t bytes) 1952 { 1953 return SetFileSize(transaction, Size() + bytes); 1954 } 1955 1956 1957 /*! 1958 Checks wether or not this inode's data stream needs to be trimmed 1959 because of an earlier preallocation. 1960 Returns true if there are any blocks to be trimmed. 1961 */ 1962 bool 1963 Inode::NeedsTrimming() 1964 { 1965 // We never trim preallocated index blocks to make them grow as smooth as possible. 1966 // There are only few indices anyway, so this doesn't hurt 1967 // Also, if an inode is already in deleted state, we don't bother trimming it 1968 if (IsIndex() || IsDeleted()) 1969 return false; 1970 1971 off_t roundedSize = (Size() + fVolume->BlockSize() - 1) 1972 & ~(fVolume->BlockSize() - 1); 1973 1974 return Node().data.MaxDirectRange() > roundedSize 1975 || Node().data.MaxIndirectRange() > roundedSize 1976 || Node().data.MaxDoubleIndirectRange() > roundedSize; 1977 } 1978 1979 1980 status_t 1981 Inode::TrimPreallocation(Transaction &transaction) 1982 { 1983 status_t status = _ShrinkStream(transaction, Size()); 1984 if (status < B_OK) 1985 return status; 1986 1987 return WriteBack(transaction); 1988 } 1989 1990 1991 //! Frees the file's data stream and removes all attributes 1992 status_t 1993 Inode::Free(Transaction &transaction) 1994 { 1995 FUNCTION(); 1996 1997 // Perhaps there should be an implementation of Inode::ShrinkStream() that 1998 // just frees the data_stream, but doesn't change the inode (since it is 1999 // freed anyway) - that would make an undelete command possible 2000 status_t status = SetFileSize(transaction, 0); 2001 if (status < B_OK) 2002 return status; 2003 2004 // Free all attributes, and remove their indices 2005 { 2006 // We have to limit the scope of AttributeIterator, so that its 2007 // destructor is not called after the inode is deleted 2008 AttributeIterator iterator(this); 2009 2010 char name[B_FILE_NAME_LENGTH]; 2011 uint32 type; 2012 size_t length; 2013 vnode_id id; 2014 while ((status = iterator.GetNext(name, &length, &type, &id)) == B_OK) { 2015 RemoveAttribute(transaction, name); 2016 } 2017 } 2018 2019 if (WriteBack(transaction) < B_OK) 2020 return B_IO_ERROR; 2021 2022 return fVolume->Free(transaction, BlockRun()); 2023 } 2024 2025 2026 //! Synchronizes (writes back to disk) the file stream of the inode. 2027 status_t 2028 Inode::Sync() 2029 { 2030 if (FileCache()) 2031 return file_cache_sync(FileCache()); 2032 2033 // We may also want to flush the attribute's data stream to 2034 // disk here... (do we?) 2035 2036 if (IsSymLink() && (Flags() & INODE_LONG_SYMLINK) == 0) 2037 return B_OK; 2038 2039 data_stream *data = &Node().data; 2040 status_t status = B_OK; 2041 2042 // flush direct range 2043 2044 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 2045 if (data->direct[i].IsZero()) 2046 return B_OK; 2047 2048 status = block_cache_sync_etc(fVolume->BlockCache(), 2049 fVolume->ToBlock(data->direct[i]), data->direct[i].Length()); 2050 if (status != B_OK) 2051 return status; 2052 } 2053 2054 // flush indirect range 2055 2056 if (data->max_indirect_range == 0) 2057 return B_OK; 2058 2059 CachedBlock cached(fVolume); 2060 off_t block = fVolume->ToBlock(data->indirect); 2061 int32 count = fVolume->BlockSize() / sizeof(block_run); 2062 2063 for (int32 j = 0; j < data->indirect.Length(); j++) { 2064 block_run *runs = (block_run *)cached.SetTo(block + j); 2065 if (runs == NULL) 2066 break; 2067 2068 for (int32 i = 0; i < count; i++) { 2069 if (runs[i].IsZero()) 2070 return B_OK; 2071 2072 status = block_cache_sync_etc(fVolume->BlockCache(), 2073 fVolume->ToBlock(runs[i]), runs[i].Length()); 2074 if (status != B_OK) 2075 return status; 2076 } 2077 } 2078 2079 // flush double indirect range 2080 2081 if (data->max_double_indirect_range == 0) 2082 return B_OK; 2083 2084 off_t indirectBlock = fVolume->ToBlock(data->double_indirect); 2085 2086 for (int32 l = 0; l < data->double_indirect.Length(); l++) { 2087 block_run *indirectRuns = (block_run *)cached.SetTo(indirectBlock + l); 2088 if (indirectRuns == NULL) 2089 return B_FILE_ERROR; 2090 2091 CachedBlock directCached(fVolume); 2092 2093 for (int32 k = 0; k < count; k++) { 2094 if (indirectRuns[k].IsZero()) 2095 return B_OK; 2096 2097 block = fVolume->ToBlock(indirectRuns[k]); 2098 for (int32 j = 0; j < indirectRuns[k].Length(); j++) { 2099 block_run *runs = (block_run *)directCached.SetTo(block + j); 2100 if (runs == NULL) 2101 return B_FILE_ERROR; 2102 2103 for (int32 i = 0; i < count; i++) { 2104 if (runs[i].IsZero()) 2105 return B_OK; 2106 2107 // TODO: combine single block_runs to bigger ones when 2108 // they are adjacent 2109 status = block_cache_sync_etc(fVolume->BlockCache(), 2110 fVolume->ToBlock(runs[i]), runs[i].Length()); 2111 if (status != B_OK) 2112 return status; 2113 } 2114 } 2115 } 2116 } 2117 return B_OK; 2118 } 2119 2120 2121 // #pragma mark - creation/deletion 2122 2123 2124 status_t 2125 Inode::Remove(Transaction &transaction, const char *name, off_t *_id, 2126 bool isDirectory) 2127 { 2128 BPlusTree *tree; 2129 if (GetTree(&tree) != B_OK) 2130 RETURN_ERROR(B_BAD_VALUE); 2131 2132 RecursiveLocker locker(fVolume->Lock()); 2133 2134 // does the file even exist? 2135 off_t id; 2136 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) < B_OK) 2137 return B_ENTRY_NOT_FOUND; 2138 2139 if (_id) 2140 *_id = id; 2141 2142 Vnode vnode(fVolume, id); 2143 Inode *inode; 2144 status_t status = vnode.Get(&inode); 2145 if (status < B_OK) { 2146 REPORT_ERROR(status); 2147 return B_ENTRY_NOT_FOUND; 2148 } 2149 2150 // Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR 2151 // bit is set for indices in BFS, not for attribute directories) - but you 2152 // should really be able to do whatever you want with your indices 2153 // without having to remove all files first :) 2154 if (!inode->IsIndex()) { 2155 // if it's not of the correct type, don't delete it! 2156 if (inode->IsContainer() != isDirectory) 2157 return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY; 2158 2159 // only delete empty directories 2160 if (isDirectory && !inode->IsEmpty()) 2161 return B_DIRECTORY_NOT_EMPTY; 2162 } 2163 2164 // remove_vnode() allows the inode to be accessed until the last put_vnode() 2165 status = remove_vnode(fVolume->ID(), id); 2166 if (status != B_OK) 2167 return status; 2168 2169 if (tree->Remove(transaction, name, id) < B_OK) { 2170 unremove_vnode(fVolume->ID(), id); 2171 RETURN_ERROR(B_ERROR); 2172 } 2173 2174 #ifdef DEBUG 2175 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) == B_OK) { 2176 DIE(("deleted entry still there")); 2177 } 2178 #endif 2179 2180 // update the inode, so that no one will ever doubt it's deleted :-) 2181 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); 2182 inode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 2183 2184 // In balance to the Inode::Create() method, the main indices 2185 // are updated here (name, size, & last_modified) 2186 2187 Index index(fVolume); 2188 if (inode->IsRegularNode()) { 2189 index.RemoveName(transaction, name, inode); 2190 // If removing from the index fails, it is not regarded as a 2191 // fatal error and will not be reported back! 2192 // Deleted inodes won't be visible in queries anyway. 2193 } 2194 2195 if (inode->IsFile() || inode->IsSymLink()) { 2196 if (inode->IsFile()) 2197 index.RemoveSize(transaction, inode); 2198 index.RemoveLastModified(transaction, inode); 2199 } 2200 2201 return inode->WriteBack(transaction); 2202 } 2203 2204 2205 /*! 2206 Creates the inode with the specified parent directory, and automatically 2207 adds the created inode to that parent directory. If an attribute directory 2208 is created, it will also automatically added to the parent inode as such. 2209 However, the indices root node, and the regular root node won't be added 2210 to the super block. 2211 It will also create the initial B+tree for the inode if it's a directory 2212 of any kind. 2213 If the "_id" or "_inode" variable is given and non-NULL to store the inode's 2214 ID, the inode stays locked - you have to call put_vnode() if you don't use it 2215 anymore. 2216 */ 2217 status_t 2218 Inode::Create(Transaction &transaction, Inode *parent, const char *name, 2219 int32 mode, int openMode, uint32 type, off_t *_id, Inode **_inode) 2220 { 2221 FUNCTION_START(("name = %s, mode = %ld\n", name, mode)); 2222 2223 block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0); 2224 Volume *volume = transaction.GetVolume(); 2225 BPlusTree *tree = NULL; 2226 2227 if (parent && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) { 2228 // check if the file already exists in the directory 2229 if (parent->GetTree(&tree) != B_OK) 2230 RETURN_ERROR(B_BAD_VALUE); 2231 } 2232 2233 WriteLocked locker(parent != NULL ? &parent->Lock() : NULL); 2234 // the parent directory is locked during the whole inode creation 2235 2236 if (tree != NULL) { 2237 // does the file already exist? 2238 off_t offset; 2239 if (tree->Find((uint8 *)name, (uint16)strlen(name), &offset) == B_OK) { 2240 // return if the file should be a directory/link or opened in exclusive mode 2241 if (S_ISDIR(mode) || S_ISLNK(mode) || openMode & O_EXCL) 2242 return B_FILE_EXISTS; 2243 2244 Vnode vnode(volume, offset); 2245 Inode *inode; 2246 status_t status = vnode.Get(&inode); 2247 if (status < B_OK) { 2248 REPORT_ERROR(status); 2249 return B_ENTRY_NOT_FOUND; 2250 } 2251 2252 // if it's a directory, bail out! 2253 if (inode->IsDirectory()) 2254 return B_IS_A_DIRECTORY; 2255 2256 // we want to open the file, so we should have the rights to do so 2257 if (inode->CheckPermissions(openModeToAccess(openMode)) != B_OK) 2258 return B_NOT_ALLOWED; 2259 2260 if (openMode & O_TRUNC) { 2261 // we need write access in order to truncate the file 2262 status = inode->CheckPermissions(W_OK); 2263 if (status != B_OK) 2264 return status; 2265 2266 // truncate the existing file 2267 WriteLocked locked(inode->Lock()); 2268 2269 status_t status = inode->SetFileSize(transaction, 0); 2270 if (status >= B_OK) 2271 status = inode->WriteBack(transaction); 2272 2273 if (status < B_OK) 2274 return status; 2275 } 2276 2277 if (_id) 2278 *_id = offset; 2279 if (_inode) 2280 *_inode = inode; 2281 2282 // only keep the vnode in memory if the _id or _inode pointer is provided 2283 if (_id != NULL || _inode != NULL) 2284 vnode.Keep(); 2285 2286 return B_OK; 2287 } 2288 } else if (parent && (mode & S_ATTR_DIR) == 0) 2289 return B_BAD_VALUE; 2290 2291 status_t status; 2292 2293 // do we have the power to create new files at all? 2294 if (parent != NULL && (status = parent->CheckPermissions(W_OK)) != B_OK) 2295 RETURN_ERROR(status); 2296 2297 // allocate space for the new inode 2298 InodeAllocator allocator(transaction); 2299 block_run run; 2300 Inode *inode; 2301 status = allocator.New(&parentRun, mode, run, &inode); 2302 if (status < B_OK) 2303 return status; 2304 2305 // Initialize the parts of the bfs_inode structure that 2306 // InodeAllocator::New() hasn't touched yet 2307 2308 bfs_inode *node = &inode->Node(); 2309 2310 if (parent == NULL) { 2311 // we set the parent to itself in this case 2312 // (only happens for the root and indices node) 2313 node->parent = run; 2314 } else 2315 node->parent = parentRun; 2316 2317 node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid()); 2318 node->gid = HOST_ENDIAN_TO_BFS_INT32(parent 2319 ? parent->Node().GroupID() : getegid()); 2320 // the group ID is inherited from the parent, if available 2321 2322 node->type = HOST_ENDIAN_TO_BFS_INT32(type); 2323 2324 inode->WriteBack(transaction); 2325 // make sure the initialized node is available to others 2326 2327 // only add the name to regular files, directories, or symlinks 2328 // don't add it to attributes, or indices 2329 if (tree && inode->IsRegularNode() && inode->SetName(transaction, name) < B_OK) 2330 return B_ERROR; 2331 2332 // Initialize b+tree if it's a directory (and add "." & ".." if it's 2333 // a standard directory for files - not for attributes or indices) 2334 if (inode->IsContainer()) { 2335 status = allocator.CreateTree(); 2336 if (status < B_OK) 2337 return status; 2338 } 2339 2340 // Add a link to the inode from the parent, depending on its type 2341 // (the vnode is not published yet, so it is safe to make the inode 2342 // accessable to the file system here) 2343 if (tree) { 2344 status = tree->Insert(transaction, name, inode->ID()); 2345 } else if (parent && (mode & S_ATTR_DIR) != 0) { 2346 parent->Attributes() = run; 2347 status = parent->WriteBack(transaction); 2348 } 2349 2350 // Note, we only care if the inode could be made accessable for the 2351 // two cases above; the root node or the indices root node must 2352 // handle this case on their own (or other cases where "parent" is 2353 // NULL) 2354 if (status < B_OK) 2355 RETURN_ERROR(status); 2356 2357 // Update the main indices (name, size & last_modified) 2358 // (live queries might want to access us after this) 2359 2360 Index index(volume); 2361 if (inode->IsRegularNode() && name != NULL) { 2362 // the name index only contains regular files 2363 // (but not the root node where name == NULL) 2364 status = index.InsertName(transaction, name, inode); 2365 if (status < B_OK && status != B_BAD_INDEX) { 2366 // We have to remove the node from the parent at this point, 2367 // because the InodeAllocator destructor can't handle this 2368 // case (and if it fails, we can't do anything about it...) 2369 if (tree) 2370 tree->Remove(transaction, name, inode->ID()); 2371 else if (parent != NULL && (mode & S_ATTR_DIR) != 0) 2372 parent->Node().attributes.SetTo(0, 0, 0); 2373 2374 RETURN_ERROR(status); 2375 } 2376 } 2377 2378 inode->UpdateOldLastModified(); 2379 2380 // The "size" & "last_modified" indices don't contain directories 2381 if (inode->IsFile() || inode->IsSymLink()) { 2382 // if adding to these indices fails, the inode creation will not be harmed; 2383 // they are considered less important than the "name" index 2384 if (inode->IsFile()) 2385 index.InsertSize(transaction, inode); 2386 index.InsertLastModified(transaction, inode); 2387 } 2388 2389 // Everything worked well until this point, we have a fully 2390 // initialized inode, and we want to keep it 2391 allocator.Keep(); 2392 2393 if (inode->IsFile() || inode->IsAttribute()) { 2394 inode->SetFileCache(file_cache_create(volume->ID(), inode->ID(), 2395 inode->Size(), volume->Device())); 2396 } 2397 2398 if (_id != NULL) 2399 *_id = inode->ID(); 2400 if (_inode != NULL) 2401 *_inode = inode; 2402 2403 // if either _id or _inode is passed, we will keep the inode locked 2404 if (_id == NULL && _inode == NULL) 2405 put_vnode(volume->ID(), inode->ID()); 2406 2407 return B_OK; 2408 } 2409 2410 2411 // #pragma mark - AttributeIterator 2412 2413 2414 AttributeIterator::AttributeIterator(Inode *inode) 2415 : 2416 fCurrentSmallData(0), 2417 fInode(inode), 2418 fAttributes(NULL), 2419 fIterator(NULL), 2420 fBuffer(NULL) 2421 { 2422 inode->_AddIterator(this); 2423 } 2424 2425 2426 AttributeIterator::~AttributeIterator() 2427 { 2428 if (fAttributes) 2429 put_vnode(fAttributes->GetVolume()->ID(), fAttributes->ID()); 2430 2431 delete fIterator; 2432 fInode->_RemoveIterator(this); 2433 } 2434 2435 2436 status_t 2437 AttributeIterator::Rewind() 2438 { 2439 fCurrentSmallData = 0; 2440 2441 if (fIterator != NULL) 2442 fIterator->Rewind(); 2443 2444 return B_OK; 2445 } 2446 2447 2448 status_t 2449 AttributeIterator::GetNext(char *name, size_t *_length, uint32 *_type, 2450 vnode_id *_id) 2451 { 2452 // read attributes out of the small data section 2453 2454 if (fCurrentSmallData >= 0) { 2455 NodeGetter nodeGetter(fInode->GetVolume(), fInode); 2456 const bfs_inode *node = nodeGetter.Node(); 2457 const small_data *item = ((bfs_inode *)node)->SmallDataStart(); 2458 2459 fInode->SmallDataLock().Lock(); 2460 2461 int32 i = 0; 2462 for (;;item = item->Next()) { 2463 if (item->IsLast(node)) 2464 break; 2465 2466 if (item->NameSize() == FILE_NAME_NAME_LENGTH 2467 && *item->Name() == FILE_NAME_NAME) 2468 continue; 2469 2470 if (i++ == fCurrentSmallData) 2471 break; 2472 } 2473 2474 if (!item->IsLast(node)) { 2475 strncpy(name, item->Name(), B_FILE_NAME_LENGTH); 2476 *_type = item->Type(); 2477 *_length = item->NameSize(); 2478 *_id = (vnode_id)fCurrentSmallData; 2479 2480 fCurrentSmallData = i; 2481 } 2482 else { 2483 // stop traversing the small_data section 2484 fCurrentSmallData = -1; 2485 } 2486 2487 fInode->SmallDataLock().Unlock(); 2488 2489 if (fCurrentSmallData != -1) 2490 return B_OK; 2491 } 2492 2493 // read attributes out of the attribute directory 2494 2495 if (fInode->Attributes().IsZero()) 2496 return B_ENTRY_NOT_FOUND; 2497 2498 Volume *volume = fInode->GetVolume(); 2499 2500 // if you haven't yet access to the attributes directory, get it 2501 if (fAttributes == NULL) { 2502 if (get_vnode(volume->ID(), volume->ToVnode(fInode->Attributes()), 2503 (void **)&fAttributes) != B_OK) { 2504 FATAL(("get_vnode() failed in AttributeIterator::GetNext(vnode_id" 2505 " = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2506 return B_ENTRY_NOT_FOUND; 2507 } 2508 2509 BPlusTree *tree; 2510 if (fAttributes->GetTree(&tree) < B_OK 2511 || (fIterator = new TreeIterator(tree)) == NULL) { 2512 FATAL(("could not get tree in AttributeIterator::GetNext(vnode_id" 2513 " = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2514 return B_ENTRY_NOT_FOUND; 2515 } 2516 } 2517 2518 uint16 length; 2519 vnode_id id; 2520 status_t status = fIterator->GetNextEntry(name, &length, 2521 B_FILE_NAME_LENGTH, &id); 2522 if (status < B_OK) 2523 return status; 2524 2525 Vnode vnode(volume,id); 2526 Inode *attribute; 2527 if ((status = vnode.Get(&attribute)) == B_OK) { 2528 *_type = attribute->Type(); 2529 *_length = attribute->Size(); 2530 *_id = id; 2531 } 2532 2533 return status; 2534 } 2535 2536 2537 void 2538 AttributeIterator::Update(uint16 index, int8 change) 2539 { 2540 // fCurrentSmallData points already to the next item 2541 if (index < fCurrentSmallData) 2542 fCurrentSmallData += change; 2543 } 2544 2545