1 /* Inode - inode access functions 2 * 3 * Copyright 2001-2006, Axel Dörfler, axeld@pinc-software.de. 4 * This file may be used under the terms of the MIT License. 5 */ 6 7 8 #include "Debug.h" 9 #include "Inode.h" 10 #include "BPlusTree.h" 11 #include "Index.h" 12 13 #include <fs_cache.h> 14 #include <util/kernel_cpp.h> 15 16 #include <string.h> 17 #include <stdio.h> 18 19 20 class InodeAllocator { 21 public: 22 InodeAllocator(Transaction &transaction); 23 ~InodeAllocator(); 24 25 status_t New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode); 26 status_t CreateTree(); 27 status_t Keep(); 28 29 private: 30 Transaction *fTransaction; 31 block_run fRun; 32 Inode *fInode; 33 }; 34 35 36 InodeAllocator::InodeAllocator(Transaction &transaction) 37 : 38 fTransaction(&transaction), 39 fInode(NULL) 40 { 41 } 42 43 44 InodeAllocator::~InodeAllocator() 45 { 46 if (fTransaction != NULL) { 47 Volume *volume = fTransaction->GetVolume(); 48 49 if (fInode != NULL) { 50 fInode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 51 // this unblocks any pending bfs_read_vnode() calls 52 fInode->Free(*fTransaction); 53 remove_vnode(volume->ID(), fInode->ID()); 54 } else 55 volume->Free(*fTransaction, fRun); 56 } 57 58 delete fInode; 59 } 60 61 62 status_t 63 InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode) 64 { 65 Volume *volume = fTransaction->GetVolume(); 66 67 status_t status = volume->AllocateForInode(*fTransaction, parentRun, mode, fRun); 68 if (status < B_OK) { 69 // don't free the space in the destructor, because 70 // the allocation failed 71 fTransaction = NULL; 72 RETURN_ERROR(status); 73 } 74 75 run = fRun; 76 fInode = new Inode(volume, *fTransaction, volume->ToVnode(run), mode, run); 77 if (fInode == NULL) 78 RETURN_ERROR(B_NO_MEMORY); 79 80 status = new_vnode(volume->ID(), fInode->ID(), fInode); 81 if (status < B_OK) { 82 delete fInode; 83 fInode = NULL; 84 RETURN_ERROR(status); 85 } 86 87 *_inode = fInode; 88 return B_OK; 89 } 90 91 92 status_t 93 InodeAllocator::CreateTree() 94 { 95 Volume *volume = fTransaction->GetVolume(); 96 97 // force S_STR_INDEX to be set, if no type is set 98 if ((fInode->Mode() & S_INDEX_TYPES) == 0) 99 fInode->Node().mode |= HOST_ENDIAN_TO_BFS_INT32(S_STR_INDEX); 100 101 BPlusTree *tree = fInode->fTree = new BPlusTree(*fTransaction, fInode); 102 if (tree == NULL || tree->InitCheck() < B_OK) 103 return B_ERROR; 104 105 if (fInode->IsRegularNode()) { 106 if (tree->Insert(*fTransaction, ".", fInode->ID()) < B_OK 107 || tree->Insert(*fTransaction, "..", volume->ToVnode(fInode->Parent())) < B_OK) 108 return B_ERROR; 109 } 110 return B_OK; 111 } 112 113 114 status_t 115 InodeAllocator::Keep() 116 { 117 ASSERT(fInode != NULL && fTransaction != NULL); 118 Volume *volume = fTransaction->GetVolume(); 119 120 status_t status = fInode->WriteBack(*fTransaction); 121 if (status < B_OK) { 122 FATAL(("writing new inode %Ld failed!\n", fInode->ID())); 123 return status; 124 } 125 126 if (!fInode->IsSymLink()) 127 status = publish_vnode(volume->ID(), fInode->ID(), fInode); 128 129 fTransaction = NULL; 130 fInode = NULL; 131 132 return status; 133 } 134 135 136 // #pragma mark - 137 138 139 status_t 140 bfs_inode::InitCheck(Volume *volume) 141 { 142 if (Magic1() != INODE_MAGIC1 143 || !(Flags() & INODE_IN_USE) 144 || inode_num.Length() != 1 145 // matches inode size? 146 || (uint32)InodeSize() != volume->InodeSize() 147 // parent resides on disk? 148 || parent.AllocationGroup() > int32(volume->AllocationGroups()) 149 || parent.AllocationGroup() < 0 150 || parent.Start() > (1L << volume->AllocationGroupShift()) 151 || parent.Length() != 1 152 // attributes, too? 153 || attributes.AllocationGroup() > int32(volume->AllocationGroups()) 154 || attributes.AllocationGroup() < 0 155 || attributes.Start() > (1L << volume->AllocationGroupShift())) 156 RETURN_ERROR(B_BAD_DATA); 157 158 if (Flags() & INODE_DELETED) 159 return B_NOT_ALLOWED; 160 161 // ToDo: Add some tests to check the integrity of the other stuff here, 162 // especially for the data_stream! 163 164 return B_OK; 165 } 166 167 168 // #pragma mark - 169 170 171 Inode::Inode(Volume *volume, vnode_id id) 172 : 173 fVolume(volume), 174 fID(id), 175 fTree(NULL), 176 fAttributes(NULL), 177 fCache(NULL) 178 { 179 PRINT(("Inode::Inode(volume = %p, id = %Ld) @ %p\n", volume, id, this)); 180 181 NodeGetter node(volume, this); 182 memcpy(&fNode, node.Node(), sizeof(bfs_inode)); 183 184 char lockName[B_OS_NAME_LENGTH]; 185 snprintf(lockName, sizeof(lockName), "bfs inode %ld.%d", 186 BlockRun().AllocationGroup(), BlockRun().Start()); 187 fLock.Initialize(lockName); 188 189 // these two will help to maintain the indices 190 fOldSize = Size(); 191 fOldLastModified = LastModified(); 192 193 if (IsContainer()) 194 fTree = new BPlusTree(this); 195 if (IsFile() || IsAttribute()) 196 SetFileCache(file_cache_create(fVolume->ID(), ID(), Size(), fVolume->Device())); 197 } 198 199 200 Inode::Inode(Volume *volume, Transaction &transaction, vnode_id id, mode_t mode, block_run &run) 201 : 202 fVolume(volume), 203 fID(id), 204 fTree(NULL), 205 fAttributes(NULL), 206 fCache(NULL) 207 { 208 PRINT(("Inode::Inode(volume = %p, transaction = %p, id = %Ld) @ %p\n", 209 volume, &transaction, id, this)); 210 211 char lockName[B_OS_NAME_LENGTH]; 212 snprintf(lockName, sizeof(lockName), "bfs inode+%ld.%d", 213 run.AllocationGroup(), run.Start()); 214 fLock.Initialize(lockName); 215 216 NodeGetter node(volume, transaction, this, true); 217 memset(&fNode, 0, sizeof(bfs_inode)); 218 219 // Initialize the bfs_inode structure -- it's not written back to disk 220 // here, because it will be done so already when the inode could be 221 // created completely. 222 223 Node().magic1 = HOST_ENDIAN_TO_BFS_INT32(INODE_MAGIC1); 224 Node().inode_num = run; 225 Node().mode = HOST_ENDIAN_TO_BFS_INT32(mode); 226 Node().flags = HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 227 228 Node().create_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) << INODE_TIME_SHIFT); 229 Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64(Node().create_time 230 | (volume->GetUniqueID() & INODE_TIME_MASK)); 231 // we use Volume::GetUniqueID() to avoid having too many duplicates in the 232 // last_modified index 233 234 Node().inode_size = HOST_ENDIAN_TO_BFS_INT32(volume->InodeSize()); 235 236 // these two will help to maintain the indices 237 fOldSize = Size(); 238 fOldLastModified = LastModified(); 239 } 240 241 242 Inode::~Inode() 243 { 244 PRINT(("Inode::~Inode() @ %p\n", this)); 245 246 file_cache_delete(FileCache()); 247 delete fTree; 248 } 249 250 251 status_t 252 Inode::InitCheck(bool checkNode) 253 { 254 // test inode magic and flags 255 if (checkNode) { 256 status_t status = Node().InitCheck(fVolume); 257 if (status == B_BUSY) 258 return B_BUSY; 259 260 if (status < B_OK) { 261 FATAL(("inode at block %Ld corrupt!\n", BlockNumber())); 262 RETURN_ERROR(B_BAD_DATA); 263 } 264 } 265 266 if (IsContainer()) { 267 // inodes that have a 268 if (fTree == NULL) 269 RETURN_ERROR(B_NO_MEMORY); 270 271 status_t status = fTree->InitCheck(); 272 if (status < B_OK) { 273 FATAL(("inode tree at block %Ld corrupt!\n", BlockNumber())); 274 RETURN_ERROR(B_BAD_DATA); 275 } 276 } 277 278 // it's more important to know that the inode is corrupt 279 // so we check for the lock not until here 280 return fLock.InitCheck(); 281 } 282 283 284 status_t 285 Inode::WriteBack(Transaction &transaction) 286 { 287 NodeGetter node(fVolume, transaction, this); 288 if (node.WritableNode() == NULL) 289 return B_IO_ERROR; 290 291 memcpy(node.WritableNode(), &Node(), sizeof(bfs_inode)); 292 return B_OK; 293 } 294 295 296 status_t 297 Inode::CheckPermissions(int accessMode) const 298 { 299 uid_t user = geteuid(); 300 gid_t group = getegid(); 301 302 // you never have write access to a read-only volume 303 if (accessMode & W_OK && fVolume->IsReadOnly()) 304 return B_READ_ONLY_DEVICE; 305 306 // root users always have full access (but they can't execute anything) 307 if (user == 0 && !((accessMode & X_OK) && (Mode() & S_IXUSR) == 0)) 308 return B_OK; 309 310 // shift mode bits, to check directly against accessMode 311 mode_t mode = Mode(); 312 if (user == (uid_t)fNode.UserID()) 313 mode >>= 6; 314 else if (group == (gid_t)fNode.GroupID()) 315 mode >>= 3; 316 317 if (accessMode & ~(mode & S_IRWXO)) 318 return B_NOT_ALLOWED; 319 320 return B_OK; 321 } 322 323 324 // #pragma mark - 325 326 327 void 328 Inode::_AddIterator(AttributeIterator *iterator) 329 { 330 if (fSmallDataLock.Lock() < B_OK) 331 return; 332 333 fIterators.Add(iterator); 334 335 fSmallDataLock.Unlock(); 336 } 337 338 339 void 340 Inode::_RemoveIterator(AttributeIterator *iterator) 341 { 342 if (fSmallDataLock.Lock() < B_OK) 343 return; 344 345 fIterators.Remove(iterator); 346 347 fSmallDataLock.Unlock(); 348 } 349 350 351 /** Tries to free up "bytes" space in the small_data section by moving 352 * attributes to real files. Used for system attributes like the name. 353 * You need to hold the fSmallDataLock when you call this method 354 */ 355 356 status_t 357 Inode::MakeSpaceForSmallData(Transaction &transaction, bfs_inode *node, const char *name, 358 int32 bytes) 359 { 360 ASSERT(fSmallDataLock.IsLocked()); 361 362 while (bytes > 0) { 363 small_data *item = node->SmallDataStart(), *max = NULL; 364 int32 index = 0, maxIndex = 0; 365 for (; !item->IsLast(node); item = item->Next(), index++) { 366 // should not remove those 367 if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name())) 368 continue; 369 370 if (max == NULL || max->Size() < item->Size()) { 371 maxIndex = index; 372 max = item; 373 } 374 375 // remove the first one large enough to free the needed amount of bytes 376 if (bytes < (int32)item->Size()) 377 break; 378 } 379 380 if (item->IsLast(node) || (int32)item->Size() < bytes) 381 return B_ERROR; 382 383 bytes -= max->Size(); 384 385 // Move the attribute to a real attribute file 386 // Luckily, this doesn't cause any index updates 387 388 Inode *attribute; 389 status_t status = CreateAttribute(transaction, item->Name(), item->Type(), 390 &attribute); 391 if (status < B_OK) 392 RETURN_ERROR(status); 393 394 size_t length = item->DataSize(); 395 status = attribute->WriteAt(transaction, 0, item->Data(), &length); 396 397 ReleaseAttribute(attribute); 398 399 if (status < B_OK) { 400 Vnode vnode(fVolume, Attributes()); 401 Inode *attributes; 402 if (vnode.Get(&attributes) < B_OK 403 || attributes->Remove(transaction, name) < B_OK) { 404 FATAL(("Could not remove newly created attribute!\n")); 405 } 406 407 RETURN_ERROR(status); 408 } 409 410 _RemoveSmallData(node, max, maxIndex); 411 } 412 return B_OK; 413 } 414 415 416 /** Private function which removes the given attribute from the small_data 417 * section. 418 * You need to hold the fSmallDataLock when you call this method 419 */ 420 421 status_t 422 Inode::_RemoveSmallData(bfs_inode *node, small_data *item, int32 index) 423 { 424 ASSERT(fSmallDataLock.IsLocked()); 425 426 small_data *next = item->Next(); 427 if (!next->IsLast(node)) { 428 // find the last attribute 429 small_data *last = next; 430 while (!last->IsLast(node)) 431 last = last->Next(); 432 433 int32 size = (uint8 *)last - (uint8 *)next; 434 if (size < 0 || size > (uint8 *)node + fVolume->BlockSize() - (uint8 *)next) 435 return B_BAD_DATA; 436 437 memmove(item, next, size); 438 439 // Move the "last" one to its new location and 440 // correctly terminate the small_data section 441 last = (small_data *)((uint8 *)last - ((uint8 *)next - (uint8 *)item)); 442 memset(last, 0, (uint8 *)node + fVolume->BlockSize() - (uint8 *)last); 443 } else 444 memset(item, 0, item->Size()); 445 446 // update all current iterators 447 AttributeIterator *iterator = NULL; 448 while ((iterator = fIterators.Next(iterator)) != NULL) 449 iterator->Update(index, -1); 450 451 return B_OK; 452 } 453 454 455 /** Removes the given attribute from the small_data section. 456 * Note that you need to write back the inode yourself after having called 457 * that method. 458 */ 459 460 status_t 461 Inode::RemoveSmallData(Transaction &transaction, NodeGetter &nodeGetter, const char *name) 462 { 463 if (name == NULL) 464 return B_BAD_VALUE; 465 466 bfs_inode *node = nodeGetter.WritableNode(); 467 SimpleLocker locker(fSmallDataLock); 468 469 // search for the small_data item 470 471 small_data *item = node->SmallDataStart(); 472 int32 index = 0; 473 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 474 item = item->Next(); 475 index++; 476 } 477 478 if (item->IsLast(node)) 479 return B_ENTRY_NOT_FOUND; 480 481 nodeGetter.MakeWritable(transaction); 482 return _RemoveSmallData(node, item, index); 483 } 484 485 486 /** Try to place the given attribute in the small_data section - if the 487 * new attribute is too big to fit in that section, it returns B_DEVICE_FULL. 488 * In that case, the attribute should be written to a real attribute file; 489 * if the attribute was already part of the small_data section, but the new 490 * one wouldn't fit, the old one is automatically removed from the small_data 491 * section. 492 * Note that you need to write back the inode yourself after having called that 493 * method - it's a bad API decision that it needs a transaction but enforces you 494 * to write back the inode all by yourself, but it's just more efficient in most 495 * cases... 496 */ 497 498 status_t 499 Inode::AddSmallData(Transaction &transaction, NodeGetter &nodeGetter, const char *name, 500 uint32 type, const uint8 *data, size_t length, bool force) 501 { 502 bfs_inode *node = nodeGetter.WritableNode(); 503 504 if (node == NULL || name == NULL || data == NULL) 505 return B_BAD_VALUE; 506 507 // reject any requests that can't fit into the small_data section 508 uint32 nameLength = strlen(name); 509 uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + length + 1; 510 if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode)) 511 return B_DEVICE_FULL; 512 513 nodeGetter.MakeWritable(transaction); 514 SimpleLocker locker(fSmallDataLock); 515 516 // Find the last item or one with the same name we have to add 517 small_data *item = node->SmallDataStart(); 518 int32 index = 0; 519 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 520 item = item->Next(); 521 index++; 522 } 523 524 // is the attribute already in the small_data section? 525 // then just replace the data part of that one 526 if (!item->IsLast(node)) { 527 // find last attribute 528 small_data *last = item; 529 while (!last->IsLast(node)) 530 last = last->Next(); 531 532 // try to change the attributes value 533 if (item->data_size > length 534 || force 535 || ((uint8 *)last + length - item->DataSize()) <= ((uint8 *)node + fVolume->InodeSize())) { 536 // make room for the new attribute if needed (and we are forced to do so) 537 if (force 538 && ((uint8 *)last + length - item->DataSize()) > ((uint8 *)node + fVolume->InodeSize())) { 539 // We also take the free space at the end of the small_data section 540 // into account, and request only what's really needed 541 uint32 needed = length - item->DataSize() - 542 (uint32)((uint8 *)node + fVolume->InodeSize() - (uint8 *)last); 543 544 if (MakeSpaceForSmallData(transaction, node, name, needed) < B_OK) 545 return B_ERROR; 546 547 // reset our pointers 548 item = node->SmallDataStart(); 549 index = 0; 550 while (!item->IsLast(node) && strcmp(item->Name(), name)) { 551 item = item->Next(); 552 index++; 553 } 554 555 last = item; 556 while (!last->IsLast(node)) 557 last = last->Next(); 558 } 559 560 // Normally, we can just overwrite the attribute data as the size 561 // is specified by the type and does not change that often 562 if (length != item->DataSize()) { 563 // move the attributes after the current one 564 small_data *next = item->Next(); 565 if (!next->IsLast(node)) 566 memmove((uint8 *)item + spaceNeeded, next, (uint8 *)last - (uint8 *)next); 567 568 // Move the "last" one to its new location and 569 // correctly terminate the small_data section 570 last = (small_data *)((uint8 *)last - ((uint8 *)next - ((uint8 *)item + spaceNeeded))); 571 if ((uint8 *)last < (uint8 *)node + fVolume->BlockSize()) 572 memset(last, 0, (uint8 *)node + fVolume->BlockSize() - (uint8 *)last); 573 574 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); 575 } 576 577 item->type = HOST_ENDIAN_TO_BFS_INT32(type); 578 memcpy(item->Data(), data, length); 579 item->Data()[length] = '\0'; 580 581 return B_OK; 582 } 583 584 // Could not replace the old attribute, so remove it to let 585 // let the calling function create an attribute file for it 586 if (_RemoveSmallData(node, item, index) < B_OK) 587 return B_ERROR; 588 589 return B_DEVICE_FULL; 590 } 591 592 // try to add the new attribute! 593 594 if ((uint8 *)item + spaceNeeded > (uint8 *)node + fVolume->InodeSize()) { 595 // there is not enough space for it! 596 if (!force) 597 return B_DEVICE_FULL; 598 599 // make room for the new attribute 600 if (MakeSpaceForSmallData(transaction, node, name, spaceNeeded) < B_OK) 601 return B_ERROR; 602 603 // get new last item! 604 item = node->SmallDataStart(); 605 index = 0; 606 while (!item->IsLast(node)) { 607 item = item->Next(); 608 index++; 609 } 610 } 611 612 memset(item, 0, spaceNeeded); 613 item->type = HOST_ENDIAN_TO_BFS_INT32(type); 614 item->name_size = HOST_ENDIAN_TO_BFS_INT16(nameLength); 615 item->data_size = HOST_ENDIAN_TO_BFS_INT16(length); 616 strcpy(item->Name(), name); 617 memcpy(item->Data(), data, length); 618 619 // correctly terminate the small_data section 620 item = item->Next(); 621 if (!item->IsLast(node)) 622 memset(item, 0, (uint8 *)node + fVolume->InodeSize() - (uint8 *)item); 623 624 // update all current iterators 625 AttributeIterator *iterator = NULL; 626 while ((iterator = fIterators.Next(iterator)) != NULL) 627 iterator->Update(index, 1); 628 629 return B_OK; 630 } 631 632 633 /** Iterates through the small_data section of an inode. 634 * To start at the beginning of this section, you let smallData 635 * point to NULL, like: 636 * small_data *data = NULL; 637 * while (inode->GetNextSmallData(&data) { ... } 638 * 639 * This function is reentrant and doesn't allocate any memory; 640 * you can safely stop calling it at any point (you don't need 641 * to iterate through the whole list). 642 * You need to hold the fSmallDataLock when you call this method 643 */ 644 645 status_t 646 Inode::GetNextSmallData(bfs_inode *node, small_data **_smallData) const 647 { 648 if (node == NULL) 649 RETURN_ERROR(B_BAD_VALUE); 650 651 ASSERT(fSmallDataLock.IsLocked()); 652 653 small_data *data = *_smallData; 654 655 // begin from the start? 656 if (data == NULL) 657 data = node->SmallDataStart(); 658 else 659 data = data->Next(); 660 661 // is already last item? 662 if (data->IsLast(node)) 663 return B_ENTRY_NOT_FOUND; 664 665 *_smallData = data; 666 667 return B_OK; 668 } 669 670 671 /** Finds the attribute "name" in the small data section, and 672 * returns a pointer to it (or NULL if it doesn't exist). 673 * You need to hold the fSmallDataLock when you call this method 674 */ 675 676 small_data * 677 Inode::FindSmallData(const bfs_inode *node, const char *name) const 678 { 679 ASSERT(fSmallDataLock.IsLocked()); 680 681 small_data *smallData = NULL; 682 while (GetNextSmallData(const_cast<bfs_inode *>(node), &smallData) == B_OK) { 683 if (!strcmp(smallData->Name(), name)) 684 return smallData; 685 } 686 return NULL; 687 } 688 689 690 /** Returns a pointer to the node's name if present in the small data 691 * section, NULL otherwise. 692 * You need to hold the fSmallDataLock when you call this method 693 */ 694 695 const char * 696 Inode::Name(const bfs_inode *node) const 697 { 698 ASSERT(fSmallDataLock.IsLocked()); 699 700 small_data *smallData = NULL; 701 while (GetNextSmallData((bfs_inode *)node, &smallData) == B_OK) { 702 if (*smallData->Name() == FILE_NAME_NAME && smallData->NameSize() == FILE_NAME_NAME_LENGTH) 703 return (const char *)smallData->Data(); 704 } 705 return NULL; 706 } 707 708 709 /** Copies the node's name into the provided buffer. 710 * The buffer should be B_FILE_NAME_LENGTH bytes large. 711 */ 712 713 status_t 714 Inode::GetName(char *buffer, size_t size) const 715 { 716 NodeGetter node(fVolume, this); 717 SimpleLocker locker(fSmallDataLock); 718 719 const char *name = Name(node.Node()); 720 if (name == NULL) 721 return B_ENTRY_NOT_FOUND; 722 723 strlcpy(buffer, name, size); 724 return B_OK; 725 } 726 727 728 /** Changes or set the name of a file: in the inode small_data section only, it 729 * doesn't change it in the parent directory's b+tree. 730 * Note that you need to write back the inode yourself after having called 731 * that method. It suffers from the same API decision as AddSmallData() does 732 * (and for the same reason). 733 */ 734 735 status_t 736 Inode::SetName(Transaction &transaction, const char *name) 737 { 738 if (name == NULL || *name == '\0') 739 return B_BAD_VALUE; 740 741 NodeGetter node(fVolume, transaction, this); 742 const char nameTag[2] = {FILE_NAME_NAME, 0}; 743 744 return AddSmallData(transaction, node, nameTag, FILE_NAME_TYPE, 745 (uint8 *)name, strlen(name), true); 746 } 747 748 749 /** Reads data from the specified attribute. 750 * This is a high-level attribute function that understands attributes 751 * in the small_data section as well as real attribute files. 752 */ 753 754 status_t 755 Inode::ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, size_t *_length) 756 { 757 if (pos < 0) 758 pos = 0; 759 760 // search in the small_data section (which has to be locked first) 761 { 762 NodeGetter node(fVolume, this); 763 SimpleLocker locker(fSmallDataLock); 764 765 small_data *smallData = FindSmallData(node.Node(), name); 766 if (smallData != NULL) { 767 size_t length = *_length; 768 if (pos >= smallData->data_size) { 769 *_length = 0; 770 return B_OK; 771 } 772 if (length + pos > smallData->DataSize()) 773 length = smallData->DataSize() - pos; 774 775 memcpy(buffer, smallData->Data() + pos, length); 776 *_length = length; 777 return B_OK; 778 } 779 } 780 781 // search in the attribute directory 782 Inode *attribute; 783 status_t status = GetAttribute(name, &attribute); 784 if (status == B_OK) { 785 if (attribute->Lock().Lock() == B_OK) { 786 status = attribute->ReadAt(pos, (uint8 *)buffer, _length); 787 attribute->Lock().Unlock(); 788 } else 789 status = B_ERROR; 790 791 ReleaseAttribute(attribute); 792 } 793 794 RETURN_ERROR(status); 795 } 796 797 798 /** Writes data to the specified attribute. 799 * This is a high-level attribute function that understands attributes 800 * in the small_data section as well as real attribute files. 801 */ 802 803 status_t 804 Inode::WriteAttribute(Transaction &transaction, const char *name, int32 type, off_t pos, 805 const uint8 *buffer, size_t *_length) 806 { 807 // needed to maintain the index 808 uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH], *oldData = NULL; 809 size_t oldLength = 0; 810 811 // ToDo: we actually depend on that the contents of "buffer" are constant. 812 // If they get changed during the write (hey, user programs), we may mess 813 // up our index trees! 814 815 Index index(fVolume); 816 index.SetTo(name); 817 818 Inode *attribute = NULL; 819 status_t status = B_OK; 820 821 if (GetAttribute(name, &attribute) < B_OK) { 822 // save the old attribute data 823 NodeGetter node(fVolume, transaction, this); 824 fSmallDataLock.Lock(); 825 826 small_data *smallData = FindSmallData(node.Node(), name); 827 if (smallData != NULL) { 828 oldLength = smallData->DataSize(); 829 if (oldLength > BPLUSTREE_MAX_KEY_LENGTH) 830 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 831 memcpy(oldData = oldBuffer, smallData->Data(), oldLength); 832 } 833 fSmallDataLock.Unlock(); 834 835 // if the attribute doesn't exist yet (as a file), try to put it in the 836 // small_data section first - if that fails (due to insufficent space), 837 // create a real attribute file 838 status = AddSmallData(transaction, node, name, type, buffer, *_length); 839 if (status == B_DEVICE_FULL) { 840 status = CreateAttribute(transaction, name, type, &attribute); 841 if (status < B_OK) 842 RETURN_ERROR(status); 843 } else if (status == B_OK) 844 status = WriteBack(transaction); 845 } 846 847 if (attribute != NULL) { 848 if (attribute->Lock().LockWrite() == B_OK) { 849 // save the old attribute data (if this fails, oldLength will reflect it) 850 if (fVolume->CheckForLiveQuery(name)) { 851 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 852 if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK) 853 oldData = oldBuffer; 854 } 855 // ToDo: check if the data fits in the inode now and delete the attribute file if so 856 status = attribute->WriteAt(transaction, pos, buffer, _length); 857 858 attribute->Lock().UnlockWrite(); 859 } else 860 status = B_ERROR; 861 862 ReleaseAttribute(attribute); 863 } 864 865 // ToDo: find a better way than this "pos" thing (the begin of the old key 866 // must be copied to the start of the new one for a comparison) 867 if (status == B_OK && pos == 0) { 868 // index only the first BPLUSTREE_MAX_KEY_LENGTH bytes 869 uint16 length = *_length; 870 if (length > BPLUSTREE_MAX_KEY_LENGTH) 871 length = BPLUSTREE_MAX_KEY_LENGTH; 872 873 // Update index. Note, Index::Update() may be called even if initializing 874 // the index failed - it will just update the live queries in this case 875 if (pos < length || pos < oldLength) 876 index.Update(transaction, name, type, oldData, oldLength, buffer, length, this); 877 } 878 return status; 879 } 880 881 882 /** Removes the specified attribute from the inode. 883 * This is a high-level attribute function that understands attributes 884 * in the small_data section as well as real attribute files. 885 */ 886 887 status_t 888 Inode::RemoveAttribute(Transaction &transaction, const char *name) 889 { 890 Index index(fVolume); 891 bool hasIndex = index.SetTo(name) == B_OK; 892 NodeGetter node(fVolume, this); 893 894 // update index for attributes in the small_data section 895 { 896 fSmallDataLock.Lock(); 897 898 small_data *smallData = FindSmallData(node.Node(), name); 899 if (smallData != NULL) { 900 uint32 length = smallData->DataSize(); 901 if (length > BPLUSTREE_MAX_KEY_LENGTH) 902 length = BPLUSTREE_MAX_KEY_LENGTH; 903 index.Update(transaction, name, smallData->Type(), smallData->Data(), 904 length, NULL, 0, this); 905 } 906 fSmallDataLock.Unlock(); 907 } 908 909 status_t status = RemoveSmallData(transaction, node, name); 910 if (status == B_OK) { 911 WriteBack(transaction); 912 } else if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) { 913 // remove the attribute file if it exists 914 Vnode vnode(fVolume, Attributes()); 915 Inode *attributes; 916 if ((status = vnode.Get(&attributes)) < B_OK) 917 return status; 918 919 // update index 920 Inode *attribute; 921 if ((hasIndex || fVolume->CheckForLiveQuery(name)) 922 && GetAttribute(name, &attribute) == B_OK) { 923 uint8 data[BPLUSTREE_MAX_KEY_LENGTH]; 924 size_t length = BPLUSTREE_MAX_KEY_LENGTH; 925 if (attribute->ReadAt(0, data, &length) == B_OK) 926 index.Update(transaction, name, attribute->Type(), data, length, NULL, 0, this); 927 928 ReleaseAttribute(attribute); 929 } 930 931 if ((status = attributes->Remove(transaction, name)) < B_OK) 932 return status; 933 934 if (attributes->IsEmpty()) { 935 // remove attribute directory (don't fail if that can't be done) 936 if (remove_vnode(fVolume->ID(), attributes->ID()) == B_OK) { 937 // update the inode, so that no one will ever doubt it's deleted :-) 938 attributes->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); 939 if (attributes->WriteBack(transaction) == B_OK) { 940 Attributes().SetTo(0, 0, 0); 941 WriteBack(transaction); 942 } else 943 unremove_vnode(fVolume->ID(), attributes->ID()); 944 } 945 } 946 } 947 return status; 948 } 949 950 951 status_t 952 Inode::GetAttribute(const char *name, Inode **attribute) 953 { 954 // does this inode even have attributes? 955 if (Attributes().IsZero()) 956 return B_ENTRY_NOT_FOUND; 957 958 Vnode vnode(fVolume, Attributes()); 959 Inode *attributes; 960 if (vnode.Get(&attributes) < B_OK) { 961 FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n", name)); 962 return B_ERROR; 963 } 964 965 BPlusTree *tree; 966 status_t status = attributes->GetTree(&tree); 967 if (status == B_OK) { 968 vnode_id id; 969 if ((status = tree->Find((uint8 *)name, (uint16)strlen(name), &id)) == B_OK) { 970 Vnode vnode(fVolume, id); 971 // Check if the attribute is really an attribute 972 if (vnode.Get(attribute) < B_OK 973 || !(*attribute)->IsAttribute()) 974 return B_ERROR; 975 976 vnode.Keep(); 977 return B_OK; 978 } 979 } 980 return status; 981 } 982 983 984 void 985 Inode::ReleaseAttribute(Inode *attribute) 986 { 987 if (attribute == NULL) 988 return; 989 990 put_vnode(fVolume->ID(), attribute->ID()); 991 } 992 993 994 status_t 995 Inode::CreateAttribute(Transaction &transaction, const char *name, uint32 type, 996 Inode **attribute) 997 { 998 // do we need to create the attribute directory first? 999 if (Attributes().IsZero()) { 1000 status_t status = Inode::Create(transaction, this, NULL, 1001 S_ATTR_DIR | S_DIRECTORY | 0666, 0, 0, NULL); 1002 if (status < B_OK) 1003 RETURN_ERROR(status); 1004 } 1005 Vnode vnode(fVolume, Attributes()); 1006 Inode *attributes; 1007 if (vnode.Get(&attributes) < B_OK) 1008 return B_ERROR; 1009 1010 // Inode::Create() locks the inode for us 1011 return Inode::Create(transaction, attributes, name, 1012 S_ATTR | S_FILE | 0666, 0, type, NULL, attribute); 1013 } 1014 1015 1016 // #pragma mark - 1017 1018 1019 /** Gives the caller direct access to the b+tree for a given directory. 1020 * The tree is no longer created on demand, but when the inode is first 1021 * created. That will report any potential errors upfront, saves locking, 1022 * and should work as good (though a bit slower). 1023 */ 1024 1025 status_t 1026 Inode::GetTree(BPlusTree **tree) 1027 { 1028 if (fTree) { 1029 *tree = fTree; 1030 return B_OK; 1031 } 1032 1033 RETURN_ERROR(B_BAD_VALUE); 1034 } 1035 1036 1037 bool 1038 Inode::IsEmpty() 1039 { 1040 BPlusTree *tree; 1041 status_t status = GetTree(&tree); 1042 if (status < B_OK) 1043 return status; 1044 1045 TreeIterator iterator(tree); 1046 1047 // index and attribute directories are really empty when they are 1048 // empty - directories for standard files always contain ".", and 1049 // "..", so we need to ignore those two 1050 1051 uint32 count = 0; 1052 char name[BPLUSTREE_MAX_KEY_LENGTH]; 1053 uint16 length; 1054 vnode_id id; 1055 while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id) == B_OK) { 1056 if (Mode() & (S_ATTR_DIR | S_INDEX_DIR)) 1057 return false; 1058 1059 if (++count > 2 || strcmp(".", name) && strcmp("..", name)) 1060 return false; 1061 } 1062 return true; 1063 } 1064 1065 1066 /** Finds the block_run where "pos" is located in the data_stream of 1067 * the inode. 1068 * If successful, "offset" will then be set to the file offset 1069 * of the block_run returned; so "pos - offset" is for the block_run 1070 * what "pos" is for the whole stream. 1071 * The caller has to make sure that "pos" is inside the stream. 1072 */ 1073 1074 status_t 1075 Inode::FindBlockRun(off_t pos, block_run &run, off_t &offset) 1076 { 1077 data_stream *data = &Node().data; 1078 1079 // find matching block run 1080 1081 if (data->MaxDirectRange() > 0 && pos >= data->MaxDirectRange()) { 1082 if (data->MaxDoubleIndirectRange() > 0 && pos >= data->MaxIndirectRange()) { 1083 // access to double indirect blocks 1084 1085 CachedBlock cached(fVolume); 1086 1087 off_t start = pos - data->MaxIndirectRange(); 1088 int32 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + cached.BlockShift())) 1089 * (fVolume->BlockSize() / sizeof(block_run)); 1090 int32 directSize = NUM_ARRAY_BLOCKS << cached.BlockShift(); 1091 int32 index = start / indirectSize; 1092 int32 runsPerBlock = cached.BlockSize() / sizeof(block_run); 1093 1094 block_run *indirect = (block_run *)cached.SetTo( 1095 fVolume->ToBlock(data->double_indirect) + index / runsPerBlock); 1096 if (indirect == NULL) 1097 RETURN_ERROR(B_ERROR); 1098 1099 //printf("\tstart = %Ld, indirectSize = %ld, directSize = %ld, index = %ld\n",start,indirectSize,directSize,index); 1100 //printf("\tlook for indirect block at %ld,%d\n",indirect[index].allocation_group,indirect[index].start); 1101 1102 int32 current = (start % indirectSize) / directSize; 1103 1104 indirect = (block_run *)cached.SetTo( 1105 fVolume->ToBlock(indirect[index % runsPerBlock]) + current / runsPerBlock); 1106 if (indirect == NULL) 1107 RETURN_ERROR(B_ERROR); 1108 1109 run = indirect[current % runsPerBlock]; 1110 offset = data->MaxIndirectRange() + (index * indirectSize) + (current * directSize); 1111 //printf("\tfCurrent = %ld, fRunFileOffset = %Ld, fRunBlockEnd = %Ld, fRun = %ld,%d\n",fCurrent,fRunFileOffset,fRunBlockEnd,fRun.allocation_group,fRun.start); 1112 } else { 1113 // access to indirect blocks 1114 1115 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1116 off_t runBlockEnd = data->MaxDirectRange(); 1117 1118 CachedBlock cached(fVolume); 1119 off_t block = fVolume->ToBlock(data->indirect); 1120 1121 for (int32 i = 0; i < data->indirect.Length(); i++) { 1122 block_run *indirect = (block_run *)cached.SetTo(block + i); 1123 if (indirect == NULL) 1124 RETURN_ERROR(B_IO_ERROR); 1125 1126 int32 current = -1; 1127 while (++current < runsPerBlock) { 1128 if (indirect[current].IsZero()) 1129 break; 1130 1131 runBlockEnd += indirect[current].Length() << cached.BlockShift(); 1132 if (runBlockEnd > pos) { 1133 run = indirect[current]; 1134 offset = runBlockEnd - (run.Length() << cached.BlockShift()); 1135 //printf("reading from indirect block: %ld,%d\n",fRun.allocation_group,fRun.start); 1136 //printf("### indirect-run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset); 1137 return fVolume->ValidateBlockRun(run); 1138 } 1139 } 1140 } 1141 RETURN_ERROR(B_ERROR); 1142 } 1143 } else { 1144 // access from direct blocks 1145 1146 off_t runBlockEnd = 0LL; 1147 int32 current = -1; 1148 1149 while (++current < NUM_DIRECT_BLOCKS) { 1150 if (data->direct[current].IsZero()) 1151 break; 1152 1153 runBlockEnd += data->direct[current].Length() << fVolume->BlockShift(); 1154 if (runBlockEnd > pos) { 1155 run = data->direct[current]; 1156 offset = runBlockEnd - (run.Length() << fVolume->BlockShift()); 1157 //printf("### run[%ld] = (%ld,%d,%d), offset = %Ld\n",fCurrent,fRun.allocation_group,fRun.start,fRun.Length(),fRunFileOffset); 1158 return fVolume->ValidateBlockRun(run); 1159 } 1160 } 1161 1162 //PRINT(("FindBlockRun() failed in direct range: size = %Ld, pos = %Ld\n",data->size,pos)); 1163 return B_ENTRY_NOT_FOUND; 1164 } 1165 return fVolume->ValidateBlockRun(run); 1166 } 1167 1168 1169 status_t 1170 Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length) 1171 { 1172 #if 0 1173 // call the right ReadAt() method, depending on the inode flags 1174 1175 if (Flags() & INODE_LOGGED) 1176 return ((Stream<Access::Logged> *)this)->ReadAt(pos, buffer, _length); 1177 1178 return ((Stream<Access::Cached> *)this)->ReadAt(pos, buffer, _length); 1179 #endif 1180 size_t length = *_length; 1181 1182 // set/check boundaries for pos/length 1183 if (pos < 0) 1184 return B_BAD_VALUE; 1185 1186 if (pos >= Size() || length == 0) { 1187 *_length = 0; 1188 return B_NO_ERROR; 1189 } 1190 1191 return file_cache_read(FileCache(), pos, buffer, _length); 1192 } 1193 1194 1195 status_t 1196 Inode::WriteAt(Transaction &transaction, off_t pos, const uint8 *buffer, size_t *_length) 1197 { 1198 // update the last modification time in memory, it will be written 1199 // back to the inode, and the index when the file is closed 1200 // ToDo: should update the internal last modified time only at this point! 1201 Node().last_modified_time = HOST_ENDIAN_TO_BFS_INT64((bigtime_t)time(NULL) << INODE_TIME_SHIFT); 1202 1203 // ToDo: support INODE_LOGGED! 1204 #if 0 1205 if (Flags() & INODE_LOGGED) 1206 return ((Stream<Access::Logged> *)this)->WriteAt(transaction, pos, buffer, _length); 1207 1208 return ((Stream<Access::Cached> *)this)->WriteAt(transaction, pos, buffer, _length); 1209 #endif 1210 size_t length = *_length; 1211 1212 // set/check boundaries for pos/length 1213 if (pos < 0) 1214 return B_BAD_VALUE; 1215 1216 if (pos + length > Size()) { 1217 off_t oldSize = Size(); 1218 1219 // the transaction doesn't have to be started already 1220 // ToDo: what's that INODE_NO_TRANSACTION flag good for again? 1221 if ((Flags() & INODE_NO_TRANSACTION) == 0 1222 && !transaction.IsStarted()) 1223 transaction.Start(fVolume, BlockNumber()); 1224 1225 // let's grow the data stream to the size needed 1226 status_t status = SetFileSize(transaction, pos + length); 1227 if (status < B_OK) { 1228 *_length = 0; 1229 RETURN_ERROR(status); 1230 } 1231 // If the position of the write was beyond the file size, we 1232 // have to fill the gap between that position and the old file 1233 // size with zeros. 1234 FillGapWithZeros(oldSize, pos); 1235 1236 // we need to write back the inode here because it has to 1237 // go into this transaction (we cannot wait until the file 1238 // is closed) 1239 status = WriteBack(transaction); 1240 if (status < B_OK) 1241 return status; 1242 } 1243 1244 // If we don't want to write anything, we can now return (we may 1245 // just have changed the file size using the position parameter) 1246 if (length == 0) 1247 return B_OK; 1248 1249 return file_cache_write(FileCache(), pos, buffer, _length); 1250 } 1251 1252 1253 /** Fills the gap between the old file size and the new file size 1254 * with zeros. 1255 * It's more or less a copy of Inode::WriteAt() but it can handle 1256 * length differences of more than just 4 GB, and it never uses 1257 * the log, even if the INODE_LOGGED flag is set. 1258 */ 1259 1260 status_t 1261 Inode::FillGapWithZeros(off_t pos, off_t newSize) 1262 { 1263 // ToDo: we currently do anything here, same as original BFS! 1264 //if (pos >= newSize) 1265 return B_OK; 1266 #if 0 1267 block_run run; 1268 off_t offset; 1269 if (FindBlockRun(pos, run, offset) < B_OK) 1270 RETURN_ERROR(B_BAD_VALUE); 1271 1272 off_t length = newSize - pos; 1273 uint32 bytesWritten = 0; 1274 uint32 blockSize = fVolume->BlockSize(); 1275 uint32 blockShift = fVolume->BlockShift(); 1276 uint8 *block; 1277 1278 // the first block_run we write could not be aligned to the block_size boundary 1279 // (write partial block at the beginning) 1280 1281 // pos % block_size == (pos - offset) % block_size, offset % block_size == 0 1282 if (pos % blockSize != 0) { 1283 run.start += (pos - offset) / blockSize; 1284 run.length -= (pos - offset) / blockSize; 1285 1286 CachedBlock cached(fVolume,run); 1287 if ((block = cached.Block()) == NULL) 1288 RETURN_ERROR(B_BAD_VALUE); 1289 1290 bytesWritten = blockSize - (pos % blockSize); 1291 if (length < bytesWritten) 1292 bytesWritten = length; 1293 1294 memset(block + (pos % blockSize), 0, bytesWritten); 1295 if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) 1296 RETURN_ERROR(B_IO_ERROR); 1297 1298 pos += bytesWritten; 1299 1300 length -= bytesWritten; 1301 if (length == 0) 1302 return B_OK; 1303 1304 if (FindBlockRun(pos, run, offset) < B_OK) 1305 RETURN_ERROR(B_BAD_VALUE); 1306 } 1307 1308 while (length > 0) { 1309 // offset is the offset to the current pos in the block_run 1310 run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + ((pos - offset) >> blockShift)); 1311 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - ((pos - offset) >> blockShift)); 1312 1313 CachedBlock cached(fVolume); 1314 off_t blockNumber = fVolume->ToBlock(run); 1315 for (int32 i = 0; i < run.Length(); i++) { 1316 if ((block = cached.SetTo(blockNumber + i, true)) == NULL) 1317 RETURN_ERROR(B_IO_ERROR); 1318 1319 if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) 1320 RETURN_ERROR(B_IO_ERROR); 1321 } 1322 1323 int32 bytes = run.Length() << blockShift; 1324 length -= bytes; 1325 bytesWritten += bytes; 1326 1327 // since we don't respect a last partial block, length can be lower 1328 if (length <= 0) 1329 break; 1330 1331 pos += bytes; 1332 1333 if (FindBlockRun(pos, run, offset) < B_OK) 1334 RETURN_ERROR(B_BAD_VALUE); 1335 } 1336 return B_OK; 1337 #endif 1338 } 1339 1340 1341 /** Allocates NUM_ARRAY_BLOCKS blocks, and clears their contents. Growing 1342 * the indirect and double indirect range uses this method. 1343 * The allocated block_run is saved in "run" 1344 */ 1345 1346 status_t 1347 Inode::_AllocateBlockArray(Transaction &transaction, block_run &run) 1348 { 1349 if (!run.IsZero()) 1350 return B_BAD_VALUE; 1351 1352 status_t status = fVolume->Allocate(transaction, this, NUM_ARRAY_BLOCKS, run, 1353 NUM_ARRAY_BLOCKS); 1354 if (status < B_OK) 1355 return status; 1356 1357 // make sure those blocks are empty 1358 CachedBlock cached(fVolume); 1359 off_t block = fVolume->ToBlock(run); 1360 1361 for (int32 i = 0; i < run.Length(); i++) { 1362 block_run *runs = (block_run *)cached.SetToWritable(transaction, block + i, true); 1363 if (runs == NULL) 1364 return B_IO_ERROR; 1365 } 1366 return B_OK; 1367 } 1368 1369 1370 status_t 1371 Inode::_GrowStream(Transaction &transaction, off_t size) 1372 { 1373 data_stream *data = &Node().data; 1374 1375 // is the data stream already large enough to hold the new size? 1376 // (can be the case with preallocated blocks) 1377 if (size < data->MaxDirectRange() 1378 || size < data->MaxIndirectRange() 1379 || size < data->MaxDoubleIndirectRange()) { 1380 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1381 return B_OK; 1382 } 1383 1384 // how many bytes are still needed? (unused ranges are always zero) 1385 uint16 minimum = 1; 1386 off_t bytes; 1387 if (data->Size() < data->MaxDoubleIndirectRange()) { 1388 bytes = size - data->MaxDoubleIndirectRange(); 1389 // the double indirect range can only handle multiple of NUM_ARRAY_BLOCKS 1390 minimum = NUM_ARRAY_BLOCKS; 1391 } else if (data->Size() < data->MaxIndirectRange()) 1392 bytes = size - data->MaxIndirectRange(); 1393 else if (data->Size() < data->MaxDirectRange()) 1394 bytes = size - data->MaxDirectRange(); 1395 else 1396 bytes = size - data->Size(); 1397 1398 // do we have enough free blocks on the disk? 1399 off_t blocksRequested = (bytes + fVolume->BlockSize() - 1) >> fVolume->BlockShift(); 1400 if (blocksRequested > fVolume->FreeBlocks()) 1401 return B_DEVICE_FULL; 1402 1403 off_t blocksNeeded = blocksRequested; 1404 // because of preallocations and partial allocations, the number of 1405 // blocks we need to allocate may be different from the one we request 1406 // from the block allocator 1407 1408 // Should we preallocate some blocks (currently, always 64k)? 1409 // Attributes, attribute directories, and long symlinks usually won't get that big, 1410 // and should stay close to the inode - preallocating could be counterproductive. 1411 // Also, if free disk space is tight, we probably don't want to do this as well. 1412 if (!IsAttribute() && !IsAttributeDirectory() && !IsSymLink() 1413 && blocksRequested < (65536 >> fVolume->BlockShift()) 1414 && fVolume->FreeBlocks() > 128) 1415 blocksRequested = 65536 >> fVolume->BlockShift(); 1416 1417 while (blocksNeeded > 0) { 1418 // the requested blocks do not need to be returned with a 1419 // single allocation, so we need to iterate until we have 1420 // enough blocks allocated 1421 block_run run; 1422 status_t status = fVolume->Allocate(transaction, this, blocksRequested, run, minimum); 1423 if (status < B_OK) 1424 return status; 1425 1426 // okay, we have the needed blocks, so just distribute them to the 1427 // different ranges of the stream (direct, indirect & double indirect) 1428 1429 // ToDo: if anything goes wrong here, we probably want to free the 1430 // blocks that couldn't be distributed into the stream! 1431 1432 blocksNeeded -= run.Length(); 1433 // don't preallocate if the first allocation was already too small 1434 blocksRequested = blocksNeeded; 1435 if (minimum > 1) { 1436 // make sure that "blocks" is a multiple of minimum 1437 blocksRequested = (blocksRequested + minimum - 1) & ~(minimum - 1); 1438 } 1439 1440 // Direct block range 1441 1442 if (data->Size() <= data->MaxDirectRange()) { 1443 // let's try to put them into the direct block range 1444 int32 free = 0; 1445 for (; free < NUM_DIRECT_BLOCKS; free++) 1446 if (data->direct[free].IsZero()) 1447 break; 1448 1449 if (free < NUM_DIRECT_BLOCKS) { 1450 // can we merge the last allocated run with the new one? 1451 int32 last = free - 1; 1452 if (free > 0 && data->direct[last].MergeableWith(run)) 1453 data->direct[last].length = HOST_ENDIAN_TO_BFS_INT16(data->direct[last].Length() + run.Length()); 1454 else 1455 data->direct[free] = run; 1456 1457 data->max_direct_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDirectRange() + run.Length() * fVolume->BlockSize()); 1458 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 ? data->max_direct_range : size); 1459 continue; 1460 } 1461 } 1462 1463 // Indirect block range 1464 1465 if (data->Size() <= data->MaxIndirectRange() || !data->MaxIndirectRange()) { 1466 CachedBlock cached(fVolume); 1467 block_run *runs = NULL; 1468 uint32 free = 0; 1469 off_t block; 1470 1471 // if there is no indirect block yet, create one 1472 if (data->indirect.IsZero()) { 1473 status = _AllocateBlockArray(transaction, data->indirect); 1474 if (status < B_OK) 1475 return status; 1476 1477 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDirectRange()); 1478 // insert the block_run in the first block 1479 runs = (block_run *)cached.SetTo(data->indirect); 1480 } else { 1481 uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run); 1482 block = fVolume->ToBlock(data->indirect); 1483 1484 // search first empty entry 1485 int32 i = 0; 1486 for (; i < data->indirect.Length(); i++) { 1487 if ((runs = (block_run *)cached.SetTo(block + i)) == NULL) 1488 return B_IO_ERROR; 1489 1490 for (free = 0; free < numberOfRuns; free++) 1491 if (runs[free].IsZero()) 1492 break; 1493 1494 if (free < numberOfRuns) 1495 break; 1496 } 1497 if (i == data->indirect.Length()) 1498 runs = NULL; 1499 } 1500 1501 if (runs != NULL) { 1502 // try to insert the run to the last one - note that this doesn't 1503 // take block borders into account, so it could be further optimized 1504 cached.MakeWritable(transaction); 1505 1506 int32 last = free - 1; 1507 if (free > 0 && runs[last].MergeableWith(run)) 1508 runs[last].length = HOST_ENDIAN_TO_BFS_INT16(runs[last].Length() + run.Length()); 1509 else 1510 runs[free] = run; 1511 1512 data->max_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxIndirectRange() + (run.Length() << fVolume->BlockShift())); 1513 data->size = HOST_ENDIAN_TO_BFS_INT64(blocksNeeded > 0 ? data->MaxIndirectRange() : size); 1514 continue; 1515 } 1516 } 1517 1518 // Double indirect block range 1519 1520 if (data->Size() <= data->MaxDoubleIndirectRange() || !data->max_double_indirect_range) { 1521 while ((run.Length() % NUM_ARRAY_BLOCKS) != 0) { 1522 // The number of allocated blocks isn't a multiple of NUM_ARRAY_BLOCKS, 1523 // so we have to change this. This can happen the first time the stream 1524 // grows into the double indirect range. 1525 // First, free the remaining blocks that don't fit into a multiple 1526 // of NUM_ARRAY_BLOCKS 1527 int32 rest = run.Length() % NUM_ARRAY_BLOCKS; 1528 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - rest); 1529 1530 status = fVolume->Free(transaction, block_run::Run(run.AllocationGroup(), 1531 run.Start() + run.Length(), rest)); 1532 if (status < B_OK) 1533 return status; 1534 1535 blocksNeeded += rest; 1536 blocksRequested = (blocksNeeded + NUM_ARRAY_BLOCKS - 1) & ~(NUM_ARRAY_BLOCKS - 1); 1537 minimum = NUM_ARRAY_BLOCKS; 1538 // we make sure here that we have at minimum NUM_ARRAY_BLOCKS allocated, 1539 // so if the allocation succeeds, we don't run into an endless loop 1540 1541 // Are there any blocks left in the run? If not, allocate a new one 1542 if (run.length == 0) 1543 continue; 1544 } 1545 1546 // if there is no double indirect block yet, create one 1547 if (data->double_indirect.IsZero()) { 1548 status = _AllocateBlockArray(transaction, data->double_indirect); 1549 if (status < B_OK) 1550 return status; 1551 1552 data->max_double_indirect_range = data->max_indirect_range; 1553 } 1554 1555 // calculate the index where to insert the new blocks 1556 1557 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1558 int32 indirectSize = ((1L << INDIRECT_BLOCKS_SHIFT) << fVolume->BlockShift()) 1559 * runsPerBlock; 1560 int32 directSize = NUM_ARRAY_BLOCKS << fVolume->BlockShift(); 1561 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1562 1563 off_t start = data->MaxDoubleIndirectRange() - data->MaxIndirectRange(); 1564 int32 indirectIndex = start / indirectSize; 1565 int32 index = start / directSize; 1566 1567 // distribute the blocks to the array and allocate 1568 // new array blocks when needed 1569 1570 CachedBlock cached(fVolume); 1571 CachedBlock cachedDirect(fVolume); 1572 block_run *array = NULL; 1573 uint32 runLength = run.Length(); 1574 1575 // ToDo: the following code is commented - it could be used to 1576 // preallocate all needed block arrays to see in advance if the 1577 // allocation will succeed. 1578 // I will probably remove it later, because it's no perfect solution 1579 // either: if the allocation was broken up before (blocksNeeded != 0), 1580 // it doesn't guarantee anything. 1581 // And since failing in this case is not that common, it doesn't have 1582 // to be optimized in that way. 1583 // Anyway, I wanted to have it in CVS - all those lines, and they will 1584 // be removed soon :-) 1585 /* 1586 // allocate new block arrays if needed 1587 1588 off_t block = -1; 1589 1590 for (int32 i = 0;i < needed;i++) { 1591 // get the block to insert the run into 1592 block = fVolume->ToBlock(data->double_indirect) + i + indirectIndex / runsPerBlock; 1593 if (cached.BlockNumber() != block) 1594 array = (block_run *)cached.SetTo(block); 1595 1596 if (array == NULL) 1597 return B_ERROR; 1598 1599 status = AllocateBlockArray(transaction, array[i + indirectIndex % runsPerBlock]); 1600 if (status < B_OK) 1601 return status; 1602 } 1603 */ 1604 1605 while (run.length != 0) { 1606 // get the indirect array block 1607 if (array == NULL) { 1608 array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) 1609 + indirectIndex / runsPerBlock); 1610 if (array == NULL) 1611 return B_IO_ERROR; 1612 } 1613 1614 do { 1615 // do we need a new array block? 1616 if (array[indirectIndex % runsPerBlock].IsZero()) { 1617 status = _AllocateBlockArray(transaction, 1618 array[indirectIndex % runsPerBlock]); 1619 if (status < B_OK) 1620 return status; 1621 } 1622 1623 block_run *runs = (block_run *)cachedDirect.SetToWritable(transaction, 1624 fVolume->ToBlock(array[indirectIndex % runsPerBlock]) 1625 + index / runsPerBlock); 1626 if (runs == NULL) 1627 return B_IO_ERROR; 1628 1629 do { 1630 // insert the block_run into the array 1631 runs[index % runsPerBlock] = run; 1632 runs[index % runsPerBlock].length = HOST_ENDIAN_TO_BFS_INT16(NUM_ARRAY_BLOCKS); 1633 1634 // alter the remaining block_run 1635 run.start = HOST_ENDIAN_TO_BFS_INT16(run.Start() + NUM_ARRAY_BLOCKS); 1636 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - NUM_ARRAY_BLOCKS); 1637 } while ((++index % runsPerBlock) != 0 && run.length); 1638 } while ((index % runsPerArray) != 0 && run.length); 1639 1640 if (++indirectIndex % runsPerBlock == 0) { 1641 array = NULL; 1642 index = 0; 1643 } 1644 } 1645 1646 data->max_double_indirect_range = HOST_ENDIAN_TO_BFS_INT64(data->MaxDoubleIndirectRange() + (runLength << fVolume->BlockShift())); 1647 data->size = blocksNeeded > 0 ? HOST_ENDIAN_TO_BFS_INT64(data->max_double_indirect_range) : size; 1648 1649 continue; 1650 } 1651 1652 RETURN_ERROR(EFBIG); 1653 } 1654 // update the size of the data stream 1655 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1656 1657 return B_OK; 1658 } 1659 1660 1661 status_t 1662 Inode::_FreeStaticStreamArray(Transaction &transaction, int32 level, block_run run, 1663 off_t size, off_t offset, off_t &max) 1664 { 1665 int32 indirectSize = 0; 1666 if (level == 0) 1667 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + fVolume->BlockShift())) 1668 * (fVolume->BlockSize() / sizeof(block_run)); 1669 else if (level == 1) 1670 indirectSize = 4 << fVolume->BlockShift(); 1671 1672 off_t start; 1673 if (size > offset) 1674 start = size - offset; 1675 else 1676 start = 0; 1677 1678 int32 index = start / indirectSize; 1679 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1680 1681 CachedBlock cached(fVolume); 1682 off_t blockNumber = fVolume->ToBlock(run); 1683 1684 // set the file offset to the current block run 1685 offset += (off_t)index * indirectSize; 1686 1687 for (int32 i = index / runsPerBlock; i < run.Length(); i++) { 1688 block_run *array = (block_run *)cached.SetToWritable(transaction, blockNumber + i); 1689 if (array == NULL) 1690 RETURN_ERROR(B_ERROR); 1691 1692 for (index = index % runsPerBlock; index < runsPerBlock; index++) { 1693 if (array[index].IsZero()) { 1694 // we also want to break out of the outer loop 1695 i = run.Length(); 1696 break; 1697 } 1698 1699 status_t status = B_OK; 1700 if (level == 0) { 1701 status = _FreeStaticStreamArray(transaction, 1, array[index], size, 1702 offset, max); 1703 } else if (offset >= size) 1704 status = fVolume->Free(transaction, array[index]); 1705 else 1706 max = HOST_ENDIAN_TO_BFS_INT64(offset + indirectSize); 1707 1708 if (status < B_OK) 1709 RETURN_ERROR(status); 1710 1711 if (offset >= size) 1712 array[index].SetTo(0, 0, 0); 1713 1714 offset += indirectSize; 1715 } 1716 index = 0; 1717 } 1718 return B_OK; 1719 } 1720 1721 1722 /** Frees all block_runs in the array which come after the specified size. 1723 * It also trims the last block_run that contain the size. 1724 * "offset" and "max" are maintained until the last block_run that doesn't 1725 * have to be freed - after this, the values won't be correct anymore, but 1726 * will still assure correct function for all subsequent calls. 1727 * "max" is considered to be in file system byte order. 1728 */ 1729 1730 status_t 1731 Inode::_FreeStreamArray(Transaction &transaction, block_run *array, uint32 arrayLength, 1732 off_t size, off_t &offset, off_t &max) 1733 { 1734 PRINT(("FreeStreamArray: arrayLength %lu, size %Ld, offset %Ld, max %Ld\n", 1735 arrayLength, size, offset, max)); 1736 1737 off_t newOffset = offset; 1738 uint32 i = 0; 1739 for (; i < arrayLength; i++, offset = newOffset) { 1740 if (array[i].IsZero()) 1741 break; 1742 1743 newOffset += (off_t)array[i].Length() << fVolume->BlockShift(); 1744 if (newOffset <= size) 1745 continue; 1746 1747 block_run run = array[i]; 1748 1749 // determine the block_run to be freed 1750 if (newOffset > size && offset < size) { 1751 // free partial block_run (and update the original block_run) 1752 run.start = array[i].start + ((size - offset) >> fVolume->BlockShift()) + 1; 1753 array[i].length = HOST_ENDIAN_TO_BFS_INT16(run.Start() - array[i].Start()); 1754 run.length = HOST_ENDIAN_TO_BFS_INT16(run.Length() - array[i].Length()); 1755 1756 if (run.length == 0) 1757 continue; 1758 1759 // update maximum range 1760 max = HOST_ENDIAN_TO_BFS_INT64(offset + ((off_t)array[i].Length() << fVolume->BlockShift())); 1761 } else { 1762 // free the whole block_run 1763 array[i].SetTo(0, 0, 0); 1764 1765 if ((off_t)BFS_ENDIAN_TO_HOST_INT64(max) > offset) 1766 max = HOST_ENDIAN_TO_BFS_INT64(offset); 1767 } 1768 1769 if (fVolume->Free(transaction, run) < B_OK) 1770 return B_IO_ERROR; 1771 } 1772 return B_OK; 1773 } 1774 1775 1776 status_t 1777 Inode::_ShrinkStream(Transaction &transaction, off_t size) 1778 { 1779 data_stream *data = &Node().data; 1780 status_t status; 1781 1782 if (data->MaxDoubleIndirectRange() > size) { 1783 off_t *maxDoubleIndirect = &data->max_double_indirect_range; 1784 // gcc 4 work-around: "error: cannot bind packed field 1785 // 'data->data_stream::max_double_indirect_range' to 'off_t&'" 1786 status = _FreeStaticStreamArray(transaction, 0, data->double_indirect, size, 1787 data->MaxIndirectRange(), *maxDoubleIndirect); 1788 if (status < B_OK) 1789 return status; 1790 1791 if (size <= data->MaxIndirectRange()) { 1792 fVolume->Free(transaction, data->double_indirect); 1793 data->double_indirect.SetTo(0, 0, 0); 1794 data->max_double_indirect_range = 0; 1795 } 1796 } 1797 1798 if (data->MaxIndirectRange() > size) { 1799 CachedBlock cached(fVolume); 1800 off_t block = fVolume->ToBlock(data->indirect); 1801 off_t offset = data->MaxDirectRange(); 1802 1803 for (int32 i = 0; i < data->indirect.Length(); i++) { 1804 block_run *array = (block_run *)cached.SetToWritable(transaction, block + i); 1805 if (array == NULL) 1806 break; 1807 1808 off_t *maxIndirect = &data->max_indirect_range; 1809 // gcc 4 work-around: "error: cannot bind packed field 1810 // 'data->data_stream::max_indirect_range' to 'off_t&'" 1811 if (_FreeStreamArray(transaction, array, fVolume->BlockSize() / sizeof(block_run), 1812 size, offset, *maxIndirect) != B_OK) 1813 return B_IO_ERROR; 1814 } 1815 if (data->max_direct_range == data->max_indirect_range) { 1816 fVolume->Free(transaction, data->indirect); 1817 data->indirect.SetTo(0, 0, 0); 1818 data->max_indirect_range = 0; 1819 } 1820 } 1821 1822 if (data->MaxDirectRange() > size) { 1823 off_t offset = 0; 1824 off_t *maxDirect = &data->max_direct_range; 1825 // gcc 4 work-around: "error: cannot bind packed field 1826 // 'data->data_stream::max_direct_range' to 'off_t&'" 1827 status = _FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS, 1828 size, offset, *maxDirect); 1829 if (status < B_OK) 1830 return status; 1831 } 1832 1833 data->size = HOST_ENDIAN_TO_BFS_INT64(size); 1834 return B_OK; 1835 } 1836 1837 1838 status_t 1839 Inode::SetFileSize(Transaction &transaction, off_t size) 1840 { 1841 if (size < 0) 1842 return B_BAD_VALUE; 1843 1844 off_t oldSize = Size(); 1845 1846 if (size == oldSize) 1847 return B_OK; 1848 1849 // should the data stream grow or shrink? 1850 status_t status; 1851 if (size > oldSize) { 1852 status = _GrowStream(transaction, size); 1853 if (status < B_OK) { 1854 // if the growing of the stream fails, the whole operation 1855 // fails, so we should shrink the stream to its former size 1856 _ShrinkStream(transaction, oldSize); 1857 } 1858 } else 1859 status = _ShrinkStream(transaction, size); 1860 1861 if (status < B_OK) 1862 return status; 1863 1864 file_cache_set_size(FileCache(), size); 1865 return WriteBack(transaction); 1866 } 1867 1868 1869 status_t 1870 Inode::Append(Transaction &transaction, off_t bytes) 1871 { 1872 return SetFileSize(transaction, Size() + bytes); 1873 } 1874 1875 1876 /** Checks wether or not this inode's data stream needs to be trimmed 1877 * because of an earlier preallocation. 1878 * Returns true if there are any blocks to be trimmed. 1879 */ 1880 1881 bool 1882 Inode::NeedsTrimming() 1883 { 1884 // We never trim preallocated index blocks to make them grow as smooth as possible. 1885 // There are only few indices anyway, so this doesn't hurt 1886 // Also, if an inode is already in deleted state, we don't bother trimming it 1887 if (IsIndex() || IsDeleted()) 1888 return false; 1889 1890 off_t roundedSize = (Size() + fVolume->BlockSize() - 1) & ~(fVolume->BlockSize() - 1); 1891 1892 return Node().data.MaxDirectRange() > roundedSize 1893 || Node().data.MaxIndirectRange() > roundedSize 1894 || Node().data.MaxDoubleIndirectRange() > roundedSize; 1895 } 1896 1897 1898 status_t 1899 Inode::TrimPreallocation(Transaction &transaction) 1900 { 1901 status_t status = _ShrinkStream(transaction, Size()); 1902 if (status < B_OK) 1903 return status; 1904 1905 return WriteBack(transaction); 1906 } 1907 1908 1909 status_t 1910 Inode::Free(Transaction &transaction) 1911 { 1912 FUNCTION(); 1913 1914 // Perhaps there should be an implementation of Inode::ShrinkStream() that 1915 // just frees the data_stream, but doesn't change the inode (since it is 1916 // freed anyway) - that would make an undelete command possible 1917 status_t status = SetFileSize(transaction, 0); 1918 if (status < B_OK) 1919 return status; 1920 1921 // Free all attributes, and remove their indices 1922 { 1923 // We have to limit the scope of AttributeIterator, so that its 1924 // destructor is not called after the inode is deleted 1925 AttributeIterator iterator(this); 1926 1927 char name[B_FILE_NAME_LENGTH]; 1928 uint32 type; 1929 size_t length; 1930 vnode_id id; 1931 while ((status = iterator.GetNext(name, &length, &type, &id)) == B_OK) 1932 RemoveAttribute(transaction, name); 1933 } 1934 1935 if (WriteBack(transaction) < B_OK) 1936 return B_IO_ERROR; 1937 1938 return fVolume->Free(transaction, BlockRun()); 1939 } 1940 1941 1942 status_t 1943 Inode::Sync() 1944 { 1945 if (FileCache()) 1946 return file_cache_sync(FileCache()); 1947 1948 // We may also want to flush the attribute's data stream to 1949 // disk here... (do we?) 1950 1951 data_stream *data = &Node().data; 1952 status_t status = B_OK; 1953 1954 // flush direct range 1955 1956 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1957 if (data->direct[i].IsZero()) 1958 return B_OK; 1959 1960 // status = flush_blocks(fVolume->Device(), fVolume->ToBlock(data->direct[i]), 1961 // data->direct[i].Length()); 1962 if (status != B_OK) 1963 return status; 1964 } 1965 1966 // flush indirect range 1967 1968 if (data->max_indirect_range == 0) 1969 return B_OK; 1970 1971 CachedBlock cached(fVolume); 1972 off_t block = fVolume->ToBlock(data->indirect); 1973 int32 count = fVolume->BlockSize() / sizeof(block_run); 1974 1975 for (int32 j = 0; j < data->indirect.Length(); j++) { 1976 block_run *runs = (block_run *)cached.SetTo(block + j); 1977 if (runs == NULL) 1978 break; 1979 1980 for (int32 i = 0; i < count; i++) { 1981 if (runs[i].IsZero()) 1982 return B_OK; 1983 1984 // status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]), runs[i].Length()); 1985 if (status != B_OK) 1986 return status; 1987 } 1988 } 1989 1990 // flush double indirect range 1991 1992 if (data->max_double_indirect_range == 0) 1993 return B_OK; 1994 1995 off_t indirectBlock = fVolume->ToBlock(data->double_indirect); 1996 1997 for (int32 l = 0; l < data->double_indirect.Length(); l++) { 1998 block_run *indirectRuns = (block_run *)cached.SetTo(indirectBlock + l); 1999 if (indirectRuns == NULL) 2000 return B_FILE_ERROR; 2001 2002 CachedBlock directCached(fVolume); 2003 2004 for (int32 k = 0; k < count; k++) { 2005 if (indirectRuns[k].IsZero()) 2006 return B_OK; 2007 2008 block = fVolume->ToBlock(indirectRuns[k]); 2009 for (int32 j = 0; j < indirectRuns[k].Length(); j++) { 2010 block_run *runs = (block_run *)directCached.SetTo(block + j); 2011 if (runs == NULL) 2012 return B_FILE_ERROR; 2013 2014 for (int32 i = 0; i < count; i++) { 2015 if (runs[i].IsZero()) 2016 return B_OK; 2017 2018 // ToDo: combine single block_runs to bigger ones when 2019 // they are adjacent 2020 // status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]), 2021 // runs[i].Length()); 2022 if (status != B_OK) 2023 return status; 2024 } 2025 } 2026 } 2027 } 2028 return B_OK; 2029 } 2030 2031 2032 status_t 2033 Inode::Remove(Transaction &transaction, const char *name, off_t *_id, bool isDirectory) 2034 { 2035 BPlusTree *tree; 2036 if (GetTree(&tree) != B_OK) 2037 RETURN_ERROR(B_BAD_VALUE); 2038 2039 RecursiveLocker locker(fVolume->Lock()); 2040 2041 // does the file even exist? 2042 off_t id; 2043 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) < B_OK) 2044 return B_ENTRY_NOT_FOUND; 2045 2046 if (_id) 2047 *_id = id; 2048 2049 Vnode vnode(fVolume, id); 2050 Inode *inode; 2051 status_t status = vnode.Get(&inode); 2052 if (status < B_OK) { 2053 REPORT_ERROR(status); 2054 return B_ENTRY_NOT_FOUND; 2055 } 2056 2057 // Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR 2058 // bit is set for indices in BFS, not for attribute directories) - but you 2059 // should really be able to do whatever you want with your indices 2060 // without having to remove all files first :) 2061 if (!inode->IsIndex()) { 2062 // if it's not of the correct type, don't delete it! 2063 if (inode->IsContainer() != isDirectory) 2064 return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY; 2065 2066 // only delete empty directories 2067 if (isDirectory && !inode->IsEmpty()) 2068 return B_DIRECTORY_NOT_EMPTY; 2069 } 2070 2071 // remove_vnode() allows the inode to be accessed until the last put_vnode() 2072 status = remove_vnode(fVolume->ID(), id); 2073 if (status != B_OK) 2074 return status; 2075 2076 if (tree->Remove(transaction, name, id) < B_OK) { 2077 unremove_vnode(fVolume->ID(), id); 2078 RETURN_ERROR(B_ERROR); 2079 } 2080 2081 #ifdef DEBUG 2082 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) == B_OK) { 2083 DIE(("deleted entry still there")); 2084 } 2085 #endif 2086 2087 // update the inode, so that no one will ever doubt it's deleted :-) 2088 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DELETED); 2089 inode->Node().flags &= ~HOST_ENDIAN_TO_BFS_INT32(INODE_IN_USE); 2090 2091 // In balance to the Inode::Create() method, the main indices 2092 // are updated here (name, size, & last_modified) 2093 2094 Index index(fVolume); 2095 if (inode->IsRegularNode()) { 2096 index.RemoveName(transaction, name, inode); 2097 // If removing from the index fails, it is not regarded as a 2098 // fatal error and will not be reported back! 2099 // Deleted inodes won't be visible in queries anyway. 2100 } 2101 2102 if (inode->IsFile() || inode->IsSymLink()) { 2103 if (inode->IsFile()) 2104 index.RemoveSize(transaction, inode); 2105 index.RemoveLastModified(transaction, inode); 2106 } 2107 2108 return inode->WriteBack(transaction); 2109 } 2110 2111 2112 /** Creates the inode with the specified parent directory, and automatically 2113 * adds the created inode to that parent directory. If an attribute directory 2114 * is created, it will also automatically added to the parent inode as such. 2115 * However, the indices root node, and the regular root node won't be added 2116 * to the super block. 2117 * It will also create the initial B+tree for the inode if it's a directory 2118 * of any kind. 2119 * If the "_id" or "_inode" variable is given and non-NULL to store the inode's 2120 * ID, the inode stays locked - you have to call put_vnode() if you don't use it 2121 * anymore. 2122 */ 2123 2124 status_t 2125 Inode::Create(Transaction &transaction, Inode *parent, const char *name, int32 mode, 2126 int openMode, uint32 type, off_t *_id, Inode **_inode) 2127 { 2128 FUNCTION_START(("name = %s, mode = %ld\n", name, mode)); 2129 2130 block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0); 2131 Volume *volume = transaction.GetVolume(); 2132 BPlusTree *tree = NULL; 2133 2134 if (parent && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) { 2135 // check if the file already exists in the directory 2136 if (parent->GetTree(&tree) != B_OK) 2137 RETURN_ERROR(B_BAD_VALUE); 2138 } 2139 2140 WriteLocked locker(parent != NULL ? &parent->Lock() : NULL); 2141 // the parent directory is locked during the whole inode creation 2142 2143 if (tree != NULL) { 2144 // does the file already exist? 2145 off_t offset; 2146 if (tree->Find((uint8 *)name, (uint16)strlen(name), &offset) == B_OK) { 2147 // return if the file should be a directory/link or opened in exclusive mode 2148 if (S_ISDIR(mode) || S_ISLNK(mode) || openMode & O_EXCL) 2149 return B_FILE_EXISTS; 2150 2151 Vnode vnode(volume, offset); 2152 Inode *inode; 2153 status_t status = vnode.Get(&inode); 2154 if (status < B_OK) { 2155 REPORT_ERROR(status); 2156 return B_ENTRY_NOT_FOUND; 2157 } 2158 2159 // if it's a directory, bail out! 2160 if (inode->IsDirectory()) 2161 return B_IS_A_DIRECTORY; 2162 2163 // we want to open the file, so we should have the rights to do so 2164 if (inode->CheckPermissions(openModeToAccess(openMode)) != B_OK) 2165 return B_NOT_ALLOWED; 2166 2167 if (openMode & O_TRUNC) { 2168 // we need write access in order to truncate the file 2169 status = inode->CheckPermissions(W_OK); 2170 if (status != B_OK) 2171 return status; 2172 2173 // truncate the existing file 2174 WriteLocked locked(inode->Lock()); 2175 2176 status_t status = inode->SetFileSize(transaction, 0); 2177 if (status >= B_OK) 2178 status = inode->WriteBack(transaction); 2179 2180 if (status < B_OK) 2181 return status; 2182 } 2183 2184 if (_id) 2185 *_id = offset; 2186 if (_inode) 2187 *_inode = inode; 2188 2189 // only keep the vnode in memory if the _id or _inode pointer is provided 2190 if (_id != NULL || _inode != NULL) 2191 vnode.Keep(); 2192 2193 return B_OK; 2194 } 2195 } else if (parent && (mode & S_ATTR_DIR) == 0) 2196 return B_BAD_VALUE; 2197 2198 status_t status; 2199 2200 // do we have the power to create new files at all? 2201 if (parent != NULL && (status = parent->CheckPermissions(W_OK)) != B_OK) 2202 RETURN_ERROR(status); 2203 2204 // allocate space for the new inode 2205 InodeAllocator allocator(transaction); 2206 block_run run; 2207 Inode *inode; 2208 status = allocator.New(&parentRun, mode, run, &inode); 2209 if (status < B_OK) 2210 return status; 2211 2212 // Initialize the parts of the bfs_inode structure that 2213 // InodeAllocator::New() hasn't touched yet 2214 2215 bfs_inode *node = &inode->Node(); 2216 2217 if (parent == NULL) { 2218 // we set the parent to itself in this case 2219 // (only happens for the root and indices node) 2220 node->parent = run; 2221 } else 2222 node->parent = parentRun; 2223 2224 node->uid = HOST_ENDIAN_TO_BFS_INT32(geteuid()); 2225 node->gid = HOST_ENDIAN_TO_BFS_INT32(parent ? parent->Node().GroupID() : getegid()); 2226 // the group ID is inherited from the parent, if available 2227 2228 node->type = HOST_ENDIAN_TO_BFS_INT32(type); 2229 2230 // only add the name to regular files, directories, or symlinks 2231 // don't add it to attributes, or indices 2232 if (tree && inode->IsRegularNode() && inode->SetName(transaction, name) < B_OK) 2233 return B_ERROR; 2234 2235 // Initialize b+tree if it's a directory (and add "." & ".." if it's 2236 // a standard directory for files - not for attributes or indices) 2237 if (inode->IsContainer()) { 2238 status = allocator.CreateTree(); 2239 if (status < B_OK) 2240 return status; 2241 } 2242 2243 // Add a link to the inode from the parent, depending on its type 2244 // (the vnode is not published yet, so it is safe to make the inode 2245 // accessable to the file system here) 2246 if (tree) { 2247 status = tree->Insert(transaction, name, inode->ID()); 2248 } else if (parent && (mode & S_ATTR_DIR) != 0) { 2249 parent->Attributes() = run; 2250 status = parent->WriteBack(transaction); 2251 } 2252 2253 // Note, we only care if the inode could be made accessable for the 2254 // two cases above; the root node or the indices root node must 2255 // handle this case on their own (or other cases where "parent" is 2256 // NULL) 2257 if (status < B_OK) 2258 RETURN_ERROR(status); 2259 2260 // Update the main indices (name, size & last_modified) 2261 // (live queries might want to access us after this) 2262 2263 Index index(volume); 2264 if (inode->IsRegularNode() && name != NULL) { 2265 // the name index only contains regular files 2266 // (but not the root node where name == NULL) 2267 status = index.InsertName(transaction, name, inode); 2268 if (status < B_OK && status != B_BAD_INDEX) { 2269 // We have to remove the node from the parent at this point, 2270 // because the InodeAllocator destructor can't handle this 2271 // case (and if it fails, we can't do anything about it...) 2272 if (tree) 2273 tree->Remove(transaction, name, inode->ID()); 2274 else if (parent != NULL && (mode & S_ATTR_DIR) != 0) 2275 parent->Node().attributes.SetTo(0, 0, 0); 2276 2277 RETURN_ERROR(status); 2278 } 2279 } 2280 2281 inode->UpdateOldLastModified(); 2282 2283 // The "size" & "last_modified" indices don't contain directories 2284 if (inode->IsFile() || inode->IsSymLink()) { 2285 // if adding to these indices fails, the inode creation will not be harmed; 2286 // they are considered less important than the "name" index 2287 if (inode->IsFile()) 2288 index.InsertSize(transaction, inode); 2289 index.InsertLastModified(transaction, inode); 2290 } 2291 2292 // Everything worked well until this point, we have a fully 2293 // initialized inode, and we want to keep it 2294 allocator.Keep(); 2295 2296 if (inode->IsFile() || inode->IsAttribute()) 2297 inode->SetFileCache(file_cache_create(volume->ID(), inode->ID(), inode->Size(), volume->Device())); 2298 2299 if (_id != NULL) 2300 *_id = inode->ID(); 2301 if (_inode != NULL) 2302 *_inode = inode; 2303 2304 // if either _id or _inode is passed, we will keep the inode locked 2305 if (_id == NULL && _inode == NULL) 2306 put_vnode(volume->ID(), inode->ID()); 2307 2308 return B_OK; 2309 } 2310 2311 2312 // #pragma mark - 2313 2314 2315 AttributeIterator::AttributeIterator(Inode *inode) 2316 : 2317 fCurrentSmallData(0), 2318 fInode(inode), 2319 fAttributes(NULL), 2320 fIterator(NULL), 2321 fBuffer(NULL) 2322 { 2323 inode->_AddIterator(this); 2324 } 2325 2326 2327 AttributeIterator::~AttributeIterator() 2328 { 2329 if (fAttributes) 2330 put_vnode(fAttributes->GetVolume()->ID(), fAttributes->ID()); 2331 2332 delete fIterator; 2333 fInode->_RemoveIterator(this); 2334 } 2335 2336 2337 status_t 2338 AttributeIterator::Rewind() 2339 { 2340 fCurrentSmallData = 0; 2341 2342 if (fIterator != NULL) 2343 fIterator->Rewind(); 2344 2345 return B_OK; 2346 } 2347 2348 2349 status_t 2350 AttributeIterator::GetNext(char *name, size_t *_length, uint32 *_type, vnode_id *_id) 2351 { 2352 // read attributes out of the small data section 2353 2354 if (fCurrentSmallData >= 0) { 2355 NodeGetter nodeGetter(fInode->GetVolume(), fInode); 2356 const bfs_inode *node = nodeGetter.Node(); 2357 const small_data *item = ((bfs_inode *)node)->SmallDataStart(); 2358 2359 fInode->SmallDataLock().Lock(); 2360 2361 int32 i = 0; 2362 for (;;item = item->Next()) { 2363 if (item->IsLast(node)) 2364 break; 2365 2366 if (item->NameSize() == FILE_NAME_NAME_LENGTH 2367 && *item->Name() == FILE_NAME_NAME) 2368 continue; 2369 2370 if (i++ == fCurrentSmallData) 2371 break; 2372 } 2373 2374 if (!item->IsLast(node)) { 2375 strncpy(name, item->Name(), B_FILE_NAME_LENGTH); 2376 *_type = item->Type(); 2377 *_length = item->NameSize(); 2378 *_id = (vnode_id)fCurrentSmallData; 2379 2380 fCurrentSmallData = i; 2381 } 2382 else { 2383 // stop traversing the small_data section 2384 fCurrentSmallData = -1; 2385 } 2386 2387 fInode->SmallDataLock().Unlock(); 2388 2389 if (fCurrentSmallData != -1) 2390 return B_OK; 2391 } 2392 2393 // read attributes out of the attribute directory 2394 2395 if (fInode->Attributes().IsZero()) 2396 return B_ENTRY_NOT_FOUND; 2397 2398 Volume *volume = fInode->GetVolume(); 2399 2400 // if you haven't yet access to the attributes directory, get it 2401 if (fAttributes == NULL) { 2402 if (get_vnode(volume->ID(), volume->ToVnode(fInode->Attributes()), 2403 (void **)&fAttributes) != B_OK) { 2404 FATAL(("get_vnode() failed in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2405 return B_ENTRY_NOT_FOUND; 2406 } 2407 2408 BPlusTree *tree; 2409 if (fAttributes->GetTree(&tree) < B_OK 2410 || (fIterator = new TreeIterator(tree)) == NULL) { 2411 FATAL(("could not get tree in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2412 return B_ENTRY_NOT_FOUND; 2413 } 2414 } 2415 2416 uint16 length; 2417 vnode_id id; 2418 status_t status = fIterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 2419 if (status < B_OK) 2420 return status; 2421 2422 Vnode vnode(volume,id); 2423 Inode *attribute; 2424 if ((status = vnode.Get(&attribute)) == B_OK) { 2425 *_type = attribute->Type(); 2426 *_length = attribute->Size(); 2427 *_id = id; 2428 } 2429 2430 return status; 2431 } 2432 2433 2434 void 2435 AttributeIterator::Update(uint16 index, int8 change) 2436 { 2437 // fCurrentSmallData points already to the next item 2438 if (index < fCurrentSmallData) 2439 fCurrentSmallData += change; 2440 } 2441 2442