1 /* Inode - inode access functions 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** This file may be used under the terms of the OpenBeOS License. 5 */ 6 7 8 #include "Debug.h" 9 #include "cpp.h" 10 #include "Inode.h" 11 #include "BPlusTree.h" 12 #include "Stream.h" 13 #include "Index.h" 14 15 #include <string.h> 16 17 18 class InodeAllocator { 19 public: 20 InodeAllocator(Transaction *transaction); 21 ~InodeAllocator(); 22 23 status_t New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode); 24 status_t CreateTree(); 25 status_t Keep(); 26 27 private: 28 Transaction *fTransaction; 29 block_run fRun; 30 Inode *fInode; 31 }; 32 33 34 InodeAllocator::InodeAllocator(Transaction *transaction) 35 : 36 fTransaction(transaction), 37 fInode(NULL) 38 { 39 } 40 41 42 InodeAllocator::~InodeAllocator() 43 { 44 if (fTransaction != NULL) { 45 if (fInode != NULL) { 46 fInode->Node()->flags &= ~INODE_IN_USE; 47 fInode->Free(fTransaction); 48 } else 49 fTransaction->GetVolume()->Free(fTransaction, fRun); 50 } 51 52 delete fInode; 53 } 54 55 56 status_t 57 InodeAllocator::New(block_run *parentRun, mode_t mode, block_run &run, Inode **_inode) 58 { 59 Volume *volume = fTransaction->GetVolume(); 60 61 status_t status = volume->AllocateForInode(fTransaction, parentRun, mode, fRun); 62 if (status < B_OK) { 63 // don't free the space in the destructor, because 64 // the allocation failed 65 fTransaction = NULL; 66 RETURN_ERROR(status); 67 } 68 69 run = fRun; 70 fInode = new Inode(volume, volume->ToVnode(run), true); 71 if (fInode == NULL) 72 RETURN_ERROR(B_NO_MEMORY); 73 74 // initialize the on-disk bfs_inode structure 75 76 bfs_inode *node = fInode->Node(); 77 78 node->magic1 = INODE_MAGIC1; 79 node->inode_num = run; 80 node->mode = mode; 81 node->flags = INODE_IN_USE | INODE_NOT_READY; 82 // INODE_NOT_READY prevents the inode from being opened - it is 83 // cleared in InodeAllocator::Keep() 84 85 node->create_time = (bigtime_t)time(NULL) << INODE_TIME_SHIFT; 86 node->last_modified_time = node->create_time | (volume->GetUniqueID() & INODE_TIME_MASK); 87 // we use Volume::GetUniqueID() to avoid having too many duplicates in the 88 // last_modified index 89 90 node->inode_size = volume->InodeSize(); 91 92 *_inode = fInode; 93 return B_OK; 94 } 95 96 97 status_t 98 InodeAllocator::CreateTree() 99 { 100 Volume *volume = fTransaction->GetVolume(); 101 102 // force S_STR_INDEX to be set, if no type is set 103 if ((fInode->Mode() & S_INDEX_TYPES) == 0) 104 fInode->Node()->mode |= S_STR_INDEX; 105 106 BPlusTree *tree = fInode->fTree = new BPlusTree(fTransaction, fInode); 107 if (tree == NULL || tree->InitCheck() < B_OK) 108 return B_ERROR; 109 110 if (fInode->IsRegularNode()) { 111 if (tree->Insert(fTransaction, ".", fInode->ID()) < B_OK 112 || tree->Insert(fTransaction, "..", volume->ToVnode(fInode->Parent())) < B_OK) 113 return B_ERROR; 114 } 115 return B_OK; 116 } 117 118 119 status_t 120 InodeAllocator::Keep() 121 { 122 ASSERT(fInode != NULL && fTransaction != NULL); 123 124 Volume *volume = fTransaction->GetVolume(); 125 fInode->Node()->flags &= ~INODE_NOT_READY; 126 status_t status = fInode->WriteBack(fTransaction); 127 128 fTransaction = NULL; 129 fInode = NULL; 130 131 return status; 132 } 133 134 135 // #pragma mark - 136 137 138 Inode::Inode(Volume *volume, vnode_id id, bool empty, uint8 reenter) 139 : CachedBlock(volume, volume->VnodeToBlock(id), empty), 140 fTree(NULL), 141 fLock("bfs inode") 142 { 143 Node()->flags &= INODE_PERMANENT_FLAGS; 144 145 // these two will help to maintain the indices 146 fOldSize = Size(); 147 fOldLastModified = LastModified(); 148 } 149 150 151 Inode::~Inode() 152 { 153 delete fTree; 154 } 155 156 157 status_t 158 Inode::InitCheck() 159 { 160 if (!Node()) 161 RETURN_ERROR(B_IO_ERROR); 162 163 // test inode magic and flags 164 if (Node()->magic1 != INODE_MAGIC1 165 || !(Node()->flags & INODE_IN_USE) 166 || Node()->inode_num.length != 1 167 // matches inode size? 168 || Node()->inode_size != fVolume->InodeSize() 169 // parent resides on disk? 170 || Node()->parent.allocation_group > fVolume->AllocationGroups() 171 || Node()->parent.allocation_group < 0 172 || Node()->parent.start > (1L << fVolume->AllocationGroupShift()) 173 || Node()->parent.length != 1 174 // attributes, too? 175 || Node()->attributes.allocation_group > fVolume->AllocationGroups() 176 || Node()->attributes.allocation_group < 0 177 || Node()->attributes.start > (1L << fVolume->AllocationGroupShift())) { 178 FATAL(("inode at block %Ld corrupt!\n", fBlockNumber)); 179 RETURN_ERROR(B_BAD_DATA); 180 } 181 182 if (Flags() & INODE_NOT_READY) 183 return B_BUSY; 184 185 // ToDo: Add some tests to check the integrity of the other stuff here, 186 // especially for the data_stream! 187 188 // it's more important to know that the inode is corrupt 189 // so we check for the lock not until here 190 return fLock.InitCheck(); 191 } 192 193 194 status_t 195 Inode::CheckPermissions(int accessMode) const 196 { 197 uid_t user = geteuid(); 198 gid_t group = getegid(); 199 200 // you never have write access to a read-only volume 201 if (accessMode & W_OK && fVolume->IsReadOnly()) 202 return B_READ_ONLY_DEVICE; 203 204 // root users always have full access (but they can't execute anything) 205 if (user == 0 && !((accessMode & X_OK) && (Mode() & S_IXUSR) == 0)) 206 return B_OK; 207 208 // shift mode bits, to check directly against accessMode 209 mode_t mode = Mode(); 210 if (user == Node()->uid) 211 mode >>= 6; 212 else if (group == Node()->gid) 213 mode >>= 3; 214 215 if (accessMode & ~(mode & S_IRWXO)) 216 return B_NOT_ALLOWED; 217 218 return B_OK; 219 } 220 221 222 // #pragma mark - 223 224 225 void 226 Inode::AddIterator(AttributeIterator *iterator) 227 { 228 if (fSmallDataLock.Lock() < B_OK) 229 return; 230 231 fIterators.Add(iterator); 232 233 fSmallDataLock.Unlock(); 234 } 235 236 237 void 238 Inode::RemoveIterator(AttributeIterator *iterator) 239 { 240 if (fSmallDataLock.Lock() < B_OK) 241 return; 242 243 fIterators.Remove(iterator); 244 245 fSmallDataLock.Unlock(); 246 } 247 248 249 /** Tries to free up "bytes" space in the small_data section by moving 250 * attributes to real files. Used for system attributes like the name. 251 * You need to hold the fSmallDataLock when you call this method 252 */ 253 254 status_t 255 Inode::MakeSpaceForSmallData(Transaction *transaction, const char *name, int32 bytes) 256 { 257 ASSERT(fSmallDataLock.IsLocked()); 258 259 while (bytes > 0) { 260 small_data *item = Node()->small_data_start, *max = NULL; 261 int32 index = 0, maxIndex = 0; 262 for (; !item->IsLast(Node()); item = item->Next(), index++) { 263 // should not remove those 264 if (*item->Name() == FILE_NAME_NAME || !strcmp(name, item->Name())) 265 continue; 266 267 if (max == NULL || max->Size() < item->Size()) { 268 maxIndex = index; 269 max = item; 270 } 271 272 // remove the first one large enough to free the needed amount of bytes 273 if (bytes < item->Size()) 274 break; 275 } 276 277 if (item->IsLast(Node()) || item->Size() < bytes) 278 return B_ERROR; 279 280 bytes -= max->Size(); 281 282 // Move the attribute to a real attribute file 283 // Luckily, this doesn't cause any index updates 284 285 Inode *attribute; 286 status_t status = CreateAttribute(transaction, item->Name(), item->type, &attribute); 287 if (status < B_OK) 288 RETURN_ERROR(status); 289 290 size_t length = item->data_size; 291 status = attribute->WriteAt(transaction, 0, item->Data(), &length); 292 293 ReleaseAttribute(attribute); 294 295 if (status < B_OK) { 296 Vnode vnode(fVolume,Attributes()); 297 Inode *attributes; 298 if (vnode.Get(&attributes) < B_OK 299 || attributes->Remove(transaction, name) < B_OK) { 300 FATAL(("Could not remove newly created attribute!\n")); 301 } 302 303 RETURN_ERROR(status); 304 } 305 306 RemoveSmallData(max, maxIndex); 307 } 308 return B_OK; 309 } 310 311 312 /** Private function which removes the given attribute from the small_data 313 * section. 314 * You need to hold the fSmallDataLock when you call this method 315 */ 316 317 status_t 318 Inode::RemoveSmallData(small_data *item, int32 index) 319 { 320 ASSERT(fSmallDataLock.IsLocked()); 321 322 small_data *next = item->Next(); 323 if (!next->IsLast(Node())) { 324 // find the last attribute 325 small_data *last = next; 326 while (!last->IsLast(Node())) 327 last = last->Next(); 328 329 int32 size = (uint8 *)last - (uint8 *)next; 330 if (size < 0 || size > (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)next) 331 return B_BAD_DATA; 332 333 memmove(item, next, size); 334 335 // Move the "last" one to its new location and 336 // correctly terminate the small_data section 337 last = (small_data *)((uint8 *)last - ((uint8 *)next - (uint8 *)item)); 338 memset(last, 0, (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)last); 339 } else 340 memset(item, 0, item->Size()); 341 342 // update all current iterators 343 AttributeIterator *iterator = NULL; 344 while ((iterator = fIterators.Next(iterator)) != NULL) 345 iterator->Update(index, -1); 346 347 return B_OK; 348 } 349 350 351 /** Removes the given attribute from the small_data section. 352 * Note that you need to write back the inode yourself after having called 353 * that method. 354 */ 355 356 status_t 357 Inode::RemoveSmallData(Transaction *transaction, const char *name) 358 { 359 if (name == NULL) 360 return B_BAD_VALUE; 361 362 SimpleLocker locker(fSmallDataLock); 363 364 // search for the small_data item 365 366 small_data *item = Node()->small_data_start; 367 int32 index = 0; 368 while (!item->IsLast(Node()) && strcmp(item->Name(), name)) { 369 item = item->Next(); 370 index++; 371 } 372 373 if (item->IsLast(Node())) 374 return B_ENTRY_NOT_FOUND; 375 376 return RemoveSmallData(item, index); 377 } 378 379 380 /** Try to place the given attribute in the small_data section - if the 381 * new attribute is too big to fit in that section, it returns B_DEVICE_FULL. 382 * In that case, the attribute should be written to a real attribute file; 383 * if the attribute was already part of the small_data section, but the new 384 * one wouldn't fit, the old one is automatically removed from the small_data 385 * section. 386 * Note that you need to write back the inode yourself after having called that 387 * method - it's a bad API decision that it needs a transaction but enforces you 388 * to write back the inode all by yourself, but it's just more efficient in most 389 * cases... 390 */ 391 392 status_t 393 Inode::AddSmallData(Transaction *transaction, const char *name, uint32 type, 394 const uint8 *data, size_t length, bool force) 395 { 396 if (name == NULL || data == NULL || type == 0) 397 return B_BAD_VALUE; 398 399 // reject any requests that can't fit into the small_data section 400 uint32 nameLength = strlen(name); 401 uint32 spaceNeeded = sizeof(small_data) + nameLength + 3 + length + 1; 402 if (spaceNeeded > fVolume->InodeSize() - sizeof(bfs_inode)) 403 return B_DEVICE_FULL; 404 405 SimpleLocker locker(fSmallDataLock); 406 407 small_data *item = Node()->small_data_start; 408 int32 index = 0; 409 while (!item->IsLast(Node()) && strcmp(item->Name(), name)) { 410 item = item->Next(); 411 index++; 412 } 413 414 // is the attribute already in the small_data section? 415 // then just replace the data part of that one 416 if (!item->IsLast(Node())) { 417 // find last attribute 418 small_data *last = item; 419 while (!last->IsLast(Node())) 420 last = last->Next(); 421 422 // try to change the attributes value 423 if (item->data_size > length 424 || force 425 || ((uint8 *)last + length - item->data_size) <= ((uint8 *)Node() + fVolume->InodeSize())) { 426 // make room for the new attribute if needed (and we are forced to do so) 427 if (force 428 && ((uint8 *)last + length - item->data_size) > ((uint8 *)Node() + fVolume->InodeSize())) { 429 // We also take the free space at the end of the small_data section 430 // into account, and request only what's really needed 431 uint32 needed = length - item->data_size - 432 (uint32)((uint8 *)Node() + fVolume->InodeSize() - (uint8 *)last); 433 434 if (MakeSpaceForSmallData(transaction, name, needed) < B_OK) 435 return B_ERROR; 436 437 // reset our pointers 438 item = Node()->small_data_start; 439 index = 0; 440 while (!item->IsLast(Node()) && strcmp(item->Name(), name)) { 441 item = item->Next(); 442 index++; 443 } 444 445 last = item; 446 while (!last->IsLast(Node())) 447 last = last->Next(); 448 } 449 450 // move the attributes after the current one 451 small_data *next = item->Next(); 452 if (!next->IsLast(Node())) 453 memmove((uint8 *)item + spaceNeeded, next, (uint8 *)last - (uint8 *)next); 454 455 // Move the "last" one to its new location and 456 // correctly terminate the small_data section 457 last = (small_data *)((uint8 *)last - ((uint8 *)next - ((uint8 *)item + spaceNeeded))); 458 if ((uint8 *)last < (uint8 *)Node() + fVolume->BlockSize()) 459 memset(last, 0, (uint8 *)Node() + fVolume->BlockSize() - (uint8 *)last); 460 461 item->type = type; 462 item->data_size = length; 463 memcpy(item->Data(), data, length); 464 item->Data()[length] = '\0'; 465 466 return B_OK; 467 } 468 469 // Could not replace the old attribute, so remove it to let 470 // let the calling function create an attribute file for it 471 if (RemoveSmallData(item, index) < B_OK) 472 return B_ERROR; 473 474 return B_DEVICE_FULL; 475 } 476 477 // try to add the new attribute! 478 479 if ((uint8 *)item + spaceNeeded > (uint8 *)Node() + fVolume->InodeSize()) { 480 // there is not enough space for it! 481 if (!force) 482 return B_DEVICE_FULL; 483 484 // make room for the new attribute 485 if (MakeSpaceForSmallData(transaction, name, spaceNeeded) < B_OK) 486 return B_ERROR; 487 488 // get new last item! 489 item = Node()->small_data_start; 490 index = 0; 491 while (!item->IsLast(Node())) { 492 item = item->Next(); 493 index++; 494 } 495 } 496 497 memset(item, 0, spaceNeeded); 498 item->type = type; 499 item->name_size = nameLength; 500 item->data_size = length; 501 strcpy(item->Name(), name); 502 memcpy(item->Data(), data, length); 503 504 // correctly terminate the small_data section 505 item = item->Next(); 506 if (!item->IsLast(Node())) 507 memset(item,0,(uint8 *)Node() + fVolume->InodeSize() - (uint8 *)item); 508 509 // update all current iterators 510 AttributeIterator *iterator = NULL; 511 while ((iterator = fIterators.Next(iterator)) != NULL) 512 iterator->Update(index,1); 513 514 return B_OK; 515 } 516 517 518 /** Iterates through the small_data section of an inode. 519 * To start at the beginning of this section, you let smallData 520 * point to NULL, like: 521 * small_data *data = NULL; 522 * while (inode->GetNextSmallData(&data) { ... } 523 * 524 * This function is reentrant and doesn't allocate any memory; 525 * you can safely stop calling it at any point (you don't need 526 * to iterate through the whole list). 527 * You need to hold the fSmallDataLock when you call this method 528 */ 529 530 status_t 531 Inode::GetNextSmallData(small_data **_smallData) const 532 { 533 if (!Node()) 534 RETURN_ERROR(B_ERROR); 535 536 ASSERT(fSmallDataLock.IsLocked()); 537 538 small_data *data = *_smallData; 539 540 // begin from the start? 541 if (data == NULL) 542 data = Node()->small_data_start; 543 else 544 data = data->Next(); 545 546 // is already last item? 547 if (data->IsLast(Node())) 548 return B_ENTRY_NOT_FOUND; 549 550 *_smallData = data; 551 552 return B_OK; 553 } 554 555 556 /** Finds the attribute "name" in the small data section, and 557 * returns a pointer to it (or NULL if it doesn't exist). 558 * You need to hold the fSmallDataLock when you call this method 559 */ 560 561 small_data * 562 Inode::FindSmallData(const char *name) const 563 { 564 ASSERT(fSmallDataLock.IsLocked()); 565 566 small_data *smallData = NULL; 567 while (GetNextSmallData(&smallData) == B_OK) { 568 if (!strcmp(smallData->Name(), name)) 569 return smallData; 570 } 571 return NULL; 572 } 573 574 575 /** Returns a pointer to the node's name if present in the small data 576 * section, NULL otherwise. 577 * You need to hold the fSmallDataLock when you call this method 578 */ 579 580 const char * 581 Inode::Name() const 582 { 583 ASSERT(fSmallDataLock.IsLocked()); 584 585 small_data *smallData = NULL; 586 while (GetNextSmallData(&smallData) == B_OK) { 587 if (*smallData->Name() == FILE_NAME_NAME && smallData->name_size == FILE_NAME_NAME_LENGTH) 588 return (const char *)smallData->Data(); 589 } 590 return NULL; 591 } 592 593 594 /** Copies the node's name into the provided buffer. 595 * The buffer must be B_FILE_NAME_LENGTH bytes large. 596 */ 597 598 status_t 599 Inode::GetName(char *buffer) const 600 { 601 SimpleLocker locker(fSmallDataLock); 602 603 const char *name = Name(); 604 if (name == NULL) 605 return B_ENTRY_NOT_FOUND; 606 607 strlcpy(buffer, name, B_FILE_NAME_LENGTH); 608 return B_OK; 609 } 610 611 612 /** Changes or set the name of a file: in the inode small_data section only, it 613 * doesn't change it in the parent directory's b+tree. 614 * Note that you need to write back the inode yourself after having called 615 * that method. It suffers from the same API decision as AddSmallData() does 616 * (and for the same reason). 617 */ 618 619 status_t 620 Inode::SetName(Transaction *transaction, const char *name) 621 { 622 if (name == NULL || *name == '\0') 623 return B_BAD_VALUE; 624 625 const char nameTag[2] = {FILE_NAME_NAME, 0}; 626 627 return AddSmallData(transaction, nameTag, FILE_NAME_TYPE, (uint8 *)name, strlen(name), true); 628 } 629 630 631 /** Reads data from the specified attribute. 632 * This is a high-level attribute function that understands attributes 633 * in the small_data section as well as real attribute files. 634 */ 635 636 status_t 637 Inode::ReadAttribute(const char *name, int32 type, off_t pos, uint8 *buffer, size_t *_length) 638 { 639 if (pos < 0) 640 pos = 0; 641 642 // search in the small_data section (which has to be locked first) 643 { 644 SimpleLocker locker(fSmallDataLock); 645 646 small_data *smallData = FindSmallData(name); 647 if (smallData != NULL) { 648 size_t length = *_length; 649 if (pos >= smallData->data_size) { 650 *_length = 0; 651 return B_OK; 652 } 653 if (length + pos > smallData->data_size) 654 length = smallData->data_size - pos; 655 656 memcpy(buffer, smallData->Data() + pos, length); 657 *_length = length; 658 return B_OK; 659 } 660 } 661 662 // search in the attribute directory 663 Inode *attribute; 664 status_t status = GetAttribute(name, &attribute); 665 if (status == B_OK) { 666 if (attribute->Lock().Lock() == B_OK) { 667 status = attribute->ReadAt(pos, (uint8 *)buffer, _length); 668 attribute->Lock().Unlock(); 669 } else 670 status = B_ERROR; 671 672 ReleaseAttribute(attribute); 673 } 674 675 RETURN_ERROR(status); 676 } 677 678 679 /** Writes data to the specified attribute. 680 * This is a high-level attribute function that understands attributes 681 * in the small_data section as well as real attribute files. 682 */ 683 684 status_t 685 Inode::WriteAttribute(Transaction *transaction, const char *name, int32 type, off_t pos, 686 const uint8 *buffer, size_t *_length) 687 { 688 // needed to maintain the index 689 uint8 oldBuffer[BPLUSTREE_MAX_KEY_LENGTH], *oldData = NULL; 690 size_t oldLength = 0; 691 692 // ToDo: we actually depend on that the contents of "buffer" are constant 693 // if they get changed during the write (hey, user programs), we may mess 694 // up our index trees! 695 696 Index index(fVolume); 697 bool hasIndex = index.SetTo(name) == B_OK; 698 699 Inode *attribute = NULL; 700 status_t status; 701 if (GetAttribute(name, &attribute) < B_OK) { 702 // save the old attribute data 703 if (hasIndex) { 704 fSmallDataLock.Lock(); 705 706 small_data *smallData = FindSmallData(name); 707 if (smallData != NULL) { 708 oldLength = smallData->data_size; 709 if (oldLength > BPLUSTREE_MAX_KEY_LENGTH) 710 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 711 memcpy(oldData = oldBuffer, smallData->Data(), oldLength); 712 } 713 fSmallDataLock.Unlock(); 714 } 715 716 // if the attribute doesn't exist yet (as a file), try to put it in the 717 // small_data section first - if that fails (due to insufficent space), 718 // create a real attribute file 719 status = AddSmallData(transaction, name, type, buffer, *_length); 720 if (status == B_DEVICE_FULL) { 721 status = CreateAttribute(transaction, name, type, &attribute); 722 if (status < B_OK) 723 RETURN_ERROR(status); 724 } else if (status == B_OK) 725 status = WriteBack(transaction); 726 } 727 728 if (attribute != NULL) { 729 if (attribute->Lock().LockWrite() == B_OK) { 730 // save the old attribute data (if this fails, oldLength will reflect it) 731 if (hasIndex) { 732 oldLength = BPLUSTREE_MAX_KEY_LENGTH; 733 if (attribute->ReadAt(0, oldBuffer, &oldLength) == B_OK) 734 oldData = oldBuffer; 735 } 736 // ToDo: check if the data fits in the inode now and delete the file if so 737 status = attribute->WriteAt(transaction, pos, buffer, _length); 738 739 attribute->Lock().UnlockWrite(); 740 } else 741 status = B_ERROR; 742 743 ReleaseAttribute(attribute); 744 } 745 746 if (status == B_OK) { 747 // ToDo: find a better way for that "pos" thing... 748 // Update index 749 if (hasIndex && pos == 0) { 750 // index only the first BPLUSTREE_MAX_KEY_LENGTH bytes 751 uint16 length = *_length; 752 if (length > BPLUSTREE_MAX_KEY_LENGTH) 753 length = BPLUSTREE_MAX_KEY_LENGTH; 754 755 index.Update(transaction, name, 0, oldData, oldLength, buffer, length, this); 756 } 757 } 758 return status; 759 } 760 761 762 /** Removes the specified attribute from the inode. 763 * This is a high-level attribute function that understands attributes 764 * in the small_data section as well as real attribute files. 765 */ 766 767 status_t 768 Inode::RemoveAttribute(Transaction *transaction, const char *name) 769 { 770 Index index(fVolume); 771 bool hasIndex = index.SetTo(name) == B_OK; 772 773 // update index for attributes in the small_data section 774 if (hasIndex) { 775 fSmallDataLock.Lock(); 776 777 small_data *smallData = FindSmallData(name); 778 if (smallData != NULL) { 779 uint32 length = smallData->data_size; 780 if (length > BPLUSTREE_MAX_KEY_LENGTH) 781 length = BPLUSTREE_MAX_KEY_LENGTH; 782 index.Update(transaction, name, 0, smallData->Data(), length, NULL, 0, this); 783 } 784 fSmallDataLock.Unlock(); 785 } 786 787 status_t status = RemoveSmallData(transaction, name); 788 if (status == B_OK) { 789 status = WriteBack(transaction); 790 } else if (status == B_ENTRY_NOT_FOUND && !Attributes().IsZero()) { 791 // remove the attribute file if it exists 792 Vnode vnode(fVolume, Attributes()); 793 Inode *attributes; 794 if ((status = vnode.Get(&attributes)) < B_OK) 795 return status; 796 797 // update index 798 Inode *attribute; 799 if (hasIndex && GetAttribute(name, &attribute) == B_OK) { 800 uint8 data[BPLUSTREE_MAX_KEY_LENGTH]; 801 size_t length = BPLUSTREE_MAX_KEY_LENGTH; 802 if (attribute->ReadAt(0, data, &length) == B_OK) 803 index.Update(transaction, name, 0, data, length, NULL, 0, this); 804 805 ReleaseAttribute(attribute); 806 } 807 808 if ((status = attributes->Remove(transaction, name)) < B_OK) 809 return status; 810 811 if (attributes->IsEmpty()) { 812 // remove attribute directory (don't fail if that can't be done) 813 if (remove_vnode(fVolume->ID(), attributes->ID()) == B_OK) { 814 // update the inode, so that no one will ever doubt it's deleted :-) 815 attributes->Node()->flags |= INODE_DELETED; 816 if (attributes->WriteBack(transaction) == B_OK) { 817 Attributes().SetTo(0, 0, 0); 818 WriteBack(transaction); 819 } else 820 unremove_vnode(fVolume->ID(), attributes->ID()); 821 } 822 } 823 } 824 return status; 825 } 826 827 828 status_t 829 Inode::GetAttribute(const char *name, Inode **attribute) 830 { 831 // does this inode even have attributes? 832 if (Attributes().IsZero()) 833 return B_ENTRY_NOT_FOUND; 834 835 Vnode vnode(fVolume, Attributes()); 836 Inode *attributes; 837 if (vnode.Get(&attributes) < B_OK) { 838 FATAL(("get_vnode() failed in Inode::GetAttribute(name = \"%s\")\n", name)); 839 return B_ERROR; 840 } 841 842 BPlusTree *tree; 843 status_t status = attributes->GetTree(&tree); 844 if (status == B_OK) { 845 vnode_id id; 846 if ((status = tree->Find((uint8 *)name, (uint16)strlen(name), &id)) == B_OK) { 847 Vnode vnode(fVolume, id); 848 // Check if the attribute is really an attribute 849 if (vnode.Get(attribute) < B_OK 850 || !(*attribute)->IsAttribute()) 851 return B_ERROR; 852 853 vnode.Keep(); 854 return B_OK; 855 } 856 } 857 return status; 858 } 859 860 861 void 862 Inode::ReleaseAttribute(Inode *attribute) 863 { 864 if (attribute == NULL) 865 return; 866 867 put_vnode(fVolume->ID(), attribute->ID()); 868 } 869 870 871 status_t 872 Inode::CreateAttribute(Transaction *transaction, const char *name, uint32 type, Inode **attribute) 873 { 874 // do we need to create the attribute directory first? 875 if (Attributes().IsZero()) { 876 status_t status = Inode::Create(transaction, this, NULL, S_ATTR_DIR | 0666, 0, 0, NULL); 877 if (status < B_OK) 878 RETURN_ERROR(status); 879 } 880 Vnode vnode(fVolume, Attributes()); 881 Inode *attributes; 882 if (vnode.Get(&attributes) < B_OK) 883 return B_ERROR; 884 885 // Inode::Create() locks the inode for us 886 return Inode::Create(transaction, attributes, name, S_ATTR | 0666, 0, type, NULL, attribute); 887 } 888 889 890 // #pragma mark - 891 892 893 /** Gives the caller direct access to the b+tree for a given directory. 894 * The tree is created on demand, but lasts until the inode is 895 * deleted. 896 */ 897 898 status_t 899 Inode::GetTree(BPlusTree **tree) 900 { 901 if (fTree) { 902 *tree = fTree; 903 return B_OK; 904 } 905 906 if (IsContainer()) { 907 fTree = new BPlusTree(this); 908 if (!fTree) 909 RETURN_ERROR(B_NO_MEMORY); 910 911 *tree = fTree; 912 status_t status = fTree->InitCheck(); 913 if (status < B_OK) { 914 delete fTree; 915 fTree = NULL; 916 } 917 RETURN_ERROR(status); 918 } 919 RETURN_ERROR(B_BAD_VALUE); 920 } 921 922 923 bool 924 Inode::IsEmpty() 925 { 926 BPlusTree *tree; 927 status_t status = GetTree(&tree); 928 if (status < B_OK) 929 return status; 930 931 TreeIterator iterator(tree); 932 933 // index and attribute directories are really empty when they are 934 // empty - directories for standard files always contain ".", and 935 // "..", so we need to ignore those two 936 937 uint32 count = 0; 938 char name[BPLUSTREE_MAX_KEY_LENGTH]; 939 uint16 length; 940 vnode_id id; 941 while (iterator.GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id) == B_OK) { 942 if (Mode() & (S_ATTR_DIR | S_INDEX_DIR)) 943 return false; 944 945 if (++count > 2 || strcmp(".", name) && strcmp("..", name)) 946 return false; 947 } 948 return true; 949 } 950 951 952 /** Finds the block_run where "pos" is located in the data_stream of 953 * the inode. 954 * If successful, "offset" will then be set to the file offset 955 * of the block_run returned; so "pos - offset" is for the block_run 956 * what "pos" is for the whole stream. 957 * The caller has to make sure that "pos" is inside the stream. 958 */ 959 960 status_t 961 Inode::FindBlockRun(off_t pos, block_run &run, off_t &offset) 962 { 963 // The BPlusTree class will call this function, we'll provide 964 // standard cached access only from here 965 return ((Stream<Access::Cached> *)this)->FindBlockRun(pos, run, offset); 966 } 967 968 969 status_t 970 Inode::ReadAt(off_t pos, uint8 *buffer, size_t *_length) 971 { 972 // call the right ReadAt() method, depending on the inode flags 973 974 if (Flags() & INODE_NO_CACHE) 975 return ((Stream<Access::Uncached> *)this)->ReadAt(pos, buffer, _length); 976 977 if (Flags() & INODE_LOGGED) 978 return ((Stream<Access::Logged> *)this)->ReadAt(pos, buffer, _length); 979 980 return ((Stream<Access::Cached> *)this)->ReadAt(pos, buffer, _length); 981 } 982 983 984 status_t 985 Inode::WriteAt(Transaction *transaction, off_t pos, const uint8 *buffer, size_t *_length) 986 { 987 // call the right WriteAt() method, depending on the inode flags 988 989 // update the last modification time in memory, it will be written 990 // back to the inode, and the index when the file is closed 991 // ToDo: should update the internal last modified time only at this point! 992 Node()->last_modified_time = (bigtime_t)time(NULL) << INODE_TIME_SHIFT; 993 994 if (Flags() & INODE_NO_CACHE) 995 return ((Stream<Access::Uncached> *)this)->WriteAt(transaction, pos, buffer, _length); 996 997 if (Flags() & INODE_LOGGED) 998 return ((Stream<Access::Logged> *)this)->WriteAt(transaction, pos, buffer, _length); 999 1000 return ((Stream<Access::Cached> *)this)->WriteAt(transaction, pos, buffer, _length); 1001 } 1002 1003 1004 /** Fills the gap between the old file size and the new file size 1005 * with zeros. 1006 * It's more or less a copy of Inode::WriteAt() but it can handle 1007 * length differences of more than just 4 GB, and it never uses 1008 * the log, even if the INODE_LOGGED flag is set. 1009 */ 1010 1011 status_t 1012 Inode::FillGapWithZeros(off_t pos, off_t newSize) 1013 { 1014 // ToDo: we currently do anything here, same as original BFS! 1015 //if (pos >= newSize) 1016 return B_OK; 1017 1018 block_run run; 1019 off_t offset; 1020 if (FindBlockRun(pos, run, offset) < B_OK) 1021 RETURN_ERROR(B_BAD_VALUE); 1022 1023 off_t length = newSize - pos; 1024 uint32 bytesWritten = 0; 1025 uint32 blockSize = fVolume->BlockSize(); 1026 uint32 blockShift = fVolume->BlockShift(); 1027 uint8 *block; 1028 1029 // the first block_run we write could not be aligned to the block_size boundary 1030 // (write partial block at the beginning) 1031 1032 // pos % block_size == (pos - offset) % block_size, offset % block_size == 0 1033 if (pos % blockSize != 0) { 1034 run.start += (pos - offset) / blockSize; 1035 run.length -= (pos - offset) / blockSize; 1036 1037 CachedBlock cached(fVolume,run); 1038 if ((block = cached.Block()) == NULL) 1039 RETURN_ERROR(B_BAD_VALUE); 1040 1041 bytesWritten = blockSize - (pos % blockSize); 1042 if (length < bytesWritten) 1043 bytesWritten = length; 1044 1045 memset(block + (pos % blockSize), 0, bytesWritten); 1046 if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) 1047 RETURN_ERROR(B_IO_ERROR); 1048 1049 pos += bytesWritten; 1050 1051 length -= bytesWritten; 1052 if (length == 0) 1053 return B_OK; 1054 1055 if (FindBlockRun(pos, run, offset) < B_OK) 1056 RETURN_ERROR(B_BAD_VALUE); 1057 } 1058 1059 while (length > 0) { 1060 // offset is the offset to the current pos in the block_run 1061 run.start += (pos - offset) >> blockShift; 1062 run.length -= (pos - offset) >> blockShift; 1063 1064 CachedBlock cached(fVolume); 1065 off_t blockNumber = fVolume->ToBlock(run); 1066 for (int32 i = 0; i < run.length; i++) { 1067 if ((block = cached.SetTo(blockNumber + i, true)) == NULL) 1068 RETURN_ERROR(B_IO_ERROR); 1069 1070 if (fVolume->WriteBlocks(cached.BlockNumber(), block, 1) < B_OK) 1071 RETURN_ERROR(B_IO_ERROR); 1072 } 1073 1074 int32 bytes = run.length << blockShift; 1075 length -= bytes; 1076 bytesWritten += bytes; 1077 1078 // since we don't respect a last partial block, length can be lower 1079 if (length <= 0) 1080 break; 1081 1082 pos += bytes; 1083 1084 if (FindBlockRun(pos, run, offset) < B_OK) 1085 RETURN_ERROR(B_BAD_VALUE); 1086 } 1087 return B_OK; 1088 } 1089 1090 1091 /** Allocates NUM_ARRAY_BLOCKS blocks, and clears their contents. Growing 1092 * the indirect and double indirect range uses this method. 1093 * The allocated block_run is saved in "run" 1094 */ 1095 1096 status_t 1097 Inode::AllocateBlockArray(Transaction *transaction, block_run &run) 1098 { 1099 if (!run.IsZero()) 1100 return B_BAD_VALUE; 1101 1102 status_t status = fVolume->Allocate(transaction, this, NUM_ARRAY_BLOCKS, run, NUM_ARRAY_BLOCKS); 1103 if (status < B_OK) 1104 return status; 1105 1106 // make sure those blocks are empty 1107 CachedBlock cached(fVolume); 1108 off_t block = fVolume->ToBlock(run); 1109 1110 for (int32 i = 0;i < run.length;i++) { 1111 block_run *runs = (block_run *)cached.SetTo(block + i, true); 1112 if (runs == NULL) 1113 return B_IO_ERROR; 1114 1115 if (cached.WriteBack(transaction) < B_OK) 1116 return B_IO_ERROR; 1117 } 1118 return B_OK; 1119 } 1120 1121 1122 status_t 1123 Inode::GrowStream(Transaction *transaction, off_t size) 1124 { 1125 data_stream *data = &Node()->data; 1126 1127 // is the data stream already large enough to hold the new size? 1128 // (can be the case with preallocated blocks) 1129 if (size < data->max_direct_range 1130 || size < data->max_indirect_range 1131 || size < data->max_double_indirect_range) { 1132 data->size = size; 1133 return B_OK; 1134 } 1135 1136 // how many bytes are still needed? (unused ranges are always zero) 1137 uint16 minimum = 1; 1138 off_t bytes; 1139 if (data->size < data->max_double_indirect_range) { 1140 bytes = size - data->max_double_indirect_range; 1141 // the double indirect range can only handle multiple of NUM_ARRAY_BLOCKS 1142 minimum = NUM_ARRAY_BLOCKS; 1143 } else if (data->size < data->max_indirect_range) 1144 bytes = size - data->max_indirect_range; 1145 else if (data->size < data->max_direct_range) 1146 bytes = size - data->max_direct_range; 1147 else 1148 bytes = size - data->size; 1149 1150 // do we have enough free blocks on the disk? 1151 off_t blocks = (bytes + fVolume->BlockSize() - 1) / fVolume->BlockSize(); 1152 if (blocks > fVolume->FreeBlocks()) 1153 return B_DEVICE_FULL; 1154 1155 // should we preallocate some blocks (currently, always 64k)? 1156 off_t blocksNeeded = blocks; 1157 if (blocks < (65536 >> fVolume->BlockShift()) && fVolume->FreeBlocks() > 128) 1158 blocks = 65536 >> fVolume->BlockShift(); 1159 1160 while (blocksNeeded > 0) { 1161 // the requested blocks do not need to be returned with a 1162 // single allocation, so we need to iterate until we have 1163 // enough blocks allocated 1164 block_run run; 1165 status_t status = fVolume->Allocate(transaction, this, blocks, run, minimum); 1166 if (status < B_OK) 1167 return status; 1168 1169 // okay, we have the needed blocks, so just distribute them to the 1170 // different ranges of the stream (direct, indirect & double indirect) 1171 1172 // ToDo: if anything goes wrong here, we probably want to free the 1173 // blocks that couldn't be distributed into the stream! 1174 1175 blocksNeeded -= run.length; 1176 // don't preallocate if the first allocation was already too small 1177 blocks = blocksNeeded; 1178 if (minimum > 1) { 1179 // make sure that "blocks" is a multiple of minimum 1180 blocks = (blocks + minimum - 1) & ~(minimum - 1); 1181 } 1182 1183 // Direct block range 1184 1185 if (data->size <= data->max_direct_range) { 1186 // let's try to put them into the direct block range 1187 int32 free = 0; 1188 for (;free < NUM_DIRECT_BLOCKS;free++) 1189 if (data->direct[free].IsZero()) 1190 break; 1191 1192 if (free < NUM_DIRECT_BLOCKS) { 1193 // can we merge the last allocated run with the new one? 1194 int32 last = free - 1; 1195 if (free > 0 && data->direct[last].MergeableWith(run)) 1196 data->direct[last].length += run.length; 1197 else 1198 data->direct[free] = run; 1199 1200 data->max_direct_range += run.length * fVolume->BlockSize(); 1201 data->size = blocksNeeded > 0 ? data->max_direct_range : size; 1202 continue; 1203 } 1204 } 1205 1206 // Indirect block range 1207 1208 if (data->size <= data->max_indirect_range || !data->max_indirect_range) { 1209 CachedBlock cached(fVolume); 1210 block_run *runs = NULL; 1211 int32 free = 0; 1212 off_t block; 1213 1214 // if there is no indirect block yet, create one 1215 if (data->indirect.IsZero()) { 1216 status = AllocateBlockArray(transaction, data->indirect); 1217 if (status < B_OK) 1218 return status; 1219 1220 data->max_indirect_range = data->max_direct_range; 1221 // insert the block_run in the first block 1222 runs = (block_run *)cached.SetTo(data->indirect); 1223 } else { 1224 uint32 numberOfRuns = fVolume->BlockSize() / sizeof(block_run); 1225 block = fVolume->ToBlock(data->indirect); 1226 1227 // search first empty entry 1228 int32 i = 0; 1229 for (;i < data->indirect.length;i++) { 1230 if ((runs = (block_run *)cached.SetTo(block + i)) == NULL) 1231 return B_IO_ERROR; 1232 1233 for (free = 0; free < numberOfRuns; free++) 1234 if (runs[free].IsZero()) 1235 break; 1236 1237 if (free < numberOfRuns) 1238 break; 1239 } 1240 if (i == data->indirect.length) 1241 runs = NULL; 1242 } 1243 1244 if (runs != NULL) { 1245 // try to insert the run to the last one - note that this doesn't 1246 // take block borders into account, so it could be further optimized 1247 int32 last = free - 1; 1248 if (free > 0 && runs[last].MergeableWith(run)) 1249 runs[last].length += run.length; 1250 else 1251 runs[free] = run; 1252 1253 data->max_indirect_range += run.length << fVolume->BlockShift(); 1254 data->size = blocksNeeded > 0 ? data->max_indirect_range : size; 1255 1256 cached.WriteBack(transaction); 1257 continue; 1258 } 1259 } 1260 1261 // Double indirect block range 1262 1263 if (data->size <= data->max_double_indirect_range || !data->max_double_indirect_range) { 1264 while ((run.length % NUM_ARRAY_BLOCKS) != 0) { 1265 // The number of allocated blocks isn't a multiple of NUM_ARRAY_BLOCKS, 1266 // so we have to change this. This can happen the first time the stream 1267 // grows into the double indirect range. 1268 // First, free the remaining blocks that don't fit into a multiple 1269 // of NUM_ARRAY_BLOCKS 1270 int32 rest = run.length % NUM_ARRAY_BLOCKS; 1271 run.length -= rest; 1272 1273 status = fVolume->Free(transaction, block_run::Run(run.allocation_group, 1274 run.start + run.length, rest)); 1275 if (status < B_OK) 1276 return status; 1277 1278 blocksNeeded += rest; 1279 blocks = (blocksNeeded + NUM_ARRAY_BLOCKS - 1) & ~(NUM_ARRAY_BLOCKS - 1); 1280 minimum = NUM_ARRAY_BLOCKS; 1281 // we make sure here that we have at minimum NUM_ARRAY_BLOCKS allocated, 1282 // so if the allocation succeeds, we don't run into an endless loop 1283 1284 // Are there any blocks left in the run? If not, allocate a new one 1285 if (run.length == 0) 1286 continue; 1287 } 1288 1289 // if there is no double indirect block yet, create one 1290 if (data->double_indirect.IsZero()) { 1291 status = AllocateBlockArray(transaction, data->double_indirect); 1292 if (status < B_OK) 1293 return status; 1294 1295 data->max_double_indirect_range = data->max_indirect_range; 1296 } 1297 1298 // calculate the index where to insert the new blocks 1299 1300 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1301 int32 indirectSize = ((1L << INDIRECT_BLOCKS_SHIFT) << fVolume->BlockShift()) 1302 * runsPerBlock; 1303 int32 directSize = NUM_ARRAY_BLOCKS << fVolume->BlockShift(); 1304 int32 runsPerArray = runsPerBlock << ARRAY_BLOCKS_SHIFT; 1305 1306 off_t start = data->max_double_indirect_range - data->max_indirect_range; 1307 int32 indirectIndex = start / indirectSize; 1308 int32 index = start / directSize; 1309 1310 // distribute the blocks to the array and allocate 1311 // new array blocks when needed 1312 1313 CachedBlock cached(fVolume); 1314 CachedBlock cachedDirect(fVolume); 1315 block_run *array = NULL; 1316 uint32 runLength = run.length; 1317 1318 // ToDo: the following code is commented - it could be used to 1319 // preallocate all needed block arrays to see in advance if the 1320 // allocation will succeed. 1321 // I will probably remove it later, because it's no perfect solution 1322 // either: if the allocation was broken up before (blocksNeeded != 0), 1323 // it doesn't guarantee anything. 1324 // And since failing in this case is not that common, it doesn't have 1325 // to be optimized in that way. 1326 // Anyway, I wanted to have it in CVS - all those lines, and they will 1327 // be removed soon :-) 1328 /* 1329 // allocate new block arrays if needed 1330 1331 off_t block = -1; 1332 1333 for (int32 i = 0;i < needed;i++) { 1334 // get the block to insert the run into 1335 block = fVolume->ToBlock(data->double_indirect) + i + indirectIndex / runsPerBlock; 1336 if (cached.BlockNumber() != block) 1337 array = (block_run *)cached.SetTo(block); 1338 1339 if (array == NULL) 1340 return B_ERROR; 1341 1342 status = AllocateBlockArray(transaction, array[i + indirectIndex % runsPerBlock]); 1343 if (status < B_OK) 1344 return status; 1345 } 1346 */ 1347 1348 while (run.length) { 1349 // get the indirect array block 1350 if (array == NULL) { 1351 if (cached.Block() != NULL 1352 && cached.WriteBack(transaction) < B_OK) 1353 return B_IO_ERROR; 1354 1355 array = (block_run *)cached.SetTo(fVolume->ToBlock(data->double_indirect) + 1356 indirectIndex / runsPerBlock); 1357 if (array == NULL) 1358 return B_IO_ERROR; 1359 } 1360 1361 do { 1362 // do we need a new array block? 1363 if (array[indirectIndex % runsPerBlock].IsZero()) { 1364 status = AllocateBlockArray(transaction, array[indirectIndex % runsPerBlock]); 1365 if (status < B_OK) 1366 return status; 1367 } 1368 1369 block_run *runs = (block_run *)cachedDirect.SetTo( 1370 fVolume->ToBlock(array[indirectIndex % runsPerBlock]) 1371 + index / runsPerBlock); 1372 if (runs == NULL) 1373 return B_IO_ERROR; 1374 1375 do { 1376 // insert the block_run into the array 1377 runs[index % runsPerBlock] = run; 1378 runs[index % runsPerBlock].length = NUM_ARRAY_BLOCKS; 1379 1380 // alter the remaining block_run 1381 run.start += NUM_ARRAY_BLOCKS; 1382 run.length -= NUM_ARRAY_BLOCKS; 1383 } while ((++index % runsPerBlock) != 0 && run.length); 1384 1385 if (cachedDirect.WriteBack(transaction) < B_OK) 1386 return B_IO_ERROR; 1387 } while ((index % runsPerArray) != 0 && run.length); 1388 1389 if (++indirectIndex % runsPerBlock == 0) { 1390 array = NULL; 1391 index = 0; 1392 } 1393 } 1394 1395 data->max_double_indirect_range += runLength << fVolume->BlockShift(); 1396 data->size = blocksNeeded > 0 ? data->max_double_indirect_range : size; 1397 1398 continue; 1399 } 1400 1401 RETURN_ERROR(EFBIG); 1402 } 1403 // update the size of the data stream 1404 data->size = size; 1405 1406 return B_OK; 1407 } 1408 1409 1410 status_t 1411 Inode::FreeStaticStreamArray(Transaction *transaction, int32 level, block_run run, 1412 off_t size, off_t offset, off_t &max) 1413 { 1414 int32 indirectSize; 1415 if (level == 0) 1416 indirectSize = (1L << (INDIRECT_BLOCKS_SHIFT + fVolume->BlockShift())) 1417 * (fVolume->BlockSize() / sizeof(block_run)); 1418 else if (level == 1) 1419 indirectSize = 4 << fVolume->BlockShift(); 1420 1421 off_t start; 1422 if (size > offset) 1423 start = size - offset; 1424 else 1425 start = 0; 1426 1427 int32 index = start / indirectSize; 1428 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1429 1430 CachedBlock cached(fVolume); 1431 off_t blockNumber = fVolume->ToBlock(run); 1432 1433 // set the file offset to the current block run 1434 offset += (off_t)index * indirectSize; 1435 1436 for (int32 i = index / runsPerBlock; i < run.length; i++) { 1437 block_run *array = (block_run *)cached.SetTo(blockNumber + i); 1438 if (array == NULL) 1439 RETURN_ERROR(B_ERROR); 1440 1441 for (index = index % runsPerBlock; index < runsPerBlock; index++) { 1442 if (array[index].IsZero()) { 1443 // we also want to break out of the outer loop 1444 i = run.length; 1445 break; 1446 } 1447 1448 status_t status = B_OK; 1449 if (level == 0) 1450 status = FreeStaticStreamArray(transaction, 1, array[index], size, offset, max); 1451 else if (offset >= size) 1452 status = fVolume->Free(transaction, array[index]); 1453 else 1454 max = offset + indirectSize; 1455 1456 if (status < B_OK) 1457 RETURN_ERROR(status); 1458 1459 if (offset >= size) 1460 array[index].SetTo(0, 0, 0); 1461 1462 offset += indirectSize; 1463 } 1464 index = 0; 1465 1466 cached.WriteBack(transaction); 1467 } 1468 return B_OK; 1469 } 1470 1471 1472 /** Frees all block_runs in the array which come after the specified size. 1473 * It also trims the last block_run that contain the size. 1474 * "offset" and "max" are maintained until the last block_run that doesn't 1475 * have to be freed - after this, the values won't be correct anymore, but 1476 * will still assure correct function for all subsequent calls. 1477 */ 1478 1479 status_t 1480 Inode::FreeStreamArray(Transaction *transaction, block_run *array, uint32 arrayLength, 1481 off_t size, off_t &offset, off_t &max) 1482 { 1483 off_t newOffset = offset; 1484 uint32 i = 0; 1485 for (; i < arrayLength; i++, offset = newOffset) { 1486 if (array[i].IsZero()) 1487 break; 1488 1489 newOffset += (off_t)array[i].length << fVolume->BlockShift(); 1490 if (newOffset <= size) 1491 continue; 1492 1493 block_run run = array[i]; 1494 1495 // determine the block_run to be freed 1496 if (newOffset > size && offset < size) { 1497 // free partial block_run (and update the original block_run) 1498 run.start = array[i].start + ((size - offset) >> fVolume->BlockShift()) + 1; 1499 array[i].length = run.start - array[i].start; 1500 run.length -= array[i].length; 1501 1502 if (run.length == 0) 1503 continue; 1504 1505 // update maximum range 1506 max = offset + ((off_t)array[i].length << fVolume->BlockShift()); 1507 } else { 1508 // free the whole block_run 1509 array[i].SetTo(0, 0, 0); 1510 1511 if (max > offset) 1512 max = offset; 1513 } 1514 1515 if (fVolume->Free(transaction, run) < B_OK) 1516 return B_IO_ERROR; 1517 } 1518 return B_OK; 1519 } 1520 1521 1522 status_t 1523 Inode::ShrinkStream(Transaction *transaction, off_t size) 1524 { 1525 data_stream *data = &Node()->data; 1526 1527 if (data->max_double_indirect_range > size) { 1528 FreeStaticStreamArray(transaction, 0, data->double_indirect, size, 1529 data->max_indirect_range, data->max_double_indirect_range); 1530 1531 if (size <= data->max_indirect_range) { 1532 fVolume->Free(transaction, data->double_indirect); 1533 data->double_indirect.SetTo(0, 0, 0); 1534 data->max_double_indirect_range = 0; 1535 } 1536 } 1537 if (data->max_indirect_range > size) { 1538 CachedBlock cached(fVolume); 1539 off_t block = fVolume->ToBlock(data->indirect); 1540 off_t offset = data->max_direct_range; 1541 1542 for (int32 i = 0; i < data->indirect.length; i++) { 1543 block_run *array = (block_run *)cached.SetTo(block + i); 1544 if (array == NULL) 1545 break; 1546 1547 if (FreeStreamArray(transaction, array, fVolume->BlockSize() / sizeof(block_run), 1548 size, offset, data->max_indirect_range) == B_OK) 1549 cached.WriteBack(transaction); 1550 } 1551 if (data->max_direct_range == data->max_indirect_range) { 1552 fVolume->Free(transaction, data->indirect); 1553 data->indirect.SetTo(0, 0, 0); 1554 data->max_indirect_range = 0; 1555 } 1556 } 1557 if (data->max_direct_range > size) { 1558 off_t offset = 0; 1559 FreeStreamArray(transaction, data->direct, NUM_DIRECT_BLOCKS, size, offset, 1560 data->max_direct_range); 1561 } 1562 1563 data->size = size; 1564 return B_OK; 1565 } 1566 1567 1568 status_t 1569 Inode::SetFileSize(Transaction *transaction, off_t size) 1570 { 1571 if (size < 0 1572 // uncached files can't be resized (Stream<Cache>::WriteAt() specifically 1573 // denies growing uncached files because of efficiency, so it had to be 1574 // adapted if this ever changes [which will probably happen in OpenBeOS]). 1575 || Flags() & INODE_NO_CACHE) 1576 return B_BAD_VALUE; 1577 1578 off_t oldSize = Node()->data.size; 1579 1580 if (size == oldSize) 1581 return B_OK; 1582 1583 // should the data stream grow or shrink? 1584 status_t status; 1585 if (size > oldSize) { 1586 status = GrowStream(transaction, size); 1587 if (status < B_OK) { 1588 // if the growing of the stream fails, the whole operation 1589 // fails, so we should shrink the stream to its former size 1590 ShrinkStream(transaction, oldSize); 1591 } 1592 } 1593 else 1594 status = ShrinkStream(transaction, size); 1595 1596 if (status < B_OK) 1597 return status; 1598 1599 return WriteBack(transaction); 1600 } 1601 1602 1603 status_t 1604 Inode::Append(Transaction *transaction, off_t bytes) 1605 { 1606 return SetFileSize(transaction, Size() + bytes); 1607 } 1608 1609 1610 status_t 1611 Inode::Trim(Transaction *transaction) 1612 { 1613 status_t status = ShrinkStream(transaction, Size()); 1614 if (status < B_OK) 1615 return status; 1616 1617 return WriteBack(transaction); 1618 } 1619 1620 1621 status_t 1622 Inode::Free(Transaction *transaction) 1623 { 1624 // Perhaps there should be an implementation of Inode::ShrinkStream() that 1625 // just frees the data_stream, but doesn't change the inode (since it is 1626 // freed anyway) - that would make an undelete command possible 1627 status_t status = SetFileSize(transaction, 0); 1628 if (status < B_OK) 1629 return status; 1630 1631 // Free all attributes, and remove their indices 1632 { 1633 // We have to limit the scope of AttributeIterator, so that its 1634 // destructor is not called after the inode is deleted 1635 AttributeIterator iterator(this); 1636 1637 char name[B_FILE_NAME_LENGTH]; 1638 uint32 type; 1639 size_t length; 1640 vnode_id id; 1641 while ((status = iterator.GetNext(name, &length, &type, &id)) == B_OK) 1642 RemoveAttribute(transaction, name); 1643 } 1644 1645 if (WriteBack(transaction) < B_OK) 1646 return B_ERROR; 1647 1648 return fVolume->Free(transaction, BlockRun()); 1649 } 1650 1651 1652 status_t 1653 Inode::Sync() 1654 { 1655 // We may also want to flush the attribute's data stream to 1656 // disk here... (do we?) 1657 1658 data_stream *data = &Node()->data; 1659 status_t status; 1660 1661 // flush direct range 1662 1663 for (int32 i = 0;i < NUM_DIRECT_BLOCKS;i++) { 1664 if (data->direct[i].IsZero()) 1665 return B_OK; 1666 1667 status = flush_blocks(fVolume->Device(), fVolume->ToBlock(data->direct[i]), 1668 data->direct[i].length); 1669 if (status != B_OK) 1670 return status; 1671 } 1672 1673 // flush indirect range 1674 1675 if (data->max_indirect_range == 0) 1676 return B_OK; 1677 1678 CachedBlock cached(fVolume); 1679 off_t block = fVolume->ToBlock(data->indirect); 1680 int32 count = fVolume->BlockSize() / sizeof(block_run); 1681 1682 for (int32 j = 0; j < data->indirect.length; j++) { 1683 block_run *runs = (block_run *)cached.SetTo(block + j); 1684 if (runs == NULL) 1685 break; 1686 1687 for (int32 i = 0; i < count; i++) { 1688 if (runs[i].IsZero()) 1689 return B_OK; 1690 1691 status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]), runs[i].length); 1692 if (status != B_OK) 1693 return status; 1694 } 1695 } 1696 1697 // flush double indirect range 1698 1699 if (data->max_double_indirect_range == 0) 1700 return B_OK; 1701 1702 off_t indirectBlock = fVolume->ToBlock(data->double_indirect); 1703 1704 for (int32 l = 0; l < data->double_indirect.length; l++) { 1705 block_run *indirectRuns = (block_run *)cached.SetTo(indirectBlock + l); 1706 if (indirectRuns == NULL) 1707 return B_FILE_ERROR; 1708 1709 CachedBlock directCached(fVolume); 1710 1711 for (int32 k = 0; k < count; k++) { 1712 if (indirectRuns[k].IsZero()) 1713 return B_OK; 1714 1715 block = fVolume->ToBlock(indirectRuns[k]); 1716 for (int32 j = 0; j < indirectRuns[k].length; j++) { 1717 block_run *runs = (block_run *)directCached.SetTo(block + j); 1718 if (runs == NULL) 1719 return B_FILE_ERROR; 1720 1721 for (int32 i = 0; i < count; i++) { 1722 if (runs[i].IsZero()) 1723 return B_OK; 1724 1725 // ToDo: combine single block_runs to bigger ones when 1726 // they are adjacent 1727 status = flush_blocks(fVolume->Device(), fVolume->ToBlock(runs[i]), 1728 runs[i].length); 1729 if (status != B_OK) 1730 return status; 1731 } 1732 } 1733 } 1734 } 1735 return B_OK; 1736 } 1737 1738 1739 status_t 1740 Inode::Remove(Transaction *transaction, const char *name, off_t *_id, bool isDirectory) 1741 { 1742 BPlusTree *tree; 1743 if (GetTree(&tree) != B_OK) 1744 RETURN_ERROR(B_BAD_VALUE); 1745 1746 // does the file even exists? 1747 off_t id; 1748 if (tree->Find((uint8 *)name, (uint16)strlen(name), &id) < B_OK) 1749 return B_ENTRY_NOT_FOUND; 1750 1751 if (_id) 1752 *_id = id; 1753 1754 Vnode vnode(fVolume, id); 1755 Inode *inode; 1756 status_t status = vnode.Get(&inode); 1757 if (status < B_OK) { 1758 REPORT_ERROR(status); 1759 return B_ENTRY_NOT_FOUND; 1760 } 1761 1762 // You can't unlink a mounted or the VM file while it is being used - while 1763 // this is not really necessary, it copies the behaviour of the original BFS 1764 // and let you and me feel a little bit safer 1765 if (inode->Flags() & INODE_NO_CACHE) 1766 return B_NOT_ALLOWED; 1767 1768 // Inode::IsContainer() is true also for indices (furthermore, the S_IFDIR 1769 // bit is set for indices in BFS, not for attribute directories) - but you 1770 // should really be able to do whatever you want with your indices 1771 // without having to remove all files first :) 1772 if (!inode->IsIndex()) { 1773 // if it's not of the correct type, don't delete it! 1774 if (inode->IsContainer() != isDirectory) 1775 return isDirectory ? B_NOT_A_DIRECTORY : B_IS_A_DIRECTORY; 1776 1777 // only delete empty directories 1778 if (isDirectory && !inode->IsEmpty()) 1779 return B_DIRECTORY_NOT_EMPTY; 1780 } 1781 1782 // remove_vnode() allows the inode to be accessed until the last put_vnode() 1783 if (remove_vnode(fVolume->ID(), id) != B_OK) 1784 return B_ERROR; 1785 1786 if (tree->Remove(transaction, name, id) < B_OK) { 1787 unremove_vnode(fVolume->ID(), id); 1788 RETURN_ERROR(B_ERROR); 1789 } 1790 1791 // update the inode, so that no one will ever doubt it's deleted :-) 1792 inode->Node()->flags |= INODE_DELETED; 1793 1794 // In balance to the Inode::Create() method, the main indices 1795 // are updated here (name, size, & last_modified) 1796 1797 Index index(fVolume); 1798 if (inode->IsRegularNode()) { 1799 index.RemoveName(transaction, name, inode); 1800 // If removing from the index fails, it is not regarded as a 1801 // fatal error and will not be reported back! 1802 // Deleted inodes won't be visible in queries anyway. 1803 } 1804 1805 if ((inode->Mode() & (S_FILE | S_SYMLINK)) != 0) { 1806 if (inode->IsFile()) 1807 index.RemoveSize(transaction, inode); 1808 index.RemoveLastModified(transaction, inode); 1809 } 1810 1811 if (inode->WriteBack(transaction) < B_OK) 1812 return B_ERROR; 1813 1814 return B_OK; 1815 } 1816 1817 1818 /** Creates the inode with the specified parent directory, and automatically 1819 * adds the created inode to that parent directory. If an attribute directory 1820 * is created, it will also automatically added to the parent inode as such. 1821 * However, the indices root node, and the regular root node won't be added 1822 * to the super block. 1823 * It will also create the initial B+tree for the inode if it's a directory 1824 * of any kind. 1825 * If the "_id" or "_inode" variable is given and non-NULL to store the inode's 1826 * ID, the inode stays locked - you have to call put_vnode() if you don't use it 1827 * anymore. 1828 */ 1829 1830 status_t 1831 Inode::Create(Transaction *transaction, Inode *parent, const char *name, int32 mode, 1832 int omode, uint32 type, off_t *_id, Inode **_inode) 1833 { 1834 block_run parentRun = parent ? parent->BlockRun() : block_run::Run(0, 0, 0); 1835 Volume *volume = transaction->GetVolume(); 1836 BPlusTree *tree = NULL; 1837 1838 if (parent && (mode & S_ATTR_DIR) == 0 && parent->IsContainer()) { 1839 // check if the file already exists in the directory 1840 if (parent->GetTree(&tree) != B_OK) 1841 RETURN_ERROR(B_BAD_VALUE); 1842 1843 // does the file already exist? 1844 off_t offset; 1845 if (tree->Find((uint8 *)name, (uint16)strlen(name), &offset) == B_OK) { 1846 // return if the file should be a directory or opened in exclusive mode 1847 if (mode & S_DIRECTORY || omode & O_EXCL) 1848 return B_FILE_EXISTS; 1849 1850 Vnode vnode(volume, offset); 1851 Inode *inode; 1852 status_t status = vnode.Get(&inode); 1853 if (status < B_OK) { 1854 REPORT_ERROR(status); 1855 return B_ENTRY_NOT_FOUND; 1856 } 1857 1858 // if it's a directory, bail out! 1859 if (inode->IsDirectory()) 1860 return B_IS_A_DIRECTORY; 1861 1862 // if it is a mounted device or the VM file, we don't allow to delete it 1863 // while it is open and in use 1864 if (inode->Flags() & INODE_NO_CACHE) 1865 return B_NOT_ALLOWED; 1866 1867 // if omode & O_TRUNC, truncate the existing file 1868 if (omode & O_TRUNC) { 1869 WriteLocked locked(inode->Lock()); 1870 1871 status_t status = inode->SetFileSize(transaction, 0); 1872 if (status < B_OK) 1873 return status; 1874 } 1875 1876 if (_id) 1877 *_id = offset; 1878 if (_inode) 1879 *_inode = inode; 1880 1881 // only keep the vnode in memory if the _id or _inode pointer is provided 1882 if (_id != NULL || _inode != NULL) 1883 vnode.Keep(); 1884 1885 return B_OK; 1886 } 1887 } else if (parent && (mode & S_ATTR_DIR) == 0) 1888 return B_BAD_VALUE; 1889 1890 // allocate space for the new inode 1891 InodeAllocator allocator(transaction); 1892 block_run run; 1893 Inode *inode; 1894 status_t status = allocator.New(&parentRun, mode, run, &inode); 1895 if (status < B_OK) 1896 return status; 1897 1898 // Initialize the parts of the bfs_inode structure that 1899 // InodeAllocator::New() hasn't touched yet 1900 1901 bfs_inode *node = inode->Node(); 1902 1903 node->parent = parentRun; 1904 1905 node->uid = geteuid(); 1906 node->gid = parent ? parent->Node()->gid : getegid(); 1907 // the group ID is inherited from the parent, if available 1908 1909 node->type = type; 1910 1911 // only add the name to regular files, directories, or symlinks 1912 // don't add it to attributes, or indices 1913 if (tree && inode->IsRegularNode() && inode->SetName(transaction, name) < B_OK) 1914 return B_ERROR; 1915 1916 // Initialize b+tree if it's a directory (and add "." & ".." if it's 1917 // a standard directory for files - not for attributes or indices) 1918 if (inode->IsContainer()) { 1919 status = allocator.CreateTree(); 1920 if (status < B_OK) 1921 return status; 1922 } 1923 1924 // Add a link to the inode from the parent, depending on its type 1925 // (the INODE_NOT_READY flag is set, so it is safe to make the inode 1926 // accessable to the file system here) 1927 if (tree) { 1928 status = tree->Insert(transaction, name, inode->ID()); 1929 } else if (parent && (mode & S_ATTR_DIR) != 0) { 1930 parent->Attributes() = run; 1931 status = parent->WriteBack(transaction); 1932 } 1933 1934 // Note, we only care if the inode could be made accessable for the 1935 // two cases above; the root node or the indices root node must 1936 // handle this case on their own (or other cases where "parent" is 1937 // NULL) 1938 if (status < B_OK) 1939 RETURN_ERROR(status); 1940 1941 // Update the main indices (name, size & last_modified) 1942 // (live queries might want to access us after this) 1943 1944 Index index(volume); 1945 if (inode->IsRegularNode()) { 1946 // the name index only contains regular files 1947 status = index.InsertName(transaction, name, inode); 1948 if (status < B_OK && status != B_BAD_INDEX) { 1949 // We have to remove the node from the parent at this point, 1950 // because the InodeAllocator destructor can't handle this 1951 // case (and if it fails, we can't do anything about it...) 1952 if (tree) 1953 tree->Remove(transaction, name, inode->ID()); 1954 else 1955 parent->Node()->attributes.SetTo(0, 0, 0); 1956 1957 return status; 1958 } 1959 } 1960 1961 inode->UpdateOldLastModified(); 1962 1963 // The "size" & "last_modified" indices don't contain directories 1964 if ((mode & (S_FILE | S_SYMLINK)) != 0) { 1965 // if adding to these indices fails, the inode creation will not be harmed; 1966 // they are considered less important than the "name" index 1967 if (inode->IsFile()) 1968 index.InsertSize(transaction, inode); 1969 index.InsertLastModified(transaction, inode); 1970 } 1971 1972 if (new_vnode(volume->ID(), inode->ID(), inode) != B_OK) { 1973 // this is a really fatal error, and we can't recover from that 1974 DIE(("new_vnode() failed for inode!")); 1975 } 1976 1977 // Everything worked well until this point, we have a fully 1978 // initialized inode, and we want to keep it 1979 allocator.Keep(); 1980 1981 if (_id != NULL) 1982 *_id = inode->ID(); 1983 if (_inode != NULL) 1984 *_inode = inode; 1985 1986 // if either _id or _inode is passed, we will keep the inode locked 1987 if (_id == NULL && _inode == NULL) 1988 put_vnode(volume->ID(), inode->ID()); 1989 1990 return B_OK; 1991 } 1992 1993 1994 // #pragma mark - 1995 1996 1997 AttributeIterator::AttributeIterator(Inode *inode) 1998 : 1999 fCurrentSmallData(0), 2000 fInode(inode), 2001 fAttributes(NULL), 2002 fIterator(NULL), 2003 fBuffer(NULL) 2004 { 2005 inode->AddIterator(this); 2006 } 2007 2008 2009 AttributeIterator::~AttributeIterator() 2010 { 2011 if (fAttributes) 2012 put_vnode(fAttributes->GetVolume()->ID(), fAttributes->ID()); 2013 2014 delete fIterator; 2015 fInode->RemoveIterator(this); 2016 } 2017 2018 2019 status_t 2020 AttributeIterator::Rewind() 2021 { 2022 fCurrentSmallData = 0; 2023 2024 if (fIterator != NULL) 2025 fIterator->Rewind(); 2026 2027 return B_OK; 2028 } 2029 2030 2031 status_t 2032 AttributeIterator::GetNext(char *name, size_t *_length, uint32 *_type, vnode_id *_id) 2033 { 2034 // read attributes out of the small data section 2035 2036 if (fCurrentSmallData >= 0) { 2037 small_data *item = fInode->Node()->small_data_start; 2038 2039 fInode->SmallDataLock().Lock(); 2040 2041 int32 i = 0; 2042 for (;;item = item->Next()) { 2043 if (item->IsLast(fInode->Node())) 2044 break; 2045 2046 if (item->name_size == FILE_NAME_NAME_LENGTH 2047 && *item->Name() == FILE_NAME_NAME) 2048 continue; 2049 2050 if (i++ == fCurrentSmallData) 2051 break; 2052 } 2053 2054 if (!item->IsLast(fInode->Node())) { 2055 strncpy(name, item->Name(), B_FILE_NAME_LENGTH); 2056 *_type = item->type; 2057 *_length = item->name_size; 2058 *_id = (vnode_id)fCurrentSmallData; 2059 2060 fCurrentSmallData = i; 2061 } 2062 else { 2063 // stop traversing the small_data section 2064 fCurrentSmallData = -1; 2065 } 2066 2067 fInode->SmallDataLock().Unlock(); 2068 2069 if (fCurrentSmallData != -1) 2070 return B_OK; 2071 } 2072 2073 // read attributes out of the attribute directory 2074 2075 if (fInode->Attributes().IsZero()) 2076 return B_ENTRY_NOT_FOUND; 2077 2078 Volume *volume = fInode->GetVolume(); 2079 2080 // if you haven't yet access to the attributes directory, get it 2081 if (fAttributes == NULL) { 2082 if (get_vnode(volume->ID(), volume->ToVnode(fInode->Attributes()), 2083 (void **)&fAttributes) != 0 2084 || fAttributes == NULL) { 2085 FATAL(("get_vnode() failed in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2086 return B_ENTRY_NOT_FOUND; 2087 } 2088 2089 BPlusTree *tree; 2090 if (fAttributes->GetTree(&tree) < B_OK 2091 || (fIterator = new TreeIterator(tree)) == NULL) { 2092 FATAL(("could not get tree in AttributeIterator::GetNext(vnode_id = %Ld,name = \"%s\")\n",fInode->ID(),name)); 2093 return B_ENTRY_NOT_FOUND; 2094 } 2095 } 2096 2097 block_run run; 2098 uint16 length; 2099 vnode_id id; 2100 status_t status = fIterator->GetNextEntry(name, &length, B_FILE_NAME_LENGTH, &id); 2101 if (status < B_OK) 2102 return status; 2103 2104 Vnode vnode(volume,id); 2105 Inode *attribute; 2106 if ((status = vnode.Get(&attribute)) == B_OK) { 2107 *_type = attribute->Node()->type; 2108 *_length = attribute->Node()->data.size; 2109 *_id = id; 2110 } 2111 2112 return status; 2113 } 2114 2115 2116 void 2117 AttributeIterator::Update(uint16 index, int8 change) 2118 { 2119 // fCurrentSmallData points already to the next item 2120 if (index < fCurrentSmallData) 2121 fCurrentSmallData += change; 2122 } 2123 2124