1 /* 2 Open Tracker License 3 4 Terms and Conditions 5 6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy of 9 this software and associated documentation files (the "Software"), to deal in 10 the Software without restriction, including without limitation the rights to 11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 of the Software, and to permit persons to whom the Software is furnished to do 13 so, subject to the following conditions: 14 15 The above copyright notice and this permission notice applies to all licensees 16 and shall be included in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 Except as contained in this notice, the name of Be Incorporated shall not be 26 used in advertising or otherwise to promote the sale, use or other dealings in 27 this Software without prior written authorization from Be Incorporated. 28 29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30 of Be Incorporated in the United States and other countries. Other brand product 31 names are registered trademarks or trademarks of their respective holders. 32 All rights reserved. 33 */ 34 35 36 #include "WIndex.h" 37 38 #include <File.h> 39 #include <fs_attr.h> 40 #include <Message.h> 41 #include <Node.h> 42 43 #include <ctype.h> 44 #include <stdlib.h> 45 #include <stdio.h> 46 47 48 #define IVERSION 1 49 50 51 static int32 kCRCTable = 0; 52 53 54 int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2); 55 void gen_crc_table(); 56 unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr, 57 int data_blk_size); 58 59 60 FileEntry::FileEntry(void) 61 { 62 63 } 64 65 66 FileEntry::FileEntry(const char *entryString) 67 : 68 BString(entryString) 69 { 70 71 } 72 73 74 status_t 75 WIndex::SetTo(const char *dataPath, const char *indexPath) 76 { 77 BFile* dataFile; 78 BFile indexFile; 79 80 dataFile = new BFile(); 81 82 if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) { 83 return B_ERROR; 84 } else { 85 bool buildIndex = true; 86 SetTo(dataFile); 87 88 time_t mtime; 89 time_t modified; 90 91 dataFile->GetModificationTime(&mtime); 92 93 if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) { 94 attr_info info; 95 if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) { 96 uint32 version = 0; 97 indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0, 98 &version, 4); 99 if (IVERSION == version) { 100 if ((indexFile.GetAttrInfo("WINDEX:modified", &info) 101 == B_OK)) { 102 indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0, 103 &modified, 4); 104 105 if (mtime == modified) { 106 if (UnflattenIndex(&indexFile) == B_OK) 107 buildIndex = false; 108 } 109 } 110 } 111 } 112 indexFile.Unset(); 113 } 114 if (buildIndex) { 115 InitIndex(); 116 BuildIndex(); 117 if (indexFile.SetTo(indexPath, 118 B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) { 119 FlattenIndex(&indexFile); 120 indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0, 121 &mtime, 4); 122 uint32 version = IVERSION; 123 indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0, 124 &version, 4); 125 } 126 } 127 } 128 return B_OK; 129 } 130 131 132 FileEntry::~FileEntry(void) 133 { 134 135 } 136 137 138 WIndex::WIndex(int32 count) 139 { 140 fEntryList = NULL; 141 fDataFile = NULL; 142 fEntriesPerBlock = count; 143 fEntrySize = sizeof(WIndexEntry); 144 if (!atomic_or(&kCRCTable, 1)) 145 gen_crc_table(); 146 } 147 148 149 WIndex::WIndex(BPositionIO *dataFile, int32 count) 150 { 151 fEntryList = NULL; 152 fDataFile = dataFile; 153 fEntriesPerBlock = count; 154 fEntrySize = sizeof(WIndexEntry); 155 if (!atomic_or(&kCRCTable, 1)) 156 gen_crc_table(); 157 } 158 159 160 WIndex::~WIndex(void) 161 { 162 if (fEntryList) 163 free(fEntryList); 164 delete fDataFile; 165 } 166 167 168 status_t 169 WIndex::UnflattenIndex(BPositionIO *io) 170 { 171 if (fEntryList) 172 free(fEntryList); 173 WIndexHead head; 174 175 io->Seek(0, SEEK_SET); 176 io->Read(&head, sizeof(head)); 177 io->Seek(head.offset, SEEK_SET); 178 179 fEntrySize = head.entrySize; 180 fEntries = head.entries; 181 fMaxEntries = fEntriesPerBlock; 182 fBlockSize = fEntriesPerBlock * fEntrySize; 183 fBlocks = fEntries / fEntriesPerBlock + 1;; 184 fIsSorted = true; 185 186 int32 size = (head.entries + 1) * head.entrySize; 187 if (!(fEntryList = (uint8 *)malloc(size))) 188 return B_ERROR; 189 190 if (fEntries) 191 io->Read(fEntryList, size); 192 193 return B_OK; 194 } 195 196 197 status_t 198 WIndex::FlattenIndex(BPositionIO *io) 199 { 200 if (fEntries && !fIsSorted) 201 SortItems(); 202 WIndexHead head; 203 204 head.entries = fEntries; 205 head.entrySize = fEntrySize; 206 head.offset = sizeof(WIndexHead); 207 io->Seek(0, SEEK_SET); 208 io->Write(&head, sizeof(head)); 209 if (fEntries) 210 io->Write(fEntryList, head.entries * head.entrySize); 211 212 return B_OK; 213 } 214 215 216 int32 217 WIndex::Lookup(int32 key) 218 { 219 if (!fEntries) 220 return -1; 221 if (!fIsSorted) 222 SortItems(); 223 224 // Binary Search 225 int32 M, Lb, Ub; 226 Lb = 0; 227 Ub = fEntries - 1; 228 while (true) { 229 M = (Lb + Ub) / 2; 230 if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key) 231 Ub = M - 1; 232 else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key) 233 Lb = M + 1; 234 else 235 return M; 236 if (Lb > Ub) 237 return -1; 238 } 239 } 240 241 242 status_t 243 WIndex::AddItem(WIndexEntry *entry) 244 { 245 if (_BlockCheck() == B_ERROR) 246 return B_ERROR; 247 memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry, 248 fEntrySize); 249 fEntries++; 250 fIsSorted = false; 251 return B_OK; 252 } 253 254 255 void 256 WIndex::SortItems(void) 257 { 258 qsort(fEntryList, fEntries, fEntrySize, 259 (int(*)(const void *, const void *))cmp_i_entries); 260 261 fIsSorted = true; 262 } 263 264 265 status_t 266 WIndex::_BlockCheck(void) 267 { 268 if (fEntries < fMaxEntries) 269 return B_OK; 270 fBlocks = fEntries / fEntriesPerBlock + 1; 271 fEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks); 272 if (!fEntryList) 273 return B_ERROR; 274 return B_OK; 275 } 276 277 278 status_t 279 WIndex::InitIndex(void) 280 { 281 if (fEntryList) 282 free(fEntryList); 283 fIsSorted = 0; 284 fEntries = 0; 285 fMaxEntries = fEntriesPerBlock; 286 fBlockSize = fEntriesPerBlock * fEntrySize; 287 fBlocks = 1; 288 fEntryList = (uint8 *)malloc(fBlockSize); 289 if (!fEntryList) 290 return B_ERROR; 291 return B_OK; 292 } 293 294 295 int32 296 WIndex::GetKey(const char *s) 297 { 298 299 int32 key = 0; 300 /*int32 x; 301 int32 a = 84589; 302 int32 b = 45989; 303 int32 m = 217728; 304 while (*s) { 305 x = *s++ - 'a'; 306 307 key ^= (a * x + b) % m; 308 key <<= 1; 309 }*/ 310 311 key = update_crc(0, s, strlen(s)); 312 313 if (key < 0) // No negative values! 314 key = ~key; 315 316 return key; 317 } 318 319 320 int32 321 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2) 322 { 323 return e1->key - e2->key; 324 } 325 326 327 status_t 328 WIndex::SetTo(BPositionIO *dataFile) 329 { 330 fDataFile = dataFile; 331 return B_OK; 332 } 333 334 335 void 336 WIndex::Unset(void) 337 { 338 fDataFile = NULL; 339 } 340 341 342 int32 343 WIndex::FindFirst(const char *word) 344 { 345 if (!fEntries) 346 return -1; 347 348 int32 index; 349 char nword[256]; 350 int32 key; 351 352 NormalizeWord(word, nword); 353 key = GetKey(nword); 354 355 if ((index = Lookup(key)) < 0) 356 return -1; 357 // Find first instance of key 358 while ((ItemAt(index - 1))->key == key) 359 index--; 360 return index; 361 } 362 363 364 FileEntry* 365 WIndex::GetEntry(int32 index) 366 { 367 if ((index >= fEntries)||(index < 0)) 368 return NULL; 369 WIndexEntry *ientry; 370 FileEntry *dentry; 371 char *buffer; 372 373 dentry = new FileEntry(); 374 375 ientry = ItemAt(index); 376 377 int32 size; 378 379 fDataFile->Seek(ientry->offset, SEEK_SET); 380 buffer = dentry->LockBuffer(256); 381 fDataFile->Read(buffer, 256); 382 size = _GetEntrySize(ientry, buffer); 383 //buffer[256] = 0; 384 //printf("Entry: = %s\n", buffer); 385 dentry->UnlockBuffer(size); 386 return dentry; 387 } 388 389 390 size_t 391 WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData) 392 { 393 // eliminate unused parameter warning 394 (void)entry; 395 396 return strcspn(entryData, "\n\r"); 397 } 398 399 400 FileEntry* 401 WIndex::GetEntry(const char *word) 402 { 403 return GetEntry(FindFirst(word)); 404 } 405 406 407 char* 408 WIndex::NormalizeWord(const char *word, char *dest) 409 { 410 const char *src; 411 char *dst; 412 413 // remove dots and copy 414 src = word; 415 dst = dest; 416 while (*src) { 417 if (*src != '.') 418 *dst++ = *src; 419 src++; 420 } 421 *dst = 0; 422 423 // convert to lower-case 424 dst = dest; 425 while (*dst) 426 *dst++ = tolower(*dst); 427 return dest; 428 } 429 430 431 /* crc32h.c -- package to compute 32-bit CRC one byte at a time using */ 432 /* the high-bit first (Big-Endian) bit ordering convention */ 433 /* */ 434 /* Synopsis: */ 435 /* gen_crc_table() -- generates a 256-word table containing all CRC */ 436 /* remainders for every possible 8-bit byte. It */ 437 /* must be executed (once) before any CRC updates. */ 438 /* */ 439 /* unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size) */ 440 /* unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */ 441 /* Returns the updated value of the CRC accumulator after */ 442 /* processing each byte in the addressed block of data. */ 443 /* */ 444 /* It is assumed that an unsigned long is at least 32 bits wide and */ 445 /* that the predefined type char occupies one 8-bit byte of storage. */ 446 /* */ 447 /* The generator polynomial used for this version of the package is */ 448 /* x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ 449 /* as specified in the Autodin/Ethernet/ADCCP protocol standards. */ 450 /* Other degree 32 polynomials may be substituted by re-defining the */ 451 /* symbol POLYNOMIAL below. Lower degree polynomials must first be */ 452 /* multiplied by an appropriate power of x. The representation used */ 453 /* is that the coefficient of x^0 is stored in the LSB of the 32-bit */ 454 /* word and the coefficient of x^31 is stored in the most significant */ 455 /* bit. The CRC is to be appended to the data most significant byte */ 456 /* first. For those protocols in which bytes are transmitted MSB */ 457 /* first and in the same order as they are encountered in the block */ 458 /* this convention results in the CRC remainder being transmitted with */ 459 /* the coefficient of x^31 first and with that of x^0 last (just as */ 460 /* would be done by a hardware shift register mechanization). */ 461 /* */ 462 /* The table lookup technique was adapted from the algorithm described */ 463 /* by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/ 464 465 466 #define POLYNOMIAL 0x04c11db7L 467 468 469 static unsigned long crc_table[256]; 470 471 472 void 473 gen_crc_table() 474 { 475 // generate the table of CRC remainders for all possible bytes 476 477 register int i, j; 478 register unsigned long crc_accum; 479 480 for (i = 0; i < 256; i++) { 481 crc_accum = ((unsigned long) i << 24); 482 for (j = 0; j < 8; j++) { 483 if (crc_accum & 0x80000000L) 484 crc_accum = (crc_accum << 1) ^ POLYNOMIAL; 485 else 486 crc_accum = (crc_accum << 1); 487 } 488 crc_table[i] = crc_accum; 489 } 490 491 return; 492 } 493 494 495 unsigned long 496 update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size) 497 { 498 // update the CRC on the data block one byte at a time 499 500 register int i, j; 501 502 for (j = 0; j < data_blk_size; j++) { 503 i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff; 504 crc_accum = (crc_accum << 8) ^ crc_table[i]; 505 } 506 507 return crc_accum; 508 } 509 510