1 /* 2 Open Tracker License 3 4 Terms and Conditions 5 6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy of 9 this software and associated documentation files (the "Software"), to deal in 10 the Software without restriction, including without limitation the rights to 11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 of the Software, and to permit persons to whom the Software is furnished to do 13 so, subject to the following conditions: 14 15 The above copyright notice and this permission notice applies to all licensees 16 and shall be included in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 Except as contained in this notice, the name of Be Incorporated shall not be 26 used in advertising or otherwise to promote the sale, use or other dealings in 27 this Software without prior written authorization from Be Incorporated. 28 29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30 of Be Incorporated in the United States and other countries. Other brand product 31 names are registered trademarks or trademarks of their respective holders. 32 All rights reserved. 33 */ 34 35 36 #include "WIndex.h" 37 38 #include <File.h> 39 #include <fs_attr.h> 40 #include <Message.h> 41 #include <Node.h> 42 43 #include <ctype.h> 44 #include <stdlib.h> 45 #include <stdio.h> 46 47 48 #define IVERSION 1 49 50 51 static int32 kCRCTable = 0; 52 53 54 int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2); 55 void gen_crc_table(); 56 unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr, 57 int data_blk_size); 58 59 60 FileEntry::FileEntry(void) 61 { 62 63 } 64 65 66 FileEntry::FileEntry(const char *entryString) 67 : 68 BString(entryString) 69 { 70 71 } 72 73 74 status_t 75 WIndex::SetTo(const char *dataPath, const char *indexPath) 76 { 77 BFile* dataFile; 78 BFile indexFile; 79 80 dataFile = new BFile(); 81 82 if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) { 83 delete dataFile; 84 return B_ERROR; 85 } else { 86 bool buildIndex = true; 87 SetTo(dataFile); 88 89 time_t mtime; 90 time_t modified; 91 92 dataFile->GetModificationTime(&mtime); 93 94 if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) { 95 attr_info info; 96 if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) { 97 uint32 version = 0; 98 indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0, 99 &version, 4); 100 if (IVERSION == version) { 101 if ((indexFile.GetAttrInfo("WINDEX:modified", &info) 102 == B_OK)) { 103 indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0, 104 &modified, 4); 105 106 if (mtime == modified) { 107 if (UnflattenIndex(&indexFile) == B_OK) 108 buildIndex = false; 109 } 110 } 111 } 112 } 113 indexFile.Unset(); 114 } 115 if (buildIndex) { 116 InitIndex(); 117 BuildIndex(); 118 if (indexFile.SetTo(indexPath, 119 B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) { 120 FlattenIndex(&indexFile); 121 indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0, 122 &mtime, 4); 123 uint32 version = IVERSION; 124 indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0, 125 &version, 4); 126 } 127 } 128 } 129 return B_OK; 130 } 131 132 133 FileEntry::~FileEntry(void) 134 { 135 136 } 137 138 139 WIndex::WIndex(int32 count) 140 { 141 fEntryList = NULL; 142 fDataFile = NULL; 143 fEntriesPerBlock = count; 144 fEntrySize = sizeof(WIndexEntry); 145 if (!atomic_or(&kCRCTable, 1)) 146 gen_crc_table(); 147 } 148 149 150 WIndex::WIndex(BPositionIO *dataFile, int32 count) 151 { 152 fEntryList = NULL; 153 fDataFile = dataFile; 154 fEntriesPerBlock = count; 155 fEntrySize = sizeof(WIndexEntry); 156 if (!atomic_or(&kCRCTable, 1)) 157 gen_crc_table(); 158 } 159 160 161 WIndex::~WIndex(void) 162 { 163 if (fEntryList) 164 free(fEntryList); 165 delete fDataFile; 166 } 167 168 169 status_t 170 WIndex::UnflattenIndex(BPositionIO *io) 171 { 172 if (fEntryList) 173 free(fEntryList); 174 WIndexHead head; 175 176 io->Seek(0, SEEK_SET); 177 io->Read(&head, sizeof(head)); 178 io->Seek(head.offset, SEEK_SET); 179 180 fEntrySize = head.entrySize; 181 fEntries = head.entries; 182 fMaxEntries = fEntriesPerBlock; 183 fBlockSize = fEntriesPerBlock * fEntrySize; 184 fBlocks = fEntries / fEntriesPerBlock + 1;; 185 fIsSorted = true; 186 187 int32 size = (head.entries + 1) * head.entrySize; 188 if (!(fEntryList = (uint8 *)malloc(size))) 189 return B_ERROR; 190 191 if (fEntries) 192 io->Read(fEntryList, size); 193 194 return B_OK; 195 } 196 197 198 status_t 199 WIndex::FlattenIndex(BPositionIO *io) 200 { 201 if (fEntries && !fIsSorted) 202 SortItems(); 203 WIndexHead head; 204 205 head.entries = fEntries; 206 head.entrySize = fEntrySize; 207 head.offset = sizeof(WIndexHead); 208 io->Seek(0, SEEK_SET); 209 io->Write(&head, sizeof(head)); 210 if (fEntries) 211 io->Write(fEntryList, head.entries * head.entrySize); 212 213 return B_OK; 214 } 215 216 217 int32 218 WIndex::Lookup(int32 key) 219 { 220 if (!fEntries) 221 return -1; 222 if (!fIsSorted) 223 SortItems(); 224 225 // Binary Search 226 int32 M, Lb, Ub; 227 Lb = 0; 228 Ub = fEntries - 1; 229 while (true) { 230 M = (Lb + Ub) / 2; 231 if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key) 232 Ub = M - 1; 233 else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key) 234 Lb = M + 1; 235 else 236 return M; 237 if (Lb > Ub) 238 return -1; 239 } 240 } 241 242 243 status_t 244 WIndex::AddItem(WIndexEntry *entry) 245 { 246 if (_BlockCheck() == B_ERROR) 247 return B_ERROR; 248 memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry, 249 fEntrySize); 250 fEntries++; 251 fIsSorted = false; 252 return B_OK; 253 } 254 255 256 void 257 WIndex::SortItems(void) 258 { 259 qsort(fEntryList, fEntries, fEntrySize, 260 (int(*)(const void *, const void *))cmp_i_entries); 261 262 fIsSorted = true; 263 } 264 265 266 status_t 267 WIndex::_BlockCheck(void) 268 { 269 if (fEntries < fMaxEntries) 270 return B_OK; 271 fBlocks = fEntries / fEntriesPerBlock + 1; 272 uint8* tmpEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks); 273 if (!tmpEntryList) { 274 free(fEntryList); 275 fEntryList = NULL; 276 return B_ERROR; 277 } else { 278 fEntryList = tmpEntryList; 279 } 280 return B_OK; 281 } 282 283 284 status_t 285 WIndex::InitIndex(void) 286 { 287 if (fEntryList) 288 free(fEntryList); 289 fIsSorted = 0; 290 fEntries = 0; 291 fMaxEntries = fEntriesPerBlock; 292 fBlockSize = fEntriesPerBlock * fEntrySize; 293 fBlocks = 1; 294 fEntryList = (uint8 *)malloc(fBlockSize); 295 if (!fEntryList) 296 return B_ERROR; 297 return B_OK; 298 } 299 300 301 int32 302 WIndex::GetKey(const char *s) 303 { 304 305 int32 key = 0; 306 /*int32 x; 307 int32 a = 84589; 308 int32 b = 45989; 309 int32 m = 217728; 310 while (*s) { 311 x = *s++ - 'a'; 312 313 key ^= (a * x + b) % m; 314 key <<= 1; 315 }*/ 316 317 key = update_crc(0, s, strlen(s)); 318 319 if (key < 0) // No negative values! 320 key = ~key; 321 322 return key; 323 } 324 325 326 int32 327 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2) 328 { 329 return e1->key - e2->key; 330 } 331 332 333 status_t 334 WIndex::SetTo(BPositionIO *dataFile) 335 { 336 fDataFile = dataFile; 337 return B_OK; 338 } 339 340 341 void 342 WIndex::Unset(void) 343 { 344 fDataFile = NULL; 345 } 346 347 348 int32 349 WIndex::FindFirst(const char *word) 350 { 351 if (!fEntries) 352 return -1; 353 354 int32 index; 355 char nword[256]; 356 int32 key; 357 358 NormalizeWord(word, nword); 359 key = GetKey(nword); 360 361 if ((index = Lookup(key)) < 0) 362 return -1; 363 // Find first instance of key 364 while ((ItemAt(index - 1))->key == key) 365 index--; 366 return index; 367 } 368 369 370 FileEntry* 371 WIndex::GetEntry(int32 index) 372 { 373 if ((index >= fEntries)||(index < 0)) 374 return NULL; 375 WIndexEntry *ientry; 376 FileEntry *dentry; 377 char *buffer; 378 379 dentry = new FileEntry(); 380 381 ientry = ItemAt(index); 382 383 int32 size; 384 385 fDataFile->Seek(ientry->offset, SEEK_SET); 386 buffer = dentry->LockBuffer(256); 387 fDataFile->Read(buffer, 256); 388 size = _GetEntrySize(ientry, buffer); 389 //buffer[256] = 0; 390 //printf("Entry: = %s\n", buffer); 391 dentry->UnlockBuffer(size); 392 return dentry; 393 } 394 395 396 size_t 397 WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData) 398 { 399 // eliminate unused parameter warning 400 (void)entry; 401 402 return strcspn(entryData, "\n\r"); 403 } 404 405 406 FileEntry* 407 WIndex::GetEntry(const char *word) 408 { 409 return GetEntry(FindFirst(word)); 410 } 411 412 413 char* 414 WIndex::NormalizeWord(const char *word, char *dest) 415 { 416 const char *src; 417 char *dst; 418 419 // remove dots and copy 420 src = word; 421 dst = dest; 422 while (*src) { 423 if (*src != '.') 424 *dst++ = *src; 425 src++; 426 } 427 *dst = 0; 428 429 // convert to lower-case 430 for (dst = dest; *dst; dst++) 431 *dst = tolower(*dst); 432 return dest; 433 } 434 435 436 /* crc32h.c -- package to compute 32-bit CRC one byte at a time using */ 437 /* the high-bit first (Big-Endian) bit ordering convention */ 438 /* */ 439 /* Synopsis: */ 440 /* gen_crc_table() -- generates a 256-word table containing all CRC */ 441 /* remainders for every possible 8-bit byte. It */ 442 /* must be executed (once) before any CRC updates. */ 443 /* */ 444 /* unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size) */ 445 /* unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */ 446 /* Returns the updated value of the CRC accumulator after */ 447 /* processing each byte in the addressed block of data. */ 448 /* */ 449 /* It is assumed that an unsigned long is at least 32 bits wide and */ 450 /* that the predefined type char occupies one 8-bit byte of storage. */ 451 /* */ 452 /* The generator polynomial used for this version of the package is */ 453 /* x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ 454 /* as specified in the Autodin/Ethernet/ADCCP protocol standards. */ 455 /* Other degree 32 polynomials may be substituted by re-defining the */ 456 /* symbol POLYNOMIAL below. Lower degree polynomials must first be */ 457 /* multiplied by an appropriate power of x. The representation used */ 458 /* is that the coefficient of x^0 is stored in the LSB of the 32-bit */ 459 /* word and the coefficient of x^31 is stored in the most significant */ 460 /* bit. The CRC is to be appended to the data most significant byte */ 461 /* first. For those protocols in which bytes are transmitted MSB */ 462 /* first and in the same order as they are encountered in the block */ 463 /* this convention results in the CRC remainder being transmitted with */ 464 /* the coefficient of x^31 first and with that of x^0 last (just as */ 465 /* would be done by a hardware shift register mechanization). */ 466 /* */ 467 /* The table lookup technique was adapted from the algorithm described */ 468 /* by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/ 469 470 471 #define POLYNOMIAL 0x04c11db7L 472 473 474 static unsigned long crc_table[256]; 475 476 477 void 478 gen_crc_table() 479 { 480 // generate the table of CRC remainders for all possible bytes 481 482 int i, j; 483 unsigned long crc_accum; 484 485 for (i = 0; i < 256; i++) { 486 crc_accum = ((unsigned long) i << 24); 487 for (j = 0; j < 8; j++) { 488 if (crc_accum & 0x80000000L) 489 crc_accum = (crc_accum << 1) ^ POLYNOMIAL; 490 else 491 crc_accum = (crc_accum << 1); 492 } 493 crc_table[i] = crc_accum; 494 } 495 496 return; 497 } 498 499 500 unsigned long 501 update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size) 502 { 503 // update the CRC on the data block one byte at a time 504 505 int i, j; 506 507 for (j = 0; j < data_blk_size; j++) { 508 i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff; 509 crc_accum = (crc_accum << 8) ^ crc_table[i]; 510 } 511 512 return crc_accum; 513 } 514 515