1 /* 2 Open Tracker License 3 4 Terms and Conditions 5 6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy of 9 this software and associated documentation files (the "Software"), to deal in 10 the Software without restriction, including without limitation the rights to 11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 of the Software, and to permit persons to whom the Software is furnished to do 13 so, subject to the following conditions: 14 15 The above copyright notice and this permission notice applies to all licensees 16 and shall be included in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 Except as contained in this notice, the name of Be Incorporated shall not be 26 used in advertising or otherwise to promote the sale, use or other dealings in 27 this Software without prior written authorization from Be Incorporated. 28 29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30 of Be Incorporated in the United States and other countries. Other brand product 31 names are registered trademarks or trademarks of their respective holders. 32 All rights reserved. 33 */ 34 35 36 #include "WIndex.h" 37 38 #include <File.h> 39 #include <fs_attr.h> 40 #include <Message.h> 41 #include <Node.h> 42 43 #include <ctype.h> 44 #include <stdlib.h> 45 #include <stdio.h> 46 47 48 #define IVERSION 1 49 50 51 static int32 kCRCTable = 0; 52 53 54 int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2); 55 void gen_crc_table(); 56 unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr, 57 int data_blk_size); 58 59 60 FileEntry::FileEntry(void) 61 { 62 63 } 64 65 66 FileEntry::FileEntry(const char *entryString) 67 : 68 BString(entryString) 69 { 70 71 } 72 73 74 status_t 75 WIndex::SetTo(const char *dataPath, const char *indexPath) 76 { 77 BFile* dataFile; 78 BFile indexFile; 79 80 dataFile = new BFile(); 81 82 if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) { 83 return B_ERROR; 84 } else { 85 bool buildIndex = true; 86 SetTo(dataFile); 87 88 time_t mtime; 89 time_t modified; 90 91 dataFile->GetModificationTime(&mtime); 92 93 if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) { 94 attr_info info; 95 if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) { 96 uint32 version = 0; 97 indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0, 98 &version, 4); 99 if (IVERSION == version) { 100 if ((indexFile.GetAttrInfo("WINDEX:modified", &info) 101 == B_OK)) { 102 indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0, 103 &modified, 4); 104 105 if (mtime == modified) { 106 if (UnflattenIndex(&indexFile) == B_OK) 107 buildIndex = false; 108 } 109 } 110 } 111 } 112 indexFile.Unset(); 113 } 114 if (buildIndex) { 115 InitIndex(); 116 BuildIndex(); 117 if (indexFile.SetTo(indexPath, 118 B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) { 119 FlattenIndex(&indexFile); 120 indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0, 121 &mtime, 4); 122 uint32 version = IVERSION; 123 indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0, 124 &version, 4); 125 } 126 } 127 } 128 return B_OK; 129 } 130 131 132 FileEntry::~FileEntry(void) 133 { 134 135 } 136 137 138 WIndex::WIndex(int32 count) 139 { 140 fEntryList = NULL; 141 fDataFile = NULL; 142 fEntriesPerBlock = count; 143 fEntrySize = sizeof(WIndexEntry); 144 if (!atomic_or(&kCRCTable, 1)) 145 gen_crc_table(); 146 } 147 148 149 WIndex::WIndex(BPositionIO *dataFile, int32 count) 150 { 151 fEntryList = NULL; 152 fDataFile = dataFile; 153 fEntriesPerBlock = count; 154 fEntrySize = sizeof(WIndexEntry); 155 if (!atomic_or(&kCRCTable, 1)) 156 gen_crc_table(); 157 } 158 159 160 WIndex::~WIndex(void) 161 { 162 if (fEntryList) 163 free(fEntryList); 164 delete fDataFile; 165 } 166 167 168 status_t 169 WIndex::UnflattenIndex(BPositionIO *io) 170 { 171 if (fEntryList) 172 free(fEntryList); 173 WIndexHead head; 174 175 io->Seek(0, SEEK_SET); 176 io->Read(&head, sizeof(head)); 177 io->Seek(head.offset, SEEK_SET); 178 179 fEntrySize = head.entrySize; 180 fEntries = head.entries; 181 fMaxEntries = fEntriesPerBlock; 182 fBlockSize = fEntriesPerBlock * fEntrySize; 183 fBlocks = fEntries / fEntriesPerBlock + 1;; 184 fIsSorted = true; 185 186 int32 size = (head.entries + 1) * head.entrySize; 187 if (!(fEntryList = (uint8 *)malloc(size))) 188 return B_ERROR; 189 190 if (fEntries) 191 io->Read(fEntryList, size); 192 193 return B_OK; 194 } 195 196 197 status_t 198 WIndex::FlattenIndex(BPositionIO *io) 199 { 200 if (fEntries && !fIsSorted) 201 SortItems(); 202 WIndexHead head; 203 204 head.entries = fEntries; 205 head.entrySize = fEntrySize; 206 head.offset = sizeof(WIndexHead); 207 io->Seek(0, SEEK_SET); 208 io->Write(&head, sizeof(head)); 209 if (fEntries) 210 io->Write(fEntryList, head.entries * head.entrySize); 211 212 return B_OK; 213 } 214 215 216 int32 217 WIndex::Lookup(int32 key) 218 { 219 if (!fEntries) 220 return -1; 221 if (!fIsSorted) 222 SortItems(); 223 224 // Binary Search 225 int32 M, Lb, Ub; 226 Lb = 0; 227 Ub = fEntries - 1; 228 while (true) { 229 M = (Lb + Ub) / 2; 230 if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key) 231 Ub = M - 1; 232 else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key) 233 Lb = M + 1; 234 else 235 return M; 236 if (Lb > Ub) 237 return -1; 238 } 239 } 240 241 242 status_t 243 WIndex::AddItem(WIndexEntry *entry) 244 { 245 if (_BlockCheck() == B_ERROR) 246 return B_ERROR; 247 memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry, 248 fEntrySize); 249 fEntries++; 250 fIsSorted = false; 251 return B_OK; 252 } 253 254 255 void 256 WIndex::SortItems(void) 257 { 258 qsort(fEntryList, fEntries, fEntrySize, 259 (int(*)(const void *, const void *))cmp_i_entries); 260 261 fIsSorted = true; 262 } 263 264 265 status_t 266 WIndex::_BlockCheck(void) 267 { 268 if (fEntries < fMaxEntries) 269 return B_OK; 270 fBlocks = fEntries / fEntriesPerBlock + 1; 271 uint8* tmpEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks); 272 if (!tmpEntryList) { 273 free(fEntryList); 274 fEntryList = NULL; 275 return B_ERROR; 276 } else { 277 fEntryList = tmpEntryList; 278 } 279 return B_OK; 280 } 281 282 283 status_t 284 WIndex::InitIndex(void) 285 { 286 if (fEntryList) 287 free(fEntryList); 288 fIsSorted = 0; 289 fEntries = 0; 290 fMaxEntries = fEntriesPerBlock; 291 fBlockSize = fEntriesPerBlock * fEntrySize; 292 fBlocks = 1; 293 fEntryList = (uint8 *)malloc(fBlockSize); 294 if (!fEntryList) 295 return B_ERROR; 296 return B_OK; 297 } 298 299 300 int32 301 WIndex::GetKey(const char *s) 302 { 303 304 int32 key = 0; 305 /*int32 x; 306 int32 a = 84589; 307 int32 b = 45989; 308 int32 m = 217728; 309 while (*s) { 310 x = *s++ - 'a'; 311 312 key ^= (a * x + b) % m; 313 key <<= 1; 314 }*/ 315 316 key = update_crc(0, s, strlen(s)); 317 318 if (key < 0) // No negative values! 319 key = ~key; 320 321 return key; 322 } 323 324 325 int32 326 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2) 327 { 328 return e1->key - e2->key; 329 } 330 331 332 status_t 333 WIndex::SetTo(BPositionIO *dataFile) 334 { 335 fDataFile = dataFile; 336 return B_OK; 337 } 338 339 340 void 341 WIndex::Unset(void) 342 { 343 fDataFile = NULL; 344 } 345 346 347 int32 348 WIndex::FindFirst(const char *word) 349 { 350 if (!fEntries) 351 return -1; 352 353 int32 index; 354 char nword[256]; 355 int32 key; 356 357 NormalizeWord(word, nword); 358 key = GetKey(nword); 359 360 if ((index = Lookup(key)) < 0) 361 return -1; 362 // Find first instance of key 363 while ((ItemAt(index - 1))->key == key) 364 index--; 365 return index; 366 } 367 368 369 FileEntry* 370 WIndex::GetEntry(int32 index) 371 { 372 if ((index >= fEntries)||(index < 0)) 373 return NULL; 374 WIndexEntry *ientry; 375 FileEntry *dentry; 376 char *buffer; 377 378 dentry = new FileEntry(); 379 380 ientry = ItemAt(index); 381 382 int32 size; 383 384 fDataFile->Seek(ientry->offset, SEEK_SET); 385 buffer = dentry->LockBuffer(256); 386 fDataFile->Read(buffer, 256); 387 size = _GetEntrySize(ientry, buffer); 388 //buffer[256] = 0; 389 //printf("Entry: = %s\n", buffer); 390 dentry->UnlockBuffer(size); 391 return dentry; 392 } 393 394 395 size_t 396 WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData) 397 { 398 // eliminate unused parameter warning 399 (void)entry; 400 401 return strcspn(entryData, "\n\r"); 402 } 403 404 405 FileEntry* 406 WIndex::GetEntry(const char *word) 407 { 408 return GetEntry(FindFirst(word)); 409 } 410 411 412 char* 413 WIndex::NormalizeWord(const char *word, char *dest) 414 { 415 const char *src; 416 char *dst; 417 418 // remove dots and copy 419 src = word; 420 dst = dest; 421 while (*src) { 422 if (*src != '.') 423 *dst++ = *src; 424 src++; 425 } 426 *dst = 0; 427 428 // convert to lower-case 429 for (dst = dest; *dst; dst++) 430 *dst = tolower(*dst); 431 return dest; 432 } 433 434 435 /* crc32h.c -- package to compute 32-bit CRC one byte at a time using */ 436 /* the high-bit first (Big-Endian) bit ordering convention */ 437 /* */ 438 /* Synopsis: */ 439 /* gen_crc_table() -- generates a 256-word table containing all CRC */ 440 /* remainders for every possible 8-bit byte. It */ 441 /* must be executed (once) before any CRC updates. */ 442 /* */ 443 /* unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size) */ 444 /* unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */ 445 /* Returns the updated value of the CRC accumulator after */ 446 /* processing each byte in the addressed block of data. */ 447 /* */ 448 /* It is assumed that an unsigned long is at least 32 bits wide and */ 449 /* that the predefined type char occupies one 8-bit byte of storage. */ 450 /* */ 451 /* The generator polynomial used for this version of the package is */ 452 /* x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ 453 /* as specified in the Autodin/Ethernet/ADCCP protocol standards. */ 454 /* Other degree 32 polynomials may be substituted by re-defining the */ 455 /* symbol POLYNOMIAL below. Lower degree polynomials must first be */ 456 /* multiplied by an appropriate power of x. The representation used */ 457 /* is that the coefficient of x^0 is stored in the LSB of the 32-bit */ 458 /* word and the coefficient of x^31 is stored in the most significant */ 459 /* bit. The CRC is to be appended to the data most significant byte */ 460 /* first. For those protocols in which bytes are transmitted MSB */ 461 /* first and in the same order as they are encountered in the block */ 462 /* this convention results in the CRC remainder being transmitted with */ 463 /* the coefficient of x^31 first and with that of x^0 last (just as */ 464 /* would be done by a hardware shift register mechanization). */ 465 /* */ 466 /* The table lookup technique was adapted from the algorithm described */ 467 /* by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/ 468 469 470 #define POLYNOMIAL 0x04c11db7L 471 472 473 static unsigned long crc_table[256]; 474 475 476 void 477 gen_crc_table() 478 { 479 // generate the table of CRC remainders for all possible bytes 480 481 register int i, j; 482 register unsigned long crc_accum; 483 484 for (i = 0; i < 256; i++) { 485 crc_accum = ((unsigned long) i << 24); 486 for (j = 0; j < 8; j++) { 487 if (crc_accum & 0x80000000L) 488 crc_accum = (crc_accum << 1) ^ POLYNOMIAL; 489 else 490 crc_accum = (crc_accum << 1); 491 } 492 crc_table[i] = crc_accum; 493 } 494 495 return; 496 } 497 498 499 unsigned long 500 update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size) 501 { 502 // update the CRC on the data block one byte at a time 503 504 register int i, j; 505 506 for (j = 0; j < data_blk_size; j++) { 507 i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff; 508 crc_accum = (crc_accum << 8) ^ crc_table[i]; 509 } 510 511 return crc_accum; 512 } 513 514