1 /* 2 Open Tracker License 3 4 Terms and Conditions 5 6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy of 9 this software and associated documentation files (the "Software"), to deal in 10 the Software without restriction, including without limitation the rights to 11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 12 of the Software, and to permit persons to whom the Software is furnished to do 13 so, subject to the following conditions: 14 15 The above copyright notice and this permission notice applies to all licensees 16 and shall be included in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION 23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 Except as contained in this notice, the name of Be Incorporated shall not be 26 used in advertising or otherwise to promote the sale, use or other dealings in 27 this Software without prior written authorization from Be Incorporated. 28 29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks 30 of Be Incorporated in the United States and other countries. Other brand product 31 names are registered trademarks or trademarks of their respective holders. 32 All rights reserved. 33 */ 34 35 #include <stdlib.h> 36 #include <stdio.h> 37 #include <ctype.h> 38 #include <File.h> 39 #include <Node.h> 40 #include <fs_attr.h> 41 #include <Message.h> 42 #include "WIndex.h" 43 44 #define IVERSION 1 45 46 static int32 kCRCTable = 0; 47 48 int32 cmp_i_entries( const WIndexEntry *e1, const WIndexEntry *e2 ); 49 void gen_crc_table(); 50 unsigned long update_crc( unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size ); 51 52 53 FileEntry::FileEntry( void ) 54 { 55 56 } 57 58 FileEntry::FileEntry( const char *entryStr ) 59 : BString( entryStr ) 60 { 61 62 } 63 64 status_t WIndex::SetTo( const char *dataPath, const char *indexPath ) 65 { 66 BFile *dataFile; 67 BFile indexFile; 68 69 dataFile = new BFile(); 70 71 if( dataFile->SetTo( dataPath, B_READ_ONLY ) != B_OK ) 72 return B_ERROR; 73 else 74 { 75 bool buildIndex = true; 76 SetTo( dataFile ); 77 78 time_t mtime; 79 time_t modified; 80 81 dataFile->GetModificationTime( &mtime ); 82 83 if( indexFile.SetTo( indexPath, B_READ_ONLY ) == B_OK ) 84 { 85 attr_info info; 86 if( (indexFile.GetAttrInfo( "WINDEX:version", &info ) == B_NO_ERROR) ) 87 { 88 uint32 version = 0; 89 indexFile.ReadAttr( "WINDEX:version", B_UINT32_TYPE, 0, &version, 4 ); 90 if( IVERSION == version ) 91 { 92 if( (indexFile.GetAttrInfo( "WINDEX:modified", &info ) == B_NO_ERROR) ) 93 { 94 indexFile.ReadAttr( "WINDEX:modified", B_UINT32_TYPE, 0, &modified, 4 ); 95 if( mtime == modified ) 96 { 97 if (UnflattenIndex( &indexFile ) == B_OK) 98 buildIndex = false; 99 } 100 } 101 } 102 } 103 indexFile.Unset(); 104 } 105 if( buildIndex ) 106 { 107 // printf( "Building Index...\n" ); 108 InitIndex(); 109 BuildIndex(); 110 if( indexFile.SetTo( indexPath, B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE ) == B_OK ) 111 { 112 FlattenIndex( &indexFile ); 113 indexFile.WriteAttr( "WINDEX:modified", B_UINT32_TYPE, 0, &mtime, 4 ); 114 uint32 version = IVERSION; 115 indexFile.WriteAttr( "WINDEX:version", B_UINT32_TYPE, 0, &version, 4 ); 116 } 117 } 118 } 119 return B_OK; 120 } 121 122 FileEntry::~FileEntry( void ) 123 { 124 125 } 126 127 WIndex::WIndex( int32 count ) 128 { 129 entryList = NULL; 130 dataFile = NULL; 131 ePerB = count; 132 entrySize = sizeof( WIndexEntry ); 133 if( !atomic_or( &kCRCTable, 1 ) ) 134 gen_crc_table(); 135 } 136 137 WIndex::WIndex( BPositionIO *dataFile, int32 count ) 138 { 139 entryList = NULL; 140 this->dataFile = dataFile; 141 ePerB = count; 142 entrySize = sizeof( WIndexEntry ); 143 if( !atomic_or( &kCRCTable, 1 ) ) 144 gen_crc_table(); 145 } 146 147 WIndex::~WIndex( void ) 148 { 149 if( entryList ) 150 free( entryList ); 151 delete dataFile; 152 } 153 154 status_t WIndex::UnflattenIndex( BPositionIO *io ) 155 { 156 if( entryList ) 157 free( entryList ); 158 WIndexHead head; 159 160 io->Seek( 0, SEEK_SET ); 161 io->Read( &head, sizeof( head ) ); 162 io->Seek( head.offset, SEEK_SET ); 163 164 entrySize = head.entrySize; 165 entries = head.entries; 166 maxEntries = ePerB; 167 blockSize = ePerB * entrySize; 168 blocks = entries/ePerB+1;; 169 isSorted = true; 170 171 int32 size = (head.entries+1) * head.entrySize; 172 if( !(entryList = (uint8 *)malloc( size )) ) 173 return B_ERROR; 174 175 if( entries ) 176 io->Read( entryList, size ); 177 178 return B_OK; 179 } 180 181 status_t WIndex::FlattenIndex( BPositionIO *io ) 182 { 183 if( entries && !isSorted ) 184 SortItems(); 185 WIndexHead head; 186 187 head.entries = entries; 188 head.entrySize = entrySize; 189 head.offset = sizeof( WIndexHead ); 190 io->Seek( 0, SEEK_SET ); 191 io->Write( &head, sizeof( head ) ); 192 if( entries ) 193 io->Write( entryList, head.entries * head.entrySize ); 194 195 return B_OK; 196 } 197 198 int32 WIndex::Lookup( int32 key ) 199 { 200 if( !entries ) 201 return -1; 202 if( !isSorted ) 203 SortItems(); 204 205 // Binary Search 206 int32 M, Lb, Ub; 207 Lb = 0; 208 Ub = entries-1; 209 while( true ) 210 { 211 M = (Lb + Ub)/2; 212 if( key < ((WIndexEntry *)(entryList+(M*entrySize)))->key ) 213 Ub = M - 1; 214 else if(key > ((WIndexEntry *)(entryList+(M*entrySize)))->key ) 215 Lb = M + 1; 216 else 217 return M; 218 if( Lb > Ub ) 219 return -1; 220 } 221 } 222 223 status_t WIndex::AddItem( WIndexEntry *entry ) 224 { 225 if( BlockCheck() == B_ERROR ) 226 return B_ERROR; 227 memcpy( ((WIndexEntry *)(entryList+(entries*entrySize))), entry, entrySize ); 228 entries++; 229 isSorted = false; 230 return B_OK; 231 } 232 233 void WIndex::SortItems( void ) 234 { 235 qsort( entryList, entries, entrySize, (int (*)(const void *, const void *))cmp_i_entries ); 236 isSorted = true; 237 //for( int32 i = 0; i < entries; i++ ) 238 // printf( "Key = %ld\n", entryList[i].key ); 239 } 240 241 status_t WIndex::BlockCheck( void ) 242 { 243 if( entries < maxEntries ) 244 return B_OK; 245 blocks = entries/ePerB+1; 246 entryList = (uint8 *)realloc( entryList, blockSize*blocks ); 247 if( !entryList ) 248 return B_ERROR; 249 return B_OK; 250 } 251 252 status_t WIndex::InitIndex( void ) 253 { 254 if( entryList ) 255 free( entryList ); 256 isSorted = 0; 257 entries = 0; 258 maxEntries = ePerB; 259 blockSize = ePerB * entrySize; 260 blocks = 1; 261 entryList = (uint8 *)malloc( blockSize ); 262 if( !entryList ) 263 return B_ERROR; 264 return B_OK; 265 } 266 267 int32 WIndex::GetKey( const char *s ) 268 { 269 270 int32 key = 0; 271 /*int32 x; 272 int32 a = 84589; 273 int32 b = 45989; 274 int32 m = 217728; 275 while( *s ) 276 { 277 x = *s++ - 'a'; 278 279 key ^= (a*x + b) % m; 280 key <<= 1; 281 }*/ 282 283 key = update_crc( 0, s, strlen(s) ); 284 285 if( key < 0 ) // No negavite values! 286 key = ~key; 287 288 return key; 289 } 290 291 int32 cmp_i_entries( const WIndexEntry *e1, const WIndexEntry *e2 ) 292 { 293 return e1->key - e2->key; 294 } 295 296 status_t WIndex::SetTo( BPositionIO *dataFile ) 297 { 298 this->dataFile = dataFile; 299 return B_OK; 300 } 301 302 void WIndex::Unset( void ) 303 { 304 dataFile = NULL; 305 } 306 307 int32 WIndex::FindFirst( const char *word ) 308 { 309 if( !entries ) 310 return -1; 311 312 int32 index; 313 char nword[256]; 314 int32 key; 315 316 NormalizeWord( word, nword ); 317 key = GetKey( nword ); 318 319 if( (index = Lookup( key )) < 0 ) 320 return -1; 321 // Find first instance of key 322 while( (ItemAt( index-1 ))->key == key ) 323 index--; 324 return index; 325 } 326 327 FileEntry *WIndex::GetEntry( int32 index ) 328 { 329 if( (index >= entries)||(index < 0) ) 330 return NULL; 331 WIndexEntry *ientry; 332 FileEntry *dentry; 333 char *buffer; 334 335 dentry = new FileEntry(); 336 337 ientry = ItemAt( index ); 338 339 int32 size; 340 341 dataFile->Seek( ientry->offset, SEEK_SET ); 342 buffer = dentry->LockBuffer( 256 ); 343 dataFile->Read( buffer, 256 ); 344 size = GetEntrySize( ientry, buffer ); 345 //buffer[256] = 0; 346 //printf( "Entry: = %s\n", buffer ); 347 dentry->UnlockBuffer( size ); 348 return dentry; 349 } 350 351 size_t WIndex::GetEntrySize( WIndexEntry *entry, const char *entryData ) 352 { 353 // eliminate unused parameter warning 354 (void)entry; 355 356 return strcspn( entryData, "\n\r" ); 357 } 358 359 FileEntry *WIndex::GetEntry( const char *word ) 360 { 361 return GetEntry( FindFirst( word ) ); 362 } 363 364 char *WIndex::NormalizeWord( const char *word, char *dest ) 365 { 366 const char *src; 367 char *dst; 368 369 // remove dots and copy 370 src = word; 371 dst = dest; 372 while( *src ) 373 { 374 if( *src != '.' ) 375 *dst++ = *src; 376 src++; 377 } 378 *dst = 0; 379 380 // convert to lower-case 381 dst = dest; 382 while( *dst ) 383 *dst++ = tolower( *dst ); 384 return dest; 385 } 386 387 /* crc32h.c -- package to compute 32-bit CRC one byte at a time using */ 388 /* the high-bit first (Big-Endian) bit ordering convention */ 389 /* */ 390 /* Synopsis: */ 391 /* gen_crc_table() -- generates a 256-word table containing all CRC */ 392 /* remainders for every possible 8-bit byte. It */ 393 /* must be executed (once) before any CRC updates. */ 394 /* */ 395 /* unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size) */ 396 /* unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */ 397 /* Returns the updated value of the CRC accumulator after */ 398 /* processing each byte in the addressed block of data. */ 399 /* */ 400 /* It is assumed that an unsigned long is at least 32 bits wide and */ 401 /* that the predefined type char occupies one 8-bit byte of storage. */ 402 /* */ 403 /* The generator polynomial used for this version of the package is */ 404 /* x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ 405 /* as specified in the Autodin/Ethernet/ADCCP protocol standards. */ 406 /* Other degree 32 polynomials may be substituted by re-defining the */ 407 /* symbol POLYNOMIAL below. Lower degree polynomials must first be */ 408 /* multiplied by an appropriate power of x. The representation used */ 409 /* is that the coefficient of x^0 is stored in the LSB of the 32-bit */ 410 /* word and the coefficient of x^31 is stored in the most significant */ 411 /* bit. The CRC is to be appended to the data most significant byte */ 412 /* first. For those protocols in which bytes are transmitted MSB */ 413 /* first and in the same order as they are encountered in the block */ 414 /* this convention results in the CRC remainder being transmitted with */ 415 /* the coefficient of x^31 first and with that of x^0 last (just as */ 416 /* would be done by a hardware shift register mechanization). */ 417 /* */ 418 /* The table lookup technique was adapted from the algorithm described */ 419 /* by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/ 420 421 #define POLYNOMIAL 0x04c11db7L 422 423 static unsigned long crc_table[256]; 424 425 void gen_crc_table() 426 /* generate the table of CRC remainders for all possible bytes */ 427 { register int i, j; register unsigned long crc_accum; 428 for ( i = 0; i < 256; i++ ) 429 { crc_accum = ( (unsigned long) i << 24 ); 430 for ( j = 0; j < 8; j++ ) 431 { if ( crc_accum & 0x80000000L ) 432 crc_accum = 433 ( crc_accum << 1 ) ^ POLYNOMIAL; 434 else 435 crc_accum = 436 ( crc_accum << 1 ); } 437 crc_table[i] = crc_accum; } 438 return; } 439 440 unsigned long update_crc( unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size ) 441 /* update the CRC on the data block one byte at a time */ 442 { register int i, j; 443 for ( j = 0; j < data_blk_size; j++ ) 444 { i = ( (int) ( crc_accum >> 24) ^ *data_blk_ptr++ ) & 0xff; 445 crc_accum = ( crc_accum << 8 ) ^ crc_table[i]; } 446 return crc_accum; } 447 448