/* Open Tracker License Terms and Conditions Copyright (c) 1991-2001, Be Incorporated. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice applies to all licensees and shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Except as contained in this notice, the name of Be Incorporated shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization from Be Incorporated. BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks of Be Incorporated in the United States and other countries. Other brand product names are registered trademarks or trademarks of their respective holders. All rights reserved. */ #include #include #include #include #include #include #include #include "WIndex.h" #define IVERSION 1 static int32 kCRCTable = 0; int32 cmp_i_entries( const WIndexEntry *e1, const WIndexEntry *e2 ); void gen_crc_table(); unsigned long update_crc( unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size ); FileEntry::FileEntry( void ) { } FileEntry::FileEntry( const char *entryStr ) : BString( entryStr ) { } status_t WIndex::SetTo( const char *dataPath, const char *indexPath ) { BFile *dataFile; BFile indexFile; dataFile = new BFile(); if( dataFile->SetTo( dataPath, B_READ_ONLY ) != B_OK ) return B_ERROR; else { bool buildIndex = true; SetTo( dataFile ); time_t mtime; time_t modified; dataFile->GetModificationTime( &mtime ); if( indexFile.SetTo( indexPath, B_READ_ONLY ) == B_OK ) { attr_info info; if( (indexFile.GetAttrInfo( "WINDEX:version", &info ) == B_NO_ERROR) ) { uint32 version = 0; indexFile.ReadAttr( "WINDEX:version", B_UINT32_TYPE, 0, &version, 4 ); if( IVERSION == version ) { if( (indexFile.GetAttrInfo( "WINDEX:modified", &info ) == B_NO_ERROR) ) { indexFile.ReadAttr( "WINDEX:modified", B_UINT32_TYPE, 0, &modified, 4 ); if( mtime == modified ) { if (UnflattenIndex( &indexFile ) == B_OK) buildIndex = false; } } } } indexFile.Unset(); } if( buildIndex ) { // printf( "Building Index...\n" ); InitIndex(); BuildIndex(); if( indexFile.SetTo( indexPath, B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE ) == B_OK ) { FlattenIndex( &indexFile ); indexFile.WriteAttr( "WINDEX:modified", B_UINT32_TYPE, 0, &mtime, 4 ); uint32 version = IVERSION; indexFile.WriteAttr( "WINDEX:version", B_UINT32_TYPE, 0, &version, 4 ); } } } return B_OK; } FileEntry::~FileEntry( void ) { } WIndex::WIndex( int32 count ) { entryList = NULL; dataFile = NULL; ePerB = count; entrySize = sizeof( WIndexEntry ); if( !atomic_or( &kCRCTable, 1 ) ) gen_crc_table(); } WIndex::WIndex( BPositionIO *dataFile, int32 count ) { entryList = NULL; this->dataFile = dataFile; ePerB = count; entrySize = sizeof( WIndexEntry ); if( !atomic_or( &kCRCTable, 1 ) ) gen_crc_table(); } WIndex::~WIndex( void ) { if( entryList ) free( entryList ); delete dataFile; } status_t WIndex::UnflattenIndex( BPositionIO *io ) { if( entryList ) free( entryList ); WIndexHead head; io->Seek( 0, SEEK_SET ); io->Read( &head, sizeof( head ) ); io->Seek( head.offset, SEEK_SET ); entrySize = head.entrySize; entries = head.entries; maxEntries = ePerB; blockSize = ePerB * entrySize; blocks = entries/ePerB+1;; isSorted = true; int32 size = (head.entries+1) * head.entrySize; if( !(entryList = (uint8 *)malloc( size )) ) return B_ERROR; if( entries ) io->Read( entryList, size ); return B_OK; } status_t WIndex::FlattenIndex( BPositionIO *io ) { if( entries && !isSorted ) SortItems(); WIndexHead head; head.entries = entries; head.entrySize = entrySize; head.offset = sizeof( WIndexHead ); io->Seek( 0, SEEK_SET ); io->Write( &head, sizeof( head ) ); if( entries ) io->Write( entryList, head.entries * head.entrySize ); return B_OK; } int32 WIndex::Lookup( int32 key ) { if( !entries ) return -1; if( !isSorted ) SortItems(); // Binary Search int32 M, Lb, Ub; Lb = 0; Ub = entries-1; while( true ) { M = (Lb + Ub)/2; if( key < ((WIndexEntry *)(entryList+(M*entrySize)))->key ) Ub = M - 1; else if(key > ((WIndexEntry *)(entryList+(M*entrySize)))->key ) Lb = M + 1; else return M; if( Lb > Ub ) return -1; } } status_t WIndex::AddItem( WIndexEntry *entry ) { if( BlockCheck() == B_ERROR ) return B_ERROR; memcpy( ((WIndexEntry *)(entryList+(entries*entrySize))), entry, entrySize ); entries++; isSorted = false; return B_OK; } void WIndex::SortItems( void ) { qsort( entryList, entries, entrySize, (int (*)(const void *, const void *))cmp_i_entries ); isSorted = true; //for( int32 i = 0; i < entries; i++ ) // printf( "Key = %ld\n", entryList[i].key ); } status_t WIndex::BlockCheck( void ) { if( entries < maxEntries ) return B_OK; blocks = entries/ePerB+1; entryList = (uint8 *)realloc( entryList, blockSize*blocks ); if( !entryList ) return B_ERROR; return B_OK; } status_t WIndex::InitIndex( void ) { if( entryList ) free( entryList ); isSorted = 0; entries = 0; maxEntries = ePerB; blockSize = ePerB * entrySize; blocks = 1; entryList = (uint8 *)malloc( blockSize ); if( !entryList ) return B_ERROR; return B_OK; } int32 WIndex::GetKey( const char *s ) { int32 key = 0; /*int32 x; int32 a = 84589; int32 b = 45989; int32 m = 217728; while( *s ) { x = *s++ - 'a'; key ^= (a*x + b) % m; key <<= 1; }*/ key = update_crc( 0, s, strlen(s) ); if( key < 0 ) // No negavite values! key = ~key; return key; } int32 cmp_i_entries( const WIndexEntry *e1, const WIndexEntry *e2 ) { return e1->key - e2->key; } status_t WIndex::SetTo( BPositionIO *dataFile ) { this->dataFile = dataFile; return B_OK; } void WIndex::Unset( void ) { dataFile = NULL; } int32 WIndex::FindFirst( const char *word ) { if( !entries ) return -1; int32 index; char nword[256]; int32 key; NormalizeWord( word, nword ); key = GetKey( nword ); if( (index = Lookup( key )) < 0 ) return -1; // Find first instance of key while( (ItemAt( index-1 ))->key == key ) index--; return index; } FileEntry *WIndex::GetEntry( int32 index ) { if( (index >= entries)||(index < 0) ) return NULL; WIndexEntry *ientry; FileEntry *dentry; char *buffer; dentry = new FileEntry(); ientry = ItemAt( index ); int32 size; dataFile->Seek( ientry->offset, SEEK_SET ); buffer = dentry->LockBuffer( 256 ); dataFile->Read( buffer, 256 ); size = GetEntrySize( ientry, buffer ); //buffer[256] = 0; //printf( "Entry: = %s\n", buffer ); dentry->UnlockBuffer( size ); return dentry; } size_t WIndex::GetEntrySize( WIndexEntry *entry, const char *entryData ) { // eliminate unused parameter warning (void)entry; return strcspn( entryData, "\n\r" ); } FileEntry *WIndex::GetEntry( const char *word ) { return GetEntry( FindFirst( word ) ); } char *WIndex::NormalizeWord( const char *word, char *dest ) { const char *src; char *dst; // remove dots and copy src = word; dst = dest; while( *src ) { if( *src != '.' ) *dst++ = *src; src++; } *dst = 0; // convert to lower-case dst = dest; while( *dst ) *dst++ = tolower( *dst ); return dest; } /* crc32h.c -- package to compute 32-bit CRC one byte at a time using */ /* the high-bit first (Big-Endian) bit ordering convention */ /* */ /* Synopsis: */ /* gen_crc_table() -- generates a 256-word table containing all CRC */ /* remainders for every possible 8-bit byte. It */ /* must be executed (once) before any CRC updates. */ /* */ /* unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size) */ /* unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */ /* Returns the updated value of the CRC accumulator after */ /* processing each byte in the addressed block of data. */ /* */ /* It is assumed that an unsigned long is at least 32 bits wide and */ /* that the predefined type char occupies one 8-bit byte of storage. */ /* */ /* The generator polynomial used for this version of the package is */ /* x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ /* as specified in the Autodin/Ethernet/ADCCP protocol standards. */ /* Other degree 32 polynomials may be substituted by re-defining the */ /* symbol POLYNOMIAL below. Lower degree polynomials must first be */ /* multiplied by an appropriate power of x. The representation used */ /* is that the coefficient of x^0 is stored in the LSB of the 32-bit */ /* word and the coefficient of x^31 is stored in the most significant */ /* bit. The CRC is to be appended to the data most significant byte */ /* first. For those protocols in which bytes are transmitted MSB */ /* first and in the same order as they are encountered in the block */ /* this convention results in the CRC remainder being transmitted with */ /* the coefficient of x^31 first and with that of x^0 last (just as */ /* would be done by a hardware shift register mechanization). */ /* */ /* The table lookup technique was adapted from the algorithm described */ /* by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/ #define POLYNOMIAL 0x04c11db7L static unsigned long crc_table[256]; void gen_crc_table() /* generate the table of CRC remainders for all possible bytes */ { register int i, j; register unsigned long crc_accum; for ( i = 0; i < 256; i++ ) { crc_accum = ( (unsigned long) i << 24 ); for ( j = 0; j < 8; j++ ) { if ( crc_accum & 0x80000000L ) crc_accum = ( crc_accum << 1 ) ^ POLYNOMIAL; else crc_accum = ( crc_accum << 1 ); } crc_table[i] = crc_accum; } return; } unsigned long update_crc( unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size ) /* update the CRC on the data block one byte at a time */ { register int i, j; for ( j = 0; j < data_blk_size; j++ ) { i = ( (int) ( crc_accum >> 24) ^ *data_blk_ptr++ ) & 0xff; crc_accum = ( crc_accum << 8 ) ^ crc_table[i]; } return crc_accum; }