xref: /haiku/src/apps/mail/WIndex.cpp (revision 93a78ecaa45114d68952d08c4778f073515102f2)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <File.h>
39 #include <Node.h>
40 #include <fs_attr.h>
41 #include <Message.h>
42 #include "WIndex.h"
43 
44 #define IVERSION	1
45 
46 static int32 kCRCTable = 0;
47 
48 int32 cmp_i_entries( const WIndexEntry *e1, const WIndexEntry *e2 );
49 void gen_crc_table();
50 unsigned long update_crc( unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size );
51 
52 
53 FileEntry::FileEntry( void )
54 {
55 
56 }
57 
58 FileEntry::FileEntry( const char *entryStr )
59 	: BString( entryStr )
60 {
61 
62 }
63 
64 status_t WIndex::SetTo( const char *dataPath, const char *indexPath )
65 {
66 	BFile		*dataFile;
67 	BFile		indexFile;
68 
69 	dataFile = new BFile();
70 
71 	if( dataFile->SetTo( dataPath, B_READ_ONLY ) != B_OK )
72 		return B_ERROR;
73 	else
74 	{
75 		bool buildIndex = true;
76 		SetTo( dataFile );
77 
78 		time_t mtime;
79 		time_t modified;
80 
81 		dataFile->GetModificationTime( &mtime );
82 
83 		if( indexFile.SetTo( indexPath, B_READ_ONLY ) == B_OK )
84 		{
85 			attr_info info;
86 			if( (indexFile.GetAttrInfo( "WINDEX:version", &info ) == B_NO_ERROR) )
87 			{
88 				uint32 version = 0;
89 				indexFile.ReadAttr( "WINDEX:version", B_UINT32_TYPE, 0, &version, 4 );
90 				if( IVERSION == version )
91 				{
92 					if( (indexFile.GetAttrInfo( "WINDEX:modified", &info ) == B_NO_ERROR) )
93 					{
94 						indexFile.ReadAttr( "WINDEX:modified", B_UINT32_TYPE, 0, &modified, 4 );
95 						if( mtime == modified )
96 						{
97 							if (UnflattenIndex( &indexFile ) == B_OK)
98 								buildIndex = false;
99 						}
100 					}
101 				}
102 			}
103 			indexFile.Unset();
104 		}
105 		if( buildIndex )
106 		{
107 			// printf( "Building Index...\n" );
108 			InitIndex();
109 			BuildIndex();
110 			if( indexFile.SetTo( indexPath, B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE ) == B_OK )
111 			{
112 				FlattenIndex( &indexFile );
113 				indexFile.WriteAttr( "WINDEX:modified", B_UINT32_TYPE, 0, &mtime, 4 );
114 				uint32 version = IVERSION;
115 				indexFile.WriteAttr( "WINDEX:version", B_UINT32_TYPE, 0, &version, 4 );
116 			}
117 		}
118 	}
119 	return B_OK;
120 }
121 
122 FileEntry::~FileEntry( void )
123 {
124 
125 }
126 
127 WIndex::WIndex( int32 count )
128 {
129 	entryList = NULL;
130 	dataFile = NULL;
131 	ePerB = count;
132 	entrySize = sizeof( WIndexEntry );
133 	if( !atomic_or( &kCRCTable, 1 ) )
134 		gen_crc_table();
135 }
136 
137 WIndex::WIndex( BPositionIO *dataFile, int32 count )
138 {
139 	entryList = NULL;
140 	this->dataFile = dataFile;
141 	ePerB = count;
142 	entrySize = sizeof( WIndexEntry );
143 	if( !atomic_or( &kCRCTable, 1 ) )
144 		gen_crc_table();
145 }
146 
147 WIndex::~WIndex( void )
148 {
149 	if( entryList )
150 		free( entryList );
151 	delete dataFile;
152 }
153 
154 status_t WIndex::UnflattenIndex( BPositionIO *io )
155 {
156 	if( entryList )
157 		free( entryList );
158 	WIndexHead		head;
159 
160 	io->Seek( 0, SEEK_SET );
161 	io->Read( &head, sizeof( head ) );
162 	io->Seek( head.offset, SEEK_SET );
163 
164 	entrySize = head.entrySize;
165 	entries = head.entries;
166 	maxEntries = ePerB;
167 	blockSize = ePerB * entrySize;
168 	blocks = entries/ePerB+1;;
169 	isSorted = true;
170 
171 	int32 size = (head.entries+1) * head.entrySize;
172 	if( !(entryList = (uint8 *)malloc( size )) )
173 		return B_ERROR;
174 
175 	if( entries )
176 		io->Read( entryList, size );
177 
178 	return B_OK;
179 }
180 
181 status_t WIndex::FlattenIndex( BPositionIO *io )
182 {
183 	if( entries && !isSorted )
184 		SortItems();
185 	WIndexHead		head;
186 
187 	head.entries = entries;
188 	head.entrySize = entrySize;
189 	head.offset = sizeof( WIndexHead );
190 	io->Seek( 0, SEEK_SET );
191 	io->Write( &head, sizeof( head ) );
192 	if( entries )
193 		io->Write( entryList, head.entries * head.entrySize );
194 
195 	return B_OK;
196 }
197 
198 int32 WIndex::Lookup( int32 key )
199 {
200 	if( !entries )
201 		return -1;
202 	if( !isSorted )
203 		SortItems();
204 
205 	// Binary Search
206 	int32	M, Lb, Ub;
207 	Lb = 0;
208 	Ub = entries-1;
209 	while( true )
210 	{
211 		M = (Lb + Ub)/2;
212 		if( key < ((WIndexEntry *)(entryList+(M*entrySize)))->key )
213 			Ub = M - 1;
214 		else if(key > ((WIndexEntry *)(entryList+(M*entrySize)))->key )
215 			Lb = M + 1;
216 		else
217 			return M;
218 		if( Lb > Ub )
219      		return -1;
220 	}
221 }
222 
223 status_t WIndex::AddItem( WIndexEntry *entry )
224 {
225 	if( BlockCheck() == B_ERROR )
226 		return B_ERROR;
227 	memcpy( ((WIndexEntry *)(entryList+(entries*entrySize))), entry, entrySize );
228 	entries++;
229 	isSorted = false;
230 	return B_OK;
231 }
232 
233 void WIndex::SortItems( void )
234 {
235 	qsort( entryList, entries, entrySize, (int (*)(const void *, const void *))cmp_i_entries );
236 	isSorted = true;
237 	//for( int32 i = 0; i < entries; i++ )
238 	//	printf( "Key = %ld\n", entryList[i].key );
239 }
240 
241 status_t WIndex::BlockCheck( void )
242 {
243 	if( entries < maxEntries )
244 		return B_OK;
245 	blocks = entries/ePerB+1;
246 	entryList = (uint8 *)realloc( entryList, blockSize*blocks );
247 	if( !entryList )
248 		return B_ERROR;
249 	return B_OK;
250 }
251 
252 status_t WIndex::InitIndex( void )
253 {
254 	if( entryList )
255 		free( entryList );
256 	isSorted = 0;
257 	entries = 0;
258 	maxEntries = ePerB;
259 	blockSize = ePerB * entrySize;
260 	blocks = 1;
261 	entryList = (uint8 *)malloc( blockSize );
262 	if( !entryList )
263 		return B_ERROR;
264 	return B_OK;
265 }
266 
267 int32 WIndex::GetKey( const char *s )
268 {
269 
270 	int32	key = 0;
271 	/*int32	x;
272 	int32	a = 84589;
273 	int32	b = 45989;
274 	int32	m = 217728;
275 	while( *s )
276 	{
277 		x = *s++ - 'a';
278 
279 		key ^= (a*x + b) % m;
280 		key <<= 1;
281 	}*/
282 
283 	key = update_crc( 0, s, strlen(s) );
284 
285 	if( key < 0 ) // No negavite values!
286 		key = ~key;
287 
288 	return key;
289 }
290 
291 int32 cmp_i_entries( const WIndexEntry *e1, const WIndexEntry *e2 )
292 {
293 	return e1->key - e2->key;
294 }
295 
296 status_t WIndex::SetTo( BPositionIO *dataFile )
297 {
298 	this->dataFile = dataFile;
299 	return B_OK;
300 }
301 
302 void WIndex::Unset( void )
303 {
304 	dataFile = NULL;
305 }
306 
307 int32 WIndex::FindFirst( const char *word )
308 {
309 	if( !entries )
310 		return -1;
311 
312 	int32			index;
313 	char			nword[256];
314 	int32			key;
315 
316 	NormalizeWord( word, nword );
317 	key = GetKey( nword );
318 
319 	if( (index = Lookup( key )) < 0 )
320 		return -1;
321 	// Find first instance of key
322 	while( (ItemAt( index-1 ))->key == key )
323 		index--;
324 	return index;
325 }
326 
327 FileEntry *WIndex::GetEntry( int32 index )
328 {
329 	if( (index >= entries)||(index < 0) )
330 		return NULL;
331 	WIndexEntry		*ientry;
332 	FileEntry		*dentry;
333 	char			*buffer;
334 
335 	dentry = new FileEntry();
336 
337 	ientry = ItemAt( index );
338 
339 	int32 size;
340 
341 	dataFile->Seek( ientry->offset, SEEK_SET );
342 	buffer = dentry->LockBuffer( 256 );
343 	dataFile->Read( buffer, 256 );
344 	size = GetEntrySize( ientry, buffer );
345 	//buffer[256] = 0;
346 	//printf( "Entry: = %s\n", buffer );
347 	dentry->UnlockBuffer( size );
348 	return dentry;
349 }
350 
351 size_t WIndex::GetEntrySize( WIndexEntry *entry, const char *entryData )
352 {
353 	// eliminate unused parameter warning
354 	(void)entry;
355 
356 	return strcspn( entryData, "\n\r" );
357 }
358 
359 FileEntry *WIndex::GetEntry( const char *word )
360 {
361 	return GetEntry( FindFirst( word ) );
362 }
363 
364 char *WIndex::NormalizeWord( const char *word, char *dest )
365 {
366 	const char 	*src;
367 	char		*dst;
368 
369 	// remove dots and copy
370 	src = word;
371 	dst = dest;
372 	while( *src )
373 	{
374 		if( *src != '.' )
375 			*dst++ = *src;
376 		src++;
377 	}
378 	*dst = 0;
379 
380 	// convert to lower-case
381 	dst = dest;
382 	while( *dst )
383 		*dst++ = tolower( *dst );
384 	return dest;
385 }
386 
387 /* crc32h.c -- package to compute 32-bit CRC one byte at a time using   */
388 /*             the high-bit first (Big-Endian) bit ordering convention  */
389 /*                                                                      */
390 /* Synopsis:                                                            */
391 /*  gen_crc_table() -- generates a 256-word table containing all CRC    */
392 /*                     remainders for every possible 8-bit byte.  It    */
393 /*                     must be executed (once) before any CRC updates.  */
394 /*                                                                      */
395 /*  unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size)         */
396 /*           unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */
397 /*           Returns the updated value of the CRC accumulator after     */
398 /*           processing each byte in the addressed block of data.       */
399 /*                                                                      */
400 /*  It is assumed that an unsigned long is at least 32 bits wide and    */
401 /*  that the predefined type char occupies one 8-bit byte of storage.   */
402 /*                                                                      */
403 /*  The generator polynomial used for this version of the package is    */
404 /*  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */
405 /*  as specified in the Autodin/Ethernet/ADCCP protocol standards.      */
406 /*  Other degree 32 polynomials may be substituted by re-defining the   */
407 /*  symbol POLYNOMIAL below.  Lower degree polynomials must first be    */
408 /*  multiplied by an appropriate power of x.  The representation used   */
409 /*  is that the coefficient of x^0 is stored in the LSB of the 32-bit   */
410 /*  word and the coefficient of x^31 is stored in the most significant  */
411 /*  bit.  The CRC is to be appended to the data most significant byte   */
412 /*  first.  For those protocols in which bytes are transmitted MSB      */
413 /*  first and in the same order as they are encountered in the block    */
414 /*  this convention results in the CRC remainder being transmitted with */
415 /*  the coefficient of x^31 first and with that of x^0 last (just as    */
416 /*  would be done by a hardware shift register mechanization).          */
417 /*                                                                      */
418 /*  The table lookup technique was adapted from the algorithm described */
419 /*  by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/
420 
421 #define POLYNOMIAL 0x04c11db7L
422 
423 static unsigned long crc_table[256];
424 
425 void gen_crc_table()
426  /* generate the table of CRC remainders for all possible bytes */
427  { register int i, j;  register unsigned long crc_accum;
428    for ( i = 0;  i < 256;  i++ )
429        { crc_accum = ( (unsigned long) i << 24 );
430          for ( j = 0;  j < 8;  j++ )
431               { if ( crc_accum & 0x80000000L )
432                    crc_accum =
433                      ( crc_accum << 1 ) ^ POLYNOMIAL;
434                 else
435                    crc_accum =
436                      ( crc_accum << 1 ); }
437          crc_table[i] = crc_accum; }
438    return; }
439 
440 unsigned long update_crc( unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size )
441  /* update the CRC on the data block one byte at a time */
442  { register int i, j;
443    for ( j = 0;  j < data_blk_size;  j++ )
444        { i = ( (int) ( crc_accum >> 24) ^ *data_blk_ptr++ ) & 0xff;
445          crc_accum = ( crc_accum << 8 ) ^ crc_table[i]; }
446    return crc_accum; }
447 
448