xref: /haiku/src/apps/mail/WIndex.cpp (revision da8162be21b36442f34a731873d2358a0d63c25a)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 
36 #include "WIndex.h"
37 
38 #include <File.h>
39 #include <fs_attr.h>
40 #include <Message.h>
41 #include <Node.h>
42 
43 #include <ctype.h>
44 #include <stdlib.h>
45 #include <stdio.h>
46 
47 
48 #define IVERSION	1
49 
50 
51 static int32 kCRCTable = 0;
52 
53 
54 int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2);
55 void gen_crc_table();
56 unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr,
57 	int data_blk_size);
58 
59 
FileEntry(void)60 FileEntry::FileEntry(void)
61 {
62 
63 }
64 
65 
FileEntry(const char * entryString)66 FileEntry::FileEntry(const char *entryString)
67 	:
68 	BString(entryString)
69 {
70 
71 }
72 
73 
74 status_t
SetTo(const char * dataPath,const char * indexPath)75 WIndex::SetTo(const char *dataPath, const char *indexPath)
76 {
77 	BFile* dataFile;
78 	BFile indexFile;
79 
80 	dataFile = new BFile();
81 
82 	if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) {
83 		delete dataFile;
84 		return B_ERROR;
85 	} else {
86 		bool buildIndex = true;
87 		SetTo(dataFile);
88 
89 		time_t mtime;
90 		time_t modified;
91 
92 		dataFile->GetModificationTime(&mtime);
93 
94 		if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) {
95 			attr_info info;
96 			if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) {
97 				uint32 version = 0;
98 				indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0,
99 					&version, 4);
100 				if (IVERSION == version) {
101 					if ((indexFile.GetAttrInfo("WINDEX:modified", &info)
102 						== B_OK)) {
103 						indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0,
104 							&modified, 4);
105 
106 						if (mtime == modified) {
107 							if (UnflattenIndex(&indexFile) == B_OK)
108 								buildIndex = false;
109 						}
110 					}
111 				}
112 			}
113 			indexFile.Unset();
114 		}
115 		if (buildIndex) {
116 			InitIndex();
117 			BuildIndex();
118 			if (indexFile.SetTo(indexPath,
119 				B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) {
120 				FlattenIndex(&indexFile);
121 				indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0,
122 					&mtime, 4);
123 				uint32 version = IVERSION;
124 				indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0,
125 					&version, 4);
126 			}
127 		}
128 	}
129 	return B_OK;
130 }
131 
132 
~FileEntry(void)133 FileEntry::~FileEntry(void)
134 {
135 
136 }
137 
138 
WIndex(int32 count)139 WIndex::WIndex(int32 count)
140 {
141 	fEntryList = NULL;
142 	fDataFile = NULL;
143 	fEntriesPerBlock = count;
144 	fEntrySize = sizeof(WIndexEntry);
145 	if (!atomic_or(&kCRCTable, 1))
146 		gen_crc_table();
147 }
148 
149 
WIndex(BPositionIO * dataFile,int32 count)150 WIndex::WIndex(BPositionIO *dataFile, int32 count)
151 {
152 	fEntryList = NULL;
153 	fDataFile = dataFile;
154 	fEntriesPerBlock = count;
155 	fEntrySize = sizeof(WIndexEntry);
156 	if (!atomic_or(&kCRCTable, 1))
157 		gen_crc_table();
158 }
159 
160 
~WIndex(void)161 WIndex::~WIndex(void)
162 {
163 	if (fEntryList)
164 		free(fEntryList);
165 	delete fDataFile;
166 }
167 
168 
169 status_t
UnflattenIndex(BPositionIO * io)170 WIndex::UnflattenIndex(BPositionIO *io)
171 {
172 	if (fEntryList)
173 		free(fEntryList);
174 	WIndexHead		head;
175 
176 	io->Seek(0, SEEK_SET);
177 	io->Read(&head, sizeof(head));
178 	io->Seek(head.offset, SEEK_SET);
179 
180 	fEntrySize = head.entrySize;
181 	fEntries = head.entries;
182 	fMaxEntries = fEntriesPerBlock;
183 	fBlockSize = fEntriesPerBlock * fEntrySize;
184 	fBlocks = fEntries / fEntriesPerBlock + 1;;
185 	fIsSorted = true;
186 
187 	int32 size = (head.entries + 1) * head.entrySize;
188 	if (!(fEntryList = (uint8 *)malloc(size)))
189 		return B_ERROR;
190 
191 	if (fEntries)
192 		io->Read(fEntryList, size);
193 
194 	return B_OK;
195 }
196 
197 
198 status_t
FlattenIndex(BPositionIO * io)199 WIndex::FlattenIndex(BPositionIO *io)
200 {
201 	if (fEntries && !fIsSorted)
202 		SortItems();
203 	WIndexHead		head;
204 
205 	head.entries = fEntries;
206 	head.entrySize = fEntrySize;
207 	head.offset = sizeof(WIndexHead);
208 	io->Seek(0, SEEK_SET);
209 	io->Write(&head, sizeof(head));
210 	if (fEntries)
211 		io->Write(fEntryList, head.entries * head.entrySize);
212 
213 	return B_OK;
214 }
215 
216 
217 int32
Lookup(int32 key)218 WIndex::Lookup(int32 key)
219 {
220 	if (!fEntries)
221 		return -1;
222 	if (!fIsSorted)
223 		SortItems();
224 
225 	// Binary Search
226 	int32	M, Lb, Ub;
227 	Lb = 0;
228 	Ub = fEntries - 1;
229 	while (true) {
230 		M = (Lb + Ub) / 2;
231 		if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
232 			Ub = M - 1;
233 		else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
234 			Lb = M + 1;
235 		else
236 			return M;
237 		if (Lb > Ub)
238 			return -1;
239 	}
240 }
241 
242 
243 status_t
AddItem(WIndexEntry * entry)244 WIndex::AddItem(WIndexEntry *entry)
245 {
246 	if (_BlockCheck() == B_ERROR)
247 		return B_ERROR;
248 	memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry,
249 		fEntrySize);
250 	fEntries++;
251 	fIsSorted = false;
252 	return B_OK;
253 }
254 
255 
256 void
SortItems(void)257 WIndex::SortItems(void)
258 {
259 	qsort(fEntryList, fEntries, fEntrySize,
260 		(int(*)(const void *, const void *))cmp_i_entries);
261 
262 	fIsSorted = true;
263 }
264 
265 
266 status_t
_BlockCheck(void)267 WIndex::_BlockCheck(void)
268 {
269 	if (fEntries < fMaxEntries)
270 		return B_OK;
271 	fBlocks = fEntries / fEntriesPerBlock + 1;
272 	uint8* tmpEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks);
273 	if (!tmpEntryList) {
274 		free(fEntryList);
275 		fEntryList = NULL;
276 		return B_ERROR;
277 	} else {
278 		fEntryList = tmpEntryList;
279 	}
280 	return B_OK;
281 }
282 
283 
284 status_t
InitIndex(void)285 WIndex::InitIndex(void)
286 {
287 	if (fEntryList)
288 		free(fEntryList);
289 	fIsSorted = 0;
290 	fEntries = 0;
291 	fMaxEntries = fEntriesPerBlock;
292 	fBlockSize = fEntriesPerBlock * fEntrySize;
293 	fBlocks = 1;
294 	fEntryList = (uint8 *)malloc(fBlockSize);
295 	if (!fEntryList)
296 		return B_ERROR;
297 	return B_OK;
298 }
299 
300 
301 int32
GetKey(const char * s)302 WIndex::GetKey(const char *s)
303 {
304 
305 	int32	key = 0;
306 	/*int32	x;
307 	int32	a = 84589;
308 	int32	b = 45989;
309 	int32	m = 217728;
310 	while (*s) {
311 		x = *s++ - 'a';
312 
313 		key ^= (a * x + b) % m;
314 		key <<= 1;
315 	}*/
316 
317 	key = update_crc(0, s, strlen(s));
318 
319 	if (key < 0) // No negative values!
320 		key = ~key;
321 
322 	return key;
323 }
324 
325 
326 int32
cmp_i_entries(const WIndexEntry * e1,const WIndexEntry * e2)327 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2)
328 {
329 	return e1->key - e2->key;
330 }
331 
332 
333 status_t
SetTo(BPositionIO * dataFile)334 WIndex::SetTo(BPositionIO *dataFile)
335 {
336 	fDataFile = dataFile;
337 	return B_OK;
338 }
339 
340 
341 void
Unset(void)342 WIndex::Unset(void)
343 {
344 	fDataFile = NULL;
345 }
346 
347 
348 int32
FindFirst(const char * word)349 WIndex::FindFirst(const char *word)
350 {
351 	if (!fEntries)
352 		return -1;
353 
354 	int32			index;
355 	char			nword[256];
356 	int32			key;
357 
358 	NormalizeWord(word, nword);
359 	key = GetKey(nword);
360 
361 	if ((index = Lookup(key)) < 0)
362 		return -1;
363 	// Find first instance of key
364 	while ((ItemAt(index - 1))->key == key)
365 		index--;
366 	return index;
367 }
368 
369 
370 FileEntry*
GetEntry(int32 index)371 WIndex::GetEntry(int32 index)
372 {
373 	if ((index >= fEntries)||(index < 0))
374 		return NULL;
375 	WIndexEntry		*ientry;
376 	FileEntry		*dentry;
377 	char			*buffer;
378 
379 	dentry = new FileEntry();
380 
381 	ientry = ItemAt(index);
382 
383 	int32 size;
384 
385 	fDataFile->Seek(ientry->offset, SEEK_SET);
386 	buffer = dentry->LockBuffer(256);
387 	fDataFile->Read(buffer, 256);
388 	size = _GetEntrySize(ientry, buffer);
389 	//buffer[256] = 0;
390 	//printf("Entry: = %s\n", buffer);
391 	dentry->UnlockBuffer(size);
392 	return dentry;
393 }
394 
395 
396 size_t
_GetEntrySize(WIndexEntry * entry,const char * entryData)397 WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData)
398 {
399 	// eliminate unused parameter warning
400 	(void)entry;
401 
402 	return strcspn(entryData, "\n\r");
403 }
404 
405 
406 FileEntry*
GetEntry(const char * word)407 WIndex::GetEntry(const char *word)
408 {
409 	return GetEntry(FindFirst(word));
410 }
411 
412 
413 char*
NormalizeWord(const char * word,char * dest)414 WIndex::NormalizeWord(const char *word, char *dest)
415 {
416 	const char 	*src;
417 	char		*dst;
418 
419 	// remove dots and copy
420 	src = word;
421 	dst = dest;
422 	while (*src) {
423 		if (*src != '.')
424 			*dst++ = *src;
425 		src++;
426 	}
427 	*dst = 0;
428 
429 	// convert to lower-case
430 	for (dst = dest; *dst; dst++)
431 		*dst = tolower(*dst);
432 	return dest;
433 }
434 
435 
436 /* crc32h.c -- package to compute 32-bit CRC one byte at a time using   */
437 /*             the high-bit first (Big-Endian) bit ordering convention  */
438 /*                                                                      */
439 /* Synopsis:                                                            */
440 /*  gen_crc_table() -- generates a 256-word table containing all CRC    */
441 /*                     remainders for every possible 8-bit byte.  It    */
442 /*                     must be executed (once) before any CRC updates.  */
443 /*                                                                      */
444 /*  unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size)         */
445 /*           unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */
446 /*           Returns the updated value of the CRC accumulator after     */
447 /*           processing each byte in the addressed block of data.       */
448 /*                                                                      */
449 /*  It is assumed that an unsigned long is at least 32 bits wide and    */
450 /*  that the predefined type char occupies one 8-bit byte of storage.   */
451 /*                                                                      */
452 /*  The generator polynomial used for this version of the package is    */
453 /*  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */
454 /*  as specified in the Autodin/Ethernet/ADCCP protocol standards.      */
455 /*  Other degree 32 polynomials may be substituted by re-defining the   */
456 /*  symbol POLYNOMIAL below.  Lower degree polynomials must first be    */
457 /*  multiplied by an appropriate power of x.  The representation used   */
458 /*  is that the coefficient of x^0 is stored in the LSB of the 32-bit   */
459 /*  word and the coefficient of x^31 is stored in the most significant  */
460 /*  bit.  The CRC is to be appended to the data most significant byte   */
461 /*  first.  For those protocols in which bytes are transmitted MSB      */
462 /*  first and in the same order as they are encountered in the block    */
463 /*  this convention results in the CRC remainder being transmitted with */
464 /*  the coefficient of x^31 first and with that of x^0 last (just as    */
465 /*  would be done by a hardware shift register mechanization).          */
466 /*                                                                      */
467 /*  The table lookup technique was adapted from the algorithm described */
468 /*  by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/
469 
470 
471 #define POLYNOMIAL 0x04c11db7L
472 
473 
474 static unsigned long crc_table[256];
475 
476 
477 void
gen_crc_table()478 gen_crc_table()
479 {
480 	// generate the table of CRC remainders for all possible bytes
481 
482 	int i, j;
483 	unsigned long crc_accum;
484 
485 	for (i = 0;  i < 256;  i++) {
486 		crc_accum = ((unsigned long) i << 24);
487 		for (j = 0;  j < 8;  j++) {
488 			if (crc_accum & 0x80000000L)
489 				crc_accum = (crc_accum << 1) ^ POLYNOMIAL;
490 			else
491 				crc_accum = (crc_accum << 1);
492 		}
493 		crc_table[i] = crc_accum;
494 	}
495 
496 	return;
497 }
498 
499 
500 unsigned long
update_crc(unsigned long crc_accum,const char * data_blk_ptr,int data_blk_size)501 update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size)
502 {
503 	// update the CRC on the data block one byte at a time
504 
505 	int i, j;
506 
507 	for (j = 0;  j < data_blk_size;  j++) {
508 		i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff;
509 		crc_accum = (crc_accum << 8) ^ crc_table[i];
510 	}
511 
512 	return crc_accum;
513 }
514 
515