xref: /haiku/src/apps/mail/WIndex.cpp (revision 495060760727dd782c9f8a90db71e5d727f19748)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 
36 #include "WIndex.h"
37 
38 #include <File.h>
39 #include <fs_attr.h>
40 #include <Message.h>
41 #include <Node.h>
42 
43 #include <ctype.h>
44 #include <stdlib.h>
45 #include <stdio.h>
46 
47 
48 #define IVERSION	1
49 
50 
51 static int32 kCRCTable = 0;
52 
53 
54 int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2);
55 void gen_crc_table();
56 unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr,
57 	int data_blk_size);
58 
59 
60 FileEntry::FileEntry(void)
61 {
62 
63 }
64 
65 
66 FileEntry::FileEntry(const char *entryString)
67 	:
68 	BString(entryString)
69 {
70 
71 }
72 
73 
74 status_t
75 WIndex::SetTo(const char *dataPath, const char *indexPath)
76 {
77 	BFile* dataFile;
78 	BFile indexFile;
79 
80 	dataFile = new BFile();
81 
82 	if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) {
83 		return B_ERROR;
84 	} else {
85 		bool buildIndex = true;
86 		SetTo(dataFile);
87 
88 		time_t mtime;
89 		time_t modified;
90 
91 		dataFile->GetModificationTime(&mtime);
92 
93 		if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) {
94 			attr_info info;
95 			if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) {
96 				uint32 version = 0;
97 				indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0,
98 					&version, 4);
99 				if (IVERSION == version) {
100 					if ((indexFile.GetAttrInfo("WINDEX:modified", &info)
101 						== B_OK)) {
102 						indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0,
103 							&modified, 4);
104 
105 						if (mtime == modified) {
106 							if (UnflattenIndex(&indexFile) == B_OK)
107 								buildIndex = false;
108 						}
109 					}
110 				}
111 			}
112 			indexFile.Unset();
113 		}
114 		if (buildIndex) {
115 			InitIndex();
116 			BuildIndex();
117 			if (indexFile.SetTo(indexPath,
118 				B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) {
119 				FlattenIndex(&indexFile);
120 				indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0,
121 					&mtime, 4);
122 				uint32 version = IVERSION;
123 				indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0,
124 					&version, 4);
125 			}
126 		}
127 	}
128 	return B_OK;
129 }
130 
131 
132 FileEntry::~FileEntry(void)
133 {
134 
135 }
136 
137 
138 WIndex::WIndex(int32 count)
139 {
140 	fEntryList = NULL;
141 	fDataFile = NULL;
142 	fEntriesPerBlock = count;
143 	fEntrySize = sizeof(WIndexEntry);
144 	if (!atomic_or(&kCRCTable, 1))
145 		gen_crc_table();
146 }
147 
148 
149 WIndex::WIndex(BPositionIO *dataFile, int32 count)
150 {
151 	fEntryList = NULL;
152 	fDataFile = dataFile;
153 	fEntriesPerBlock = count;
154 	fEntrySize = sizeof(WIndexEntry);
155 	if (!atomic_or(&kCRCTable, 1))
156 		gen_crc_table();
157 }
158 
159 
160 WIndex::~WIndex(void)
161 {
162 	if (fEntryList)
163 		free(fEntryList);
164 	delete fDataFile;
165 }
166 
167 
168 status_t
169 WIndex::UnflattenIndex(BPositionIO *io)
170 {
171 	if (fEntryList)
172 		free(fEntryList);
173 	WIndexHead		head;
174 
175 	io->Seek(0, SEEK_SET);
176 	io->Read(&head, sizeof(head));
177 	io->Seek(head.offset, SEEK_SET);
178 
179 	fEntrySize = head.entrySize;
180 	fEntries = head.entries;
181 	fMaxEntries = fEntriesPerBlock;
182 	fBlockSize = fEntriesPerBlock * fEntrySize;
183 	fBlocks = fEntries / fEntriesPerBlock + 1;;
184 	fIsSorted = true;
185 
186 	int32 size = (head.entries + 1) * head.entrySize;
187 	if (!(fEntryList = (uint8 *)malloc(size)))
188 		return B_ERROR;
189 
190 	if (fEntries)
191 		io->Read(fEntryList, size);
192 
193 	return B_OK;
194 }
195 
196 
197 status_t
198 WIndex::FlattenIndex(BPositionIO *io)
199 {
200 	if (fEntries && !fIsSorted)
201 		SortItems();
202 	WIndexHead		head;
203 
204 	head.entries = fEntries;
205 	head.entrySize = fEntrySize;
206 	head.offset = sizeof(WIndexHead);
207 	io->Seek(0, SEEK_SET);
208 	io->Write(&head, sizeof(head));
209 	if (fEntries)
210 		io->Write(fEntryList, head.entries * head.entrySize);
211 
212 	return B_OK;
213 }
214 
215 
216 int32
217 WIndex::Lookup(int32 key)
218 {
219 	if (!fEntries)
220 		return -1;
221 	if (!fIsSorted)
222 		SortItems();
223 
224 	// Binary Search
225 	int32	M, Lb, Ub;
226 	Lb = 0;
227 	Ub = fEntries - 1;
228 	while (true) {
229 		M = (Lb + Ub) / 2;
230 		if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
231 			Ub = M - 1;
232 		else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
233 			Lb = M + 1;
234 		else
235 			return M;
236 		if (Lb > Ub)
237 			return -1;
238 	}
239 }
240 
241 
242 status_t
243 WIndex::AddItem(WIndexEntry *entry)
244 {
245 	if (_BlockCheck() == B_ERROR)
246 		return B_ERROR;
247 	memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry,
248 		fEntrySize);
249 	fEntries++;
250 	fIsSorted = false;
251 	return B_OK;
252 }
253 
254 
255 void
256 WIndex::SortItems(void)
257 {
258 	qsort(fEntryList, fEntries, fEntrySize,
259 		(int(*)(const void *, const void *))cmp_i_entries);
260 
261 	fIsSorted = true;
262 }
263 
264 
265 status_t
266 WIndex::_BlockCheck(void)
267 {
268 	if (fEntries < fMaxEntries)
269 		return B_OK;
270 	fBlocks = fEntries / fEntriesPerBlock + 1;
271 	uint8* tmpEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks);
272 	if (!tmpEntryList) {
273 		free(fEntryList);
274 		fEntryList = NULL;
275 		return B_ERROR;
276 	} else {
277 		fEntryList = tmpEntryList;
278 	}
279 	return B_OK;
280 }
281 
282 
283 status_t
284 WIndex::InitIndex(void)
285 {
286 	if (fEntryList)
287 		free(fEntryList);
288 	fIsSorted = 0;
289 	fEntries = 0;
290 	fMaxEntries = fEntriesPerBlock;
291 	fBlockSize = fEntriesPerBlock * fEntrySize;
292 	fBlocks = 1;
293 	fEntryList = (uint8 *)malloc(fBlockSize);
294 	if (!fEntryList)
295 		return B_ERROR;
296 	return B_OK;
297 }
298 
299 
300 int32
301 WIndex::GetKey(const char *s)
302 {
303 
304 	int32	key = 0;
305 	/*int32	x;
306 	int32	a = 84589;
307 	int32	b = 45989;
308 	int32	m = 217728;
309 	while (*s) {
310 		x = *s++ - 'a';
311 
312 		key ^= (a * x + b) % m;
313 		key <<= 1;
314 	}*/
315 
316 	key = update_crc(0, s, strlen(s));
317 
318 	if (key < 0) // No negative values!
319 		key = ~key;
320 
321 	return key;
322 }
323 
324 
325 int32
326 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2)
327 {
328 	return e1->key - e2->key;
329 }
330 
331 
332 status_t
333 WIndex::SetTo(BPositionIO *dataFile)
334 {
335 	fDataFile = dataFile;
336 	return B_OK;
337 }
338 
339 
340 void
341 WIndex::Unset(void)
342 {
343 	fDataFile = NULL;
344 }
345 
346 
347 int32
348 WIndex::FindFirst(const char *word)
349 {
350 	if (!fEntries)
351 		return -1;
352 
353 	int32			index;
354 	char			nword[256];
355 	int32			key;
356 
357 	NormalizeWord(word, nword);
358 	key = GetKey(nword);
359 
360 	if ((index = Lookup(key)) < 0)
361 		return -1;
362 	// Find first instance of key
363 	while ((ItemAt(index - 1))->key == key)
364 		index--;
365 	return index;
366 }
367 
368 
369 FileEntry*
370 WIndex::GetEntry(int32 index)
371 {
372 	if ((index >= fEntries)||(index < 0))
373 		return NULL;
374 	WIndexEntry		*ientry;
375 	FileEntry		*dentry;
376 	char			*buffer;
377 
378 	dentry = new FileEntry();
379 
380 	ientry = ItemAt(index);
381 
382 	int32 size;
383 
384 	fDataFile->Seek(ientry->offset, SEEK_SET);
385 	buffer = dentry->LockBuffer(256);
386 	fDataFile->Read(buffer, 256);
387 	size = _GetEntrySize(ientry, buffer);
388 	//buffer[256] = 0;
389 	//printf("Entry: = %s\n", buffer);
390 	dentry->UnlockBuffer(size);
391 	return dentry;
392 }
393 
394 
395 size_t
396 WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData)
397 {
398 	// eliminate unused parameter warning
399 	(void)entry;
400 
401 	return strcspn(entryData, "\n\r");
402 }
403 
404 
405 FileEntry*
406 WIndex::GetEntry(const char *word)
407 {
408 	return GetEntry(FindFirst(word));
409 }
410 
411 
412 char*
413 WIndex::NormalizeWord(const char *word, char *dest)
414 {
415 	const char 	*src;
416 	char		*dst;
417 
418 	// remove dots and copy
419 	src = word;
420 	dst = dest;
421 	while (*src) {
422 		if (*src != '.')
423 			*dst++ = *src;
424 		src++;
425 	}
426 	*dst = 0;
427 
428 	// convert to lower-case
429 	for (dst = dest; *dst; dst++)
430 		*dst = tolower(*dst);
431 	return dest;
432 }
433 
434 
435 /* crc32h.c -- package to compute 32-bit CRC one byte at a time using   */
436 /*             the high-bit first (Big-Endian) bit ordering convention  */
437 /*                                                                      */
438 /* Synopsis:                                                            */
439 /*  gen_crc_table() -- generates a 256-word table containing all CRC    */
440 /*                     remainders for every possible 8-bit byte.  It    */
441 /*                     must be executed (once) before any CRC updates.  */
442 /*                                                                      */
443 /*  unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size)         */
444 /*           unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */
445 /*           Returns the updated value of the CRC accumulator after     */
446 /*           processing each byte in the addressed block of data.       */
447 /*                                                                      */
448 /*  It is assumed that an unsigned long is at least 32 bits wide and    */
449 /*  that the predefined type char occupies one 8-bit byte of storage.   */
450 /*                                                                      */
451 /*  The generator polynomial used for this version of the package is    */
452 /*  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */
453 /*  as specified in the Autodin/Ethernet/ADCCP protocol standards.      */
454 /*  Other degree 32 polynomials may be substituted by re-defining the   */
455 /*  symbol POLYNOMIAL below.  Lower degree polynomials must first be    */
456 /*  multiplied by an appropriate power of x.  The representation used   */
457 /*  is that the coefficient of x^0 is stored in the LSB of the 32-bit   */
458 /*  word and the coefficient of x^31 is stored in the most significant  */
459 /*  bit.  The CRC is to be appended to the data most significant byte   */
460 /*  first.  For those protocols in which bytes are transmitted MSB      */
461 /*  first and in the same order as they are encountered in the block    */
462 /*  this convention results in the CRC remainder being transmitted with */
463 /*  the coefficient of x^31 first and with that of x^0 last (just as    */
464 /*  would be done by a hardware shift register mechanization).          */
465 /*                                                                      */
466 /*  The table lookup technique was adapted from the algorithm described */
467 /*  by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/
468 
469 
470 #define POLYNOMIAL 0x04c11db7L
471 
472 
473 static unsigned long crc_table[256];
474 
475 
476 void
477 gen_crc_table()
478 {
479 	// generate the table of CRC remainders for all possible bytes
480 
481 	register int i, j;
482 	register unsigned long crc_accum;
483 
484 	for (i = 0;  i < 256;  i++) {
485 		crc_accum = ((unsigned long) i << 24);
486 		for (j = 0;  j < 8;  j++) {
487 			if (crc_accum & 0x80000000L)
488 				crc_accum = (crc_accum << 1) ^ POLYNOMIAL;
489 			else
490 				crc_accum = (crc_accum << 1);
491 		}
492 		crc_table[i] = crc_accum;
493 	}
494 
495 	return;
496 }
497 
498 
499 unsigned long
500 update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size)
501 {
502 	// update the CRC on the data block one byte at a time
503 
504 	register int i, j;
505 
506 	for (j = 0;  j < data_blk_size;  j++) {
507 		i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff;
508 		crc_accum = (crc_accum << 8) ^ crc_table[i];
509 	}
510 
511 	return crc_accum;
512 }
513 
514