xref: /haiku/src/apps/mail/WIndex.cpp (revision 62f5ba006a08b0df30631375878effaf67ae5dbc)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2001, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 
36 #include "WIndex.h"
37 
38 #include <File.h>
39 #include <fs_attr.h>
40 #include <Message.h>
41 #include <Node.h>
42 
43 #include <ctype.h>
44 #include <stdlib.h>
45 #include <stdio.h>
46 
47 
48 #define IVERSION	1
49 
50 
51 static int32 kCRCTable = 0;
52 
53 
54 int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2);
55 void gen_crc_table();
56 unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr,
57 	int data_blk_size);
58 
59 
60 FileEntry::FileEntry(void)
61 {
62 
63 }
64 
65 
66 FileEntry::FileEntry(const char *entryString)
67 	:
68 	BString(entryString)
69 {
70 
71 }
72 
73 
74 status_t
75 WIndex::SetTo(const char *dataPath, const char *indexPath)
76 {
77 	BFile* dataFile;
78 	BFile indexFile;
79 
80 	dataFile = new BFile();
81 
82 	if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) {
83 		return B_ERROR;
84 	} else {
85 		bool buildIndex = true;
86 		SetTo(dataFile);
87 
88 		time_t mtime;
89 		time_t modified;
90 
91 		dataFile->GetModificationTime(&mtime);
92 
93 		if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) {
94 			attr_info info;
95 			if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) {
96 				uint32 version = 0;
97 				indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0,
98 					&version, 4);
99 				if (IVERSION == version) {
100 					if ((indexFile.GetAttrInfo("WINDEX:modified", &info)
101 						== B_OK)) {
102 						indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0,
103 							&modified, 4);
104 
105 						if (mtime == modified) {
106 							if (UnflattenIndex(&indexFile) == B_OK)
107 								buildIndex = false;
108 						}
109 					}
110 				}
111 			}
112 			indexFile.Unset();
113 		}
114 		if (buildIndex) {
115 			InitIndex();
116 			BuildIndex();
117 			if (indexFile.SetTo(indexPath,
118 				B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) {
119 				FlattenIndex(&indexFile);
120 				indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0,
121 					&mtime, 4);
122 				uint32 version = IVERSION;
123 				indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0,
124 					&version, 4);
125 			}
126 		}
127 	}
128 	return B_OK;
129 }
130 
131 
132 FileEntry::~FileEntry(void)
133 {
134 
135 }
136 
137 
138 WIndex::WIndex(int32 count)
139 {
140 	fEntryList = NULL;
141 	fDataFile = NULL;
142 	fEntriesPerBlock = count;
143 	fEntrySize = sizeof(WIndexEntry);
144 	if (!atomic_or(&kCRCTable, 1))
145 		gen_crc_table();
146 }
147 
148 
149 WIndex::WIndex(BPositionIO *dataFile, int32 count)
150 {
151 	fEntryList = NULL;
152 	fDataFile = dataFile;
153 	fEntriesPerBlock = count;
154 	fEntrySize = sizeof(WIndexEntry);
155 	if (!atomic_or(&kCRCTable, 1))
156 		gen_crc_table();
157 }
158 
159 
160 WIndex::~WIndex(void)
161 {
162 	if (fEntryList)
163 		free(fEntryList);
164 	delete fDataFile;
165 }
166 
167 
168 status_t
169 WIndex::UnflattenIndex(BPositionIO *io)
170 {
171 	if (fEntryList)
172 		free(fEntryList);
173 	WIndexHead		head;
174 
175 	io->Seek(0, SEEK_SET);
176 	io->Read(&head, sizeof(head));
177 	io->Seek(head.offset, SEEK_SET);
178 
179 	fEntrySize = head.entrySize;
180 	fEntries = head.entries;
181 	fMaxEntries = fEntriesPerBlock;
182 	fBlockSize = fEntriesPerBlock * fEntrySize;
183 	fBlocks = fEntries / fEntriesPerBlock + 1;;
184 	fIsSorted = true;
185 
186 	int32 size = (head.entries + 1) * head.entrySize;
187 	if (!(fEntryList = (uint8 *)malloc(size)))
188 		return B_ERROR;
189 
190 	if (fEntries)
191 		io->Read(fEntryList, size);
192 
193 	return B_OK;
194 }
195 
196 
197 status_t
198 WIndex::FlattenIndex(BPositionIO *io)
199 {
200 	if (fEntries && !fIsSorted)
201 		SortItems();
202 	WIndexHead		head;
203 
204 	head.entries = fEntries;
205 	head.entrySize = fEntrySize;
206 	head.offset = sizeof(WIndexHead);
207 	io->Seek(0, SEEK_SET);
208 	io->Write(&head, sizeof(head));
209 	if (fEntries)
210 		io->Write(fEntryList, head.entries * head.entrySize);
211 
212 	return B_OK;
213 }
214 
215 
216 int32
217 WIndex::Lookup(int32 key)
218 {
219 	if (!fEntries)
220 		return -1;
221 	if (!fIsSorted)
222 		SortItems();
223 
224 	// Binary Search
225 	int32	M, Lb, Ub;
226 	Lb = 0;
227 	Ub = fEntries - 1;
228 	while (true) {
229 		M = (Lb + Ub) / 2;
230 		if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
231 			Ub = M - 1;
232 		else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
233 			Lb = M + 1;
234 		else
235 			return M;
236 		if (Lb > Ub)
237 			return -1;
238 	}
239 }
240 
241 
242 status_t
243 WIndex::AddItem(WIndexEntry *entry)
244 {
245 	if (_BlockCheck() == B_ERROR)
246 		return B_ERROR;
247 	memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry,
248 		fEntrySize);
249 	fEntries++;
250 	fIsSorted = false;
251 	return B_OK;
252 }
253 
254 
255 void
256 WIndex::SortItems(void)
257 {
258 	qsort(fEntryList, fEntries, fEntrySize,
259 		(int(*)(const void *, const void *))cmp_i_entries);
260 
261 	fIsSorted = true;
262 }
263 
264 
265 status_t
266 WIndex::_BlockCheck(void)
267 {
268 	if (fEntries < fMaxEntries)
269 		return B_OK;
270 	fBlocks = fEntries / fEntriesPerBlock + 1;
271 	fEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks);
272 	if (!fEntryList)
273 		return B_ERROR;
274 	return B_OK;
275 }
276 
277 
278 status_t
279 WIndex::InitIndex(void)
280 {
281 	if (fEntryList)
282 		free(fEntryList);
283 	fIsSorted = 0;
284 	fEntries = 0;
285 	fMaxEntries = fEntriesPerBlock;
286 	fBlockSize = fEntriesPerBlock * fEntrySize;
287 	fBlocks = 1;
288 	fEntryList = (uint8 *)malloc(fBlockSize);
289 	if (!fEntryList)
290 		return B_ERROR;
291 	return B_OK;
292 }
293 
294 
295 int32
296 WIndex::GetKey(const char *s)
297 {
298 
299 	int32	key = 0;
300 	/*int32	x;
301 	int32	a = 84589;
302 	int32	b = 45989;
303 	int32	m = 217728;
304 	while (*s) {
305 		x = *s++ - 'a';
306 
307 		key ^= (a * x + b) % m;
308 		key <<= 1;
309 	}*/
310 
311 	key = update_crc(0, s, strlen(s));
312 
313 	if (key < 0) // No negative values!
314 		key = ~key;
315 
316 	return key;
317 }
318 
319 
320 int32
321 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2)
322 {
323 	return e1->key - e2->key;
324 }
325 
326 
327 status_t
328 WIndex::SetTo(BPositionIO *dataFile)
329 {
330 	fDataFile = dataFile;
331 	return B_OK;
332 }
333 
334 
335 void
336 WIndex::Unset(void)
337 {
338 	fDataFile = NULL;
339 }
340 
341 
342 int32
343 WIndex::FindFirst(const char *word)
344 {
345 	if (!fEntries)
346 		return -1;
347 
348 	int32			index;
349 	char			nword[256];
350 	int32			key;
351 
352 	NormalizeWord(word, nword);
353 	key = GetKey(nword);
354 
355 	if ((index = Lookup(key)) < 0)
356 		return -1;
357 	// Find first instance of key
358 	while ((ItemAt(index - 1))->key == key)
359 		index--;
360 	return index;
361 }
362 
363 
364 FileEntry*
365 WIndex::GetEntry(int32 index)
366 {
367 	if ((index >= fEntries)||(index < 0))
368 		return NULL;
369 	WIndexEntry		*ientry;
370 	FileEntry		*dentry;
371 	char			*buffer;
372 
373 	dentry = new FileEntry();
374 
375 	ientry = ItemAt(index);
376 
377 	int32 size;
378 
379 	fDataFile->Seek(ientry->offset, SEEK_SET);
380 	buffer = dentry->LockBuffer(256);
381 	fDataFile->Read(buffer, 256);
382 	size = _GetEntrySize(ientry, buffer);
383 	//buffer[256] = 0;
384 	//printf("Entry: = %s\n", buffer);
385 	dentry->UnlockBuffer(size);
386 	return dentry;
387 }
388 
389 
390 size_t
391 WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData)
392 {
393 	// eliminate unused parameter warning
394 	(void)entry;
395 
396 	return strcspn(entryData, "\n\r");
397 }
398 
399 
400 FileEntry*
401 WIndex::GetEntry(const char *word)
402 {
403 	return GetEntry(FindFirst(word));
404 }
405 
406 
407 char*
408 WIndex::NormalizeWord(const char *word, char *dest)
409 {
410 	const char 	*src;
411 	char		*dst;
412 
413 	// remove dots and copy
414 	src = word;
415 	dst = dest;
416 	while (*src) {
417 		if (*src != '.')
418 			*dst++ = *src;
419 		src++;
420 	}
421 	*dst = 0;
422 
423 	// convert to lower-case
424 	dst = dest;
425 	while (*dst)
426 		*dst++ = tolower(*dst);
427 	return dest;
428 }
429 
430 
431 /* crc32h.c -- package to compute 32-bit CRC one byte at a time using   */
432 /*             the high-bit first (Big-Endian) bit ordering convention  */
433 /*                                                                      */
434 /* Synopsis:                                                            */
435 /*  gen_crc_table() -- generates a 256-word table containing all CRC    */
436 /*                     remainders for every possible 8-bit byte.  It    */
437 /*                     must be executed (once) before any CRC updates.  */
438 /*                                                                      */
439 /*  unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size)         */
440 /*           unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */
441 /*           Returns the updated value of the CRC accumulator after     */
442 /*           processing each byte in the addressed block of data.       */
443 /*                                                                      */
444 /*  It is assumed that an unsigned long is at least 32 bits wide and    */
445 /*  that the predefined type char occupies one 8-bit byte of storage.   */
446 /*                                                                      */
447 /*  The generator polynomial used for this version of the package is    */
448 /*  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */
449 /*  as specified in the Autodin/Ethernet/ADCCP protocol standards.      */
450 /*  Other degree 32 polynomials may be substituted by re-defining the   */
451 /*  symbol POLYNOMIAL below.  Lower degree polynomials must first be    */
452 /*  multiplied by an appropriate power of x.  The representation used   */
453 /*  is that the coefficient of x^0 is stored in the LSB of the 32-bit   */
454 /*  word and the coefficient of x^31 is stored in the most significant  */
455 /*  bit.  The CRC is to be appended to the data most significant byte   */
456 /*  first.  For those protocols in which bytes are transmitted MSB      */
457 /*  first and in the same order as they are encountered in the block    */
458 /*  this convention results in the CRC remainder being transmitted with */
459 /*  the coefficient of x^31 first and with that of x^0 last (just as    */
460 /*  would be done by a hardware shift register mechanization).          */
461 /*                                                                      */
462 /*  The table lookup technique was adapted from the algorithm described */
463 /*  by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/
464 
465 
466 #define POLYNOMIAL 0x04c11db7L
467 
468 
469 static unsigned long crc_table[256];
470 
471 
472 void
473 gen_crc_table()
474 {
475 	// generate the table of CRC remainders for all possible bytes
476 
477 	register int i, j;
478 	register unsigned long crc_accum;
479 
480 	for (i = 0;  i < 256;  i++) {
481 		crc_accum = ((unsigned long) i << 24);
482 		for (j = 0;  j < 8;  j++) {
483 			if (crc_accum & 0x80000000L)
484 				crc_accum = (crc_accum << 1) ^ POLYNOMIAL;
485 			else
486 				crc_accum = (crc_accum << 1);
487 		}
488 		crc_table[i] = crc_accum;
489 	}
490 
491 	return;
492 }
493 
494 
495 unsigned long
496 update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size)
497 {
498 	// update the CRC on the data block one byte at a time
499 
500 	register int i, j;
501 
502 	for (j = 0;  j < data_blk_size;  j++) {
503 		i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff;
504 		crc_accum = (crc_accum << 8) ^ crc_table[i];
505 	}
506 
507 	return crc_accum;
508 }
509 
510