xref: /haiku/src/add-ons/index_server/fulltext/CLuceneDataBase.cpp (revision bab64f65bb775dc23060e276f1f1c4498ab7af6c)
1 /*
2  * Copyright 2010, Haiku.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		based on previous work of Ankur Sethi
7  *		Clemens Zeidler <haiku@clemens-zeidler.de>
8  */
9 
10 #include "CLuceneDataBase.h"
11 
12 #include <Directory.h>
13 #include <File.h>
14 #include <TranslatorRoster.h>
15 
16 
17 #define DEBUG_CLUCENE_DATABASE
18 #ifdef DEBUG_CLUCENE_DATABASE
19 #include <stdio.h>
20 #	define STRACE(x...) printf("FT: " x)
21 #else
22 #	define STRACE(x...) ;
23 #endif
24 
25 
26 using namespace lucene::document;
27 using namespace lucene::util;
28 
29 
30 const uint8 kCluceneTries = 10;
31 
32 
to_wchar(const char * str)33 wchar_t* to_wchar(const char *str)
34 {
35 	int size = strlen(str) * sizeof(wchar_t) ;
36 	wchar_t *wStr = new wchar_t[size] ;
37 
38 	if (mbstowcs(wStr, str, size) == -1) {
39 		delete[] wStr ;
40 		return NULL ;
41 	} else
42 		return wStr ;
43 }
44 
45 
CLuceneWriteDataBase(const BPath & databasePath)46 CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath)
47 	:
48 	fDataBasePath(databasePath),
49 	fTempPath(databasePath),
50 	fIndexWriter(NULL)
51 {
52 	printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path());
53 	create_directory(fDataBasePath.Path(), 0755);
54 
55 	fTempPath.Append("temp_file");
56 }
57 
58 
~CLuceneWriteDataBase()59 CLuceneWriteDataBase::~CLuceneWriteDataBase()
60 {
61 	// TODO: delete fTempPath file
62 }
63 
64 
65 status_t
InitCheck()66 CLuceneWriteDataBase::InitCheck()
67 {
68 
69 	return B_OK;
70 }
71 
72 
73 status_t
AddDocument(const entry_ref & ref)74 CLuceneWriteDataBase::AddDocument(const entry_ref& ref)
75 {
76 	// check if already in the queue
77 	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
78 		if (fAddQueue.at(i) == ref)
79 			return B_OK;
80 	}
81 	fAddQueue.push_back(ref);
82 
83 	return B_OK;
84 }
85 
86 
87 status_t
RemoveDocument(const entry_ref & ref)88 CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref)
89 {
90 	// check if already in the queue
91 	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
92 		if (fDeleteQueue.at(i) == ref)
93 			return B_OK;
94 	}
95 	fDeleteQueue.push_back(ref);
96 	return B_OK;
97 }
98 
99 
100 status_t
Commit()101 CLuceneWriteDataBase::Commit()
102 {
103 	if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0)
104 		return B_OK;
105 	STRACE("Commit\n");
106 
107 	_RemoveDocuments(fAddQueue);
108 	_RemoveDocuments(fDeleteQueue);
109 	fDeleteQueue.clear();
110 
111 	if (fAddQueue.size() == 0)
112 		return B_OK;
113 
114 	fIndexWriter = _OpenIndexWriter();
115 	if (fIndexWriter == NULL)
116 		return B_ERROR;
117 
118 	status_t status = B_OK;
119 	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
120 		if (!_IndexDocument(fAddQueue.at(i))) {
121 			status = B_ERROR;
122 			break;
123 		}
124 	}
125 
126 	fAddQueue.clear();
127 	fIndexWriter->close();
128 	delete fIndexWriter;
129 	fIndexWriter = NULL;
130 
131 	return status;
132 }
133 
134 
135 IndexWriter*
_OpenIndexWriter()136 CLuceneWriteDataBase::_OpenIndexWriter()
137 {
138 	IndexWriter* writer = NULL;
139 	for (int i = 0; i < kCluceneTries; i++) {
140 		try {
141 			bool createIndex = true;
142 			if (IndexReader::indexExists(fDataBasePath.Path()))
143 				createIndex = false;
144 
145 			writer = new IndexWriter(fDataBasePath.Path(),
146 				&fStandardAnalyzer, createIndex);
147 			if (writer)
148 				break;
149 		} catch (CLuceneError &error) {
150 			STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what());
151 			delete writer;
152 			writer = NULL;
153 		}
154 	}
155 	return writer;
156 }
157 
158 
159 IndexReader*
_OpenIndexReader()160 CLuceneWriteDataBase::_OpenIndexReader()
161 {
162 	IndexReader* reader = NULL;
163 
164 	BEntry entry(fDataBasePath.Path(), NULL);
165 	if (!entry.Exists())
166 		return NULL;
167 
168 	for (int i = 0; i < kCluceneTries; i++) {
169 		try {
170 			if (!IndexReader::indexExists(fDataBasePath.Path()))
171 				return NULL;
172 
173 			reader = IndexReader::open(fDataBasePath.Path());
174 			if (reader)
175 				break;
176 		} catch (CLuceneError &error) {
177 			STRACE("CLuceneError: _OpenIndexReader %s\n", error.what());
178 			delete reader;
179 			reader = NULL;
180 		}
181 	}
182 
183 	return reader;
184 }
185 
186 
187 bool
_RemoveDocuments(std::vector<entry_ref> & docs)188 CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs)
189 {
190 	IndexReader *reader = NULL;
191 	reader = _OpenIndexReader();
192 	if (!reader)
193 		return false;
194 	bool status = false;
195 
196 	for (unsigned int i = 0; i < docs.size(); i++) {
197 		BPath path(&docs.at(i));
198 		wchar_t* wPath = to_wchar(path.Path());
199 		if (wPath == NULL)
200 			continue;
201 
202 		for (int i = 0; i < kCluceneTries; i++) {
203 			status = _RemoveDocument(wPath, reader);
204 			if (status)
205 				break;
206 			reader->close();
207 			delete reader;
208 			reader = _OpenIndexReader();
209 			if (!reader) {
210 				status = false;
211 				break;
212 			}
213 		}
214 		delete[] wPath;
215 
216 		if (!status)
217 			break;
218 	}
219 
220 	reader->close();
221 	delete reader;
222 
223 	return status;
224 }
225 
226 
227 bool
_RemoveDocument(wchar_t * wPath,IndexReader * reader)228 CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader)
229 {
230 	try {
231 		Term term(_T("path"), wPath);
232 		reader->deleteDocuments(&term);
233 	} catch (CLuceneError &error) {
234 		STRACE("CLuceneError: deleteDocuments %s\n", error.what());
235 		return false;
236 	}
237 	return true;
238 }
239 
240 
241 bool
_IndexDocument(const entry_ref & ref)242 CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref)
243 {
244 	BPath path(&ref);
245 
246 	BFile inFile, outFile;
247 	inFile.SetTo(path.Path(), B_READ_ONLY);
248 	if (inFile.InitCheck() != B_OK) {
249 		STRACE("Can't open inFile %s\n", path.Path());
250 		return false;
251 	}
252 	outFile.SetTo(fTempPath.Path(),
253 		B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
254 	if (outFile.InitCheck() != B_OK) {
255 		STRACE("Can't open outFile %s\n", fTempPath.Path());
256 		return false;
257 	}
258 
259 	BTranslatorRoster* translatorRoster = BTranslatorRoster::Default();
260 	if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT')
261 		!= B_OK)
262 		return false;
263 
264 	inFile.Unset();
265 	outFile.Unset();
266 
267 	FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8");
268 	wchar_t* wPath = to_wchar(path.Path());
269 	if (wPath == NULL)
270 		return false;
271 
272 	Document *document = new Document;
273 	Field contentField(_T("contents"), fileReader,
274 		Field::STORE_NO | Field::INDEX_TOKENIZED);
275 	document->add(contentField);
276 	Field pathField(_T("path"), wPath,
277 		Field::STORE_YES | Field::INDEX_UNTOKENIZED);
278 	document->add(pathField);
279 
280 	bool status = true;
281 	for (int i = 0; i < kCluceneTries; i++) {
282 		try {
283 			fIndexWriter->addDocument(document);
284 			STRACE("document added, retries: %i\n", i);
285 			break;
286 		} catch (CLuceneError &error) {
287 			STRACE("CLuceneError addDocument %s\n", error.what());
288 			fIndexWriter->close();
289 			delete fIndexWriter;
290 			fIndexWriter = _OpenIndexWriter();
291 			if (fIndexWriter == NULL) {
292 				status = false;
293 				break;
294 			}
295 		}
296 	}
297 
298 	if (!status)
299 		delete document;
300 	delete[] wPath;
301 	return status;
302 }
303