xref: /haiku/src/add-ons/index_server/fulltext/CLuceneDataBase.cpp (revision 4466b89c65970de4c7236ac87faa2bee4589f413)
1 /*
2  * Copyright 2010, Haiku.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		based on previous work of Ankur Sethi
7  *		Clemens Zeidler <haiku@clemens-zeidler.de>
8  */
9 
10 #include "CLuceneDataBase.h"
11 
12 #include <Directory.h>
13 #include <File.h>
14 #include <TranslatorRoster.h>
15 
16 
17 #define DEBUG_CLUCENE_DATABASE
18 #ifdef DEBUG_CLUCENE_DATABASE
19 #include <stdio.h>
20 #	define STRACE(x...) printf("FT: " x)
21 #else
22 #	define STRACE(x...) ;
23 #endif
24 
25 
26 using namespace lucene::document;
27 using namespace lucene::util;
28 
29 
30 const uint8 kCluceneTries = 10;
31 
32 
33 wchar_t* to_wchar(const char *str)
34 {
35 	int size = strlen(str) * sizeof(wchar_t) ;
36 	wchar_t *wStr = new wchar_t[size] ;
37 
38 	if (mbstowcs(wStr, str, size) == -1)
39 		return NULL ;
40 	else
41 		return wStr ;
42 }
43 
44 
45 CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath)
46 	:
47 	fDataBasePath(databasePath),
48 	fTempPath(databasePath),
49 	fIndexWriter(NULL)
50 {
51 	printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path());
52 	create_directory(fDataBasePath.Path(), 0755);
53 
54 	fTempPath.Append("temp_file");
55 }
56 
57 
58 CLuceneWriteDataBase::~CLuceneWriteDataBase()
59 {
60 	// TODO: delete fTempPath file
61 }
62 
63 
64 status_t
65 CLuceneWriteDataBase::InitCheck()
66 {
67 
68 	return B_OK;
69 }
70 
71 
72 status_t
73 CLuceneWriteDataBase::AddDocument(const entry_ref& ref)
74 {
75 	// check if already in the queue
76 	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
77 		if (fAddQueue.at(i) == ref)
78 			return B_OK;
79 	}
80 	fAddQueue.push_back(ref);
81 
82 	return B_OK;
83 }
84 
85 
86 status_t
87 CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref)
88 {
89 	// check if already in the queue
90 	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
91 		if (fDeleteQueue.at(i) == ref)
92 			return B_OK;
93 	}
94 	fDeleteQueue.push_back(ref);
95 	return B_OK;
96 }
97 
98 
99 status_t
100 CLuceneWriteDataBase::Commit()
101 {
102 	if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0)
103 		return B_OK;
104 	STRACE("Commit\n");
105 
106 	_RemoveDocuments(fAddQueue);
107 	_RemoveDocuments(fDeleteQueue);
108 	fDeleteQueue.clear();
109 
110 	if (fAddQueue.size() == 0)
111 		return B_OK;
112 
113 	fIndexWriter = _OpenIndexWriter();
114 	if (fIndexWriter == NULL)
115 		return B_ERROR;
116 
117 	status_t status = B_OK;
118 	for (unsigned int i = 0; i < fAddQueue.size(); i++) {
119 		if (!_IndexDocument(fAddQueue.at(i))) {
120 			status = B_ERROR;
121 			break;
122 		}
123 	}
124 
125 	fAddQueue.clear();
126 	fIndexWriter->close();
127 	delete fIndexWriter;
128 	fIndexWriter = NULL;
129 
130 	return status;
131 }
132 
133 
134 IndexWriter*
135 CLuceneWriteDataBase::_OpenIndexWriter()
136 {
137 	IndexWriter* writer = NULL;
138 	for (int i = 0; i < kCluceneTries; i++) {
139 		try {
140 			bool createIndex = true;
141 			if (IndexReader::indexExists(fDataBasePath.Path()))
142 				createIndex = false;
143 
144 			writer = new IndexWriter(fDataBasePath.Path(),
145 				&fStandardAnalyzer, createIndex);
146 			if (writer)
147 				break;
148 		} catch (CLuceneError &error) {
149 			STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what());
150 			delete writer;
151 			writer = NULL;
152 		}
153 	}
154 	return writer;
155 }
156 
157 
158 IndexReader*
159 CLuceneWriteDataBase::_OpenIndexReader()
160 {
161 	IndexReader* reader = NULL;
162 
163 	BEntry entry(fDataBasePath.Path(), NULL);
164 	if (!entry.Exists())
165 		return NULL;
166 
167 	for (int i = 0; i < kCluceneTries; i++) {
168 		try {
169 			if (!IndexReader::indexExists(fDataBasePath.Path()))
170 				return NULL;
171 
172 			reader = IndexReader::open(fDataBasePath.Path());
173 			if (reader)
174 				break;
175 		} catch (CLuceneError &error) {
176 			STRACE("CLuceneError: _OpenIndexReader %s\n", error.what());
177 			delete reader;
178 			reader = NULL;
179 		}
180 	}
181 
182 	return reader;
183 }
184 
185 
186 bool
187 CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs)
188 {
189 	IndexReader *reader = NULL;
190 	reader = _OpenIndexReader();
191 	if (!reader)
192 		return false;
193 	bool status = false;
194 
195 	for (unsigned int i = 0; i < docs.size(); i++) {
196 		BPath path(&docs.at(i));
197 		wchar_t* wPath = to_wchar(path.Path());
198 		if (wPath == NULL)
199 			continue;
200 
201 		for (int i = 0; i < kCluceneTries; i++) {
202 			status = _RemoveDocument(wPath, reader);
203 			if (status)
204 				break;
205 			reader->close();
206 			delete reader;
207 			reader = _OpenIndexReader();
208 			if (!reader) {
209 				status = false;
210 				break;
211 			}
212 		}
213 		delete wPath;
214 
215 		if (!status)
216 			break;
217 	}
218 
219 	reader->close();
220 	delete reader;
221 
222 	return status;
223 }
224 
225 
226 bool
227 CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader)
228 {
229 	try {
230 		Term term(_T("path"), wPath);
231 		reader->deleteDocuments(&term);
232 	} catch (CLuceneError &error) {
233 		STRACE("CLuceneError: deleteDocuments %s\n", error.what());
234 		return false;
235 	}
236 	return true;
237 }
238 
239 
240 bool
241 CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref)
242 {
243 	BPath path(&ref);
244 
245 	BFile inFile, outFile;
246 	inFile.SetTo(path.Path(), B_READ_ONLY);
247 	if (inFile.InitCheck() != B_OK) {
248 		STRACE("Can't open inFile %s\n", path.Path());
249 		return false;
250 	}
251 	outFile.SetTo(fTempPath.Path(),
252 		B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
253 	if (outFile.InitCheck() != B_OK) {
254 		STRACE("Can't open outFile %s\n", fTempPath.Path());
255 		return false;
256 	}
257 
258 	BTranslatorRoster* translatorRoster = BTranslatorRoster::Default();
259 	if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT')
260 		!= B_OK)
261 		return false;
262 
263 	inFile.Unset();
264 	outFile.Unset();
265 
266 	FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8");
267 	wchar_t* wPath = to_wchar(path.Path());
268 	if (wPath == NULL)
269 		return false;
270 
271 	Document *document = new Document;
272 	Field contentField(_T("contents"), fileReader,
273 		Field::STORE_NO | Field::INDEX_TOKENIZED);
274 	document->add(contentField);
275 	Field pathField(_T("path"), wPath,
276 		Field::STORE_YES | Field::INDEX_UNTOKENIZED);
277 	document->add(pathField);
278 
279 	bool status = true;
280 	for (int i = 0; i < kCluceneTries; i++) {
281 		try {
282 			fIndexWriter->addDocument(document);
283 			STRACE("document added, retries: %i\n", i);
284 			break;
285 		} catch (CLuceneError &error) {
286 			STRACE("CLuceneError addDocument %s\n", error.what());
287 			fIndexWriter->close();
288 			delete fIndexWriter;
289 			fIndexWriter = _OpenIndexWriter();
290 			if (fIndexWriter == NULL) {
291 				status = false;
292 				break;
293 			}
294 		}
295 	}
296 
297 	if (!status)
298 		delete document;
299 	delete wPath;
300 	return status;
301 }
302