1 /*
2 * Copyright 2010, Haiku.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 * based on previous work of Ankur Sethi
7 * Clemens Zeidler <haiku@clemens-zeidler.de>
8 */
9
10 #include "CLuceneDataBase.h"
11
12 #include <Directory.h>
13 #include <File.h>
14 #include <TranslatorRoster.h>
15
16
17 #define DEBUG_CLUCENE_DATABASE
18 #ifdef DEBUG_CLUCENE_DATABASE
19 #include <stdio.h>
20 # define STRACE(x...) printf("FT: " x)
21 #else
22 # define STRACE(x...) ;
23 #endif
24
25
26 using namespace lucene::document;
27 using namespace lucene::util;
28
29
30 const uint8 kCluceneTries = 10;
31
32
to_wchar(const char * str)33 wchar_t* to_wchar(const char *str)
34 {
35 int size = strlen(str) * sizeof(wchar_t) ;
36 wchar_t *wStr = new wchar_t[size] ;
37
38 if (mbstowcs(wStr, str, size) == -1) {
39 delete[] wStr ;
40 return NULL ;
41 } else
42 return wStr ;
43 }
44
45
CLuceneWriteDataBase(const BPath & databasePath)46 CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath)
47 :
48 fDataBasePath(databasePath),
49 fTempPath(databasePath),
50 fIndexWriter(NULL)
51 {
52 printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path());
53 create_directory(fDataBasePath.Path(), 0755);
54
55 fTempPath.Append("temp_file");
56 }
57
58
~CLuceneWriteDataBase()59 CLuceneWriteDataBase::~CLuceneWriteDataBase()
60 {
61 // TODO: delete fTempPath file
62 }
63
64
65 status_t
InitCheck()66 CLuceneWriteDataBase::InitCheck()
67 {
68
69 return B_OK;
70 }
71
72
73 status_t
AddDocument(const entry_ref & ref)74 CLuceneWriteDataBase::AddDocument(const entry_ref& ref)
75 {
76 // check if already in the queue
77 for (unsigned int i = 0; i < fAddQueue.size(); i++) {
78 if (fAddQueue.at(i) == ref)
79 return B_OK;
80 }
81 fAddQueue.push_back(ref);
82
83 return B_OK;
84 }
85
86
87 status_t
RemoveDocument(const entry_ref & ref)88 CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref)
89 {
90 // check if already in the queue
91 for (unsigned int i = 0; i < fAddQueue.size(); i++) {
92 if (fDeleteQueue.at(i) == ref)
93 return B_OK;
94 }
95 fDeleteQueue.push_back(ref);
96 return B_OK;
97 }
98
99
100 status_t
Commit()101 CLuceneWriteDataBase::Commit()
102 {
103 if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0)
104 return B_OK;
105 STRACE("Commit\n");
106
107 _RemoveDocuments(fAddQueue);
108 _RemoveDocuments(fDeleteQueue);
109 fDeleteQueue.clear();
110
111 if (fAddQueue.size() == 0)
112 return B_OK;
113
114 fIndexWriter = _OpenIndexWriter();
115 if (fIndexWriter == NULL)
116 return B_ERROR;
117
118 status_t status = B_OK;
119 for (unsigned int i = 0; i < fAddQueue.size(); i++) {
120 if (!_IndexDocument(fAddQueue.at(i))) {
121 status = B_ERROR;
122 break;
123 }
124 }
125
126 fAddQueue.clear();
127 fIndexWriter->close();
128 delete fIndexWriter;
129 fIndexWriter = NULL;
130
131 return status;
132 }
133
134
135 IndexWriter*
_OpenIndexWriter()136 CLuceneWriteDataBase::_OpenIndexWriter()
137 {
138 IndexWriter* writer = NULL;
139 for (int i = 0; i < kCluceneTries; i++) {
140 try {
141 bool createIndex = true;
142 if (IndexReader::indexExists(fDataBasePath.Path()))
143 createIndex = false;
144
145 writer = new IndexWriter(fDataBasePath.Path(),
146 &fStandardAnalyzer, createIndex);
147 if (writer)
148 break;
149 } catch (CLuceneError &error) {
150 STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what());
151 delete writer;
152 writer = NULL;
153 }
154 }
155 return writer;
156 }
157
158
159 IndexReader*
_OpenIndexReader()160 CLuceneWriteDataBase::_OpenIndexReader()
161 {
162 IndexReader* reader = NULL;
163
164 BEntry entry(fDataBasePath.Path(), NULL);
165 if (!entry.Exists())
166 return NULL;
167
168 for (int i = 0; i < kCluceneTries; i++) {
169 try {
170 if (!IndexReader::indexExists(fDataBasePath.Path()))
171 return NULL;
172
173 reader = IndexReader::open(fDataBasePath.Path());
174 if (reader)
175 break;
176 } catch (CLuceneError &error) {
177 STRACE("CLuceneError: _OpenIndexReader %s\n", error.what());
178 delete reader;
179 reader = NULL;
180 }
181 }
182
183 return reader;
184 }
185
186
187 bool
_RemoveDocuments(std::vector<entry_ref> & docs)188 CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs)
189 {
190 IndexReader *reader = NULL;
191 reader = _OpenIndexReader();
192 if (!reader)
193 return false;
194 bool status = false;
195
196 for (unsigned int i = 0; i < docs.size(); i++) {
197 BPath path(&docs.at(i));
198 wchar_t* wPath = to_wchar(path.Path());
199 if (wPath == NULL)
200 continue;
201
202 for (int i = 0; i < kCluceneTries; i++) {
203 status = _RemoveDocument(wPath, reader);
204 if (status)
205 break;
206 reader->close();
207 delete reader;
208 reader = _OpenIndexReader();
209 if (!reader) {
210 status = false;
211 break;
212 }
213 }
214 delete[] wPath;
215
216 if (!status)
217 break;
218 }
219
220 reader->close();
221 delete reader;
222
223 return status;
224 }
225
226
227 bool
_RemoveDocument(wchar_t * wPath,IndexReader * reader)228 CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader)
229 {
230 try {
231 Term term(_T("path"), wPath);
232 reader->deleteDocuments(&term);
233 } catch (CLuceneError &error) {
234 STRACE("CLuceneError: deleteDocuments %s\n", error.what());
235 return false;
236 }
237 return true;
238 }
239
240
241 bool
_IndexDocument(const entry_ref & ref)242 CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref)
243 {
244 BPath path(&ref);
245
246 BFile inFile, outFile;
247 inFile.SetTo(path.Path(), B_READ_ONLY);
248 if (inFile.InitCheck() != B_OK) {
249 STRACE("Can't open inFile %s\n", path.Path());
250 return false;
251 }
252 outFile.SetTo(fTempPath.Path(),
253 B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
254 if (outFile.InitCheck() != B_OK) {
255 STRACE("Can't open outFile %s\n", fTempPath.Path());
256 return false;
257 }
258
259 BTranslatorRoster* translatorRoster = BTranslatorRoster::Default();
260 if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT')
261 != B_OK)
262 return false;
263
264 inFile.Unset();
265 outFile.Unset();
266
267 FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8");
268 wchar_t* wPath = to_wchar(path.Path());
269 if (wPath == NULL)
270 return false;
271
272 Document *document = new Document;
273 Field contentField(_T("contents"), fileReader,
274 Field::STORE_NO | Field::INDEX_TOKENIZED);
275 document->add(contentField);
276 Field pathField(_T("path"), wPath,
277 Field::STORE_YES | Field::INDEX_UNTOKENIZED);
278 document->add(pathField);
279
280 bool status = true;
281 for (int i = 0; i < kCluceneTries; i++) {
282 try {
283 fIndexWriter->addDocument(document);
284 STRACE("document added, retries: %i\n", i);
285 break;
286 } catch (CLuceneError &error) {
287 STRACE("CLuceneError addDocument %s\n", error.what());
288 fIndexWriter->close();
289 delete fIndexWriter;
290 fIndexWriter = _OpenIndexWriter();
291 if (fIndexWriter == NULL) {
292 status = false;
293 break;
294 }
295 }
296 }
297
298 if (!status)
299 delete document;
300 delete[] wPath;
301 return status;
302 }
303