1 /* 2 * Copyright 2010, Haiku. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * based on previous work of Ankur Sethi 7 * Clemens Zeidler <haiku@clemens-zeidler.de> 8 */ 9 10 #include "CLuceneDataBase.h" 11 12 #include <Directory.h> 13 #include <File.h> 14 #include <TranslatorRoster.h> 15 16 17 #define DEBUG_CLUCENE_DATABASE 18 #ifdef DEBUG_CLUCENE_DATABASE 19 #include <stdio.h> 20 # define STRACE(x...) printf("FT: " x) 21 #else 22 # define STRACE(x...) ; 23 #endif 24 25 26 using namespace lucene::document; 27 using namespace lucene::util; 28 29 30 const uint8 kCluceneTries = 10; 31 32 33 wchar_t* to_wchar(const char *str) 34 { 35 int size = strlen(str) * sizeof(wchar_t) ; 36 wchar_t *wStr = new wchar_t[size] ; 37 38 if (mbstowcs(wStr, str, size) == -1) { 39 delete[] wStr ; 40 return NULL ; 41 } else 42 return wStr ; 43 } 44 45 46 CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath) 47 : 48 fDataBasePath(databasePath), 49 fTempPath(databasePath), 50 fIndexWriter(NULL) 51 { 52 printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path()); 53 create_directory(fDataBasePath.Path(), 0755); 54 55 fTempPath.Append("temp_file"); 56 } 57 58 59 CLuceneWriteDataBase::~CLuceneWriteDataBase() 60 { 61 // TODO: delete fTempPath file 62 } 63 64 65 status_t 66 CLuceneWriteDataBase::InitCheck() 67 { 68 69 return B_OK; 70 } 71 72 73 status_t 74 CLuceneWriteDataBase::AddDocument(const entry_ref& ref) 75 { 76 // check if already in the queue 77 for (unsigned int i = 0; i < fAddQueue.size(); i++) { 78 if (fAddQueue.at(i) == ref) 79 return B_OK; 80 } 81 fAddQueue.push_back(ref); 82 83 return B_OK; 84 } 85 86 87 status_t 88 CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref) 89 { 90 // check if already in the queue 91 for (unsigned int i = 0; i < fAddQueue.size(); i++) { 92 if (fDeleteQueue.at(i) == ref) 93 return B_OK; 94 } 95 fDeleteQueue.push_back(ref); 96 return B_OK; 97 } 98 99 100 status_t 101 CLuceneWriteDataBase::Commit() 102 { 103 if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0) 104 return B_OK; 105 STRACE("Commit\n"); 106 107 _RemoveDocuments(fAddQueue); 108 _RemoveDocuments(fDeleteQueue); 109 fDeleteQueue.clear(); 110 111 if (fAddQueue.size() == 0) 112 return B_OK; 113 114 fIndexWriter = _OpenIndexWriter(); 115 if (fIndexWriter == NULL) 116 return B_ERROR; 117 118 status_t status = B_OK; 119 for (unsigned int i = 0; i < fAddQueue.size(); i++) { 120 if (!_IndexDocument(fAddQueue.at(i))) { 121 status = B_ERROR; 122 break; 123 } 124 } 125 126 fAddQueue.clear(); 127 fIndexWriter->close(); 128 delete fIndexWriter; 129 fIndexWriter = NULL; 130 131 return status; 132 } 133 134 135 IndexWriter* 136 CLuceneWriteDataBase::_OpenIndexWriter() 137 { 138 IndexWriter* writer = NULL; 139 for (int i = 0; i < kCluceneTries; i++) { 140 try { 141 bool createIndex = true; 142 if (IndexReader::indexExists(fDataBasePath.Path())) 143 createIndex = false; 144 145 writer = new IndexWriter(fDataBasePath.Path(), 146 &fStandardAnalyzer, createIndex); 147 if (writer) 148 break; 149 } catch (CLuceneError &error) { 150 STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what()); 151 delete writer; 152 writer = NULL; 153 } 154 } 155 return writer; 156 } 157 158 159 IndexReader* 160 CLuceneWriteDataBase::_OpenIndexReader() 161 { 162 IndexReader* reader = NULL; 163 164 BEntry entry(fDataBasePath.Path(), NULL); 165 if (!entry.Exists()) 166 return NULL; 167 168 for (int i = 0; i < kCluceneTries; i++) { 169 try { 170 if (!IndexReader::indexExists(fDataBasePath.Path())) 171 return NULL; 172 173 reader = IndexReader::open(fDataBasePath.Path()); 174 if (reader) 175 break; 176 } catch (CLuceneError &error) { 177 STRACE("CLuceneError: _OpenIndexReader %s\n", error.what()); 178 delete reader; 179 reader = NULL; 180 } 181 } 182 183 return reader; 184 } 185 186 187 bool 188 CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs) 189 { 190 IndexReader *reader = NULL; 191 reader = _OpenIndexReader(); 192 if (!reader) 193 return false; 194 bool status = false; 195 196 for (unsigned int i = 0; i < docs.size(); i++) { 197 BPath path(&docs.at(i)); 198 wchar_t* wPath = to_wchar(path.Path()); 199 if (wPath == NULL) 200 continue; 201 202 for (int i = 0; i < kCluceneTries; i++) { 203 status = _RemoveDocument(wPath, reader); 204 if (status) 205 break; 206 reader->close(); 207 delete reader; 208 reader = _OpenIndexReader(); 209 if (!reader) { 210 status = false; 211 break; 212 } 213 } 214 delete[] wPath; 215 216 if (!status) 217 break; 218 } 219 220 reader->close(); 221 delete reader; 222 223 return status; 224 } 225 226 227 bool 228 CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader) 229 { 230 try { 231 Term term(_T("path"), wPath); 232 reader->deleteDocuments(&term); 233 } catch (CLuceneError &error) { 234 STRACE("CLuceneError: deleteDocuments %s\n", error.what()); 235 return false; 236 } 237 return true; 238 } 239 240 241 bool 242 CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref) 243 { 244 BPath path(&ref); 245 246 BFile inFile, outFile; 247 inFile.SetTo(path.Path(), B_READ_ONLY); 248 if (inFile.InitCheck() != B_OK) { 249 STRACE("Can't open inFile %s\n", path.Path()); 250 return false; 251 } 252 outFile.SetTo(fTempPath.Path(), 253 B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE); 254 if (outFile.InitCheck() != B_OK) { 255 STRACE("Can't open outFile %s\n", fTempPath.Path()); 256 return false; 257 } 258 259 BTranslatorRoster* translatorRoster = BTranslatorRoster::Default(); 260 if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT') 261 != B_OK) 262 return false; 263 264 inFile.Unset(); 265 outFile.Unset(); 266 267 FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8"); 268 wchar_t* wPath = to_wchar(path.Path()); 269 if (wPath == NULL) 270 return false; 271 272 Document *document = new Document; 273 Field contentField(_T("contents"), fileReader, 274 Field::STORE_NO | Field::INDEX_TOKENIZED); 275 document->add(contentField); 276 Field pathField(_T("path"), wPath, 277 Field::STORE_YES | Field::INDEX_UNTOKENIZED); 278 document->add(pathField); 279 280 bool status = true; 281 for (int i = 0; i < kCluceneTries; i++) { 282 try { 283 fIndexWriter->addDocument(document); 284 STRACE("document added, retries: %i\n", i); 285 break; 286 } catch (CLuceneError &error) { 287 STRACE("CLuceneError addDocument %s\n", error.what()); 288 fIndexWriter->close(); 289 delete fIndexWriter; 290 fIndexWriter = _OpenIndexWriter(); 291 if (fIndexWriter == NULL) { 292 status = false; 293 break; 294 } 295 } 296 } 297 298 if (!status) 299 delete document; 300 delete[] wPath; 301 return status; 302 } 303