1 /* 2 * Copyright 2010, Haiku. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * based on previous work of Ankur Sethi 7 * Clemens Zeidler <haiku@clemens-zeidler.de> 8 */ 9 10 #include "CLuceneDataBase.h" 11 12 #include <Directory.h> 13 #include <File.h> 14 #include <TranslatorRoster.h> 15 16 17 #define DEBUG_CLUCENE_DATABASE 18 #ifdef DEBUG_CLUCENE_DATABASE 19 #include <stdio.h> 20 # define STRACE(x...) printf("FT: " x) 21 #else 22 # define STRACE(x...) ; 23 #endif 24 25 26 using namespace lucene::document; 27 using namespace lucene::util; 28 29 30 const uint8 kCluceneTries = 10; 31 32 33 wchar_t* to_wchar(const char *str) 34 { 35 int size = strlen(str) * sizeof(wchar_t) ; 36 wchar_t *wStr = new wchar_t[size] ; 37 38 if (mbstowcs(wStr, str, size) == -1) 39 return NULL ; 40 else 41 return wStr ; 42 } 43 44 45 CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath) 46 : 47 fDataBasePath(databasePath), 48 fTempPath(databasePath), 49 fIndexWriter(NULL) 50 { 51 printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path()); 52 create_directory(fDataBasePath.Path(), 0755); 53 54 fTempPath.Append("temp_file"); 55 } 56 57 58 CLuceneWriteDataBase::~CLuceneWriteDataBase() 59 { 60 // TODO: delete fTempPath file 61 } 62 63 64 status_t 65 CLuceneWriteDataBase::InitCheck() 66 { 67 68 return B_OK; 69 } 70 71 72 status_t 73 CLuceneWriteDataBase::AddDocument(const entry_ref& ref) 74 { 75 // check if already in the queue 76 for (unsigned int i = 0; i < fAddQueue.size(); i++) { 77 if (fAddQueue.at(i) == ref) 78 return B_OK; 79 } 80 fAddQueue.push_back(ref); 81 82 return B_OK; 83 } 84 85 86 status_t 87 CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref) 88 { 89 // check if already in the queue 90 for (unsigned int i = 0; i < fAddQueue.size(); i++) { 91 if (fDeleteQueue.at(i) == ref) 92 return B_OK; 93 } 94 fDeleteQueue.push_back(ref); 95 return B_OK; 96 } 97 98 99 status_t 100 CLuceneWriteDataBase::Commit() 101 { 102 if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0) 103 return B_OK; 104 STRACE("Commit\n"); 105 106 _RemoveDocuments(fAddQueue); 107 _RemoveDocuments(fDeleteQueue); 108 fDeleteQueue.clear(); 109 110 if (fAddQueue.size() == 0) 111 return B_OK; 112 113 fIndexWriter = _OpenIndexWriter(); 114 if (fIndexWriter == NULL) 115 return B_ERROR; 116 117 status_t status = B_OK; 118 for (unsigned int i = 0; i < fAddQueue.size(); i++) { 119 if (!_IndexDocument(fAddQueue.at(i))) { 120 status = B_ERROR; 121 break; 122 } 123 } 124 125 fAddQueue.clear(); 126 fIndexWriter->close(); 127 delete fIndexWriter; 128 fIndexWriter = NULL; 129 130 return status; 131 } 132 133 134 IndexWriter* 135 CLuceneWriteDataBase::_OpenIndexWriter() 136 { 137 IndexWriter* writer = NULL; 138 for (int i = 0; i < kCluceneTries; i++) { 139 try { 140 bool createIndex = true; 141 if (IndexReader::indexExists(fDataBasePath.Path())) 142 createIndex = false; 143 144 writer = new IndexWriter(fDataBasePath.Path(), 145 &fStandardAnalyzer, createIndex); 146 if (writer) 147 break; 148 } catch (CLuceneError &error) { 149 STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what()); 150 delete writer; 151 writer = NULL; 152 } 153 } 154 return writer; 155 } 156 157 158 IndexReader* 159 CLuceneWriteDataBase::_OpenIndexReader() 160 { 161 IndexReader* reader = NULL; 162 163 BEntry entry(fDataBasePath.Path(), NULL); 164 if (!entry.Exists()) 165 return NULL; 166 167 for (int i = 0; i < kCluceneTries; i++) { 168 try { 169 if (!IndexReader::indexExists(fDataBasePath.Path())) 170 return NULL; 171 172 reader = IndexReader::open(fDataBasePath.Path()); 173 if (reader) 174 break; 175 } catch (CLuceneError &error) { 176 STRACE("CLuceneError: _OpenIndexReader %s\n", error.what()); 177 delete reader; 178 reader = NULL; 179 } 180 } 181 182 return reader; 183 } 184 185 186 bool 187 CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs) 188 { 189 IndexReader *reader = NULL; 190 reader = _OpenIndexReader(); 191 if (!reader) 192 return false; 193 bool status = false; 194 195 for (unsigned int i = 0; i < docs.size(); i++) { 196 BPath path(&docs.at(i)); 197 wchar_t* wPath = to_wchar(path.Path()); 198 if (wPath == NULL) 199 continue; 200 201 for (int i = 0; i < kCluceneTries; i++) { 202 status = _RemoveDocument(wPath, reader); 203 if (status) 204 break; 205 reader->close(); 206 delete reader; 207 reader = _OpenIndexReader(); 208 if (!reader) { 209 status = false; 210 break; 211 } 212 } 213 delete wPath; 214 215 if (!status) 216 break; 217 } 218 219 reader->close(); 220 delete reader; 221 222 return status; 223 } 224 225 226 bool 227 CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader) 228 { 229 try { 230 Term term(_T("path"), wPath); 231 reader->deleteDocuments(&term); 232 } catch (CLuceneError &error) { 233 STRACE("CLuceneError: deleteDocuments %s\n", error.what()); 234 return false; 235 } 236 return true; 237 } 238 239 240 bool 241 CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref) 242 { 243 BPath path(&ref); 244 245 BFile inFile, outFile; 246 inFile.SetTo(path.Path(), B_READ_ONLY); 247 if (inFile.InitCheck() != B_OK) { 248 STRACE("Can't open inFile %s\n", path.Path()); 249 return false; 250 } 251 outFile.SetTo(fTempPath.Path(), 252 B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE); 253 if (outFile.InitCheck() != B_OK) { 254 STRACE("Can't open outFile %s\n", fTempPath.Path()); 255 return false; 256 } 257 258 BTranslatorRoster* translatorRoster = BTranslatorRoster::Default(); 259 if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT') 260 != B_OK) 261 return false; 262 263 inFile.Unset(); 264 outFile.Unset(); 265 266 FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8"); 267 wchar_t* wPath = to_wchar(path.Path()); 268 if (wPath == NULL) 269 return false; 270 271 Document *document = new Document; 272 Field contentField(_T("contents"), fileReader, 273 Field::STORE_NO | Field::INDEX_TOKENIZED); 274 document->add(contentField); 275 Field pathField(_T("path"), wPath, 276 Field::STORE_YES | Field::INDEX_UNTOKENIZED); 277 document->add(pathField); 278 279 bool status = true; 280 for (int i = 0; i < kCluceneTries; i++) { 281 try { 282 fIndexWriter->addDocument(document); 283 STRACE("document added, retries: %i\n", i); 284 break; 285 } catch (CLuceneError &error) { 286 STRACE("CLuceneError addDocument %s\n", error.what()); 287 fIndexWriter->close(); 288 delete fIndexWriter; 289 fIndexWriter = _OpenIndexWriter(); 290 if (fIndexWriter == NULL) { 291 status = false; 292 break; 293 } 294 } 295 } 296 297 if (!status) 298 delete document; 299 delete wPath; 300 return status; 301 } 302