/** |
* example usage of the configurator pattern. E Lucene nao eh banco de dados, eh banco de indice. |
* os dados ficam no banco ou nao, pode ser o sistema de arquivos. mas nao ficam no lucene. |
* Lucene eh Bussieness Inteligence e serviço de macaco eh ficar pulando feito um idiota. |
* Mouse Bussiness eh coisa seria.. ;-) |
**/ |
package org.nanotek.lucene.tika; |
import org.nanotek.Kong; |
import org.apache.tika.Tika; |
import org.nanotek.util.Configurator; |
import org.apache.tika.io.TikaInputStream; |
import org.apache.lucene.document.Document; |
import org.nanotek.lucene.LuceneDocumentBuilder; |
import org.nanotek.lucene.TikaDocumentTransformer; |
public abstract class TikaBaseDocumentBuilder<S extends TikaInputStream, P extends Kong<String>> implements LuceneDocumentBuilder<S> , TikaDocumentTransformer<S,P,Document>{ |
private Tika tika; |
private String textFieldName = "TextData"; |
protected Configurator<S,P,Document> configurator; |
public TikaBaseDocumentBuilder() { |
initTika(); |
} |
@Override |
public Document buildDocument(S source) { |
return transform(source); |
} |
private void initTika() { |
tika = new Tika(); |
} |
public Tika getTika() { |
return tika; |
} |
public void setTika(Tika tika) { |
this.tika = tika; |
} |
public String getTextFieldName() { |
return textFieldName; |
} |
public void setTextFieldName(String textFieldName) { |
this.textFieldName = textFieldName; |
} |
@Override |
public abstract Document transform(S source,P parameters); |
// { |
// Document document = null; |
// String txtExtracted; |
// if (tika == null) |
// initTika(); |
// try { |
// txtExtracted = tika.parseToString(source); |
// document = new Document(); |
// Field fileLocationField = new TextField("file_location", source.toString() , Field.Store.YES); //$NON-NLS-1$ //$NON-NLS-2$ |
// document.add(fileLocationField); |
// Field wordField = new TextField(textFieldName, txtExtracted, Field.Store.YES); //$NON-NLS-1$ //$NON-NLS-2$ |
// document.add(wordField); |
// Field modifiedField = new LongField("modified" , new Date().getTime() , Field.Store.YES); //$NON-NLS-1$ |
// document.add(modifiedField); |
// } catch (IOException | TikaException e) { |
// throw new TransformerException(e); |
// } |
// return document; |
// } |
} |
Tuesday, October 1, 2013
A Document Builder Generic for TIka and Lucene -> Yankees to offer Girardi a new deal
Location:
Fawn Creek, KS, EUA