| /** |
| * example usage of the configurator pattern. E Lucene nao eh banco de dados, eh banco de indice. |
| * os dados ficam no banco ou nao, pode ser o sistema de arquivos. mas nao ficam no lucene. |
| * Lucene eh Bussieness Inteligence e serviço de macaco eh ficar pulando feito um idiota. |
| * Mouse Bussiness eh coisa seria.. ;-) |
| **/ |
| package org.nanotek.lucene.tika; |
| import org.nanotek.Kong; |
| import org.apache.tika.Tika; |
| import org.nanotek.util.Configurator; |
| import org.apache.tika.io.TikaInputStream; |
| import org.apache.lucene.document.Document; |
| import org.nanotek.lucene.LuceneDocumentBuilder; |
| import org.nanotek.lucene.TikaDocumentTransformer; |
| public abstract class TikaBaseDocumentBuilder<S extends TikaInputStream, P extends Kong<String>> implements LuceneDocumentBuilder<S> , TikaDocumentTransformer<S,P,Document>{ |
| private Tika tika; |
| private String textFieldName = "TextData"; |
| protected Configurator<S,P,Document> configurator; |
| public TikaBaseDocumentBuilder() { |
| initTika(); |
| } |
| @Override |
| public Document buildDocument(S source) { |
| return transform(source); |
| } |
| private void initTika() { |
| tika = new Tika(); |
| } |
| public Tika getTika() { |
| return tika; |
| } |
| public void setTika(Tika tika) { |
| this.tika = tika; |
| } |
| public String getTextFieldName() { |
| return textFieldName; |
| } |
| public void setTextFieldName(String textFieldName) { |
| this.textFieldName = textFieldName; |
| } |
| @Override |
| public abstract Document transform(S source,P parameters); |
| // { |
| // Document document = null; |
| // String txtExtracted; |
| // if (tika == null) |
| // initTika(); |
| // try { |
| // txtExtracted = tika.parseToString(source); |
| // document = new Document(); |
| // Field fileLocationField = new TextField("file_location", source.toString() , Field.Store.YES); //$NON-NLS-1$ //$NON-NLS-2$ |
| // document.add(fileLocationField); |
| // Field wordField = new TextField(textFieldName, txtExtracted, Field.Store.YES); //$NON-NLS-1$ //$NON-NLS-2$ |
| // document.add(wordField); |
| // Field modifiedField = new LongField("modified" , new Date().getTime() , Field.Store.YES); //$NON-NLS-1$ |
| // document.add(modifiedField); |
| // } catch (IOException | TikaException e) { |
| // throw new TransformerException(e); |
| // } |
| // return document; |
| // } |
| } |
Tuesday, October 1, 2013
A Document Builder Generic for TIka and Lucene -> Yankees to offer Girardi a new deal
Location:
Fawn Creek, KS, EUA
