/* * Copyright 2003-2004 Michael Franken, Zilverline. * * The contents of this file, or the files included with this file, are subject to * the current version of ZILVERLINE Collaborative Source License for the * Zilverline Search Engine (the "License"); You may not use this file except in * compliance with the License. * * You may obtain a copy of the License at * *http://www.zilverline.org. * * See the License for the rights, obligations and * limitations governing use of the contents of the file. * * The Original and Upgraded Code is the Zilverline Search Engine. The developer of * the Original and Upgraded Code is Michael Franken. Michael Franken owns the * copyrights in the portions it created. All Rights Reserved. * */package org.zilverline.extractors;import java.io.File;/** * This interface defines the type of family of extractors. Extractors extract * all relevant info from a File, and return the info in a ParsedFileInfo * Object. These are mappings used by zilverline to plugin extractors based on * file extensions. The plugin is a java class that implements the Extractor * interface and needs to be available on the classpath. * * <p> * So if for example you specify the mapping "pdf => * org.zilverline.extractors.PDFExtractor" make sure * org.zilverline.extractors.PDFExtractor is available, otherwise an Exception * will be raised and handled by zilverline. * </p> * * <p> * Right now you can use the TEXT, HTML, WORD and PDF extractors, and define the * extensions you want to map. You can not use wildcards, but you can define * multiple extensions for one Extractor. By default the extensions are treated * case insensitively, but you can change that. Note that you van use an empty * extension as well. * </p> * * @author Michael Franken * @version $Revision: 1.7 $ * * @see org.zilverline.extractors.ParsedFileInfo */ public interface Extractor { /** * This method extracts all relevant info of the file as an ParsedFileInfo * object. * * @param f * the File to extract content from * * @return ParsedFileInfo the object containing relevant info of the * provided file */ ParsedFileInfo extractInfo(File f); }
/* * Copyright 2003-2004 Michael Franken, Zilverline. * * The contents of this file, or the files included with this file, are subject to * the current version of ZILVERLINE Collaborative Source License for the * Zilverline Search Engine (the "License"); You may not use this file except in * compliance with the License. * * You may obtain a copy of the License at * *http://www.zilverline.org. * * See the License for the rights, obligations and * limitations governing use of the contents of the file. * * The Original and Upgraded Code is the Zilverline Search Engine. The developer of * the Original and Upgraded Code is Michael Franken. Michael Franken owns the * copyrights in the portions it created. All Rights Reserved. * */package org.zilverline.extractors;import java.io.File; import java.io.Reader;import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory;/** * Java bean for the Result of a Parsed File. Used in {@link Extractor}. * * @author Michael Franken * @version $Revision: 1.7 $ */ public class ParsedFileInfo { /** logger for Commons logging. */ private static Log log = LogFactory.getLog(ParsedFileInfo.class); private File file; private Reader reader; private String author = ""; private String title = ""; private String summary = ""; private long creationDate; private long modificationDate; private String type = ""; private long size; /** * DOCUMENT ME! * * @return */ public String getAuthor() { return author; } /** * DOCUMENT ME! * * @return */ public long getCreationDate() { return creationDate; } /** * DOCUMENT ME! * * @return */ public long getModificationDate() { return modificationDate; } /** * DOCUMENT ME! * * @return */ public Reader getReader() { return reader; } /** * DOCUMENT ME! * * @return */ public String getSummary() { return summary; } /** * DOCUMENT ME! * * @return */ public String getType() { return type; } /** * DOCUMENT ME! * * @param string */ public void setAuthor(String string) { author = string; } /** * DOCUMENT ME! * * @param theDate */ public void setCreationDate(long theDate) { creationDate = theDate; } /** * DOCUMENT ME! * * @param theDate */ public void setModificationDate(long theDate) { modificationDate = theDate; } /** * DOCUMENT ME! * * @param thisReader */ public void setReader(Reader thisReader) { this.reader = thisReader; } /** * DOCUMENT ME! * * @param string */ public void setSummary(String string) { summary = string; } /** * DOCUMENT ME! * * @param string */ public void setType(String string) { type = string; } /** * DOCUMENT ME! * * @return */ public File getFile() { return file; } /** * DOCUMENT ME! * * @return */ public long getSize() { return size; } /** * DOCUMENT ME! * * @return */ public String getTitle() { return title; } /** * DOCUMENT ME! * * @param thisFile */ public void setFile(File thisFile) { this.file = thisFile; } /** * DOCUMENT ME! * * @param l */ public void setSize(long l) { size = l; } /** * DOCUMENT ME! * * @param string */ public void setTitle(String string) { title = string; } /** * DOCUMENT ME! * * @return string */ public String toString() { StringBuffer info = new StringBuffer(); info.append("File: " + file.getName()); info.append(", type: " + type); info.append(", title: " + title); info.append(", author: " + author); info.append(", creationDate: " + creationDate); info.append(", modificationDate: " + modificationDate); info.append(", size: " + size); info.append(", summary: " + summary); return info.toString(); } }
| Extractors | snipsnap-search | mfranken |
Made with

AgileHolland member.