Coverage Report

Coverage Report - org.crosswire.jsword.index.lucene.LuceneIndex

Classes in this File

0/200

0/92

 /**
  * Distribution License:
  * JSword is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License, version 2.1 or later
  * as published by the Free Software Foundation. This program is distributed
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  * See the GNU Lesser General Public License for more details.
  *
  * The License is available on the internet at:
  *      http://www.gnu.org/copyleft/lgpl.html
  * or by writing to:
  *      Free Software Foundation, Inc.
  *      59 Temple Place - Suite 330
  *      Boston, MA 02111-1307, USA
  *
  * © CrossWire Bible Society, 2005 - 2016
  *
  */
 package org.crosswire.jsword.index.lucene;
 
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.TopScoreDocCollector;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
 import org.crosswire.common.progress.JobManager;
 import org.crosswire.common.progress.Progress;
 import org.crosswire.common.util.FileUtil;
 import org.crosswire.common.util.IOUtil;
 import org.crosswire.common.util.NetUtil;
 import org.crosswire.common.util.Reporter;
 import org.crosswire.jsword.JSMsg;
 import org.crosswire.jsword.book.Book;
 import org.crosswire.jsword.book.BookData;
 import org.crosswire.jsword.book.BookException;
 import org.crosswire.jsword.book.FeatureType;
 import org.crosswire.jsword.book.OSISUtil;
 import org.crosswire.jsword.index.AbstractIndex;
 import org.crosswire.jsword.index.IndexPolicy;
 import org.crosswire.jsword.index.IndexStatus;
 import org.crosswire.jsword.index.lucene.analysis.LuceneAnalyzer;
 import org.crosswire.jsword.index.search.SearchModifier;
 import org.crosswire.jsword.passage.AbstractPassage;
 import org.crosswire.jsword.passage.Key;
 import org.crosswire.jsword.passage.NoSuchKeyException;
 import org.crosswire.jsword.passage.NoSuchVerseException;
 import org.crosswire.jsword.passage.PassageTally;
 import org.crosswire.jsword.passage.Verse;
 import org.crosswire.jsword.passage.VerseFactory;
 import org.crosswire.jsword.versification.Versification;
 import org.crosswire.jsword.versification.system.Versifications;
 import org.jdom2.Element;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
  * Implement the SearchEngine using Lucene as the search engine.
  * 
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
  * @author Joe Walker
  */
 public class LuceneIndex extends AbstractIndex implements Closeable {
     /*
      * The following fields are named the same as Sword in the hopes of sharing
      * indexes.
      */
     /**
      * The Lucene field for the osisID
      */
     public static final String FIELD_KEY = "key";
 
     /**
      * The Lucene field for the text contents
      */
     public static final String FIELD_BODY = "content";
 
     /**
      * The Lucene field for the strong numbers
      */
     public static final String FIELD_STRONG = "strong";
 
     /**
      * The Lucene field for headings
      */
     public static final String FIELD_HEADING = "heading";
 
     /**
      * The Lucene field for cross references
      */
     public static final String FIELD_XREF = "xref";
 
     /**
      * The Lucene field for the notes
      */
     public static final String FIELD_NOTE = "note";
 
     /**
      * Combines the strong numbers with the morphology field
      */
     public static final String FIELD_MORPHOLOGY = "morph";
 
     /**
      * Combines the strong numbers with the morphology field
      */
     public static final String FIELD_INTRO = "intro";
 
     /**
      * An estimate of the percent of time spent indexing.
      * The remaining time, if any, is spent doing cleanup.
      */
     private static final int WORK_ESTIMATE = 98;
 
     /**
      * Read an existing index and use it.
      * 
      * @param book the book
      * @param storage 
      * @throws BookException
      *             If we fail to read the index files
      */
     public LuceneIndex(Book book, URI storage) throws BookException {
         this.book = book;
 
         try {
             this.path = NetUtil.getAsFile(storage).getCanonicalPath();
         } catch (IOException ex) {
             // TRANSLATOR: Error condition: Could not initialize a search index.
             throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
         }
         initDirectoryAndSearcher();
     }
 
     /**
      * Generate an index to use, telling the job about progress as you go.
      * 
      * @param book the book
      * @param storage 
      * @param policy 
      * @throws BookException
      *             If we fail to read the index files
      */
     public LuceneIndex(Book book, URI storage, IndexPolicy policy) throws BookException {
 
         this.book = book;
         File finalPath = null;
         try {
             finalPath = NetUtil.getAsFile(storage);
             this.path = finalPath.getCanonicalPath();
         } catch (IOException ex) {
             // TRANSLATOR: Error condition: Could not initialize a search index. Lucene is the name of the search technology being used.
             throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
         }
 
         // TRANSLATOR: Progress label indicating the start of indexing. {0} is a placeholder for the book's short name.
         String jobName = JSMsg.gettext("Creating index. Processing {0}", book.getInitials());
         Progress job = JobManager.createJob(String.format(Progress.CREATE_INDEX, book.getInitials()), jobName, Thread.currentThread());
         job.beginJob(jobName);
 
         IndexStatus finalStatus = IndexStatus.UNDONE;
 
         List<Key> errors = new ArrayList<Key>();
         // Build to another location and rename in the end.
         File tempPath = new File(path + '.' + IndexStatus.CREATING.toString());
 
         // Ensure that the temp path is gone
         // It is not good for it to have been leftover from before.
         if (tempPath.exists()) {
             FileUtil.delete(tempPath);
         }
 
         try {
             // When misconfigured, this can throw errors.
             Analyzer analyzer = new LuceneAnalyzer(book);
 
             // Lock on metadata to allow creation of multiple indexes, so long as they are on different books.
             // Otherwise lock on a single object to make this serial
             Object mutex = policy.isSerial() ? CREATING : book.getBookMetaData();
             synchronized (mutex) {
 
                 book.setIndexStatus(IndexStatus.CREATING);
 
                 IndexWriter writer = null;
                 try {
                     // Write the core index to disk.
                     final Directory destination = FSDirectory.open(new File(tempPath.getCanonicalPath()));
                     writer = new IndexWriter(destination, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
                     writer.setRAMBufferSizeMB(policy.getRAMBufferSize());
 
                     generateSearchIndexImpl(job, errors, writer, book.getGlobalKeyList(), 0, policy);
 
                 } finally {
                     if (writer != null) {
                         writer.close();
                     }
                 }
 
                 job.setCancelable(false);
                 if (!job.isFinished()) {
                     if (!tempPath.renameTo(finalPath)) {
                         // TRANSLATOR: The search index could not be moved to it's final location.
                         throw new BookException(JSMsg.gettext("Installation failed."));
                     }
                 }
 
                 if (finalPath.exists()) {
                     finalStatus = IndexStatus.DONE;
                 }
 
                 if (!errors.isEmpty()) {
                     StringBuilder buf = new StringBuilder();
                     for (Key error : errors) {
                         buf.append(error);
                         buf.append('\n');
                     }
                     // TRANSLATOR: It is likely that one or more verses could not be indexed due to errors in those verses.
                     // This message gives a listing of them to the user.
                     Reporter.informUser(this, JSMsg.gettext("The following verses have errors and could not be indexed\n{0}", buf));
                 }
                 initDirectoryAndSearcher();
             }
         } catch (IOException ex) {
             job.cancel();
             // TRANSLATOR: Common error condition: Some error happened while creating a search index.
             throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
         } finally {
             book.setIndexStatus(finalStatus);
             job.done();
             // Ensure that the temp path is gone - errors can leave it there and cause further problems.
             if (tempPath.exists()) {
                 FileUtil.delete(tempPath);
             }
         }
     }
 
     /**
      * Initializes the directory and searcher.
      */
     private void initDirectoryAndSearcher() {
         try {
             directory = FSDirectory.open(new File(path));
             searcher = new IndexSearcher(directory, true);
         } catch (IOException ex) {
             log.warn("second load failure", ex);
         }
     }
 
     /* (non-Javadoc)
      * @see org.crosswire.jsword.index.Index#find(java.lang.String)
      */
     public Key find(String search) throws BookException {
         String v11nName = book.getBookMetaData().getProperty("Versification").toString();
         Versification v11n = Versifications.instance().getVersification(v11nName);
 
         SearchModifier modifier = getSearchModifier();
         Key results = null;
 
         if (search != null) {
             Throwable theCause = null;
             try {
                 Analyzer analyzer = new LuceneAnalyzer(book);
 
                 QueryParser parser = new QueryParser(Version.LUCENE_29, LuceneIndex.FIELD_BODY, analyzer);
                 parser.setAllowLeadingWildcard(true);
                 Query query = parser.parse(search);
                 log.info("ParsedQuery- {}", query.toString());
 
                 // For ranking we use a PassageTally
                 if (modifier != null && modifier.isRanked()) {
                     PassageTally tally = new PassageTally(v11n);
                     tally.raiseEventSuppresion();
                     tally.raiseNormalizeProtection();
                     results = tally;
 
                     TopScoreDocCollector collector = TopScoreDocCollector.create(modifier.getMaxResults(), false);
                     searcher.search(query, collector);
                     tally.setTotal(collector.getTotalHits());
                     ScoreDoc[] hits = collector.topDocs().scoreDocs;
                     for (int i = 0; i < hits.length; i++) {
                         int docId = hits[i].doc;
                         Document doc = searcher.doc(docId);
                         Key key = VerseFactory.fromString(v11n, doc.get(LuceneIndex.FIELD_KEY));
                         // PassageTally understands a score of 0 as the verse
                         // not participating
                         int score = (int) (hits[i].score * 100 + 1);
                         tally.add(key, score);
                     }
                     tally.lowerNormalizeProtection();
                     tally.lowerEventSuppressionAndTest();
                 } else {
                     results = book.createEmptyKeyList();
                     // If we have an abstract passage,
                     // make sure it does not try to fire change events.
                     AbstractPassage passage = null;
                     if (results instanceof AbstractPassage) {
                         passage = (AbstractPassage) results;
                         passage.raiseEventSuppresion();
                         passage.raiseNormalizeProtection();
                     }
                     searcher.search(query, new VerseCollector(v11n, searcher, results));
                     if (passage != null) {
                         passage.lowerNormalizeProtection();
                         passage.lowerEventSuppressionAndTest();
                     }
                 }
             } catch (IOException e) {
                 // The VerseCollector may throw IOExceptions that merely wrap a NoSuchVerseException
                 Throwable cause = e.getCause();
                 theCause = cause instanceof NoSuchVerseException ? cause : e;
             } catch (NoSuchVerseException e) {
                 theCause = e;
             } catch (ParseException e) {
                 theCause = e;
             }
 
             if (theCause != null) {
                 // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail.
                 throw new BookException(JSMsg.gettext("Search failed."), theCause);
             }
         }
 
         if (results == null) {
             if (modifier != null && modifier.isRanked()) {
                 results = new PassageTally(v11n);
             } else {
                 results = book.createEmptyKeyList();
             }
         }
         return results;
     }
 
     /* (non-Javadoc)
      * @see org.crosswire.jsword.index.Index#getKey(java.lang.String)
      */
     public Key getKey(String name) throws NoSuchKeyException {
         return book.getKey(name);
     }
 
     /* (non-Javadoc)
      * @see org.crosswire.jsword.index.Index#close()
      */
     public final void close() {
         IOUtil.close(searcher);
         searcher = null;
         IOUtil.close(directory);
         directory = null;
     }
 
     /**
      * Dig down into a Key indexing as we go.
      * @param policy 
      */
     private void generateSearchIndexImpl(Progress job, List<Key> errors, IndexWriter writer, Key key, int count, IndexPolicy policy) throws BookException, IOException {
         String v11nName = null;
         if (book.getBookMetaData().getProperty("Versification") != null) {
             v11nName = book.getBookMetaData().getProperty("Versification").toString();
         }
         Versification v11n = Versifications.instance().getVersification(v11nName);
         boolean includeStrongs = book.getBookMetaData().hasFeature(FeatureType.STRONGS_NUMBERS) && policy.isStrongsIndexed();
         boolean includeXrefs = book.getBookMetaData().hasFeature(FeatureType.SCRIPTURE_REFERENCES) && policy.isXrefIndexed();
         boolean includeNotes = book.getBookMetaData().hasFeature(FeatureType.FOOTNOTES) && policy.isNoteIndexed();
         boolean includeHeadings = book.getBookMetaData().hasFeature(FeatureType.HEADINGS) && policy.isTitleIndexed();
         boolean includeMorphology = book.getBookMetaData().hasFeature(FeatureType.MORPHOLOGY) && policy.isMorphIndexed();
 
         String oldRootName = "";
         int percent = 0;
         String rootName = "";
         BookData data = null;
         Element osis = null;
 
         // Set up for reuse.
         Document doc = new Document();
         Field keyField = new Field(FIELD_KEY, "", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO);
         Field bodyField = new Field(FIELD_BODY, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
         Field introField = new Field(FIELD_INTRO, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
         Field strongField = new Field(FIELD_STRONG, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
         Field xrefField = new Field(FIELD_XREF, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
         Field noteField = new Field(FIELD_NOTE, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
         Field headingField = new Field(FIELD_HEADING, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
         Field morphologyField  = new Field(FIELD_MORPHOLOGY , "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
 
         int size = key.getCardinality();
         int subCount = count;
         log.debug("Number of keys = {}", Integer.toString(size));
         for (Key subkey : key) {
             // Bibles and verse based commentaries don't have keys with children.
             // However, tree keyed Books do. So we only index the leaf keys.
             // FIXME(DMS): Should not use recursion!!!!
             if (subkey.canHaveChildren()) {
                 generateSearchIndexImpl(job, errors, writer, subkey, subCount, policy);
                 continue;
             }
 
             data = new BookData(book, subkey);
             osis = null;
 
             try {
                 osis = data.getOsisFragment(false);
             } catch (BookException e) {
                 errors.add(subkey);
                 continue;
             }
 
             // Remove all fields from the document
             doc.getFields().clear();
 
             // Do the actual indexing
             // Always add the key
             keyField.setValue(subkey.getOsisRef());
             doc.add(keyField);
 
             if (subkey instanceof Verse && ((Verse) subkey).getVerse() == 0) {
                 addField(doc, introField, OSISUtil.getCanonicalText(osis));
             } else {
                 addField(doc, bodyField, OSISUtil.getCanonicalText(osis));
             }
 
             if (includeStrongs) {
                 addField(doc, strongField, OSISUtil.getStrongsNumbers(osis));
             }
 
             if (includeXrefs) {
                 // We pass book and key because the xref may not be valid and it needs to be reported.
                 addField(doc, xrefField, OSISUtil.getReferences(this.book, subkey, v11n, osis));
             }
 
             if (includeNotes) {
                 addField(doc, noteField, OSISUtil.getNotes(osis));
             }
 
             if (includeHeadings) {
                 String heading = OSISUtil.getHeadings(osis);
                 addField(doc, headingField, heading);
             }
 
             if (includeMorphology) {
                 addField(doc, morphologyField, OSISUtil.getMorphologiesWithStrong(osis));
             }
 
             // Add the document if we added more than just the key.
             if (doc.getFields().size() > 1) {
                 writer.addDocument(doc);
             }
 
             // report progress
             rootName = subkey.getRootName();
             if (!rootName.equals(oldRootName)) {
                 oldRootName = rootName;
                 // Note, this does not cause progress to be updated
                 // It will show up the next time progress is updated.
                 job.setSectionName(rootName);
             }
 
             subCount++;
             int oldPercent = percent;
             percent = WORK_ESTIMATE * subCount / size;
 
             // Only send out a max of 95 progress updates
             if (oldPercent != percent) {
                 job.setWork(percent);
             }
 
             // This could take a long time ...
             Thread.yield();
             if (Thread.currentThread().isInterrupted()) {
                 break;
             }
         }
     }
 
     /**
      * Add the text to the Field and put the Field in the document,
      * ignoring null and empty text.
      * 
      * @param doc The Document to which the Field should be added
      * @param field The Field to add
      * @param text The text for the field
      */
     private void addField(Document doc, Field field, String text) {
         if (text != null && text.length() > 0) {
             field.setValue(text);
             doc.add(field);
         }
     }
 
     /**
      * Could be null if the index has been closed down. This is helpful to third party applications which wish to have greater control over 
      * the underlying Lucene functionality.
      * 
      * Note: by using this method, you need to ensure you don't close the searcher while it is being used.
      * See {@link org.crosswire.jsword.index.IndexManager#closeAllIndexes()} for more information
      * @return the searcher
      */
     public Searcher getSearcher() {
         return searcher;
     }
 
     /**
      * The Book that we are indexing
      */
     private Book book;
 
     /**
      * The location of this index
      */
     private String path;
 
     /**
      * The Lucene directory for the path.
      */
     private Directory directory;
 
     /**
      * The Lucene search engine
      */
     private Searcher searcher;
 
     /**
      * A synchronization lock point to prevent us from doing 2 index runs at a
      * time.
      */
     private static final Object CREATING = new Object();
 
     /**
      * The log stream
      */
     private static final Logger log = LoggerFactory.getLogger(LuceneIndex.class);
 }

1		/**
2		* Distribution License:
3		* JSword is free software; you can redistribute it and/or modify it under
4		* the terms of the GNU Lesser General Public License, version 2.1 or later
5		* as published by the Free Software Foundation. This program is distributed
6		* in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
7		* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
8		* See the GNU Lesser General Public License for more details.
9		*
10		* The License is available on the internet at:
11		* http://www.gnu.org/copyleft/lgpl.html
12		* or by writing to:
13		* Free Software Foundation, Inc.
14		* 59 Temple Place - Suite 330
15		* Boston, MA 02111-1307, USA
16		*
17		* © CrossWire Bible Society, 2005 - 2016
18		*
19		*/
20		package org.crosswire.jsword.index.lucene;
21
22		import java.io.Closeable;
23		import java.io.File;
24		import java.io.IOException;
25		import java.net.URI;
26		import java.util.ArrayList;
27		import java.util.List;
28
29		import org.apache.lucene.analysis.Analyzer;
30		import org.apache.lucene.document.Document;
31		import org.apache.lucene.document.Field;
32		import org.apache.lucene.index.IndexWriter;
33		import org.apache.lucene.queryParser.ParseException;
34		import org.apache.lucene.queryParser.QueryParser;
35		import org.apache.lucene.search.IndexSearcher;
36		import org.apache.lucene.search.Query;
37		import org.apache.lucene.search.ScoreDoc;
38		import org.apache.lucene.search.Searcher;
39		import org.apache.lucene.search.TopScoreDocCollector;
40		import org.apache.lucene.store.Directory;
41		import org.apache.lucene.store.FSDirectory;
42		import org.apache.lucene.util.Version;
43		import org.crosswire.common.progress.JobManager;
44		import org.crosswire.common.progress.Progress;
45		import org.crosswire.common.util.FileUtil;
46		import org.crosswire.common.util.IOUtil;
47		import org.crosswire.common.util.NetUtil;
48		import org.crosswire.common.util.Reporter;
49		import org.crosswire.jsword.JSMsg;
50		import org.crosswire.jsword.book.Book;
51		import org.crosswire.jsword.book.BookData;
52		import org.crosswire.jsword.book.BookException;
53		import org.crosswire.jsword.book.FeatureType;
54		import org.crosswire.jsword.book.OSISUtil;
55		import org.crosswire.jsword.index.AbstractIndex;
56		import org.crosswire.jsword.index.IndexPolicy;
57		import org.crosswire.jsword.index.IndexStatus;
58		import org.crosswire.jsword.index.lucene.analysis.LuceneAnalyzer;
59		import org.crosswire.jsword.index.search.SearchModifier;
60		import org.crosswire.jsword.passage.AbstractPassage;
61		import org.crosswire.jsword.passage.Key;
62		import org.crosswire.jsword.passage.NoSuchKeyException;
63		import org.crosswire.jsword.passage.NoSuchVerseException;
64		import org.crosswire.jsword.passage.PassageTally;
65		import org.crosswire.jsword.passage.Verse;
66		import org.crosswire.jsword.passage.VerseFactory;
67		import org.crosswire.jsword.versification.Versification;
68		import org.crosswire.jsword.versification.system.Versifications;
69		import org.jdom2.Element;
70		import org.slf4j.Logger;
71		import org.slf4j.LoggerFactory;
72
73		/**
74		* Implement the SearchEngine using Lucene as the search engine.
75		*
76		* @see gnu.lgpl.License The GNU Lesser General Public License for details.
77		* @author Joe Walker
78		*/
79		public class LuceneIndex extends AbstractIndex implements Closeable {
80		/*
81		* The following fields are named the same as Sword in the hopes of sharing
82		* indexes.
83		*/
84		/**
85		* The Lucene field for the osisID
86		*/
87		public static final String FIELD_KEY = "key";
88
89		/**
90		* The Lucene field for the text contents
91		*/
92		public static final String FIELD_BODY = "content";
93
94		/**
95		* The Lucene field for the strong numbers
96		*/
97		public static final String FIELD_STRONG = "strong";
98
99		/**
100		* The Lucene field for headings
101		*/
102		public static final String FIELD_HEADING = "heading";
103
104		/**
105		* The Lucene field for cross references
106		*/
107		public static final String FIELD_XREF = "xref";
108
109		/**
110		* The Lucene field for the notes
111		*/
112		public static final String FIELD_NOTE = "note";
113
114		/**
115		* Combines the strong numbers with the morphology field
116		*/
117		public static final String FIELD_MORPHOLOGY = "morph";
118
119		/**
120		* Combines the strong numbers with the morphology field
121		*/
122		public static final String FIELD_INTRO = "intro";
123
124		/**
125		* An estimate of the percent of time spent indexing.
126		* The remaining time, if any, is spent doing cleanup.
127		*/
128		private static final int WORK_ESTIMATE = 98;
129
130		/**
131		* Read an existing index and use it.
132		*
133		* @param book the book
134		* @param storage
135		* @throws BookException
136		* If we fail to read the index files
137		*/
138	0	public LuceneIndex(Book book, URI storage) throws BookException {
139	0	this.book = book;
140
141		try {
142	0	this.path = NetUtil.getAsFile(storage).getCanonicalPath();
143	0	} catch (IOException ex) {
144		// TRANSLATOR: Error condition: Could not initialize a search index.
145	0	throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
146	0	}
147	0	initDirectoryAndSearcher();
148	0	}
149
150		/**
151		* Generate an index to use, telling the job about progress as you go.
152		*
153		* @param book the book
154		* @param storage
155		* @param policy
156		* @throws BookException
157		* If we fail to read the index files
158		*/
159	0	public LuceneIndex(Book book, URI storage, IndexPolicy policy) throws BookException {
160
161	0	this.book = book;
162	0	File finalPath = null;
163		try {
164	0	finalPath = NetUtil.getAsFile(storage);
165	0	this.path = finalPath.getCanonicalPath();
166	0	} catch (IOException ex) {
167		// TRANSLATOR: Error condition: Could not initialize a search index. Lucene is the name of the search technology being used.
168	0	throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
169	0	}
170
171		// TRANSLATOR: Progress label indicating the start of indexing. {0} is a placeholder for the book's short name.
172	0	String jobName = JSMsg.gettext("Creating index. Processing {0}", book.getInitials());
173	0	Progress job = JobManager.createJob(String.format(Progress.CREATE_INDEX, book.getInitials()), jobName, Thread.currentThread());
174	0	job.beginJob(jobName);
175
176	0	IndexStatus finalStatus = IndexStatus.UNDONE;
177
178	0	List<Key> errors = new ArrayList<Key>();
179		// Build to another location and rename in the end.
180	0	File tempPath = new File(path + '.' + IndexStatus.CREATING.toString());
181
182		// Ensure that the temp path is gone
183		// It is not good for it to have been leftover from before.
184	0	if (tempPath.exists()) {
185	0	FileUtil.delete(tempPath);
186		}
187
188		try {
189		// When misconfigured, this can throw errors.
190	0	Analyzer analyzer = new LuceneAnalyzer(book);
191
192		// Lock on metadata to allow creation of multiple indexes, so long as they are on different books.
193		// Otherwise lock on a single object to make this serial
194	0	Object mutex = policy.isSerial() ? CREATING : book.getBookMetaData();
195	0	synchronized (mutex) {
196
197	0	book.setIndexStatus(IndexStatus.CREATING);
198
199	0	IndexWriter writer = null;
200		try {
201		// Write the core index to disk.
202	0	final Directory destination = FSDirectory.open(new File(tempPath.getCanonicalPath()));
203	0	writer = new IndexWriter(destination, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
204	0	writer.setRAMBufferSizeMB(policy.getRAMBufferSize());
205
206	0	generateSearchIndexImpl(job, errors, writer, book.getGlobalKeyList(), 0, policy);
207
208		} finally {
209	0	if (writer != null) {
210	0	writer.close();
211		}
212		}
213
214	0	job.setCancelable(false);
215	0	if (!job.isFinished()) {
216	0	if (!tempPath.renameTo(finalPath)) {
217		// TRANSLATOR: The search index could not be moved to it's final location.
218	0	throw new BookException(JSMsg.gettext("Installation failed."));
219		}
220		}
221
222	0	if (finalPath.exists()) {
223	0	finalStatus = IndexStatus.DONE;
224		}
225
226	0	if (!errors.isEmpty()) {
227	0	StringBuilder buf = new StringBuilder();
228	0	for (Key error : errors) {
229	0	buf.append(error);
230	0	buf.append('\n');
231		}
232		// TRANSLATOR: It is likely that one or more verses could not be indexed due to errors in those verses.
233		// This message gives a listing of them to the user.
234	0	Reporter.informUser(this, JSMsg.gettext("The following verses have errors and could not be indexed\n{0}", buf));
235		}
236	0	initDirectoryAndSearcher();
237	0	}
238	0	} catch (IOException ex) {
239	0	job.cancel();
240		// TRANSLATOR: Common error condition: Some error happened while creating a search index.
241	0	throw new BookException(JSMsg.gettext("Failed to initialize Lucene search engine."), ex);
242		} finally {
243	0	book.setIndexStatus(finalStatus);
244	0	job.done();
245		// Ensure that the temp path is gone - errors can leave it there and cause further problems.
246	0	if (tempPath.exists()) {
247	0	FileUtil.delete(tempPath);
248		}
249		}
250	0	}
251
252		/**
253		* Initializes the directory and searcher.
254		*/
255		private void initDirectoryAndSearcher() {
256		try {
257	0	directory = FSDirectory.open(new File(path));
258	0	searcher = new IndexSearcher(directory, true);
259	0	} catch (IOException ex) {
260	0	log.warn("second load failure", ex);
261	0	}
262	0	}
263
264		/* (non-Javadoc)
265		* @see org.crosswire.jsword.index.Index#find(java.lang.String)
266		*/
267		public Key find(String search) throws BookException {
268	0	String v11nName = book.getBookMetaData().getProperty("Versification").toString();
269	0	Versification v11n = Versifications.instance().getVersification(v11nName);
270
271	0	SearchModifier modifier = getSearchModifier();
272	0	Key results = null;
273
274	0	if (search != null) {
275	0	Throwable theCause = null;
276		try {
277	0	Analyzer analyzer = new LuceneAnalyzer(book);
278
279	0	QueryParser parser = new QueryParser(Version.LUCENE_29, LuceneIndex.FIELD_BODY, analyzer);
280	0	parser.setAllowLeadingWildcard(true);
281	0	Query query = parser.parse(search);
282	0	log.info("ParsedQuery- {}", query.toString());
283
284		// For ranking we use a PassageTally
285	0	if (modifier != null && modifier.isRanked()) {
286	0	PassageTally tally = new PassageTally(v11n);
287	0	tally.raiseEventSuppresion();
288	0	tally.raiseNormalizeProtection();
289	0	results = tally;
290
291	0	TopScoreDocCollector collector = TopScoreDocCollector.create(modifier.getMaxResults(), false);
292	0	searcher.search(query, collector);
293	0	tally.setTotal(collector.getTotalHits());
294	0	ScoreDoc[] hits = collector.topDocs().scoreDocs;
295	0	for (int i = 0; i < hits.length; i++) {
296	0	int docId = hits[i].doc;
297	0	Document doc = searcher.doc(docId);
298	0	Key key = VerseFactory.fromString(v11n, doc.get(LuceneIndex.FIELD_KEY));
299		// PassageTally understands a score of 0 as the verse
300		// not participating
301	0	int score = (int) (hits[i].score * 100 + 1);
302	0	tally.add(key, score);
303		}
304	0	tally.lowerNormalizeProtection();
305	0	tally.lowerEventSuppressionAndTest();
306	0	} else {
307	0	results = book.createEmptyKeyList();
308		// If we have an abstract passage,
309		// make sure it does not try to fire change events.
310	0	AbstractPassage passage = null;
311	0	if (results instanceof AbstractPassage) {
312	0	passage = (AbstractPassage) results;
313	0	passage.raiseEventSuppresion();
314	0	passage.raiseNormalizeProtection();
315		}
316	0	searcher.search(query, new VerseCollector(v11n, searcher, results));
317	0	if (passage != null) {
318	0	passage.lowerNormalizeProtection();
319	0	passage.lowerEventSuppressionAndTest();
320		}
321		}
322	0	} catch (IOException e) {
323		// The VerseCollector may throw IOExceptions that merely wrap a NoSuchVerseException
324	0	Throwable cause = e.getCause();
325	0	theCause = cause instanceof NoSuchVerseException ? cause : e;
326	0	} catch (NoSuchVerseException e) {
327	0	theCause = e;
328	0	} catch (ParseException e) {
329	0	theCause = e;
330	0	}
331
332	0	if (theCause != null) {
333		// TRANSLATOR: Error condition: An unexpected error happened that caused search to fail.
334	0	throw new BookException(JSMsg.gettext("Search failed."), theCause);
335		}
336		}
337
338	0	if (results == null) {
339	0	if (modifier != null && modifier.isRanked()) {
340	0	results = new PassageTally(v11n);
341		} else {
342	0	results = book.createEmptyKeyList();
343		}
344		}
345	0	return results;
346		}
347
348		/* (non-Javadoc)
349		* @see org.crosswire.jsword.index.Index#getKey(java.lang.String)
350		*/
351		public Key getKey(String name) throws NoSuchKeyException {
352	0	return book.getKey(name);
353		}
354
355		/* (non-Javadoc)
356		* @see org.crosswire.jsword.index.Index#close()
357		*/
358		public final void close() {
359	0	IOUtil.close(searcher);
360	0	searcher = null;
361	0	IOUtil.close(directory);
362	0	directory = null;
363	0	}
364
365		/**
366		* Dig down into a Key indexing as we go.
367		* @param policy
368		*/
369		private void generateSearchIndexImpl(Progress job, List<Key> errors, IndexWriter writer, Key key, int count, IndexPolicy policy) throws BookException, IOException {
370	0	String v11nName = null;
371	0	if (book.getBookMetaData().getProperty("Versification") != null) {
372	0	v11nName = book.getBookMetaData().getProperty("Versification").toString();
373		}
374	0	Versification v11n = Versifications.instance().getVersification(v11nName);
375	0	boolean includeStrongs = book.getBookMetaData().hasFeature(FeatureType.STRONGS_NUMBERS) && policy.isStrongsIndexed();
376	0	boolean includeXrefs = book.getBookMetaData().hasFeature(FeatureType.SCRIPTURE_REFERENCES) && policy.isXrefIndexed();
377	0	boolean includeNotes = book.getBookMetaData().hasFeature(FeatureType.FOOTNOTES) && policy.isNoteIndexed();
378	0	boolean includeHeadings = book.getBookMetaData().hasFeature(FeatureType.HEADINGS) && policy.isTitleIndexed();
379	0	boolean includeMorphology = book.getBookMetaData().hasFeature(FeatureType.MORPHOLOGY) && policy.isMorphIndexed();
380
381	0	String oldRootName = "";
382	0	int percent = 0;
383	0	String rootName = "";
384	0	BookData data = null;
385	0	Element osis = null;
386
387		// Set up for reuse.
388	0	Document doc = new Document();
389	0	Field keyField = new Field(FIELD_KEY, "", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO);
390	0	Field bodyField = new Field(FIELD_BODY, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
391	0	Field introField = new Field(FIELD_INTRO, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
392	0	Field strongField = new Field(FIELD_STRONG, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
393	0	Field xrefField = new Field(FIELD_XREF, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
394	0	Field noteField = new Field(FIELD_NOTE, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
395	0	Field headingField = new Field(FIELD_HEADING, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
396	0	Field morphologyField = new Field(FIELD_MORPHOLOGY , "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
397
398	0	int size = key.getCardinality();
399	0	int subCount = count;
400	0	log.debug("Number of keys = {}", Integer.toString(size));
401	0	for (Key subkey : key) {
402		// Bibles and verse based commentaries don't have keys with children.
403		// However, tree keyed Books do. So we only index the leaf keys.
404		// FIXME(DMS): Should not use recursion!!!!
405	0	if (subkey.canHaveChildren()) {
406	0	generateSearchIndexImpl(job, errors, writer, subkey, subCount, policy);
407	0	continue;
408		}
409
410	0	data = new BookData(book, subkey);
411	0	osis = null;
412
413		try {
414	0	osis = data.getOsisFragment(false);
415	0	} catch (BookException e) {
416	0	errors.add(subkey);
417	0	continue;
418	0	}
419
420		// Remove all fields from the document
421	0	doc.getFields().clear();
422
423		// Do the actual indexing
424		// Always add the key
425	0	keyField.setValue(subkey.getOsisRef());
426	0	doc.add(keyField);
427
428	0	if (subkey instanceof Verse && ((Verse) subkey).getVerse() == 0) {
429	0	addField(doc, introField, OSISUtil.getCanonicalText(osis));
430		} else {
431	0	addField(doc, bodyField, OSISUtil.getCanonicalText(osis));
432		}
433
434	0	if (includeStrongs) {
435	0	addField(doc, strongField, OSISUtil.getStrongsNumbers(osis));
436		}
437
438	0	if (includeXrefs) {
439		// We pass book and key because the xref may not be valid and it needs to be reported.
440	0	addField(doc, xrefField, OSISUtil.getReferences(this.book, subkey, v11n, osis));
441		}
442
443	0	if (includeNotes) {
444	0	addField(doc, noteField, OSISUtil.getNotes(osis));
445		}
446
447	0	if (includeHeadings) {
448	0	String heading = OSISUtil.getHeadings(osis);
449	0	addField(doc, headingField, heading);
450		}
451
452	0	if (includeMorphology) {
453	0	addField(doc, morphologyField, OSISUtil.getMorphologiesWithStrong(osis));
454		}
455
456		// Add the document if we added more than just the key.
457	0	if (doc.getFields().size() > 1) {
458	0	writer.addDocument(doc);
459		}
460
461		// report progress
462	0	rootName = subkey.getRootName();
463	0	if (!rootName.equals(oldRootName)) {
464	0	oldRootName = rootName;
465		// Note, this does not cause progress to be updated
466		// It will show up the next time progress is updated.
467	0	job.setSectionName(rootName);
468		}
469
470	0	subCount++;
471	0	int oldPercent = percent;
472	0	percent = WORK_ESTIMATE * subCount / size;
473
474		// Only send out a max of 95 progress updates
475	0	if (oldPercent != percent) {
476	0	job.setWork(percent);
477		}
478
479		// This could take a long time ...
480	0	Thread.yield();
481	0	if (Thread.currentThread().isInterrupted()) {
482	0	break;
483		}
484	0	}
485	0	}
486
487		/**
488		* Add the text to the Field and put the Field in the document,
489		* ignoring null and empty text.
490		*
491		* @param doc The Document to which the Field should be added
492		* @param field The Field to add
493		* @param text The text for the field
494		*/
495		private void addField(Document doc, Field field, String text) {
496	0	if (text != null && text.length() > 0) {
497	0	field.setValue(text);
498	0	doc.add(field);
499		}
500	0	}
501
502		/**
503		* Could be null if the index has been closed down. This is helpful to third party applications which wish to have greater control over
504		* the underlying Lucene functionality.
505		*
506		* Note: by using this method, you need to ensure you don't close the searcher while it is being used.
507		* See {@link org.crosswire.jsword.index.IndexManager#closeAllIndexes()} for more information
508		* @return the searcher
509		*/
510		public Searcher getSearcher() {
511	0	return searcher;
512		}
513
514		/**
515		* The Book that we are indexing
516		*/
517		private Book book;
518
519		/**
520		* The location of this index
521		*/
522		private String path;
523
524		/**
525		* The Lucene directory for the path.
526		*/
527		private Directory directory;
528
529		/**
530		* The Lucene search engine
531		*/
532		private Searcher searcher;
533
534		/**
535		* A synchronization lock point to prevent us from doing 2 index runs at a
536		* time.
537		*/
538	0	private static final Object CREATING = new Object();
539
540		/**
541		* The log stream
542		*/
543	0	private static final Logger log = LoggerFactory.getLogger(LuceneIndex.class);
544		}