Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
GreekLuceneAnalyzer |
|
| 2.6666666666666665;2.667 |
1 | /** | |
2 | * Distribution License: | |
3 | * JSword is free software; you can redistribute it and/or modify it under | |
4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
5 | * as published by the Free Software Foundation. This program is distributed | |
6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
8 | * See the GNU Lesser General Public License for more details. | |
9 | * | |
10 | * The License is available on the internet at: | |
11 | * http://www.gnu.org/copyleft/lgpl.html | |
12 | * or by writing to: | |
13 | * Free Software Foundation, Inc. | |
14 | * 59 Temple Place - Suite 330 | |
15 | * Boston, MA 02111-1307, USA | |
16 | * | |
17 | * © CrossWire Bible Society, 2007 - 2016 | |
18 | * | |
19 | */ | |
20 | package org.crosswire.jsword.index.lucene.analysis; | |
21 | ||
22 | import java.io.IOException; | |
23 | import java.io.Reader; | |
24 | ||
25 | import org.apache.lucene.analysis.StopFilter; | |
26 | import org.apache.lucene.analysis.TokenStream; | |
27 | import org.apache.lucene.analysis.el.GreekAnalyzer; | |
28 | import org.apache.lucene.analysis.el.GreekLowerCaseFilter; | |
29 | import org.apache.lucene.analysis.standard.StandardTokenizer; | |
30 | import org.apache.lucene.util.Version; | |
31 | ||
32 | /** | |
33 | * Uses org.apache.lucene.analysis.el.GreekAnalyzer to do lowercasing and | |
34 | * stopword(off by default). Stemming not implemented yet | |
35 | * | |
36 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
37 | * @author Sijo Cherian | |
38 | */ | |
39 | public class GreekLuceneAnalyzer extends AbstractBookAnalyzer { | |
40 | 0 | public GreekLuceneAnalyzer() { |
41 | 0 | stopSet = GreekAnalyzer.getDefaultStopSet(); |
42 | 0 | } |
43 | ||
44 | /** | |
45 | * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. | |
46 | * | |
47 | * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with | |
48 | * {@link GreekLowerCaseFilter} and {@link StopFilter} | |
49 | */ | |
50 | @Override | |
51 | public TokenStream tokenStream(String fieldName, Reader reader) { | |
52 | 0 | TokenStream result = new StandardTokenizer(matchVersion, reader); |
53 | 0 | result = new GreekLowerCaseFilter(result); |
54 | 0 | if (doStopWords && stopSet != null) { |
55 | 0 | result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); |
56 | } | |
57 | 0 | return result; |
58 | } | |
59 | ||
60 | /** | |
61 | * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text | |
62 | * in the provided {@link Reader}. | |
63 | * | |
64 | * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with | |
65 | * {@link GreekLowerCaseFilter} and {@link StopFilter} | |
66 | */ | |
67 | @Override | |
68 | public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { | |
69 | 0 | SavedStreams streams = (SavedStreams) getPreviousTokenStream(); |
70 | 0 | if (streams == null) { |
71 | 0 | streams = new SavedStreams(new StandardTokenizer(matchVersion, reader)); |
72 | 0 | streams.setResult(new GreekLowerCaseFilter(streams.getResult())); |
73 | 0 | if (doStopWords && stopSet != null) { |
74 | 0 | streams.setResult(new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), streams.getResult(), stopSet)); |
75 | } | |
76 | 0 | setPreviousTokenStream(streams); |
77 | } else { | |
78 | 0 | streams.getSource().reset(reader); |
79 | } | |
80 | 0 | return streams.getResult(); |
81 | } | |
82 | ||
83 | 0 | private final Version matchVersion = Version.LUCENE_29; |
84 | } |