Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
GBFFilter |
|
| 8.0;8 |
1 | /** | |
2 | * Distribution License: | |
3 | * JSword is free software; you can redistribute it and/or modify it under | |
4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
5 | * as published by the Free Software Foundation. This program is distributed | |
6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
8 | * See the GNU Lesser General Public License for more details. | |
9 | * | |
10 | * The License is available on the internet at: | |
11 | * http://www.gnu.org/copyleft/lgpl.html | |
12 | * or by writing to: | |
13 | * Free Software Foundation, Inc. | |
14 | * 59 Temple Place - Suite 330 | |
15 | * Boston, MA 02111-1307, USA | |
16 | * | |
17 | * © CrossWire Bible Society, 2005 - 2016 | |
18 | * | |
19 | */ | |
20 | package org.crosswire.jsword.book.filter.gbf; | |
21 | ||
22 | import java.util.ArrayList; | |
23 | import java.util.LinkedList; | |
24 | import java.util.List; | |
25 | ||
26 | import org.crosswire.jsword.book.Book; | |
27 | import org.crosswire.jsword.book.DataPolice; | |
28 | import org.crosswire.jsword.book.OSISUtil; | |
29 | import org.crosswire.jsword.book.filter.SourceFilter; | |
30 | import org.crosswire.jsword.passage.Key; | |
31 | import org.jdom2.Content; | |
32 | import org.jdom2.Element; | |
33 | ||
34 | /** | |
35 | * Filter to convert GBF data to OSIS format. | |
36 | * | |
37 | * The best place to go for more information about the GBF spec is: | |
38 | * <a href="http://ebible.org/bible/gbf.htm">http://ebible.org/bible/gbf.htm</a> | |
39 | * | |
40 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
41 | * @author Joe Walker | |
42 | */ | |
43 | 0 | public class GBFFilter implements SourceFilter { |
44 | /* (non-Javadoc) | |
45 | * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String) | |
46 | */ | |
47 | public List<Content> toOSIS(Book book, Key key, String plain) { | |
48 | 0 | Element ele = OSISUtil.factory().createDiv(); |
49 | 0 | LinkedList<Content> stack = new LinkedList<Content>(); |
50 | 0 | stack.addFirst(ele); |
51 | ||
52 | 0 | List<Tag> taglist = parseTags(book, key, plain.trim()); |
53 | while (true) { | |
54 | 0 | if (taglist.isEmpty()) { |
55 | 0 | break; |
56 | } | |
57 | ||
58 | 0 | Tag tag = taglist.remove(0); |
59 | 0 | tag.updateOsisStack(book, key, stack); |
60 | 0 | } |
61 | ||
62 | 0 | stack.removeFirst(); |
63 | 0 | return ele.removeContent(); |
64 | } | |
65 | ||
66 | @Override | |
67 | public GBFFilter clone() { | |
68 | 0 | GBFFilter clone = null; |
69 | try { | |
70 | 0 | clone = (GBFFilter) super.clone(); |
71 | 0 | } catch (CloneNotSupportedException e) { |
72 | 0 | assert false : e; |
73 | 0 | } |
74 | 0 | return clone; |
75 | } | |
76 | ||
77 | /** | |
78 | * Turn the string into a list of tags in the order that they appear in the | |
79 | * original string. | |
80 | */ | |
81 | private List<Tag> parseTags(Book book, Key key, String aRemains) { | |
82 | 0 | String remains = aRemains; |
83 | 0 | List<Tag> taglist = new ArrayList<Tag>(); |
84 | ||
85 | // A GBF code is of the form <XY...> or <Xy...> | |
86 | // where the first letter is always capitalized and | |
87 | // the second letter indicates an open or close tag. | |
88 | // Upper letters are open, lower are close. | |
89 | // The ... is optional and represents an argument. | |
90 | // Sometimes the argument is preceded by a space. | |
91 | // In GBF it is legal to have < and > otherwise. | |
92 | // In at least one module, GerLut1545, << ... >> is used for quotes. | |
93 | while (true) { | |
94 | 0 | int ltpos = remains.indexOf('<'); |
95 | 0 | int gtpos = remains.indexOf('>', ltpos + 1); |
96 | ||
97 | // check whether we have unmatched < and >, or no tags at all | |
98 | // If so then we don't have a tag in the remaining. | |
99 | 0 | if (ltpos == -1 || gtpos == -1) { |
100 | // If the first letter after < is an upper case letter | |
101 | // then report it as a potential problem | |
102 | 0 | if (ltpos >= 0 |
103 | && ltpos < remains.length() + 1 | |
104 | && Character.isUpperCase(remains.charAt(ltpos + 1))) | |
105 | { | |
106 | 0 | DataPolice.report(book, key, "Possible bad GBF tag" + remains); |
107 | } | |
108 | 0 | if (gtpos != -1 && ltpos >= 0) { |
109 | 0 | DataPolice.report(book, key, "Possible bad GBF tag" + remains); |
110 | } | |
111 | 0 | int pos = Math.max(ltpos, gtpos) + 1; |
112 | // If there were not any <, > or either ended the string | |
113 | // then we only have text. | |
114 | 0 | if (pos == 0 || pos == remains.length()) { |
115 | 0 | taglist.add(GBFTagBuilders.getTextTag(remains)); |
116 | 0 | break; |
117 | } | |
118 | 0 | taglist.add(GBFTagBuilders.getTextTag(remains.substring(0, pos))); |
119 | 0 | remains = remains.substring(pos); |
120 | 0 | continue; |
121 | } | |
122 | ||
123 | // If the character after the < is not an upper case letter | |
124 | // then we don't have GBF. | |
125 | // So, create a text tag that ends with the found >. | |
126 | // Note that in JST, there are spurious html tags and | |
127 | // this will treat them as valid GBF text. | |
128 | 0 | char firstChar = remains.charAt(ltpos + 1); |
129 | 0 | if (!Character.isUpperCase(firstChar)) { |
130 | 0 | taglist.add(GBFTagBuilders.getTextTag(remains.substring(0, gtpos + 1))); |
131 | 0 | remains = remains.substring(gtpos + 1); |
132 | 0 | continue; |
133 | } | |
134 | ||
135 | // generate tags | |
136 | 0 | String start = remains.substring(0, ltpos); |
137 | 0 | int strLen = start.length(); |
138 | 0 | if (strLen > 0) { |
139 | 0 | int beginIndex = 0; |
140 | 0 | boolean inSepStr = SEPARATORS.indexOf(start.charAt(0)) >= 0; |
141 | // split words from separators... | |
142 | // e.g., "a b c? e g." -> "a b c", "? ", "e g." | |
143 | // "a b c<tag> e g." -> "a b c", tag, " ", "e g." | |
144 | 0 | for (int i = 1; inSepStr && i < strLen; i++) { |
145 | 0 | char currentChar = start.charAt(i); |
146 | 0 | if (!(SEPARATORS.indexOf(currentChar) >= 0)) { |
147 | 0 | taglist.add(GBFTagBuilders.getTextTag(start.substring(beginIndex, i))); |
148 | 0 | beginIndex = i; |
149 | 0 | inSepStr = false; |
150 | } | |
151 | } | |
152 | ||
153 | 0 | if (beginIndex < strLen) { |
154 | 0 | taglist.add(GBFTagBuilders.getTextTag(start.substring(beginIndex))); |
155 | } | |
156 | } | |
157 | ||
158 | 0 | String tag = remains.substring(ltpos + 1, gtpos); |
159 | 0 | int length = tag.length(); |
160 | 0 | if (length > 0) { |
161 | 0 | Tag reply = GBFTagBuilders.getTag(book, key, tag); |
162 | 0 | if (reply != null) { |
163 | 0 | taglist.add(reply); |
164 | } | |
165 | } | |
166 | ||
167 | 0 | remains = remains.substring(gtpos + 1); |
168 | 0 | } |
169 | ||
170 | 0 | return taglist; |
171 | } | |
172 | ||
173 | private static final String SEPARATORS = " ,:;.?!"; | |
174 | ||
175 | } |