| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| GBFFilter |
|
| 8.0;8 |
| 1 | /** | |
| 2 | * Distribution License: | |
| 3 | * JSword is free software; you can redistribute it and/or modify it under | |
| 4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
| 5 | * as published by the Free Software Foundation. This program is distributed | |
| 6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
| 7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
| 8 | * See the GNU Lesser General Public License for more details. | |
| 9 | * | |
| 10 | * The License is available on the internet at: | |
| 11 | * http://www.gnu.org/copyleft/lgpl.html | |
| 12 | * or by writing to: | |
| 13 | * Free Software Foundation, Inc. | |
| 14 | * 59 Temple Place - Suite 330 | |
| 15 | * Boston, MA 02111-1307, USA | |
| 16 | * | |
| 17 | * © CrossWire Bible Society, 2005 - 2016 | |
| 18 | * | |
| 19 | */ | |
| 20 | package org.crosswire.jsword.book.filter.gbf; | |
| 21 | ||
| 22 | import java.util.ArrayList; | |
| 23 | import java.util.LinkedList; | |
| 24 | import java.util.List; | |
| 25 | ||
| 26 | import org.crosswire.jsword.book.Book; | |
| 27 | import org.crosswire.jsword.book.DataPolice; | |
| 28 | import org.crosswire.jsword.book.OSISUtil; | |
| 29 | import org.crosswire.jsword.book.filter.SourceFilter; | |
| 30 | import org.crosswire.jsword.passage.Key; | |
| 31 | import org.jdom2.Content; | |
| 32 | import org.jdom2.Element; | |
| 33 | ||
| 34 | /** | |
| 35 | * Filter to convert GBF data to OSIS format. | |
| 36 | * | |
| 37 | * The best place to go for more information about the GBF spec is: | |
| 38 | * <a href="http://ebible.org/bible/gbf.htm">http://ebible.org/bible/gbf.htm</a> | |
| 39 | * | |
| 40 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
| 41 | * @author Joe Walker | |
| 42 | */ | |
| 43 | 0 | public class GBFFilter implements SourceFilter { |
| 44 | /* (non-Javadoc) | |
| 45 | * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String) | |
| 46 | */ | |
| 47 | public List<Content> toOSIS(Book book, Key key, String plain) { | |
| 48 | 0 | Element ele = OSISUtil.factory().createDiv(); |
| 49 | 0 | LinkedList<Content> stack = new LinkedList<Content>(); |
| 50 | 0 | stack.addFirst(ele); |
| 51 | ||
| 52 | 0 | List<Tag> taglist = parseTags(book, key, plain.trim()); |
| 53 | while (true) { | |
| 54 | 0 | if (taglist.isEmpty()) { |
| 55 | 0 | break; |
| 56 | } | |
| 57 | ||
| 58 | 0 | Tag tag = taglist.remove(0); |
| 59 | 0 | tag.updateOsisStack(book, key, stack); |
| 60 | 0 | } |
| 61 | ||
| 62 | 0 | stack.removeFirst(); |
| 63 | 0 | return ele.removeContent(); |
| 64 | } | |
| 65 | ||
| 66 | @Override | |
| 67 | public GBFFilter clone() { | |
| 68 | 0 | GBFFilter clone = null; |
| 69 | try { | |
| 70 | 0 | clone = (GBFFilter) super.clone(); |
| 71 | 0 | } catch (CloneNotSupportedException e) { |
| 72 | 0 | assert false : e; |
| 73 | 0 | } |
| 74 | 0 | return clone; |
| 75 | } | |
| 76 | ||
| 77 | /** | |
| 78 | * Turn the string into a list of tags in the order that they appear in the | |
| 79 | * original string. | |
| 80 | */ | |
| 81 | private List<Tag> parseTags(Book book, Key key, String aRemains) { | |
| 82 | 0 | String remains = aRemains; |
| 83 | 0 | List<Tag> taglist = new ArrayList<Tag>(); |
| 84 | ||
| 85 | // A GBF code is of the form <XY...> or <Xy...> | |
| 86 | // where the first letter is always capitalized and | |
| 87 | // the second letter indicates an open or close tag. | |
| 88 | // Upper letters are open, lower are close. | |
| 89 | // The ... is optional and represents an argument. | |
| 90 | // Sometimes the argument is preceded by a space. | |
| 91 | // In GBF it is legal to have < and > otherwise. | |
| 92 | // In at least one module, GerLut1545, << ... >> is used for quotes. | |
| 93 | while (true) { | |
| 94 | 0 | int ltpos = remains.indexOf('<'); |
| 95 | 0 | int gtpos = remains.indexOf('>', ltpos + 1); |
| 96 | ||
| 97 | // check whether we have unmatched < and >, or no tags at all | |
| 98 | // If so then we don't have a tag in the remaining. | |
| 99 | 0 | if (ltpos == -1 || gtpos == -1) { |
| 100 | // If the first letter after < is an upper case letter | |
| 101 | // then report it as a potential problem | |
| 102 | 0 | if (ltpos >= 0 |
| 103 | && ltpos < remains.length() + 1 | |
| 104 | && Character.isUpperCase(remains.charAt(ltpos + 1))) | |
| 105 | { | |
| 106 | 0 | DataPolice.report(book, key, "Possible bad GBF tag" + remains); |
| 107 | } | |
| 108 | 0 | if (gtpos != -1 && ltpos >= 0) { |
| 109 | 0 | DataPolice.report(book, key, "Possible bad GBF tag" + remains); |
| 110 | } | |
| 111 | 0 | int pos = Math.max(ltpos, gtpos) + 1; |
| 112 | // If there were not any <, > or either ended the string | |
| 113 | // then we only have text. | |
| 114 | 0 | if (pos == 0 || pos == remains.length()) { |
| 115 | 0 | taglist.add(GBFTagBuilders.getTextTag(remains)); |
| 116 | 0 | break; |
| 117 | } | |
| 118 | 0 | taglist.add(GBFTagBuilders.getTextTag(remains.substring(0, pos))); |
| 119 | 0 | remains = remains.substring(pos); |
| 120 | 0 | continue; |
| 121 | } | |
| 122 | ||
| 123 | // If the character after the < is not an upper case letter | |
| 124 | // then we don't have GBF. | |
| 125 | // So, create a text tag that ends with the found >. | |
| 126 | // Note that in JST, there are spurious html tags and | |
| 127 | // this will treat them as valid GBF text. | |
| 128 | 0 | char firstChar = remains.charAt(ltpos + 1); |
| 129 | 0 | if (!Character.isUpperCase(firstChar)) { |
| 130 | 0 | taglist.add(GBFTagBuilders.getTextTag(remains.substring(0, gtpos + 1))); |
| 131 | 0 | remains = remains.substring(gtpos + 1); |
| 132 | 0 | continue; |
| 133 | } | |
| 134 | ||
| 135 | // generate tags | |
| 136 | 0 | String start = remains.substring(0, ltpos); |
| 137 | 0 | int strLen = start.length(); |
| 138 | 0 | if (strLen > 0) { |
| 139 | 0 | int beginIndex = 0; |
| 140 | 0 | boolean inSepStr = SEPARATORS.indexOf(start.charAt(0)) >= 0; |
| 141 | // split words from separators... | |
| 142 | // e.g., "a b c? e g." -> "a b c", "? ", "e g." | |
| 143 | // "a b c<tag> e g." -> "a b c", tag, " ", "e g." | |
| 144 | 0 | for (int i = 1; inSepStr && i < strLen; i++) { |
| 145 | 0 | char currentChar = start.charAt(i); |
| 146 | 0 | if (!(SEPARATORS.indexOf(currentChar) >= 0)) { |
| 147 | 0 | taglist.add(GBFTagBuilders.getTextTag(start.substring(beginIndex, i))); |
| 148 | 0 | beginIndex = i; |
| 149 | 0 | inSepStr = false; |
| 150 | } | |
| 151 | } | |
| 152 | ||
| 153 | 0 | if (beginIndex < strLen) { |
| 154 | 0 | taglist.add(GBFTagBuilders.getTextTag(start.substring(beginIndex))); |
| 155 | } | |
| 156 | } | |
| 157 | ||
| 158 | 0 | String tag = remains.substring(ltpos + 1, gtpos); |
| 159 | 0 | int length = tag.length(); |
| 160 | 0 | if (length > 0) { |
| 161 | 0 | Tag reply = GBFTagBuilders.getTag(book, key, tag); |
| 162 | 0 | if (reply != null) { |
| 163 | 0 | taglist.add(reply); |
| 164 | } | |
| 165 | } | |
| 166 | ||
| 167 | 0 | remains = remains.substring(gtpos + 1); |
| 168 | 0 | } |
| 169 | ||
| 170 | 0 | return taglist; |
| 171 | } | |
| 172 | ||
| 173 | private static final String SEPARATORS = " ,:;.?!"; | |
| 174 | ||
| 175 | } |