Coverage Report - org.crosswire.jsword.book.filter.gbf.GBFFilter
 
Classes in this File Line Coverage Branch Coverage Complexity
GBFFilter
0%
0/61
0%
0/40
8
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2005 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.jsword.book.filter.gbf;
 21  
 
 22  
 import java.util.ArrayList;
 23  
 import java.util.LinkedList;
 24  
 import java.util.List;
 25  
 
 26  
 import org.crosswire.jsword.book.Book;
 27  
 import org.crosswire.jsword.book.DataPolice;
 28  
 import org.crosswire.jsword.book.OSISUtil;
 29  
 import org.crosswire.jsword.book.filter.SourceFilter;
 30  
 import org.crosswire.jsword.passage.Key;
 31  
 import org.jdom2.Content;
 32  
 import org.jdom2.Element;
 33  
 
 34  
 /**
 35  
  * Filter to convert GBF data to OSIS format.
 36  
  * 
 37  
  * The best place to go for more information about the GBF spec is:
 38  
  * <a href="http://ebible.org/bible/gbf.htm">http://ebible.org/bible/gbf.htm</a>
 39  
  * 
 40  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 41  
  * @author Joe Walker
 42  
  */
 43  0
 public class GBFFilter implements SourceFilter {
 44  
     /* (non-Javadoc)
 45  
      * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String)
 46  
      */
 47  
     public List<Content> toOSIS(Book book, Key key, String plain) {
 48  0
         Element ele = OSISUtil.factory().createDiv();
 49  0
         LinkedList<Content> stack = new LinkedList<Content>();
 50  0
         stack.addFirst(ele);
 51  
 
 52  0
         List<Tag> taglist = parseTags(book, key, plain.trim());
 53  
         while (true) {
 54  0
             if (taglist.isEmpty()) {
 55  0
                 break;
 56  
             }
 57  
 
 58  0
             Tag tag = taglist.remove(0);
 59  0
             tag.updateOsisStack(book, key, stack);
 60  0
         }
 61  
 
 62  0
         stack.removeFirst();
 63  0
         return ele.removeContent();
 64  
     }
 65  
 
 66  
     @Override
 67  
     public GBFFilter clone() {
 68  0
         GBFFilter clone = null;
 69  
         try {
 70  0
             clone = (GBFFilter) super.clone();
 71  0
         } catch (CloneNotSupportedException e) {
 72  0
             assert false : e;
 73  0
         }
 74  0
         return clone;
 75  
     }
 76  
 
 77  
     /**
 78  
      * Turn the string into a list of tags in the order that they appear in the
 79  
      * original string.
 80  
      */
 81  
     private List<Tag> parseTags(Book book, Key key, String aRemains) {
 82  0
         String remains = aRemains;
 83  0
         List<Tag> taglist = new ArrayList<Tag>();
 84  
 
 85  
         // A GBF code is of the form <XY...> or <Xy...>
 86  
         // where the first letter is always capitalized and
 87  
         // the second letter indicates an open or close tag.
 88  
         // Upper letters are open, lower are close.
 89  
         // The ... is optional and represents an argument.
 90  
         // Sometimes the argument is preceded by a space.
 91  
         // In GBF it is legal to have < and > otherwise.
 92  
         // In at least one module, GerLut1545, << ... >> is used for quotes.
 93  
         while (true) {
 94  0
             int ltpos = remains.indexOf('<');
 95  0
             int gtpos = remains.indexOf('>', ltpos + 1);
 96  
 
 97  
             // check whether we have unmatched < and >, or no tags at all
 98  
             // If so then we don't have a tag in the remaining.
 99  0
             if (ltpos == -1 || gtpos == -1) {
 100  
                 // If the first letter after < is an upper case letter
 101  
                 // then report it as a potential problem
 102  0
                 if (ltpos >= 0
 103  
                         && ltpos < remains.length() + 1
 104  
                         && Character.isUpperCase(remains.charAt(ltpos + 1)))
 105  
                 {
 106  0
                     DataPolice.report(book, key, "Possible bad GBF tag" + remains);
 107  
                 }
 108  0
                 if (gtpos != -1 && ltpos >= 0) {
 109  0
                     DataPolice.report(book, key, "Possible bad GBF tag" + remains);
 110  
                 }
 111  0
                 int pos = Math.max(ltpos, gtpos) + 1;
 112  
                 // If there were not any <, > or either ended the string
 113  
                 // then we only have text.
 114  0
                 if (pos == 0 || pos == remains.length()) {
 115  0
                     taglist.add(GBFTagBuilders.getTextTag(remains));
 116  0
                     break;
 117  
                 }
 118  0
                 taglist.add(GBFTagBuilders.getTextTag(remains.substring(0, pos)));
 119  0
                 remains = remains.substring(pos);
 120  0
                 continue;
 121  
             }
 122  
 
 123  
             // If the character after the < is not an upper case letter
 124  
             // then we don't have GBF.
 125  
             // So, create a text tag that ends with the found >.
 126  
             // Note that in JST, there are spurious html tags and
 127  
             // this will treat them as valid GBF text.
 128  0
             char firstChar = remains.charAt(ltpos + 1);
 129  0
             if (!Character.isUpperCase(firstChar)) {
 130  0
                 taglist.add(GBFTagBuilders.getTextTag(remains.substring(0, gtpos + 1)));
 131  0
                 remains = remains.substring(gtpos + 1);
 132  0
                 continue;
 133  
             }
 134  
 
 135  
             // generate tags
 136  0
             String start = remains.substring(0, ltpos);
 137  0
             int strLen = start.length();
 138  0
             if (strLen > 0) {
 139  0
                 int beginIndex = 0;
 140  0
                 boolean inSepStr = SEPARATORS.indexOf(start.charAt(0)) >= 0;
 141  
                 // split words from separators...
 142  
                 // e.g., "a b c? e g." -> "a b c", "? ", "e g."
 143  
                 // "a b c<tag> e g." -> "a b c", tag, " ", "e g."
 144  0
                 for (int i = 1; inSepStr && i < strLen; i++) {
 145  0
                     char currentChar = start.charAt(i);
 146  0
                     if (!(SEPARATORS.indexOf(currentChar) >= 0)) {
 147  0
                         taglist.add(GBFTagBuilders.getTextTag(start.substring(beginIndex, i)));
 148  0
                         beginIndex = i;
 149  0
                         inSepStr = false;
 150  
                     }
 151  
                 }
 152  
 
 153  0
                 if (beginIndex < strLen) {
 154  0
                     taglist.add(GBFTagBuilders.getTextTag(start.substring(beginIndex)));
 155  
                 }
 156  
             }
 157  
 
 158  0
             String tag = remains.substring(ltpos + 1, gtpos);
 159  0
             int length = tag.length();
 160  0
             if (length > 0) {
 161  0
                 Tag reply = GBFTagBuilders.getTag(book, key, tag);
 162  0
                 if (reply != null) {
 163  0
                     taglist.add(reply);
 164  
                 }
 165  
             }
 166  
 
 167  0
             remains = remains.substring(gtpos + 1);
 168  0
         }
 169  
 
 170  0
         return taglist;
 171  
     }
 172  
 
 173  
     private static final String SEPARATORS = " ,:;.?!";
 174  
 
 175  
 }