Coverage Report - org.crosswire.jsword.book.filter.thml.THMLFilter
 
Classes in this File Line Coverage Branch Coverage Complexity
THMLFilter
0%
0/60
0%
0/12
3
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2005 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.jsword.book.filter.thml;
 21  
 
 22  
 import java.io.IOException;
 23  
 import java.io.StringReader;
 24  
 import java.util.List;
 25  
 
 26  
 import javax.xml.parsers.ParserConfigurationException;
 27  
 import javax.xml.parsers.SAXParser;
 28  
 import javax.xml.parsers.SAXParserFactory;
 29  
 
 30  
 import org.crosswire.common.xml.XMLUtil;
 31  
 import org.crosswire.jsword.book.Book;
 32  
 import org.crosswire.jsword.book.OSISUtil;
 33  
 import org.crosswire.jsword.book.filter.SourceFilter;
 34  
 import org.crosswire.jsword.passage.Key;
 35  
 import org.jdom2.Content;
 36  
 import org.jdom2.Element;
 37  
 import org.slf4j.Logger;
 38  
 import org.slf4j.LoggerFactory;
 39  
 import org.xml.sax.InputSource;
 40  
 import org.xml.sax.SAXException;
 41  
 import org.xml.sax.SAXParseException;
 42  
 
 43  
 /**
 44  
  * Filter to convert THML to OSIS format.
 45  
  *
 46  
  * <p>
 47  
  * I used the THML ref page: <a
 48  
  * href="http://www.ccel.org/ThML/ThML1.04.htm">http
 49  
  * ://www.ccel.org/ThML/ThML1.04.htm</a> to work out what the tags meant.
 50  
  *
 51  
  * LATER(joe): check nesting on these THML elements
 52  
  *
 53  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 54  
  * @author Joe Walker
 55  
  */
 56  0
 public class THMLFilter implements SourceFilter {
 57  
     /* (non-Javadoc)
 58  
      * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String)
 59  
      */
 60  
     public List<Content> toOSIS(Book book, Key key, String plain) {
 61  0
         Element ele = cleanParse(book, key, plain);
 62  
 
 63  0
         if (ele == null) {
 64  0
             if (error instanceof SAXParseException) {
 65  0
                 SAXParseException spe = (SAXParseException) error;
 66  0
                 int colNumber = spe.getColumnNumber();
 67  0
                 int start = Math.max(0, colNumber - 40);
 68  0
                 int stop = Math.min(finalInput.length(), colNumber + 40);
 69  0
                 int here = stop - start;
 70  0
                 log.warn("Could not fix {}({}) by {}: Error here({},{},{}): {}",
 71  
                          book.getInitials(),
 72  
                          key.getName(),
 73  
                          errorMessage,
 74  
                          Integer.toString(colNumber),
 75  
                          Integer.toString(finalInput.length()),
 76  
                          Integer.toString(here),
 77  
                          finalInput.substring(start, stop));
 78  0
             } else {
 79  0
                 log.warn("Could not fix {}({}) by {}: {}",
 80  
                          book.getInitials(),
 81  
                          key.getName(),
 82  
                          errorMessage,
 83  
                          error.getMessage());
 84  
             }
 85  0
             ele = OSISUtil.factory().createP();
 86  
         }
 87  
 
 88  0
         return ele.removeContent();
 89  
     }
 90  
 
 91  
     @Override
 92  
     public THMLFilter clone() {
 93  0
         THMLFilter clone = null;
 94  
         try {
 95  0
             clone = (THMLFilter) super.clone();
 96  0
         } catch (CloneNotSupportedException e) {
 97  0
             assert false : e;
 98  0
         }
 99  0
         return clone;
 100  
     }
 101  
 
 102  
     private Element cleanParse(Book book, Key key, String plain) {
 103  
         // So just try to strip out all XML looking things
 104  0
         String clean = XMLUtil.cleanAllEntities(plain);
 105  0
         Element ele = parse(book, key, clean, "cleaning entities");
 106  
 
 107  0
         if (ele == null) {
 108  0
             ele = cleanText(book, key, clean);
 109  
         }
 110  
 
 111  0
         return ele;
 112  
     }
 113  
 
 114  
     private Element cleanText(Book book, Key key, String plain) {
 115  
         // So just try to strip out all XML looking things
 116  0
         String clean = XMLUtil.cleanAllCharacters(plain);
 117  0
         Element ele = parse(book, key, clean, "cleaning text");
 118  
 
 119  0
         if (ele == null) {
 120  0
             ele = parse(book, key, XMLUtil.closeEmptyTags(clean), "closing empty tags");
 121  
         }
 122  
 
 123  0
         if (ele == null) {
 124  0
             ele = cleanTags(book, key, clean);
 125  
         }
 126  
 
 127  0
         return ele;
 128  
     }
 129  
 
 130  
     private Element cleanTags(Book book, Key key, String plain) {
 131  
         // So just try to strip out all XML looking things
 132  0
         String clean = XMLUtil.cleanAllTags(plain);
 133  0
         return parse(book, key, clean, "cleaning tags");
 134  
     }
 135  
 
 136  
     private Element parse(Book book, Key key, String plain, String failMessage) {
 137  0
         Exception ex = null;
 138  
         // We need to create a root element to house our document fragment
 139  
         // 15 for the tags we add
 140  0
         StringBuilder buf = new StringBuilder(15 + plain.length());
 141  0
         buf.append('<').append(RootTag.TAG_ROOT).append('>').append(plain).append("</").append(RootTag.TAG_ROOT).append('>');
 142  0
         finalInput = buf.toString();
 143  
         try {
 144  0
             StringReader in = new StringReader(finalInput);
 145  0
             InputSource is = new InputSource(in);
 146  0
             SAXParserFactory spf = SAXParserFactory.newInstance();
 147  0
             SAXParser parser = spf.newSAXParser();
 148  0
             CustomHandler handler = new CustomHandler(book, key);
 149  
 
 150  0
             parser.parse(is, handler);
 151  0
             return handler.getRootElement();
 152  0
         } catch (SAXParseException e) {
 153  0
             ex = e;
 154  0
         } catch (SAXException e) {
 155  0
             ex = e;
 156  0
         } catch (IOException e) {
 157  0
             ex = e;
 158  0
         } catch (ParserConfigurationException e) {
 159  0
             ex = e;
 160  0
         } catch (IllegalArgumentException e) {
 161  
             // JDOM has a few exceptions which are all derived from this.
 162  0
             ex = e;
 163  0
         }
 164  
 
 165  0
         errorMessage = failMessage;
 166  0
         error = ex;
 167  0
         return null;
 168  
     }
 169  
 
 170  
     private String errorMessage;
 171  
     private Exception error;
 172  
     private String finalInput;
 173  
 
 174  
     /**
 175  
      * The log stream
 176  
      */
 177  0
     private static final Logger log = LoggerFactory.getLogger(THMLFilter.class);
 178  
 }