Coverage Report - org.crosswire.jsword.book.filter.thml.CustomHandler
 
Classes in this File Line Coverage Branch Coverage Complexity
CustomHandler
0%
0/67
0%
0/36
5.167
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2005 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.jsword.book.filter.thml;
 21  
 
 22  
 import java.util.HashMap;
 23  
 import java.util.LinkedList;
 24  
 import java.util.Locale;
 25  
 import java.util.Map;
 26  
 
 27  
 import org.crosswire.jsword.book.Book;
 28  
 import org.crosswire.jsword.book.DataPolice;
 29  
 import org.crosswire.jsword.passage.Key;
 30  
 import org.jdom2.Content;
 31  
 import org.jdom2.Element;
 32  
 import org.jdom2.Text;
 33  
 import org.xml.sax.Attributes;
 34  
 import org.xml.sax.SAXException;
 35  
 import org.xml.sax.helpers.DefaultHandler;
 36  
 
 37  
 /**
 38  
  * To convert SAX events into OSIS events.
 39  
  * 
 40  
  * <p>
 41  
  * This is based upon the THML reference page:
 42  
  * <a href="http://www.ccel.org/ThML/ThML1.04.htm">http://www.ccel.org/ThML/ThML1.04.htm</a>
 43  
  * to work out what the tags meant.
 44  
  * 
 45  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 46  
  * @author Joe Walker
 47  
  */
 48  
 public class CustomHandler extends DefaultHandler {
 49  
     /**
 50  
      * Simple ctor
 51  
      * 
 52  
      * @param book the book
 53  
      * @param key the key
 54  
      */
 55  0
     public CustomHandler(Book book, Key key) {
 56  0
         this.book = book;
 57  0
         this.key = key;
 58  0
         this.stack = new LinkedList<Content>();
 59  0
     }
 60  
 
 61  
     /* (non-Javadoc)
 62  
      * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
 63  
      */
 64  
     @Override
 65  
     public void startElement(String uri, String localname, String qname, Attributes attrs) throws SAXException {
 66  0
         Element ele = null;
 67  
 
 68  
         // If we are looking at the root element
 69  
         // then the stack is empty
 70  0
         if (!stack.isEmpty()) {
 71  0
             Object top = stack.getFirst();
 72  
 
 73  
             // If the element and its descendants are to be ignored
 74  
             // then there is a null element on the stack
 75  0
             if (top == null) {
 76  0
                 return;
 77  
             }
 78  
 
 79  
             // It might be a text element
 80  0
             if (top instanceof Element) {
 81  0
                 ele = (Element) top;
 82  
             }
 83  
         }
 84  
 
 85  0
         Tag t = getTag(localname, qname);
 86  
 
 87  0
         if (t != null) {
 88  0
             stack.addFirst(t.processTag(book, key, ele, attrs));
 89  
         }
 90  0
     }
 91  
 
 92  
     /* (non-Javadoc)
 93  
      * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
 94  
      */
 95  
     @Override
 96  
     public void characters(char[] data, int offset, int length) {
 97  
         // what we are adding
 98  0
         String text = new String(data, offset, length);
 99  
 
 100  0
         if (stack.isEmpty()) {
 101  0
             stack.addFirst(new Text(text));
 102  0
             return;
 103  
         }
 104  
 
 105  
         // What we are adding to
 106  0
         Content top = stack.getFirst();
 107  
 
 108  
         // If the element and its descendants are to be ignored
 109  
         // then there is a null element on the stack
 110  0
         if (top == null) {
 111  0
             return;
 112  
         }
 113  
 
 114  0
         if (top instanceof Text) {
 115  0
             ((Text) top).append(text);
 116  0
             return;
 117  
         }
 118  
 
 119  0
         if (top instanceof Element) {
 120  0
             Element current = (Element) top;
 121  
 
 122  0
             int size = current.getContentSize();
 123  
 
 124  
             // If the last element in the list is a string then we should add
 125  
             // this string on to the end of it rather than add a new list item
 126  
             // because (probably as an artifact of the HTML/XSL transform we get
 127  
             // a space inserted in the output even when 2 calls to this method
 128  
             // split a word.
 129  0
             if (size > 0) {
 130  0
                 Content last = current.getContent(size - 1);
 131  0
                 if (last instanceof Text) {
 132  0
                     ((Text) last).append(text);
 133  0
                     return;
 134  
                 }
 135  
             }
 136  0
             current.addContent(new Text(text));
 137  
         }
 138  0
     }
 139  
 
 140  
     /* (non-Javadoc)
 141  
      * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
 142  
      */
 143  
     @Override
 144  
     public void endElement(String uri, String localname, String qname) {
 145  0
         if (stack.isEmpty()) {
 146  0
             return;
 147  
         }
 148  
         // When we are done processing an element we need to remove
 149  
         // it from the stack so that nothing more is attached to it.
 150  0
         Content top = stack.removeFirst();
 151  0
         if (top instanceof Element) {
 152  0
             Element finished = (Element) top;
 153  0
             Tag t = getTag(localname, qname);
 154  
 
 155  0
             if (t != null) {
 156  0
                 t.processContent(book, key, finished);
 157  
             }
 158  
 
 159  
             // If it was the last element then it was the root element
 160  
             // so save it
 161  0
             if (stack.isEmpty()) {
 162  0
                 rootElement = finished;
 163  
             }
 164  
         }
 165  0
     }
 166  
 
 167  
     /**
 168  
      * @return the root element
 169  
      */
 170  
     public Element getRootElement() {
 171  0
         return rootElement;
 172  
     }
 173  
 
 174  
     private Tag getTag(String localname, String qname) {
 175  
         // sometimes qname is empty e.g. on Android 2.1
 176  
         String name;
 177  0
         if (qname != null && qname.length() > 0) {
 178  0
             name = qname;
 179  
         } else {
 180  0
             name = localname;
 181  
         }
 182  
 
 183  0
         Tag t = TAG_MAP.get(name);
 184  
 
 185  
         // Some of the THML books are broken in that they use uppercase
 186  
         // element names, which the spec disallows, but we might as well
 187  
         // look out for them
 188  0
         if (t == null) {
 189  0
             t = TAG_MAP.get(name.toLowerCase(Locale.ENGLISH));
 190  
 
 191  0
             if (t == null) {
 192  0
                 DataPolice.report(book, key, "Unknown thml element: " + localname + " qname=" + name);
 193  
 
 194  
                 // Report on it only once and make sure the content is output.
 195  0
                 t = new AnonymousTag(name);
 196  0
                 TAG_MAP.put(name, t);
 197  0
                 return t;
 198  
             }
 199  
 
 200  
             // DataPolice.report(book, key, "Wrong case used in thml element: " + name);
 201  
         }
 202  0
         return t;
 203  
     }
 204  
 
 205  
     /**
 206  
      * When the document is parsed, this is the last element popped off the
 207  
      * stack.
 208  
      */
 209  
     private Element rootElement;
 210  
 
 211  
     /**
 212  
      * The book being parsed.
 213  
      */
 214  
     private Book book;
 215  
 
 216  
     /**
 217  
      * The book being parsed.
 218  
      */
 219  
     private Key key;
 220  
 
 221  
     /**
 222  
      * The stack of elements that we have created
 223  
      */
 224  
     private LinkedList<Content> stack;
 225  
 
 226  
     /**
 227  
      * The known tag types
 228  
      */
 229  0
     private static final Map<String, Tag> TAG_MAP = new HashMap<String, Tag>();
 230  
 
 231  
     static {
 232  
         /*
 233  
          * ThML is based upon Voyager XHTML and all Voyager elements are
 234  
          * allowed. However not all elements make sense.
 235  
          */
 236  0
         Tag[] tags = new Tag[] {
 237  
                 // The following are defined in Voyager xhtml 4.0
 238  
                 new ATag(),
 239  
                 new AbbrTag(),
 240  
                 new AliasTag("acronym", new AbbrTag()),
 241  
                 new AnonymousTag("address"),
 242  
                 new SkipTag("applet"),
 243  
                 new SkipTag("area"),
 244  
                 new BTag(),
 245  
                 new SkipTag("base"),
 246  
                 new SkipTag("basefont"),
 247  
                 new IgnoreTag("bdo"),
 248  
                 new BigTag(),
 249  
                 new BlockquoteTag(),
 250  
                 new IgnoreTag("body"),
 251  
                 new BrTag(),
 252  
                 new SkipTag("button"),
 253  
                 new AnonymousTag("caption"),
 254  
                 new CenterTag(),
 255  
                 new AnonymousTag("cite"),
 256  
                 new AnonymousTag("code"),
 257  
                 new SkipTag("col"),
 258  
                 new SkipTag("colgroup"),
 259  
                 new AliasTag("dd", new LiTag()),
 260  
                 new AnonymousTag("del"),
 261  
                 new AnonymousTag("dfn"),
 262  
                 new DivTag(),
 263  
                 new AliasTag("dl", new UlTag()),
 264  
                 new AliasTag("dt", new LiTag()),
 265  
                 new AliasTag("em", new ITag()),
 266  
                 new IgnoreTag("fieldset"),
 267  
                 new FontTag(),
 268  
                 new SkipTag("form"),
 269  
                 new SkipTag("frame"),
 270  
                 new SkipTag("frameset"),
 271  
                 new AliasTag("h1", new HTag(1)),
 272  
                 new AliasTag("h2", new HTag(2)),
 273  
                 new AliasTag("h3", new HTag(3)),
 274  
                 new AliasTag("h4", new HTag(4)),
 275  
                 new AliasTag("h5", new HTag(5)),
 276  
                 new AliasTag("h6", new HTag(6)),
 277  
                 new SkipTag("head"),
 278  
                 new HrTag(),
 279  
                 new IgnoreTag("html"),
 280  
                 new IgnoreTag("frameset"),
 281  
                 new ITag(),
 282  
                 new SkipTag("iframe"),
 283  
                 new ImgTag(),
 284  
                 new SkipTag("input"),
 285  
                 new AnonymousTag("ins"),
 286  
                 new AnonymousTag("kbd"),
 287  
                 new AnonymousTag("label"),
 288  
                 new AnonymousTag("legend"),
 289  
                 new LiTag(),
 290  
                 new SkipTag("link"),
 291  
                 new SkipTag("map"),
 292  
                 new SkipTag("meta"),
 293  
                 new SkipTag("noscript"),
 294  
                 new SkipTag("object"),
 295  
                 new OlTag(),
 296  
                 new SkipTag("optgroup"),
 297  
                 new SkipTag("option"),
 298  
                 new PTag(),
 299  
                 new SkipTag("param"),
 300  
                 new IgnoreTag("pre"),
 301  
                 new QTag(),
 302  
                 new RootTag(),
 303  
                 new STag(),
 304  
                 new AnonymousTag("samp"),
 305  
                 new SkipTag("script"),
 306  
                 new SkipTag("select"),
 307  
                 new SmallTag(),
 308  
                 new IgnoreTag("span"),
 309  
                 new AliasTag("strong", new BTag()),
 310  
                 new SkipTag("style"),
 311  
                 new SubTag(),
 312  
                 new SupTag(),
 313  
                 new SyncTag(),
 314  
                 new TableTag(),
 315  
                 new IgnoreTag("tbody"),
 316  
                 new TdTag(),
 317  
                 new IgnoreTag("tfoot"),
 318  
                 new SkipTag("textarea"),
 319  
                 new SkipTag("title"),
 320  
                 new IgnoreTag("thead"),
 321  
                 new ThTag(),
 322  
                 new TrTag(),
 323  
                 new TtTag(),
 324  
                 new UTag(),
 325  
                 new UlTag(),
 326  
                 new AnonymousTag("var"),
 327  
 
 328  
                 // ThML adds the following to Voyager
 329  
                 // Note: hymn.mod is not here nor are additional head&DC
 330  
                 // elements
 331  
                 new AnonymousTag("added"),
 332  
                 new AnonymousTag("attr"),
 333  
                 new AnonymousTag("argument"),
 334  
                 new CitationTag(),
 335  
                 new AnonymousTag("date"),
 336  
                 new AnonymousTag("deleted"),
 337  
                 new AnonymousTag("def"),
 338  
                 new AliasTag("div1", new DivTag(1)),
 339  
                 new AliasTag("div2", new DivTag(2)),
 340  
                 new AliasTag("div3", new DivTag(3)),
 341  
                 new AliasTag("div4", new DivTag(4)),
 342  
                 new AliasTag("div5", new DivTag(5)),
 343  
                 new AliasTag("div6", new DivTag(6)),
 344  
                 new ForeignTag(),
 345  
                 new AnonymousTag("index"),
 346  
                 new AnonymousTag("insertIndex"),
 347  
                 new AnonymousTag("glossary"),
 348  
                 new NoteTag(),
 349  
                 new NameTag(),
 350  
                 new PbTag(),
 351  
                 new AnonymousTag("scripCom"),
 352  
                 new AnonymousTag("scripContext"),
 353  
                 new ScripRefTag(),
 354  
                 new ScriptureTag(),
 355  
                 new TermTag(),
 356  
                 new AnonymousTag("unclear"),
 357  
                 new VerseTag(),
 358  
         };
 359  0
         for (int i = 0; i < tags.length; i++) {
 360  0
             Tag t = tags[i];
 361  0
             String tagName = t.getTagName();
 362  0
             TAG_MAP.put(tagName, t);
 363  
         }
 364  0
     }
 365  
 
 366  
 }