| 1 | |
|
| 2 | |
|
| 3 | |
|
| 4 | |
|
| 5 | |
|
| 6 | |
|
| 7 | |
|
| 8 | |
|
| 9 | |
|
| 10 | |
|
| 11 | |
|
| 12 | |
|
| 13 | |
|
| 14 | |
|
| 15 | |
|
| 16 | |
|
| 17 | |
|
| 18 | |
|
| 19 | |
|
| 20 | |
package org.crosswire.jsword.book.filter.thml; |
| 21 | |
|
| 22 | |
import java.io.IOException; |
| 23 | |
import java.io.StringReader; |
| 24 | |
import java.util.List; |
| 25 | |
|
| 26 | |
import javax.xml.parsers.ParserConfigurationException; |
| 27 | |
import javax.xml.parsers.SAXParser; |
| 28 | |
import javax.xml.parsers.SAXParserFactory; |
| 29 | |
|
| 30 | |
import org.crosswire.common.xml.XMLUtil; |
| 31 | |
import org.crosswire.jsword.book.Book; |
| 32 | |
import org.crosswire.jsword.book.OSISUtil; |
| 33 | |
import org.crosswire.jsword.book.filter.SourceFilter; |
| 34 | |
import org.crosswire.jsword.passage.Key; |
| 35 | |
import org.jdom2.Content; |
| 36 | |
import org.jdom2.Element; |
| 37 | |
import org.slf4j.Logger; |
| 38 | |
import org.slf4j.LoggerFactory; |
| 39 | |
import org.xml.sax.InputSource; |
| 40 | |
import org.xml.sax.SAXException; |
| 41 | |
import org.xml.sax.SAXParseException; |
| 42 | |
|
| 43 | |
|
| 44 | |
|
| 45 | |
|
| 46 | |
|
| 47 | |
|
| 48 | |
|
| 49 | |
|
| 50 | |
|
| 51 | |
|
| 52 | |
|
| 53 | |
|
| 54 | |
|
| 55 | |
|
| 56 | 0 | public class THMLFilter implements SourceFilter { |
| 57 | |
|
| 58 | |
|
| 59 | |
|
| 60 | |
public List<Content> toOSIS(Book book, Key key, String plain) { |
| 61 | 0 | Element ele = cleanParse(book, key, plain); |
| 62 | |
|
| 63 | 0 | if (ele == null) { |
| 64 | 0 | if (error instanceof SAXParseException) { |
| 65 | 0 | SAXParseException spe = (SAXParseException) error; |
| 66 | 0 | int colNumber = spe.getColumnNumber(); |
| 67 | 0 | int start = Math.max(0, colNumber - 40); |
| 68 | 0 | int stop = Math.min(finalInput.length(), colNumber + 40); |
| 69 | 0 | int here = stop - start; |
| 70 | 0 | log.warn("Could not fix {}({}) by {}: Error here({},{},{}): {}", |
| 71 | |
book.getInitials(), |
| 72 | |
key.getName(), |
| 73 | |
errorMessage, |
| 74 | |
Integer.toString(colNumber), |
| 75 | |
Integer.toString(finalInput.length()), |
| 76 | |
Integer.toString(here), |
| 77 | |
finalInput.substring(start, stop)); |
| 78 | 0 | } else { |
| 79 | 0 | log.warn("Could not fix {}({}) by {}: {}", |
| 80 | |
book.getInitials(), |
| 81 | |
key.getName(), |
| 82 | |
errorMessage, |
| 83 | |
error.getMessage()); |
| 84 | |
} |
| 85 | 0 | ele = OSISUtil.factory().createP(); |
| 86 | |
} |
| 87 | |
|
| 88 | 0 | return ele.removeContent(); |
| 89 | |
} |
| 90 | |
|
| 91 | |
@Override |
| 92 | |
public THMLFilter clone() { |
| 93 | 0 | THMLFilter clone = null; |
| 94 | |
try { |
| 95 | 0 | clone = (THMLFilter) super.clone(); |
| 96 | 0 | } catch (CloneNotSupportedException e) { |
| 97 | 0 | assert false : e; |
| 98 | 0 | } |
| 99 | 0 | return clone; |
| 100 | |
} |
| 101 | |
|
| 102 | |
private Element cleanParse(Book book, Key key, String plain) { |
| 103 | |
|
| 104 | 0 | String clean = XMLUtil.cleanAllEntities(plain); |
| 105 | 0 | Element ele = parse(book, key, clean, "cleaning entities"); |
| 106 | |
|
| 107 | 0 | if (ele == null) { |
| 108 | 0 | ele = cleanText(book, key, clean); |
| 109 | |
} |
| 110 | |
|
| 111 | 0 | return ele; |
| 112 | |
} |
| 113 | |
|
| 114 | |
private Element cleanText(Book book, Key key, String plain) { |
| 115 | |
|
| 116 | 0 | String clean = XMLUtil.cleanAllCharacters(plain); |
| 117 | 0 | Element ele = parse(book, key, clean, "cleaning text"); |
| 118 | |
|
| 119 | 0 | if (ele == null) { |
| 120 | 0 | ele = parse(book, key, XMLUtil.closeEmptyTags(clean), "closing empty tags"); |
| 121 | |
} |
| 122 | |
|
| 123 | 0 | if (ele == null) { |
| 124 | 0 | ele = cleanTags(book, key, clean); |
| 125 | |
} |
| 126 | |
|
| 127 | 0 | return ele; |
| 128 | |
} |
| 129 | |
|
| 130 | |
private Element cleanTags(Book book, Key key, String plain) { |
| 131 | |
|
| 132 | 0 | String clean = XMLUtil.cleanAllTags(plain); |
| 133 | 0 | return parse(book, key, clean, "cleaning tags"); |
| 134 | |
} |
| 135 | |
|
| 136 | |
private Element parse(Book book, Key key, String plain, String failMessage) { |
| 137 | 0 | Exception ex = null; |
| 138 | |
|
| 139 | |
|
| 140 | 0 | StringBuilder buf = new StringBuilder(15 + plain.length()); |
| 141 | 0 | buf.append('<').append(RootTag.TAG_ROOT).append('>').append(plain).append("</").append(RootTag.TAG_ROOT).append('>'); |
| 142 | 0 | finalInput = buf.toString(); |
| 143 | |
try { |
| 144 | 0 | StringReader in = new StringReader(finalInput); |
| 145 | 0 | InputSource is = new InputSource(in); |
| 146 | 0 | SAXParserFactory spf = SAXParserFactory.newInstance(); |
| 147 | 0 | SAXParser parser = spf.newSAXParser(); |
| 148 | 0 | CustomHandler handler = new CustomHandler(book, key); |
| 149 | |
|
| 150 | 0 | parser.parse(is, handler); |
| 151 | 0 | return handler.getRootElement(); |
| 152 | 0 | } catch (SAXParseException e) { |
| 153 | 0 | ex = e; |
| 154 | 0 | } catch (SAXException e) { |
| 155 | 0 | ex = e; |
| 156 | 0 | } catch (IOException e) { |
| 157 | 0 | ex = e; |
| 158 | 0 | } catch (ParserConfigurationException e) { |
| 159 | 0 | ex = e; |
| 160 | 0 | } catch (IllegalArgumentException e) { |
| 161 | |
|
| 162 | 0 | ex = e; |
| 163 | 0 | } |
| 164 | |
|
| 165 | 0 | errorMessage = failMessage; |
| 166 | 0 | error = ex; |
| 167 | 0 | return null; |
| 168 | |
} |
| 169 | |
|
| 170 | |
private String errorMessage; |
| 171 | |
private Exception error; |
| 172 | |
private String finalInput; |
| 173 | |
|
| 174 | |
|
| 175 | |
|
| 176 | |
|
| 177 | 0 | private static final Logger log = LoggerFactory.getLogger(THMLFilter.class); |
| 178 | |
} |