1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
package org.crosswire.jsword.book.filter.thml; |
21 | |
|
22 | |
import java.io.IOException; |
23 | |
import java.io.StringReader; |
24 | |
import java.util.List; |
25 | |
|
26 | |
import javax.xml.parsers.ParserConfigurationException; |
27 | |
import javax.xml.parsers.SAXParser; |
28 | |
import javax.xml.parsers.SAXParserFactory; |
29 | |
|
30 | |
import org.crosswire.common.xml.XMLUtil; |
31 | |
import org.crosswire.jsword.book.Book; |
32 | |
import org.crosswire.jsword.book.OSISUtil; |
33 | |
import org.crosswire.jsword.book.filter.SourceFilter; |
34 | |
import org.crosswire.jsword.passage.Key; |
35 | |
import org.jdom2.Content; |
36 | |
import org.jdom2.Element; |
37 | |
import org.slf4j.Logger; |
38 | |
import org.slf4j.LoggerFactory; |
39 | |
import org.xml.sax.InputSource; |
40 | |
import org.xml.sax.SAXException; |
41 | |
import org.xml.sax.SAXParseException; |
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | 0 | public class THMLFilter implements SourceFilter { |
57 | |
|
58 | |
|
59 | |
|
60 | |
public List<Content> toOSIS(Book book, Key key, String plain) { |
61 | 0 | Element ele = cleanParse(book, key, plain); |
62 | |
|
63 | 0 | if (ele == null) { |
64 | 0 | if (error instanceof SAXParseException) { |
65 | 0 | SAXParseException spe = (SAXParseException) error; |
66 | 0 | int colNumber = spe.getColumnNumber(); |
67 | 0 | int start = Math.max(0, colNumber - 40); |
68 | 0 | int stop = Math.min(finalInput.length(), colNumber + 40); |
69 | 0 | int here = stop - start; |
70 | 0 | log.warn("Could not fix {}({}) by {}: Error here({},{},{}): {}", |
71 | |
book.getInitials(), |
72 | |
key.getName(), |
73 | |
errorMessage, |
74 | |
Integer.toString(colNumber), |
75 | |
Integer.toString(finalInput.length()), |
76 | |
Integer.toString(here), |
77 | |
finalInput.substring(start, stop)); |
78 | 0 | } else { |
79 | 0 | log.warn("Could not fix {}({}) by {}: {}", |
80 | |
book.getInitials(), |
81 | |
key.getName(), |
82 | |
errorMessage, |
83 | |
error.getMessage()); |
84 | |
} |
85 | 0 | ele = OSISUtil.factory().createP(); |
86 | |
} |
87 | |
|
88 | 0 | return ele.removeContent(); |
89 | |
} |
90 | |
|
91 | |
@Override |
92 | |
public THMLFilter clone() { |
93 | 0 | THMLFilter clone = null; |
94 | |
try { |
95 | 0 | clone = (THMLFilter) super.clone(); |
96 | 0 | } catch (CloneNotSupportedException e) { |
97 | 0 | assert false : e; |
98 | 0 | } |
99 | 0 | return clone; |
100 | |
} |
101 | |
|
102 | |
private Element cleanParse(Book book, Key key, String plain) { |
103 | |
|
104 | 0 | String clean = XMLUtil.cleanAllEntities(plain); |
105 | 0 | Element ele = parse(book, key, clean, "cleaning entities"); |
106 | |
|
107 | 0 | if (ele == null) { |
108 | 0 | ele = cleanText(book, key, clean); |
109 | |
} |
110 | |
|
111 | 0 | return ele; |
112 | |
} |
113 | |
|
114 | |
private Element cleanText(Book book, Key key, String plain) { |
115 | |
|
116 | 0 | String clean = XMLUtil.cleanAllCharacters(plain); |
117 | 0 | Element ele = parse(book, key, clean, "cleaning text"); |
118 | |
|
119 | 0 | if (ele == null) { |
120 | 0 | ele = parse(book, key, XMLUtil.closeEmptyTags(clean), "closing empty tags"); |
121 | |
} |
122 | |
|
123 | 0 | if (ele == null) { |
124 | 0 | ele = cleanTags(book, key, clean); |
125 | |
} |
126 | |
|
127 | 0 | return ele; |
128 | |
} |
129 | |
|
130 | |
private Element cleanTags(Book book, Key key, String plain) { |
131 | |
|
132 | 0 | String clean = XMLUtil.cleanAllTags(plain); |
133 | 0 | return parse(book, key, clean, "cleaning tags"); |
134 | |
} |
135 | |
|
136 | |
private Element parse(Book book, Key key, String plain, String failMessage) { |
137 | 0 | Exception ex = null; |
138 | |
|
139 | |
|
140 | 0 | StringBuilder buf = new StringBuilder(15 + plain.length()); |
141 | 0 | buf.append('<').append(RootTag.TAG_ROOT).append('>').append(plain).append("</").append(RootTag.TAG_ROOT).append('>'); |
142 | 0 | finalInput = buf.toString(); |
143 | |
try { |
144 | 0 | StringReader in = new StringReader(finalInput); |
145 | 0 | InputSource is = new InputSource(in); |
146 | 0 | SAXParserFactory spf = SAXParserFactory.newInstance(); |
147 | 0 | SAXParser parser = spf.newSAXParser(); |
148 | 0 | CustomHandler handler = new CustomHandler(book, key); |
149 | |
|
150 | 0 | parser.parse(is, handler); |
151 | 0 | return handler.getRootElement(); |
152 | 0 | } catch (SAXParseException e) { |
153 | 0 | ex = e; |
154 | 0 | } catch (SAXException e) { |
155 | 0 | ex = e; |
156 | 0 | } catch (IOException e) { |
157 | 0 | ex = e; |
158 | 0 | } catch (ParserConfigurationException e) { |
159 | 0 | ex = e; |
160 | 0 | } catch (IllegalArgumentException e) { |
161 | |
|
162 | 0 | ex = e; |
163 | 0 | } |
164 | |
|
165 | 0 | errorMessage = failMessage; |
166 | 0 | error = ex; |
167 | 0 | return null; |
168 | |
} |
169 | |
|
170 | |
private String errorMessage; |
171 | |
private Exception error; |
172 | |
private String finalInput; |
173 | |
|
174 | |
|
175 | |
|
176 | |
|
177 | 0 | private static final Logger log = LoggerFactory.getLogger(THMLFilter.class); |
178 | |
} |