Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
NumberShaper |
|
| 2.7777777777777777;2.778 |
1 | /** | |
2 | * Distribution License: | |
3 | * JSword is free software; you can redistribute it and/or modify it under | |
4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
5 | * as published by the Free Software Foundation. This program is distributed | |
6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
8 | * See the GNU Lesser General Public License for more details. | |
9 | * | |
10 | * The License is available on the internet at: | |
11 | * http://www.gnu.org/copyleft/lgpl.html | |
12 | * or by writing to: | |
13 | * Free Software Foundation, Inc. | |
14 | * 59 Temple Place - Suite 330 | |
15 | * Boston, MA 02111-1307, USA | |
16 | * | |
17 | * © CrossWire Bible Society, 2007 - 2016 | |
18 | * | |
19 | */ | |
20 | package org.crosswire.common.icu; | |
21 | ||
22 | import java.io.Serializable; | |
23 | import java.util.Locale; | |
24 | ||
25 | import org.crosswire.jsword.internationalisation.LocaleProviderManager; | |
26 | ||
27 | /** | |
28 | * NumberShaper changes numbers from one number system to another. That is, the | |
29 | * numbers 0-9 have different representations in some locales. This means that | |
30 | * they have different code points. For example, Eastern Arabic numbers are from | |
31 | * \u06f0 - \u06f9. | |
32 | * <p> | |
33 | * Internally, numbers will be represented with 0-9, but externally they should | |
34 | * show as a user wishes. Further user input may, optionally, use the external | |
35 | * form. | |
36 | * </p> | |
37 | * <p> | |
38 | * This shaper has special behavior for Arabic numbers that are in the form | |
39 | * "12:34" as this is taken as chapter:verse. Normally, a ':' is treated as a | |
40 | * numeric separator, this results in "12:34", but for verses it should be | |
41 | * "34:12". That is, Arabic, numbers are left-to-right (even though the rest of | |
42 | * the script is right-to-left) and the ':' as a numeric separator does not | |
43 | * change that. So to get around this we mark the ':' as a right-to-left | |
44 | * character. | |
45 | * </p> | |
46 | * <p> | |
47 | * See also: com.ibm.icu.text.ArabicShaping | |
48 | * </p> | |
49 | * | |
50 | * @see java.awt.font.NumericShaper | |
51 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
52 | * @author DM Smith | |
53 | */ | |
54 | public class NumberShaper implements Serializable { | |
55 | /** | |
56 | * Create a shaper that is appropriate for the user's locale. | |
57 | */ | |
58 | 0 | public NumberShaper() { |
59 | 0 | this.nineShape = '\u0000'; |
60 | 0 | } |
61 | ||
62 | /** | |
63 | * Determine whether shaping is possible. | |
64 | * | |
65 | * @return whether shaping back to 0-9 is possible. | |
66 | */ | |
67 | public boolean canShape() { | |
68 | // return arabicShaper != null || numericShaper != null || getNine() != | |
69 | // '9'; | |
70 | 0 | return getNine() != '9'; |
71 | } | |
72 | ||
73 | /** | |
74 | * Replace 0-9 in the input with representations appropriate for the script. | |
75 | * | |
76 | * @param input | |
77 | * the text to be transformed | |
78 | * @return the transformed text | |
79 | */ | |
80 | public String shape(String input) { | |
81 | 0 | if (input == null) { |
82 | 0 | return input; |
83 | } | |
84 | ||
85 | 0 | char[] src = input.toCharArray(); |
86 | 0 | boolean[] transformed = new boolean[1]; |
87 | 0 | transformed[0] = false; |
88 | 0 | char[] dest = shaped(src, transformed); |
89 | 0 | if (transformed[0]) { |
90 | 0 | return new String(dest); |
91 | } | |
92 | ||
93 | 0 | return input; |
94 | } | |
95 | ||
96 | /** | |
97 | * Determine whether shaping back to 0-9 is possible. | |
98 | * | |
99 | * @return whether shaping back to 0-9 is possible. | |
100 | */ | |
101 | public boolean canUnshape() { | |
102 | 0 | return getNine() != '9'; |
103 | } | |
104 | ||
105 | /** | |
106 | * Replace script representations of numbers with 0-9. | |
107 | * | |
108 | * @param input | |
109 | * the text to be transformed | |
110 | * @return the transformed text | |
111 | */ | |
112 | public String unshape(String input) { | |
113 | 0 | char[] src = input.toCharArray(); |
114 | 0 | boolean[] transformed = new boolean[1]; |
115 | 0 | transformed[0] = false; |
116 | 0 | char[] dest = unshaped(src, transformed); |
117 | 0 | if (transformed[0]) { |
118 | 0 | return new String(dest); |
119 | } | |
120 | ||
121 | 0 | return input; |
122 | } | |
123 | ||
124 | /** | |
125 | * Perform shaping back to 0-9. | |
126 | * @param src | |
127 | * the text to transform | |
128 | * @param transformed | |
129 | * an input parameter of one boolean that can hold whether there | |
130 | * was a transformation | |
131 | * @return the unshaped text | |
132 | */ | |
133 | private char[] unshaped(char[] src, boolean[] transformed) { | |
134 | 0 | int nine = getNine(); |
135 | 0 | if (nine == '9') { |
136 | 0 | return src; |
137 | } | |
138 | ||
139 | 0 | int zero = nine - 9; |
140 | 0 | return transform(src, zero, nine, '9' - nine, transformed); |
141 | } | |
142 | ||
143 | /** | |
144 | * @param src | |
145 | * the text to transform | |
146 | * @param transformed | |
147 | * an input parameter of one boolean that can hold whether there | |
148 | * was a transformation | |
149 | * @return the shaped string | |
150 | */ | |
151 | private char[] shaped(char[] src, boolean[] transformed) { | |
152 | 0 | char nine = getNine(); |
153 | 0 | if (nine == '9') { |
154 | 0 | return src; |
155 | } | |
156 | ||
157 | 0 | return transform(src, '0', '9', nine - '9', transformed); |
158 | } | |
159 | ||
160 | /** | |
161 | * Transform either to or from 0-9 and the script representation, returning | |
162 | * the result and true when at least one character is transformed. | |
163 | * | |
164 | * @param src | |
165 | * the text to transform | |
166 | * @param zero | |
167 | * zero in the source representation | |
168 | * @param nine | |
169 | * nine in the source representation | |
170 | * @param offset | |
171 | * the distance between zeros in the source and target | |
172 | * representation | |
173 | * @param transformed | |
174 | * an input parameter of one boolean that can hold whether there | |
175 | * was a transformation | |
176 | * @return the shaped string | |
177 | */ | |
178 | private char[] transform(char[] src, int zero, int nine, int offset, boolean[] transformed) { | |
179 | 0 | char[] text = src; |
180 | ||
181 | // offset > 0 when we are going from 0-9 | |
182 | // FIXME(DMS): C:V should be shown as V:C in Farsi. | |
183 | /* | |
184 | int srcLen = text.length; | |
185 | int destLen = srcLen; | |
186 | if (offset > 0 && srcLen > 3) { | |
187 | // count the number of ':' flanked by '0' to '9' | |
188 | // each one of these is going | |
189 | // to be bracketed with RLO and PDF. | |
190 | for (int i = 1; i < srcLen - 1; i++) { | |
191 | char prevChar = text[i - 1]; | |
192 | char curChar = text[i]; | |
193 | char nextChar = text[i + 1]; | |
194 | if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') { | |
195 | destLen += 2; | |
196 | } | |
197 | } | |
198 | ||
199 | // Did we actually see a ':' | |
200 | if (destLen != srcLen) { | |
201 | transformed[0] = true; | |
202 | int sPos = 0; | |
203 | int dPos = 0; | |
204 | int stop = srcLen - 1; // ensure look-ahead | |
205 | char[] dest = new char[destLen]; | |
206 | dest[dPos++] = text[sPos++]; | |
207 | while (sPos < stop) { | |
208 | char prevChar = text[sPos - 1]; | |
209 | char nextChar = text[sPos + 1]; | |
210 | char curChar = text[sPos++]; | |
211 | if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') { | |
212 | dest[dPos++] = '\u202E'; // RLO | |
213 | dest[dPos++] = curChar; | |
214 | dest[dPos++] = '\u202C'; // PDF | |
215 | } else if (curChar >= zero && curChar <= nine) { | |
216 | dest[dPos++] = (char) (curChar + offset); | |
217 | } else { | |
218 | dest[dPos++] = curChar; | |
219 | } | |
220 | } | |
221 | // copy the rest | |
222 | while (sPos < srcLen) { | |
223 | dest[dPos++] = text[sPos++]; | |
224 | } | |
225 | return dest; | |
226 | } | |
227 | } | |
228 | // Are we going to '0' - '9' with embedded, specially marked ':' | |
229 | else if (offset < 0 && srcLen > 3) { | |
230 | for (int sPos = 0; sPos < srcLen - 2; sPos++) { | |
231 | if (text[sPos] == '\u202E' && text[sPos + 1] == ':' && text[sPos + 2] == '\u202C') { | |
232 | destLen -= 2; | |
233 | sPos += 2; | |
234 | } | |
235 | } | |
236 | ||
237 | // Did we actually see a '\u202E:\u202C' | |
238 | if (destLen != srcLen) { | |
239 | transformed[0] = true; | |
240 | char[] dest = new char[destLen]; | |
241 | int sPos = 0; | |
242 | int dPos = 0; | |
243 | int stop = srcLen - 2; // ensure look-ahead | |
244 | while (sPos < stop) { | |
245 | char curChar = text[sPos++]; | |
246 | if (curChar == '\u202E' && text[sPos] == ':' && text[sPos + 1] == '\u202C') { | |
247 | dest[dPos++] = ':'; | |
248 | sPos += 2; // skip the whole pattern | |
249 | } else if (curChar >= zero && curChar <= nine) { | |
250 | dest[dPos++] = (char) (curChar + offset); | |
251 | } else { | |
252 | dest[dPos++] = curChar; | |
253 | } | |
254 | } | |
255 | ||
256 | // copy the rest | |
257 | while (sPos < srcLen) { | |
258 | dest[dPos++] = text[sPos++]; | |
259 | } | |
260 | ||
261 | return dest; | |
262 | } | |
263 | } | |
264 | */ | |
265 | 0 | int len = src.length; |
266 | 0 | for (int i = 0; i < len; i++) { |
267 | 0 | char c = text[i]; |
268 | 0 | if (c >= zero && c <= nine) { |
269 | 0 | text[i] = (char) (c + offset); |
270 | 0 | transformed[0] = true; |
271 | } | |
272 | } | |
273 | ||
274 | 0 | return text; |
275 | } | |
276 | ||
277 | /** | |
278 | * Establish nine for the script. There are scripts that don't have zeroes. | |
279 | * | |
280 | * @return the representation for 9 in the script | |
281 | */ | |
282 | private char getNine() { | |
283 | 0 | if (nineShape == '\u0000') { |
284 | 0 | nineShape = '9'; |
285 | 0 | Locale locale = LocaleProviderManager.getLocale(); |
286 | 0 | if ("fa".equals(locale.getLanguage())) { |
287 | 0 | nineShape = '\u06f9'; |
288 | 0 | } else if ("ar".equals(locale.getLanguage())) { |
289 | 0 | nineShape = '\u0669'; |
290 | } | |
291 | } | |
292 | 0 | return nineShape; |
293 | } | |
294 | ||
295 | ||
296 | /** | |
297 | * Nine for this shaper. | |
298 | */ | |
299 | private char nineShape; | |
300 | ||
301 | /** | |
302 | * Serialization ID | |
303 | */ | |
304 | private static final long serialVersionUID = -8408052851113601251L; | |
305 | } |