| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| NumberShaper |
|
| 2.7777777777777777;2.778 |
| 1 | /** | |
| 2 | * Distribution License: | |
| 3 | * JSword is free software; you can redistribute it and/or modify it under | |
| 4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
| 5 | * as published by the Free Software Foundation. This program is distributed | |
| 6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
| 7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
| 8 | * See the GNU Lesser General Public License for more details. | |
| 9 | * | |
| 10 | * The License is available on the internet at: | |
| 11 | * http://www.gnu.org/copyleft/lgpl.html | |
| 12 | * or by writing to: | |
| 13 | * Free Software Foundation, Inc. | |
| 14 | * 59 Temple Place - Suite 330 | |
| 15 | * Boston, MA 02111-1307, USA | |
| 16 | * | |
| 17 | * © CrossWire Bible Society, 2007 - 2016 | |
| 18 | * | |
| 19 | */ | |
| 20 | package org.crosswire.common.icu; | |
| 21 | ||
| 22 | import java.io.Serializable; | |
| 23 | import java.util.Locale; | |
| 24 | ||
| 25 | import org.crosswire.jsword.internationalisation.LocaleProviderManager; | |
| 26 | ||
| 27 | /** | |
| 28 | * NumberShaper changes numbers from one number system to another. That is, the | |
| 29 | * numbers 0-9 have different representations in some locales. This means that | |
| 30 | * they have different code points. For example, Eastern Arabic numbers are from | |
| 31 | * \u06f0 - \u06f9. | |
| 32 | * <p> | |
| 33 | * Internally, numbers will be represented with 0-9, but externally they should | |
| 34 | * show as a user wishes. Further user input may, optionally, use the external | |
| 35 | * form. | |
| 36 | * </p> | |
| 37 | * <p> | |
| 38 | * This shaper has special behavior for Arabic numbers that are in the form | |
| 39 | * "12:34" as this is taken as chapter:verse. Normally, a ':' is treated as a | |
| 40 | * numeric separator, this results in "12:34", but for verses it should be | |
| 41 | * "34:12". That is, Arabic, numbers are left-to-right (even though the rest of | |
| 42 | * the script is right-to-left) and the ':' as a numeric separator does not | |
| 43 | * change that. So to get around this we mark the ':' as a right-to-left | |
| 44 | * character. | |
| 45 | * </p> | |
| 46 | * <p> | |
| 47 | * See also: com.ibm.icu.text.ArabicShaping | |
| 48 | * </p> | |
| 49 | * | |
| 50 | * @see java.awt.font.NumericShaper | |
| 51 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
| 52 | * @author DM Smith | |
| 53 | */ | |
| 54 | public class NumberShaper implements Serializable { | |
| 55 | /** | |
| 56 | * Create a shaper that is appropriate for the user's locale. | |
| 57 | */ | |
| 58 | 0 | public NumberShaper() { |
| 59 | 0 | this.nineShape = '\u0000'; |
| 60 | 0 | } |
| 61 | ||
| 62 | /** | |
| 63 | * Determine whether shaping is possible. | |
| 64 | * | |
| 65 | * @return whether shaping back to 0-9 is possible. | |
| 66 | */ | |
| 67 | public boolean canShape() { | |
| 68 | // return arabicShaper != null || numericShaper != null || getNine() != | |
| 69 | // '9'; | |
| 70 | 0 | return getNine() != '9'; |
| 71 | } | |
| 72 | ||
| 73 | /** | |
| 74 | * Replace 0-9 in the input with representations appropriate for the script. | |
| 75 | * | |
| 76 | * @param input | |
| 77 | * the text to be transformed | |
| 78 | * @return the transformed text | |
| 79 | */ | |
| 80 | public String shape(String input) { | |
| 81 | 0 | if (input == null) { |
| 82 | 0 | return input; |
| 83 | } | |
| 84 | ||
| 85 | 0 | char[] src = input.toCharArray(); |
| 86 | 0 | boolean[] transformed = new boolean[1]; |
| 87 | 0 | transformed[0] = false; |
| 88 | 0 | char[] dest = shaped(src, transformed); |
| 89 | 0 | if (transformed[0]) { |
| 90 | 0 | return new String(dest); |
| 91 | } | |
| 92 | ||
| 93 | 0 | return input; |
| 94 | } | |
| 95 | ||
| 96 | /** | |
| 97 | * Determine whether shaping back to 0-9 is possible. | |
| 98 | * | |
| 99 | * @return whether shaping back to 0-9 is possible. | |
| 100 | */ | |
| 101 | public boolean canUnshape() { | |
| 102 | 0 | return getNine() != '9'; |
| 103 | } | |
| 104 | ||
| 105 | /** | |
| 106 | * Replace script representations of numbers with 0-9. | |
| 107 | * | |
| 108 | * @param input | |
| 109 | * the text to be transformed | |
| 110 | * @return the transformed text | |
| 111 | */ | |
| 112 | public String unshape(String input) { | |
| 113 | 0 | char[] src = input.toCharArray(); |
| 114 | 0 | boolean[] transformed = new boolean[1]; |
| 115 | 0 | transformed[0] = false; |
| 116 | 0 | char[] dest = unshaped(src, transformed); |
| 117 | 0 | if (transformed[0]) { |
| 118 | 0 | return new String(dest); |
| 119 | } | |
| 120 | ||
| 121 | 0 | return input; |
| 122 | } | |
| 123 | ||
| 124 | /** | |
| 125 | * Perform shaping back to 0-9. | |
| 126 | * @param src | |
| 127 | * the text to transform | |
| 128 | * @param transformed | |
| 129 | * an input parameter of one boolean that can hold whether there | |
| 130 | * was a transformation | |
| 131 | * @return the unshaped text | |
| 132 | */ | |
| 133 | private char[] unshaped(char[] src, boolean[] transformed) { | |
| 134 | 0 | int nine = getNine(); |
| 135 | 0 | if (nine == '9') { |
| 136 | 0 | return src; |
| 137 | } | |
| 138 | ||
| 139 | 0 | int zero = nine - 9; |
| 140 | 0 | return transform(src, zero, nine, '9' - nine, transformed); |
| 141 | } | |
| 142 | ||
| 143 | /** | |
| 144 | * @param src | |
| 145 | * the text to transform | |
| 146 | * @param transformed | |
| 147 | * an input parameter of one boolean that can hold whether there | |
| 148 | * was a transformation | |
| 149 | * @return the shaped string | |
| 150 | */ | |
| 151 | private char[] shaped(char[] src, boolean[] transformed) { | |
| 152 | 0 | char nine = getNine(); |
| 153 | 0 | if (nine == '9') { |
| 154 | 0 | return src; |
| 155 | } | |
| 156 | ||
| 157 | 0 | return transform(src, '0', '9', nine - '9', transformed); |
| 158 | } | |
| 159 | ||
| 160 | /** | |
| 161 | * Transform either to or from 0-9 and the script representation, returning | |
| 162 | * the result and true when at least one character is transformed. | |
| 163 | * | |
| 164 | * @param src | |
| 165 | * the text to transform | |
| 166 | * @param zero | |
| 167 | * zero in the source representation | |
| 168 | * @param nine | |
| 169 | * nine in the source representation | |
| 170 | * @param offset | |
| 171 | * the distance between zeros in the source and target | |
| 172 | * representation | |
| 173 | * @param transformed | |
| 174 | * an input parameter of one boolean that can hold whether there | |
| 175 | * was a transformation | |
| 176 | * @return the shaped string | |
| 177 | */ | |
| 178 | private char[] transform(char[] src, int zero, int nine, int offset, boolean[] transformed) { | |
| 179 | 0 | char[] text = src; |
| 180 | ||
| 181 | // offset > 0 when we are going from 0-9 | |
| 182 | // FIXME(DMS): C:V should be shown as V:C in Farsi. | |
| 183 | /* | |
| 184 | int srcLen = text.length; | |
| 185 | int destLen = srcLen; | |
| 186 | if (offset > 0 && srcLen > 3) { | |
| 187 | // count the number of ':' flanked by '0' to '9' | |
| 188 | // each one of these is going | |
| 189 | // to be bracketed with RLO and PDF. | |
| 190 | for (int i = 1; i < srcLen - 1; i++) { | |
| 191 | char prevChar = text[i - 1]; | |
| 192 | char curChar = text[i]; | |
| 193 | char nextChar = text[i + 1]; | |
| 194 | if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') { | |
| 195 | destLen += 2; | |
| 196 | } | |
| 197 | } | |
| 198 | ||
| 199 | // Did we actually see a ':' | |
| 200 | if (destLen != srcLen) { | |
| 201 | transformed[0] = true; | |
| 202 | int sPos = 0; | |
| 203 | int dPos = 0; | |
| 204 | int stop = srcLen - 1; // ensure look-ahead | |
| 205 | char[] dest = new char[destLen]; | |
| 206 | dest[dPos++] = text[sPos++]; | |
| 207 | while (sPos < stop) { | |
| 208 | char prevChar = text[sPos - 1]; | |
| 209 | char nextChar = text[sPos + 1]; | |
| 210 | char curChar = text[sPos++]; | |
| 211 | if (curChar == ':' && prevChar >= '0' && prevChar <= '9' && nextChar >= '0' && nextChar <= '9') { | |
| 212 | dest[dPos++] = '\u202E'; // RLO | |
| 213 | dest[dPos++] = curChar; | |
| 214 | dest[dPos++] = '\u202C'; // PDF | |
| 215 | } else if (curChar >= zero && curChar <= nine) { | |
| 216 | dest[dPos++] = (char) (curChar + offset); | |
| 217 | } else { | |
| 218 | dest[dPos++] = curChar; | |
| 219 | } | |
| 220 | } | |
| 221 | // copy the rest | |
| 222 | while (sPos < srcLen) { | |
| 223 | dest[dPos++] = text[sPos++]; | |
| 224 | } | |
| 225 | return dest; | |
| 226 | } | |
| 227 | } | |
| 228 | // Are we going to '0' - '9' with embedded, specially marked ':' | |
| 229 | else if (offset < 0 && srcLen > 3) { | |
| 230 | for (int sPos = 0; sPos < srcLen - 2; sPos++) { | |
| 231 | if (text[sPos] == '\u202E' && text[sPos + 1] == ':' && text[sPos + 2] == '\u202C') { | |
| 232 | destLen -= 2; | |
| 233 | sPos += 2; | |
| 234 | } | |
| 235 | } | |
| 236 | ||
| 237 | // Did we actually see a '\u202E:\u202C' | |
| 238 | if (destLen != srcLen) { | |
| 239 | transformed[0] = true; | |
| 240 | char[] dest = new char[destLen]; | |
| 241 | int sPos = 0; | |
| 242 | int dPos = 0; | |
| 243 | int stop = srcLen - 2; // ensure look-ahead | |
| 244 | while (sPos < stop) { | |
| 245 | char curChar = text[sPos++]; | |
| 246 | if (curChar == '\u202E' && text[sPos] == ':' && text[sPos + 1] == '\u202C') { | |
| 247 | dest[dPos++] = ':'; | |
| 248 | sPos += 2; // skip the whole pattern | |
| 249 | } else if (curChar >= zero && curChar <= nine) { | |
| 250 | dest[dPos++] = (char) (curChar + offset); | |
| 251 | } else { | |
| 252 | dest[dPos++] = curChar; | |
| 253 | } | |
| 254 | } | |
| 255 | ||
| 256 | // copy the rest | |
| 257 | while (sPos < srcLen) { | |
| 258 | dest[dPos++] = text[sPos++]; | |
| 259 | } | |
| 260 | ||
| 261 | return dest; | |
| 262 | } | |
| 263 | } | |
| 264 | */ | |
| 265 | 0 | int len = src.length; |
| 266 | 0 | for (int i = 0; i < len; i++) { |
| 267 | 0 | char c = text[i]; |
| 268 | 0 | if (c >= zero && c <= nine) { |
| 269 | 0 | text[i] = (char) (c + offset); |
| 270 | 0 | transformed[0] = true; |
| 271 | } | |
| 272 | } | |
| 273 | ||
| 274 | 0 | return text; |
| 275 | } | |
| 276 | ||
| 277 | /** | |
| 278 | * Establish nine for the script. There are scripts that don't have zeroes. | |
| 279 | * | |
| 280 | * @return the representation for 9 in the script | |
| 281 | */ | |
| 282 | private char getNine() { | |
| 283 | 0 | if (nineShape == '\u0000') { |
| 284 | 0 | nineShape = '9'; |
| 285 | 0 | Locale locale = LocaleProviderManager.getLocale(); |
| 286 | 0 | if ("fa".equals(locale.getLanguage())) { |
| 287 | 0 | nineShape = '\u06f9'; |
| 288 | 0 | } else if ("ar".equals(locale.getLanguage())) { |
| 289 | 0 | nineShape = '\u0669'; |
| 290 | } | |
| 291 | } | |
| 292 | 0 | return nineShape; |
| 293 | } | |
| 294 | ||
| 295 | ||
| 296 | /** | |
| 297 | * Nine for this shaper. | |
| 298 | */ | |
| 299 | private char nineShape; | |
| 300 | ||
| 301 | /** | |
| 302 | * Serialization ID | |
| 303 | */ | |
| 304 | private static final long serialVersionUID = -8408052851113601251L; | |
| 305 | } |