| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| SwordUtil |
|
| 2.7222222222222223;2.722 |
| 1 | /** | |
| 2 | * Distribution License: | |
| 3 | * JSword is free software; you can redistribute it and/or modify it under | |
| 4 | * the terms of the GNU Lesser General Public License, version 2.1 or later | |
| 5 | * as published by the Free Software Foundation. This program is distributed | |
| 6 | * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even | |
| 7 | * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
| 8 | * See the GNU Lesser General Public License for more details. | |
| 9 | * | |
| 10 | * The License is available on the internet at: | |
| 11 | * http://www.gnu.org/copyleft/lgpl.html | |
| 12 | * or by writing to: | |
| 13 | * Free Software Foundation, Inc. | |
| 14 | * 59 Temple Place - Suite 330 | |
| 15 | * Boston, MA 02111-1307, USA | |
| 16 | * | |
| 17 | * © CrossWire Bible Society, 2005 - 2016 | |
| 18 | * | |
| 19 | */ | |
| 20 | package org.crosswire.jsword.book.sword; | |
| 21 | ||
| 22 | import java.io.IOException; | |
| 23 | import java.io.RandomAccessFile; | |
| 24 | import java.io.UnsupportedEncodingException; | |
| 25 | import java.net.URI; | |
| 26 | ||
| 27 | import org.crosswire.common.util.NetUtil; | |
| 28 | import org.crosswire.jsword.JSOtherMsg; | |
| 29 | import org.crosswire.jsword.book.BookException; | |
| 30 | import org.crosswire.jsword.book.BookMetaData; | |
| 31 | import org.slf4j.Logger; | |
| 32 | import org.slf4j.LoggerFactory; | |
| 33 | ||
| 34 | /** | |
| 35 | * Various utilities used by different Sword classes. | |
| 36 | * | |
| 37 | * @see gnu.lgpl.License The GNU Lesser General Public License for details. | |
| 38 | * @author Joe Walker | |
| 39 | */ | |
| 40 | public final class SwordUtil { | |
| 41 | /** | |
| 42 | * Prevent instantiation | |
| 43 | */ | |
| 44 | 0 | private SwordUtil() { |
| 45 | 0 | } |
| 46 | ||
| 47 | /** | |
| 48 | * Read a RandomAccessFile | |
| 49 | * | |
| 50 | * @param raf | |
| 51 | * The file to read | |
| 52 | * @param offset | |
| 53 | * The start of the record to read | |
| 54 | * @param theSize | |
| 55 | * The number of bytes to read | |
| 56 | * @return the read data | |
| 57 | * @throws IOException | |
| 58 | * on error | |
| 59 | */ | |
| 60 | protected static byte[] readRAF(RandomAccessFile raf, long offset, int theSize) throws IOException { | |
| 61 | 0 | raf.seek(offset); |
| 62 | 0 | return readNextRAF(raf, theSize); |
| 63 | } | |
| 64 | ||
| 65 | /** | |
| 66 | * Read a RandomAccessFile from the current location in the file. | |
| 67 | * | |
| 68 | * @param raf | |
| 69 | * The file to read | |
| 70 | * @param theSize | |
| 71 | * The number of bytes to read | |
| 72 | * @return the read data | |
| 73 | * @throws IOException | |
| 74 | * on error | |
| 75 | */ | |
| 76 | protected static byte[] readNextRAF(RandomAccessFile raf, int theSize) throws IOException { | |
| 77 | 0 | long offset = raf.getFilePointer(); |
| 78 | 0 | int size = theSize; |
| 79 | 0 | long rafSize = raf.length(); |
| 80 | ||
| 81 | // It is common to have an entry that points to nothing. | |
| 82 | // That is the equivalent of an empty string. | |
| 83 | 0 | if (size == 0) { |
| 84 | 0 | return new byte[0]; |
| 85 | } | |
| 86 | ||
| 87 | 0 | if (size < 0) { |
| 88 | 0 | log.error("Nothing to read at offset = {} returning empty because negative size={}", Long.toString(offset), Integer.toString(size)); |
| 89 | 0 | return new byte[0]; |
| 90 | } | |
| 91 | ||
| 92 | 0 | if (offset >= rafSize) { |
| 93 | 0 | log.error("Attempt to read beyond end. offset={} size={} but raf.length={}", Long.toString(offset), Integer.toString(size), Long.toString(rafSize)); |
| 94 | 0 | return new byte[0]; |
| 95 | } | |
| 96 | ||
| 97 | 0 | if (offset + size > raf.length()) { |
| 98 | 0 | log.error("Need to reduce size to avoid EOFException. offset={} size={} but raf.length={}", Long.toString(offset), Integer.toString(size), Long.toString(rafSize)); |
| 99 | 0 | size = (int) (raf.length() - offset); |
| 100 | } | |
| 101 | ||
| 102 | 0 | byte[] read = new byte[size]; |
| 103 | 0 | raf.readFully(read); |
| 104 | ||
| 105 | 0 | return read; |
| 106 | } | |
| 107 | ||
| 108 | /** | |
| 109 | * Writes "data" to a RandomAccessFile at the "offset" position | |
| 110 | * | |
| 111 | * @param raf | |
| 112 | * RandomAccessFile | |
| 113 | * @param offset | |
| 114 | * offset to write at | |
| 115 | * @param data | |
| 116 | * data to write | |
| 117 | * @throws IOException | |
| 118 | * on error | |
| 119 | */ | |
| 120 | protected static void writeRAF(RandomAccessFile raf, long offset, byte[] data) throws IOException { | |
| 121 | 0 | raf.seek(offset); |
| 122 | 0 | writeNextRAF(raf, data); |
| 123 | 0 | } |
| 124 | ||
| 125 | protected static void writeNextRAF(RandomAccessFile raf, byte[] data) throws IOException { | |
| 126 | 0 | if (data == null) { |
| 127 | 0 | return; |
| 128 | } | |
| 129 | 0 | raf.write(data); |
| 130 | 0 | } |
| 131 | ||
| 132 | /** | |
| 133 | * Read a RandomAccessFile until a particular byte is seen | |
| 134 | * | |
| 135 | * @param raf | |
| 136 | * The file to read | |
| 137 | * @param offset | |
| 138 | * The start of the record to read | |
| 139 | * @param stopByte | |
| 140 | * The point at which to stop reading | |
| 141 | * @return the read data | |
| 142 | * @throws IOException | |
| 143 | * on error | |
| 144 | */ | |
| 145 | protected static byte[] readUntilRAF(RandomAccessFile raf, int offset, byte stopByte) throws IOException { | |
| 146 | 0 | raf.seek(offset); |
| 147 | 0 | return readUntilRAF(raf, stopByte); |
| 148 | } | |
| 149 | ||
| 150 | /** | |
| 151 | * Read a RandomAccessFile until a particular byte is seen | |
| 152 | * | |
| 153 | * @param raf | |
| 154 | * The file to read | |
| 155 | * @param stopByte | |
| 156 | * The point at which to stop reading | |
| 157 | * @return the read data | |
| 158 | * @throws IOException | |
| 159 | * on error | |
| 160 | */ | |
| 161 | protected static byte[] readUntilRAF(RandomAccessFile raf, byte stopByte) throws IOException { | |
| 162 | // The strategy used here is to read the file twice. | |
| 163 | // Once to determine how much to read and then getting the actual data. | |
| 164 | // It may be more efficient to incrementally build up a byte buffer. | |
| 165 | // Note: that growing a static array by 1 byte at a time is O(n**2) | |
| 166 | // This is negligible when the n is small, but prohibitive otherwise. | |
| 167 | 0 | long offset = raf.getFilePointer(); |
| 168 | 0 | int size = 0; |
| 169 | ||
| 170 | 0 | int nextByte = -1; |
| 171 | do { | |
| 172 | 0 | nextByte = raf.read(); |
| 173 | ||
| 174 | 0 | size++; |
| 175 | 0 | } while (nextByte != -1 && nextByte != stopByte); |
| 176 | ||
| 177 | // Note: we allow for nextByte == -1 to be included in size | |
| 178 | // so that readRAF will report EOF errors | |
| 179 | 0 | return readRAF(raf, offset, size); |
| 180 | } | |
| 181 | ||
| 182 | /** | |
| 183 | * Decode little endian data from a byte array. This assumes that the high | |
| 184 | * order bit is not set as this is used solely for an offset in a file in | |
| 185 | * bytes. For a practical limit, 2**31 is way bigger than any document that | |
| 186 | * we can have. | |
| 187 | * | |
| 188 | * @param data | |
| 189 | * the byte[] from which to read 4 bytes | |
| 190 | * @param offset | |
| 191 | * the offset into the array | |
| 192 | * @return The decoded data | |
| 193 | */ | |
| 194 | public static int decodeLittleEndian32(byte[] data, int offset) { | |
| 195 | // Convert from a byte to an int, but prevent sign extension. | |
| 196 | // So -16 becomes 240 | |
| 197 | 0 | int byte1 = data[0 + offset] & 0xFF; |
| 198 | 0 | int byte2 = (data[1 + offset] & 0xFF) << 8; |
| 199 | 0 | int byte3 = (data[2 + offset] & 0xFF) << 16; |
| 200 | 0 | int byte4 = (data[3 + offset] & 0xFF) << 24; |
| 201 | ||
| 202 | 0 | return byte4 | byte3 | byte2 | byte1; |
| 203 | } | |
| 204 | ||
| 205 | /** | |
| 206 | * Encode little endian data from a byte array. This assumes that the number | |
| 207 | * fits in a Java integer. That is, the range of an unsigned C integer is | |
| 208 | * greater than a signed Java integer. For a practical limit, 2**31 is way | |
| 209 | * bigger than any document that we can have. If this ever doesn't work, use | |
| 210 | * a long for the number. | |
| 211 | * | |
| 212 | * @param val | |
| 213 | * the number to encode into little endian | |
| 214 | * @param data | |
| 215 | * the byte[] from which to write 4 bytes | |
| 216 | * @param offset | |
| 217 | * the offset into the array | |
| 218 | */ | |
| 219 | protected static void encodeLittleEndian32(int val, byte[] data, int offset) { | |
| 220 | 0 | data[0 + offset] = (byte) (val & 0xFF); |
| 221 | 0 | data[1 + offset] = (byte) ((val >> 8) & 0xFF); |
| 222 | 0 | data[2 + offset] = (byte) ((val >> 16) & 0xFF); |
| 223 | 0 | data[3 + offset] = (byte) ((val >> 24) & 0xFF); |
| 224 | 0 | } |
| 225 | ||
| 226 | /** | |
| 227 | * Decode little endian data from a byte array | |
| 228 | * | |
| 229 | * @param data | |
| 230 | * the byte[] from which to read 2 bytes | |
| 231 | * @param offset | |
| 232 | * the offset into the array | |
| 233 | * @return The decoded data | |
| 234 | */ | |
| 235 | protected static int decodeLittleEndian16(byte[] data, int offset) { | |
| 236 | // Convert from a byte to an int, but prevent sign extension. | |
| 237 | // So -16 becomes 240 | |
| 238 | 0 | int byte1 = data[0 + offset] & 0xFF; |
| 239 | 0 | int byte2 = (data[1 + offset] & 0xFF) << 8; |
| 240 | ||
| 241 | 0 | return byte2 | byte1; |
| 242 | } | |
| 243 | ||
| 244 | /** | |
| 245 | * Encode a 16-bit little endian from an integer. It is assumed that the | |
| 246 | * integer's lower 16 bits are the only that are set. | |
| 247 | * | |
| 248 | * @param data | |
| 249 | * the byte[] from which to write 2 bytes | |
| 250 | * @param offset | |
| 251 | * the offset into the array | |
| 252 | */ | |
| 253 | protected static void encodeLittleEndian16(int val, byte[] data, int offset) { | |
| 254 | 0 | data[0 + offset] = (byte) (val & 0xFF); |
| 255 | 0 | data[1 + offset] = (byte) ((val >> 8) & 0xFF); |
| 256 | 0 | } |
| 257 | ||
| 258 | /** | |
| 259 | * Find a byte of data in an array | |
| 260 | * | |
| 261 | * @param data | |
| 262 | * The array to search | |
| 263 | * @param sought | |
| 264 | * The data to search for | |
| 265 | * @return The index of the found position or -1 if not found | |
| 266 | */ | |
| 267 | protected static int findByte(byte[] data, byte sought) { | |
| 268 | 0 | return findByte(data, 0, sought); |
| 269 | } | |
| 270 | ||
| 271 | /** | |
| 272 | * Find a byte of data in an array | |
| 273 | * | |
| 274 | * @param data | |
| 275 | * The array to search | |
| 276 | * @param offset | |
| 277 | * The position in the array to begin looking | |
| 278 | * @param sought | |
| 279 | * The data to search for | |
| 280 | * @return The index of the found position or -1 if not found | |
| 281 | */ | |
| 282 | protected static int findByte(byte[] data, int offset, byte sought) { | |
| 283 | 0 | for (int i = offset; i < data.length; i++) { |
| 284 | 0 | if (data[i] == sought) { |
| 285 | 0 | return i; |
| 286 | } | |
| 287 | } | |
| 288 | ||
| 289 | 0 | return -1; |
| 290 | } | |
| 291 | ||
| 292 | /** | |
| 293 | * Transform a byte array into a string given the encoding. If the encoding | |
| 294 | * is bad then it just does it as a string. | |
| 295 | * Note: this may modify data. Don't use it to examine data. | |
| 296 | * | |
| 297 | * @param key the key | |
| 298 | * @param data | |
| 299 | * The byte array to be converted | |
| 300 | * @param charset | |
| 301 | * The encoding of the byte array | |
| 302 | * @return a string that is UTF-8 internally | |
| 303 | */ | |
| 304 | public static String decode(String key, byte[] data, String charset) { | |
| 305 | 0 | return decode(key, data, 0, data.length, charset); |
| 306 | } | |
| 307 | ||
| 308 | /** | |
| 309 | * Transform a portion of a byte array into a string given the encoding. If | |
| 310 | * the encoding is bad then it just does it as a string. | |
| 311 | * Note: this may modify data. Don't use it to examine data. | |
| 312 | * | |
| 313 | * @param key the key | |
| 314 | * @param data | |
| 315 | * The byte array to be converted | |
| 316 | * @param length | |
| 317 | * The number of bytes to use. | |
| 318 | * @param charset | |
| 319 | * The encoding of the byte array | |
| 320 | * @return a string that is UTF-8 internally | |
| 321 | */ | |
| 322 | public static String decode(String key, byte[] data, int length, String charset) { | |
| 323 | 0 | return decode(key, data, 0, length, charset); |
| 324 | } | |
| 325 | ||
| 326 | /** | |
| 327 | * Transform a portion of a byte array starting at an offset into a string | |
| 328 | * given the encoding. If the encoding is bad then it just does it as a | |
| 329 | * string. Note: this may modify data. Don't use it to examine data. | |
| 330 | * | |
| 331 | * @param key the key | |
| 332 | * @param data | |
| 333 | * The byte array to be converted | |
| 334 | * @param offset | |
| 335 | * The starting position in the byte array | |
| 336 | * @param length | |
| 337 | * The number of bytes to use. | |
| 338 | * @param charset | |
| 339 | * The encoding of the byte array | |
| 340 | * @return a string that is UTF-8 internally | |
| 341 | */ | |
| 342 | public static String decode(String key, byte[] data, int offset, int length, String charset) { | |
| 343 | 0 | if ("WINDOWS-1252".equals(charset)) { |
| 344 | 0 | clean1252(key, data, offset, length); |
| 345 | } | |
| 346 | 0 | String txt = ""; |
| 347 | try { | |
| 348 | 0 | if (offset + length <= data.length) { |
| 349 | 0 | txt = new String(data, offset, length, charset); |
| 350 | } | |
| 351 | 0 | } catch (UnsupportedEncodingException ex) { |
| 352 | // It is impossible! In case, use system default... | |
| 353 | 0 | log.error("{}: Encoding {} not supported.", key, charset, ex); |
| 354 | 0 | txt = new String(data, offset, length); |
| 355 | 0 | } |
| 356 | ||
| 357 | 0 | return txt; |
| 358 | } | |
| 359 | ||
| 360 | /** | |
| 361 | * Remove rogue characters in the source. These are characters that are not | |
| 362 | * valid in cp1252 aka WINDOWS-1252 and in UTF-8 or are non-printing control | |
| 363 | * characters in the range of 0-32. | |
| 364 | */ | |
| 365 | private static void clean1252(String key, byte[] data, int offset, int length) { | |
| 366 | 0 | int end = offset + length; |
| 367 | // make sure it doesn't go off the end | |
| 368 | 0 | if (end > data.length) { |
| 369 | 0 | end = data.length; |
| 370 | } | |
| 371 | 0 | for (int i = offset; i < end; i++) { |
| 372 | // between 0-32 only allow whitespace: \t, \n, \r, ' ' | |
| 373 | // characters 0x81, 0x8D, 0x8F, 0x90 and 0x9D are undefined in | |
| 374 | // cp1252 | |
| 375 | 0 | int c = data[i] & 0xFF; |
| 376 | 0 | if ((c >= 0x00 && c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D) || (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D)) { |
| 377 | 0 | data[i] = 0x20; |
| 378 | 0 | log.error("{} has bad character 0x{} at position {} in input.", key, Integer.toString(c, 16), Integer.toString(i)); |
| 379 | } | |
| 380 | } | |
| 381 | 0 | } |
| 382 | ||
| 383 | /** | |
| 384 | * Returns where the book should be located | |
| 385 | * @param bookMetaData meta information about the book | |
| 386 | * @return the URI locating the resource | |
| 387 | * @throws BookException thrown if an issue is encountered, e.g. missing data files. | |
| 388 | */ | |
| 389 | public static URI getExpandedDataPath(BookMetaData bookMetaData) throws BookException { | |
| 390 | 0 | URI loc = NetUtil.lengthenURI(bookMetaData.getLibrary(), bookMetaData.getProperty(SwordBookMetaData.KEY_DATA_PATH)); |
| 391 | ||
| 392 | 0 | if (loc == null) { |
| 393 | // FIXME(DMS): missing parameter | |
| 394 | 0 | throw new BookException(JSOtherMsg.lookupText("Missing data files for old and new testaments in {0}.")); |
| 395 | } | |
| 396 | ||
| 397 | 0 | return loc; |
| 398 | } | |
| 399 | ||
| 400 | /** | |
| 401 | * The log stream | |
| 402 | */ | |
| 403 | 0 | private static final Logger log = LoggerFactory.getLogger(SwordUtil.class); |
| 404 | ||
| 405 | } |