Coverage Report - org.crosswire.jsword.book.sword.SwordUtil
 
Classes in this File Line Coverage Branch Coverage Complexity
SwordUtil
0%
0/84
0%
0/48
2.722
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2005 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.jsword.book.sword;
 21  
 
 22  
 import java.io.IOException;
 23  
 import java.io.RandomAccessFile;
 24  
 import java.io.UnsupportedEncodingException;
 25  
 import java.net.URI;
 26  
 
 27  
 import org.crosswire.common.util.NetUtil;
 28  
 import org.crosswire.jsword.JSOtherMsg;
 29  
 import org.crosswire.jsword.book.BookException;
 30  
 import org.crosswire.jsword.book.BookMetaData;
 31  
 import org.slf4j.Logger;
 32  
 import org.slf4j.LoggerFactory;
 33  
 
 34  
 /**
 35  
  * Various utilities used by different Sword classes.
 36  
  * 
 37  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 38  
  * @author Joe Walker
 39  
  */
 40  
 public final class SwordUtil {
 41  
     /**
 42  
      * Prevent instantiation
 43  
      */
 44  0
     private SwordUtil() {
 45  0
     }
 46  
 
 47  
     /**
 48  
      * Read a RandomAccessFile
 49  
      * 
 50  
      * @param raf
 51  
      *            The file to read
 52  
      * @param offset
 53  
      *            The start of the record to read
 54  
      * @param theSize
 55  
      *            The number of bytes to read
 56  
      * @return the read data
 57  
      * @throws IOException
 58  
      *             on error
 59  
      */
 60  
     protected static byte[] readRAF(RandomAccessFile raf, long offset, int theSize) throws IOException {
 61  0
         raf.seek(offset);
 62  0
         return readNextRAF(raf, theSize);
 63  
     }
 64  
 
 65  
     /**
 66  
      * Read a RandomAccessFile from the current location in the file.
 67  
      * 
 68  
      * @param raf
 69  
      *            The file to read
 70  
      * @param theSize
 71  
      *            The number of bytes to read
 72  
      * @return the read data
 73  
      * @throws IOException
 74  
      *             on error
 75  
      */
 76  
     protected static byte[] readNextRAF(RandomAccessFile raf, int theSize) throws IOException {
 77  0
         long offset = raf.getFilePointer();
 78  0
         int size = theSize;
 79  0
         long rafSize = raf.length();
 80  
 
 81  
         // It is common to have an entry that points to nothing.
 82  
         // That is the equivalent of an empty string.
 83  0
         if (size == 0) {
 84  0
             return new byte[0];
 85  
         }
 86  
 
 87  0
         if (size < 0) {
 88  0
             log.error("Nothing to read at offset = {} returning empty because negative size={}", Long.toString(offset), Integer.toString(size));
 89  0
             return new byte[0];
 90  
         }
 91  
 
 92  0
         if (offset >= rafSize) {
 93  0
             log.error("Attempt to read beyond end. offset={} size={} but raf.length={}", Long.toString(offset), Integer.toString(size), Long.toString(rafSize));
 94  0
             return new byte[0];
 95  
         }
 96  
 
 97  0
         if (offset + size > raf.length()) {
 98  0
             log.error("Need to reduce size to avoid EOFException. offset={} size={} but raf.length={}", Long.toString(offset), Integer.toString(size), Long.toString(rafSize));
 99  0
             size = (int) (raf.length() - offset);
 100  
         }
 101  
 
 102  0
         byte[] read = new byte[size];
 103  0
         raf.readFully(read);
 104  
 
 105  0
         return read;
 106  
     }
 107  
 
 108  
     /**
 109  
      * Writes "data" to a RandomAccessFile at the "offset" position
 110  
      * 
 111  
      * @param raf
 112  
      *            RandomAccessFile
 113  
      * @param offset
 114  
      *            offset to write at
 115  
      * @param data
 116  
      *            data to write
 117  
      * @throws IOException
 118  
      *             on error
 119  
      */
 120  
     protected static void writeRAF(RandomAccessFile raf, long offset, byte[] data) throws IOException {
 121  0
         raf.seek(offset);
 122  0
         writeNextRAF(raf, data);
 123  0
     }
 124  
 
 125  
     protected static void writeNextRAF(RandomAccessFile raf, byte[] data) throws IOException {
 126  0
         if (data == null) {
 127  0
             return;
 128  
         }
 129  0
         raf.write(data);
 130  0
     }
 131  
 
 132  
     /**
 133  
      * Read a RandomAccessFile until a particular byte is seen
 134  
      * 
 135  
      * @param raf
 136  
      *            The file to read
 137  
      * @param offset
 138  
      *            The start of the record to read
 139  
      * @param stopByte
 140  
      *            The point at which to stop reading
 141  
      * @return the read data
 142  
      * @throws IOException
 143  
      *             on error
 144  
      */
 145  
     protected static byte[] readUntilRAF(RandomAccessFile raf, int offset, byte stopByte) throws IOException {
 146  0
         raf.seek(offset);
 147  0
         return readUntilRAF(raf, stopByte);
 148  
     }
 149  
 
 150  
     /**
 151  
      * Read a RandomAccessFile until a particular byte is seen
 152  
      * 
 153  
      * @param raf
 154  
      *            The file to read
 155  
      * @param stopByte
 156  
      *            The point at which to stop reading
 157  
      * @return the read data
 158  
      * @throws IOException
 159  
      *             on error
 160  
      */
 161  
     protected static byte[] readUntilRAF(RandomAccessFile raf, byte stopByte) throws IOException {
 162  
         // The strategy used here is to read the file twice.
 163  
         // Once to determine how much to read and then getting the actual data.
 164  
         // It may be more efficient to incrementally build up a byte buffer.
 165  
         // Note: that growing a static array by 1 byte at a time is O(n**2)
 166  
         // This is negligible when the n is small, but prohibitive otherwise.
 167  0
         long offset = raf.getFilePointer();
 168  0
         int size = 0;
 169  
 
 170  0
         int nextByte = -1;
 171  
         do {
 172  0
             nextByte = raf.read();
 173  
 
 174  0
             size++;
 175  0
         } while (nextByte != -1 && nextByte != stopByte);
 176  
 
 177  
         // Note: we allow for nextByte == -1 to be included in size
 178  
         // so that readRAF will report EOF errors
 179  0
         return readRAF(raf, offset, size);
 180  
     }
 181  
 
 182  
     /**
 183  
      * Decode little endian data from a byte array. This assumes that the high
 184  
      * order bit is not set as this is used solely for an offset in a file in
 185  
      * bytes. For a practical limit, 2**31 is way bigger than any document that
 186  
      * we can have.
 187  
      * 
 188  
      * @param data
 189  
      *            the byte[] from which to read 4 bytes
 190  
      * @param offset
 191  
      *            the offset into the array
 192  
      * @return The decoded data
 193  
      */
 194  
     public static int decodeLittleEndian32(byte[] data, int offset) {
 195  
         // Convert from a byte to an int, but prevent sign extension.
 196  
         // So -16 becomes 240
 197  0
         int byte1 = data[0 + offset] & 0xFF;
 198  0
         int byte2 = (data[1 + offset] & 0xFF) << 8;
 199  0
         int byte3 = (data[2 + offset] & 0xFF) << 16;
 200  0
         int byte4 = (data[3 + offset] & 0xFF) << 24;
 201  
 
 202  0
         return byte4 | byte3 | byte2 | byte1;
 203  
     }
 204  
 
 205  
     /**
 206  
      * Encode little endian data from a byte array. This assumes that the number
 207  
      * fits in a Java integer. That is, the range of an unsigned C integer is
 208  
      * greater than a signed Java integer. For a practical limit, 2**31 is way
 209  
      * bigger than any document that we can have. If this ever doesn't work, use
 210  
      * a long for the number.
 211  
      * 
 212  
      * @param val
 213  
      *            the number to encode into little endian
 214  
      * @param data
 215  
      *            the byte[] from which to write 4 bytes
 216  
      * @param offset
 217  
      *            the offset into the array
 218  
      */
 219  
     protected static void encodeLittleEndian32(int val, byte[] data, int offset) {
 220  0
         data[0 + offset] = (byte) (val & 0xFF);
 221  0
         data[1 + offset] = (byte) ((val >> 8) & 0xFF);
 222  0
         data[2 + offset] = (byte) ((val >> 16) & 0xFF);
 223  0
         data[3 + offset] = (byte) ((val >> 24) & 0xFF);
 224  0
     }
 225  
 
 226  
     /**
 227  
      * Decode little endian data from a byte array
 228  
      * 
 229  
      * @param data
 230  
      *            the byte[] from which to read 2 bytes
 231  
      * @param offset
 232  
      *            the offset into the array
 233  
      * @return The decoded data
 234  
      */
 235  
     protected static int decodeLittleEndian16(byte[] data, int offset) {
 236  
         // Convert from a byte to an int, but prevent sign extension.
 237  
         // So -16 becomes 240
 238  0
         int byte1 = data[0 + offset] & 0xFF;
 239  0
         int byte2 = (data[1 + offset] & 0xFF) << 8;
 240  
 
 241  0
         return byte2 | byte1;
 242  
     }
 243  
 
 244  
     /**
 245  
      * Encode a 16-bit little endian from an integer. It is assumed that the
 246  
      * integer's lower 16 bits are the only that are set.
 247  
      * 
 248  
      * @param data
 249  
      *            the byte[] from which to write 2 bytes
 250  
      * @param offset
 251  
      *            the offset into the array
 252  
      */
 253  
     protected static void encodeLittleEndian16(int val, byte[] data, int offset) {
 254  0
         data[0 + offset] = (byte) (val & 0xFF);
 255  0
         data[1 + offset] = (byte) ((val >> 8) & 0xFF);
 256  0
     }
 257  
 
 258  
     /**
 259  
      * Find a byte of data in an array
 260  
      * 
 261  
      * @param data
 262  
      *            The array to search
 263  
      * @param sought
 264  
      *            The data to search for
 265  
      * @return The index of the found position or -1 if not found
 266  
      */
 267  
     protected static int findByte(byte[] data, byte sought) {
 268  0
         return findByte(data, 0, sought);
 269  
     }
 270  
 
 271  
     /**
 272  
      * Find a byte of data in an array
 273  
      * 
 274  
      * @param data
 275  
      *            The array to search
 276  
      * @param offset
 277  
      *            The position in the array to begin looking
 278  
      * @param sought
 279  
      *            The data to search for
 280  
      * @return The index of the found position or -1 if not found
 281  
      */
 282  
     protected static int findByte(byte[] data, int offset, byte sought) {
 283  0
         for (int i = offset; i < data.length; i++) {
 284  0
             if (data[i] == sought) {
 285  0
                 return i;
 286  
             }
 287  
         }
 288  
 
 289  0
         return -1;
 290  
     }
 291  
 
 292  
     /**
 293  
      * Transform a byte array into a string given the encoding. If the encoding
 294  
      * is bad then it just does it as a string.
 295  
      * Note: this may modify data. Don't use it to examine data.
 296  
      * 
 297  
      * @param key the key
 298  
      * @param data
 299  
      *            The byte array to be converted
 300  
      * @param charset
 301  
      *            The encoding of the byte array
 302  
      * @return a string that is UTF-8 internally
 303  
      */
 304  
     public static String decode(String key, byte[] data, String charset) {
 305  0
         return decode(key, data, 0, data.length, charset);
 306  
     }
 307  
 
 308  
     /**
 309  
      * Transform a portion of a byte array into a string given the encoding. If
 310  
      * the encoding is bad then it just does it as a string.
 311  
      * Note: this may modify data. Don't use it to examine data.
 312  
      * 
 313  
      * @param key the key
 314  
      * @param data
 315  
      *            The byte array to be converted
 316  
      * @param length
 317  
      *            The number of bytes to use.
 318  
      * @param charset
 319  
      *            The encoding of the byte array
 320  
      * @return a string that is UTF-8 internally
 321  
      */
 322  
     public static String decode(String key, byte[] data, int length, String charset) {
 323  0
         return decode(key, data, 0, length, charset);
 324  
     }
 325  
 
 326  
     /**
 327  
      * Transform a portion of a byte array starting at an offset into a string
 328  
      * given the encoding. If the encoding is bad then it just does it as a
 329  
      * string. Note: this may modify data. Don't use it to examine data.
 330  
      * 
 331  
      * @param key the key
 332  
      * @param data
 333  
      *            The byte array to be converted
 334  
      * @param offset
 335  
      *            The starting position in the byte array
 336  
      * @param length
 337  
      *            The number of bytes to use.
 338  
      * @param charset
 339  
      *            The encoding of the byte array
 340  
      * @return a string that is UTF-8 internally
 341  
      */
 342  
     public static String decode(String key, byte[] data, int offset, int length, String charset) {
 343  0
          if ("WINDOWS-1252".equals(charset)) {
 344  0
             clean1252(key, data, offset, length);
 345  
          }
 346  0
         String txt = "";
 347  
         try {
 348  0
             if (offset + length <= data.length) {
 349  0
                 txt = new String(data, offset, length, charset);
 350  
             }
 351  0
         } catch (UnsupportedEncodingException ex) {
 352  
             // It is impossible! In case, use system default...
 353  0
             log.error("{}: Encoding {} not supported.", key, charset, ex);
 354  0
             txt = new String(data, offset, length);
 355  0
         }
 356  
 
 357  0
         return txt;
 358  
     }
 359  
 
 360  
     /**
 361  
      * Remove rogue characters in the source. These are characters that are not
 362  
      * valid in cp1252 aka WINDOWS-1252 and in UTF-8 or are non-printing control
 363  
      * characters in the range of 0-32.
 364  
      */
 365  
     private static void clean1252(String key, byte[] data, int offset, int length) {
 366  0
         int end = offset + length;
 367  
         // make sure it doesn't go off the end
 368  0
         if (end > data.length) {
 369  0
             end = data.length;
 370  
         }
 371  0
         for (int i = offset; i < end; i++) {
 372  
             // between 0-32 only allow whitespace: \t, \n, \r, ' '
 373  
             // characters 0x81, 0x8D, 0x8F, 0x90 and 0x9D are undefined in
 374  
             // cp1252
 375  0
             int c = data[i] & 0xFF;
 376  0
             if ((c >= 0x00 && c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D) || (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D)) {
 377  0
                 data[i] = 0x20;
 378  0
                 log.error("{} has bad character 0x{} at position {} in input.", key, Integer.toString(c, 16), Integer.toString(i));
 379  
             }
 380  
         }
 381  0
     }
 382  
 
 383  
     /**
 384  
      * Returns where the book should be located
 385  
      * @param bookMetaData meta information about the book
 386  
      * @return the URI locating the resource
 387  
      * @throws BookException thrown if an issue is encountered, e.g. missing data files.
 388  
      */
 389  
     public static URI getExpandedDataPath(BookMetaData bookMetaData) throws BookException {
 390  0
         URI loc = NetUtil.lengthenURI(bookMetaData.getLibrary(), bookMetaData.getProperty(SwordBookMetaData.KEY_DATA_PATH));
 391  
 
 392  0
         if (loc == null) {
 393  
             // FIXME(DMS): missing parameter
 394  0
             throw new BookException(JSOtherMsg.lookupText("Missing data files for old and new testaments in {0}."));
 395  
         }
 396  
 
 397  0
         return loc;
 398  
     }
 399  
 
 400  
     /**
 401  
      * The log stream
 402  
      */
 403  0
     private static final Logger log = LoggerFactory.getLogger(SwordUtil.class);
 404  
 
 405  
 }