[jsword-svn] r1210 - trunk/jsword/src/main/java/org/crosswire/jsword/book/sword

dmsmith at www.crosswire.org dmsmith at www.crosswire.org
Fri Dec 15 14:24:07 MST 2006


Author: dmsmith
Date: 2006-12-15 14:24:06 -0700 (Fri, 15 Dec 2006)
New Revision: 1210

Added:
   trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeKeyIndex.java
   trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeNode.java
Modified:
   trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java
   trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/GenBookBackend.java
   trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordUtil.java
Log:
Initial Raw GenBook implementation.

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java	2006-12-12 22:31:43 UTC (rev 1209)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java	2006-12-15 21:24:06 UTC (rev 1210)
@@ -265,7 +265,7 @@
 
         protected boolean isBackendSupported(SwordBookMetaData sbmd)
         {
-            return false;
+            return true;
         }
 
         /**

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/GenBookBackend.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/GenBookBackend.java	2006-12-12 22:31:43 UTC (rev 1209)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/GenBookBackend.java	2006-12-15 21:24:06 UTC (rev 1210)
@@ -21,8 +21,14 @@
  */
 package org.crosswire.jsword.book.sword;
 
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
 import org.crosswire.common.activate.Activator;
 import org.crosswire.common.activate.Lock;
+import org.crosswire.common.util.FileUtil;
+import org.crosswire.common.util.Logger;
 import org.crosswire.jsword.book.BookException;
 import org.crosswire.jsword.passage.Key;
 
@@ -38,9 +44,17 @@
     /**
      * Simple ctor
      */
-    public GenBookBackend(SwordBookMetaData sbmd)
+    public GenBookBackend(SwordBookMetaData sbmd) throws BookException
     {
         super(sbmd);
+        String path = getExpandedDataPath();
+        bdtFile = new File(path + EXTENSION_BDT);
+
+        if (!bdtFile.canRead())
+        {
+            throw new BookException(Msg.READ_FAIL, new Object[] { bdtFile.getAbsolutePath() });
+        }
+
     }
 
     /* (non-Javadoc)
@@ -48,6 +62,15 @@
      */
     public final void activate(Lock lock)
     {
+        try
+        {
+            bdtRaf = new RandomAccessFile(bdtFile, FileUtil.MODE_READ);
+        }
+        catch (IOException ex)
+        {
+            log.error("failed to open files", ex); //$NON-NLS-1$
+            bdtRaf = null;
+        }
         active = true;
     }
 
@@ -56,6 +79,21 @@
      */
     public final void deactivate(Lock lock)
     {
+        try
+        {
+            if (bdtRaf != null)
+            {
+                bdtRaf.close();
+            }
+        }
+        catch (IOException ex)
+        {
+            log.error("failed to close gen book files", ex); //$NON-NLS-1$
+        }
+        finally
+        {
+            bdtRaf = null;
+        }
         active = false;
     }
 
@@ -84,7 +122,7 @@
     /* @Override */
     public boolean isSupported()
     {
-        return false;
+        return true;
     }
 
     /**
@@ -99,8 +137,27 @@
     }
 
     /**
+     * Raw GenBook file extensions
+     */
+    private static final String EXTENSION_BDT = ".bdt"; //$NON-NLS-1$
+
+    /**
+     * The raw data file
+     */
+    private File bdtFile;
+
+    /**
+     * The random access file for the raw data
+     */
+    private RandomAccessFile bdtRaf;
+
+    /**
      * Are we active
      */
     private boolean active;
 
+    /**
+     * The log stream
+     */
+    private static final Logger log = Logger.getLogger(GenBookBackend.class);
 }

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordUtil.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordUtil.java	2006-12-12 22:31:43 UTC (rev 1209)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordUtil.java	2006-12-15 21:24:06 UTC (rev 1210)
@@ -55,13 +55,28 @@
     /**
      * Read a RandomAccessFile
      * @param raf The file to read
-     * @param offset The record to read
+     * @param offset The start of the record to read
      * @param theSize The number of bytes to read
      * @return the read data
      */
-    protected static byte[] readRAF(RandomAccessFile raf, int offset, int theSize) throws IOException
+    protected static byte[] readRAF(RandomAccessFile raf, long offset, int theSize) throws IOException
     {
+        raf.seek(offset);
+        return readNextRAF(raf, theSize);
+    }
+
+    /**
+     * Read a RandomAccessFile from the current location in the file.
+     * 
+     * @param raf The file to read
+     * @param theSize The number of bytes to read
+     * @return the read data
+     */
+    protected static byte[] readNextRAF(RandomAccessFile raf, int theSize) throws IOException
+    {
+        long offset = raf.getFilePointer();
         int size = theSize;
+
         if (offset + size > raf.length())
         {
             DataPolice.report("Need to reduce size to avoid EOFException. offset=" + offset + " size=" + size + " but raf.length=" + raf.length()); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
@@ -74,7 +89,6 @@
             return new byte[0];
         }
 
-        raf.seek(offset);
         byte[] read = new byte[size];
         raf.readFully(read);
 
@@ -82,6 +96,50 @@
     }
 
     /**
+     * Read a RandomAccessFile until a particular byte is seen
+     * @param raf The file to read
+     * @param offset The start of the record to read
+     * @param stopByte The point at which to stop reading
+     * @return the read data
+     */
+    protected static byte[] readUntilRAF(RandomAccessFile raf, int offset, byte stopByte) throws IOException
+    {
+        raf.seek(offset);
+        return readUntilRAF(raf, stopByte);
+    }
+
+    /**
+     * Read a RandomAccessFile until a particular byte is seen
+     * @param raf The file to read
+     * @param offset The start of the record to read
+     * @param stopByte The point at which to stop reading
+     * @return the read data
+     */
+    protected static byte[] readUntilRAF(RandomAccessFile raf, byte stopByte) throws IOException
+    {
+        // The strategy used here is to read the file twice.
+        // Once to determine how much to read and then getting the actual data.
+        // It may be more efficient to incrementally build up a byte buffer.
+        // Note: that growing a static array by 1 byte at a time is O(n**2)
+        // This is negligible when the n is small, but prohibitive otherwise.
+        long offset = raf.getFilePointer();
+        int size = 0;
+
+        int nextByte = -1;
+        do
+        {
+            nextByte = raf.read();
+
+            size++;
+        }
+        while (nextByte != -1 && nextByte != stopByte);
+
+        // Note: we allow for nextByte == -1 to be included in size
+        // so that readRAF will report EOF errors
+        return readRAF(raf, offset, size);
+    }
+
+    /**
      * Decode little endian data from a byte array.
      * This assumes that the high order bit is not set as this is used solely
      * for an offset in a file in bytes. For a practical limit, 2**31 is way
@@ -196,20 +254,32 @@
      */
     public static String decode(Key key, byte[] data, String charset)
     {
+        return decode(key, data, data.length, charset);
+    }
+
+    /**
+     * Transform a byte array into a string given the encoding.
+     * If the encoding is bad then it just does it as a string.
+     * @param data The byte array to be converted
+     * @param charset The encoding of the byte array
+     * @return a string that is UTF-8 internally
+     */
+    public static String decode(Key key, byte[] data, int length, String charset)
+    {
         if ("WINDOWS-1252".equals(charset)) //$NON-NLS-1$
         {
-            clean1252(key, data);
+            clean1252(key, data, length);
         }
         String txt = ""; //$NON-NLS-1$
         try
         {
-            txt = new String(data, charset);
+            txt = new String(data, 0, length, charset);
         }
         catch (UnsupportedEncodingException ex)
         {
             // It is impossible! In case, use system default...
             log.error(key + ": Encoding: " + charset + " not supported", ex); //$NON-NLS-1$ //$NON-NLS-2$
-            txt = new String(data);
+            txt = new String(data, 0, length);
         }
 
         return txt;
@@ -223,7 +293,18 @@
      */
     public static void clean1252(Key key, byte[] data)
     {
-        for (int i = 0; i < data.length; i++)
+        clean1252(key, data, data.length);
+    }
+
+    /**
+     * Remove rogue characters in the source.
+     * These are characters that are not valid in cp1252 aka WINDOWS-1252
+     * and in UTF-8 or are non-printing control characters in the range
+     * of 0-32.
+     */
+    public static void clean1252(Key key, byte[] data, int length)
+    {
+        for (int i = 0; i < length; i++)
         {
             // between 0-32 only allow whitespace
             // characters 0x81, 0x8D, 0x8F, 0x90 and 0x9D are undefined in cp1252

Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeKeyIndex.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeKeyIndex.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeKeyIndex.java	2006-12-15 21:24:06 UTC (rev 1210)
@@ -0,0 +1,259 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: SwordUtil.java 1169 2006-10-19 17:48:21 -0400 (Thu, 19 Oct 2006) dmsmith $
+ */
+package org.crosswire.jsword.book.sword;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.net.URL;
+
+import org.crosswire.common.activate.Activatable;
+import org.crosswire.common.activate.Activator;
+import org.crosswire.common.activate.Lock;
+import org.crosswire.common.util.FileUtil;
+import org.crosswire.common.util.Logger;
+import org.crosswire.common.util.NetUtil;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.passage.DefaultKeyList;
+import org.crosswire.jsword.passage.Key;
+
+/**
+ * TreeKeyIndex reads Sword index files that are path based.
+ * Paths are of the form /a/b/c, and can be of any depth.
+ * The ultimate output of a TreeKeyIndex is the offset and
+ * length of a chunk of data in another file that can be read.
+ * 
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class TreeKeyIndex implements Activatable
+{
+    /**
+     * Simple ctor
+     * @throws BookException 
+     */
+    public TreeKeyIndex(SwordBookMetaData sbmd) throws BookException
+    {
+        bmd = sbmd;
+
+        String path = getExpandedDataPath();
+
+        idxFile = new File(path + EXTENSION_INDEX);
+        datFile = new File(path + EXTENSION_DATA);
+
+        if (!idxFile.canRead())
+        {
+            throw new BookException(Msg.READ_FAIL, new Object[] { idxFile.getAbsolutePath() });
+        }
+
+        if (!datFile.canRead())
+        {
+            throw new BookException(Msg.READ_FAIL, new Object[] { datFile.getAbsolutePath() });
+        }
+
+    }
+
+    /**
+     * @return the root TreeNode for the module.
+     * @throws IOException
+     */
+    public TreeNode getRoot() throws IOException
+    {
+        return getTreeNode(getOffset(0));
+    }
+
+    /**
+     * Get the parent of the TreeNode.
+     * @param node the node being worked upon
+     * @return the parent node
+     * @throws IOException
+     */
+    public TreeNode getParent(TreeNode node) throws IOException
+    {
+        return getTreeNode(getOffset(node.getParent()));
+    }
+
+    /**
+     * Get the first child of the TreeNode.
+     * @param node the node being worked upon
+     * @return the first child node
+     * @throws IOException
+     */
+    public TreeNode getFirstChild(TreeNode node) throws IOException
+    {
+        return getTreeNode(getOffset(node.getFirstChild()));
+    }
+
+    /**
+     * Get the next sibling of the TreeNode.
+     * @param node the node being worked upon
+     * @return the next sibling node
+     * @throws IOException
+     */
+    public TreeNode getNextSibling(TreeNode node) throws IOException
+    {
+        return getTreeNode(getOffset(node.getNextSibling()));
+    }
+
+    /**
+     * The idx file contains offsets into the dat file.
+     * @param index the record id
+     * @return an offset into the dat file
+     * @throws IOException
+     */
+    private int getOffset(int index) throws IOException
+    {
+        if (index == -1)
+        {
+            return -1;
+        }
+
+        byte[] buffer = SwordUtil.readRAF(idxRaf, index, 4);
+        return SwordUtil.decodeLittleEndian32(buffer, 0);
+    }
+
+    /**
+     * Given an offset get the TreeNode from the dat file.
+     * @param offset start of a TreeNode record in the dat file.
+     * @return the TreeNode
+     * @throws IOException
+     */
+    private TreeNode getTreeNode(int offset) throws IOException
+    {
+        TreeNode node = new TreeNode(offset);
+
+        if (offset == -1)
+        {
+            return node;
+        }
+
+        byte[] buffer = SwordUtil.readRAF(datRaf, offset, 12);
+        node.setParent(SwordUtil.decodeLittleEndian32(buffer, 0));
+        node.setNextSibling(SwordUtil.decodeLittleEndian32(buffer, 4));
+        node.setFirstChild(SwordUtil.decodeLittleEndian32(buffer, 8));
+
+        buffer = SwordUtil.readUntilRAF(datRaf, (byte) 0);
+        int size = buffer.length;
+        if (buffer[size-1] == 0)
+        {
+            size--;
+        }
+
+        Key key = new DefaultKeyList(null, bmd.getName());
+        node.setName(SwordUtil.decode(key, buffer, size, bmd.getBookCharset()));
+
+        buffer = SwordUtil.readNextRAF(datRaf, 2);
+        int userDataSize = SwordUtil.decodeLittleEndian16(buffer, 0);
+        if (userDataSize > 0)
+        {
+            node.setUserData(SwordUtil.readNextRAF(datRaf, userDataSize));
+        }
+
+        return node;        
+    }
+
+    /* (non-Javadoc)
+     * @see org.crosswire.common.activate.Activatable#activate(org.crosswire.common.activate.Lock)
+     */
+    public final void activate(Lock lock)
+    {
+        try
+        {
+            idxRaf = new RandomAccessFile(idxFile, FileUtil.MODE_READ);
+            datRaf = new RandomAccessFile(datFile, FileUtil.MODE_READ);
+        }
+        catch (IOException ex)
+        {
+            log.error("failed to open files", ex); //$NON-NLS-1$
+            idxRaf = null;
+            datRaf = null;
+        }
+        active = true;
+    }
+
+    /* (non-Javadoc)
+     * @see org.crosswire.common.activate.Activatable#deactivate(org.crosswire.common.activate.Lock)
+     */
+    public final void deactivate(Lock lock)
+    {
+        try
+        {
+            if (idxRaf != null)
+            {
+                idxRaf.close();
+            }
+            if (datRaf != null)
+            {
+                datRaf.close();
+            }
+        }
+        catch (IOException ex)
+        {
+            log.error("failed to close nt files", ex); //$NON-NLS-1$
+        }
+        finally
+        {
+            idxRaf = null;
+            datRaf = null;
+        }
+        active = false;
+    }
+
+    /**
+     * Helper method so we can quickly activate ourselves on access
+     */
+    protected final void checkActive()
+    {
+        if (!active)
+        {
+            Activator.activate(this);
+        }
+    }
+
+    private String getExpandedDataPath() throws BookException
+    {
+        URL loc = NetUtil.lengthenURL(bmd.getLibrary(), bmd.getProperty(ConfigEntryType.DATA_PATH));
+
+        if (loc == null)
+        {
+            throw new BookException(Msg.MISSING_FILE);
+        }
+
+        return new File(loc.getFile()).getAbsolutePath();
+    }
+
+    private static final String EXTENSION_INDEX = ".idx"; //$NON-NLS-1$
+    private static final String EXTENSION_DATA = ".dat"; //$NON-NLS-1$
+
+    private SwordBookMetaData bmd;
+    private File idxFile;
+    private File datFile;
+    private RandomAccessFile idxRaf;
+    private RandomAccessFile datRaf;
+    private boolean active;
+
+    /**
+     * The log stream
+     */
+    private static final Logger log = Logger.getLogger(TreeKeyIndex.class);
+}

Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeNode.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeNode.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/TreeNode.java	2006-12-15 21:24:06 UTC (rev 1210)
@@ -0,0 +1,214 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: LZSSBackend.java 1143 2006-10-04 22:07:23 -0400 (Wed, 04 Oct 2006) dmsmith $
+ */
+package org.crosswire.jsword.book.sword;
+
+import java.io.Serializable;
+
+/**
+ * A node that knows where the data is in the real file and where it is in
+ * relationship to other nodes.
+ * 
+ * @see gnu.lgpl.License for license details. The copyright to this program is
+ *      held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+class TreeNode implements Cloneable, Serializable
+{
+    /**
+     * TreeNode default ctor.
+     */
+    TreeNode()
+    {
+        this(-1);
+    }
+
+    /**
+     * Setup with the positions of data in the file
+     * 
+     * @param theOffset
+     */
+    TreeNode(int theOffset)
+    {
+        offset = theOffset;
+        name = ""; //$NON-NLS-1$
+        parent = -1;
+        nextSibling = -1;
+        firstChild = -1;
+        userData = new byte[0];
+    }
+
+    /**
+     * @return the offset
+     */
+    public int getOffset()
+    {
+        return offset;
+    }
+
+    /**
+     * @param newOffset the offset to set
+     */
+    public void setOffset(int newOffset)
+    {
+        offset = newOffset;
+    }
+
+    /**
+     * @return the name
+     */
+    public String getName()
+    {
+        return name;
+    }
+
+    /**
+     * @param newName the name to set
+     */
+    public void setName(String newName)
+    {
+        name = newName;
+    }
+
+    /**
+     * @return the userData
+     */
+    public byte[] getUserData()
+    {
+        return userData;
+    }
+
+    /**
+     * @param theUserData the userData to set
+     */
+    public void setUserData(byte[] theUserData)
+    {
+        userData = theUserData;
+    }
+
+    /**
+     * @return the firstChild
+     */
+    public int getFirstChild()
+    {
+        return firstChild;
+    }
+
+    /**
+     * @param firstChild the firstChild to set
+     */
+    public void setFirstChild(int firstChild)
+    {
+        this.firstChild = firstChild;
+    }
+
+    /**
+     * @return the nextSibling
+     */
+    public int getNextSibling()
+    {
+        return nextSibling;
+    }
+
+    /**
+     * @param nextSibling the nextSibling to set
+     */
+    public void setNextSibling(int nextSibling)
+    {
+        this.nextSibling = nextSibling;
+    }
+
+    /**
+     * @return the parent
+     */
+    public int getParent()
+    {
+        return parent;
+    }
+
+    /**
+     * @param parent the parent to set
+     */
+    public void setParent(int parent)
+    {
+        this.parent = parent;
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see java.lang.Object#clone()
+     */
+    public Object clone()
+    {
+        try
+        {
+            return super.clone();
+        }
+        catch (CloneNotSupportedException e)
+        {
+            assert false;
+        }
+
+        return null;
+    }
+
+    /**
+     * The offset of this TreeNode in the offset.
+     */
+    private int               offset;
+
+    /**
+     * The name of this TreeNode. Note, this is not the path. To get the path,
+     * one needs to traverse to the parent to construct the path.
+     */
+    private String            name;
+
+    /**
+     * Optional, extra data associated with this TreeNode.
+     * For example, this is used to store offset and length for a raw genbook.
+     */
+    private byte[]            userData;
+
+    /**
+     * The offset of the parent record in the offset. -1 means that there are no
+     * parents and this TreeNode is a root.
+     */
+    private int               parent;
+
+    /**
+     * The offset of the next sibling record in the offset. -1 means that there is
+     * no next sibling.
+     */
+    private int               nextSibling;
+
+    /**
+     * The offset of the first child record in the offset. -1 means that there are
+     * no children and this TreeNode is a leaf.
+     */
+    private int               firstChild;
+
+    /**
+     * Serialization ID
+     */
+    private static final long serialVersionUID = -2472601787934480762L;
+
+}




More information about the jsword-svn mailing list