[jsword-svn] r1038 - in trunk: common/src/main/java/org/crosswire/common/xml jsword/src/main/java/org/crosswire/jsword/examples

dmsmith at crosswire.org dmsmith at crosswire.org
Mon Mar 6 06:17:33 MST 2006


Author: dmsmith
Date: 2006-03-06 06:17:22 -0700 (Mon, 06 Mar 2006)
New Revision: 1038

Modified:
   trunk/common/src/main/java/org/crosswire/common/xml/XMLProcess.java
   trunk/jsword/src/main/java/org/crosswire/jsword/examples/ModToOsis.java
Log:
KJV 2003 fixup example.

Modified: trunk/common/src/main/java/org/crosswire/common/xml/XMLProcess.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/xml/XMLProcess.java	2006-03-05 21:45:11 UTC (rev 1037)
+++ trunk/common/src/main/java/org/crosswire/common/xml/XMLProcess.java	2006-03-06 13:17:22 UTC (rev 1038)
@@ -43,6 +43,14 @@
     }
 
     /**
+     * @return Returns the features.
+     */
+    public XMLFeatureSet getFeatures()
+    {
+        return features;
+    }
+
+    /**
      * Process an xml file according to the arguments.
      * @param argv
      */
@@ -76,7 +84,6 @@
         }
 
         checker.initialize(argv);
-
         checker.parse(arg);
 
     }
@@ -119,7 +126,10 @@
         }
 
         features.setFeatureStates(argv);
+    }
 
+    private void bind()
+    {
         createParser(DEFAULT_PARSER_NAME);
         createAdapter(DEFAULT_HANDLER_NAME);
 
@@ -191,8 +201,9 @@
         }
     }
 
-    private void parse(String xmlFile)
+    public void parse(String xmlFile)
     {
+        bind();
         // parse file
         try
         {

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/examples/ModToOsis.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/examples/ModToOsis.java	2006-03-05 21:45:11 UTC (rev 1037)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/examples/ModToOsis.java	2006-03-06 13:17:22 UTC (rev 1038)
@@ -9,7 +9,10 @@
 import java.text.FieldPosition;
 import java.text.MessageFormat;
 import java.util.Iterator;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
+import org.crosswire.common.xml.XMLProcess;
 import org.crosswire.jsword.book.Book;
 import org.crosswire.jsword.book.BookException;
 import org.crosswire.jsword.book.BookMetaData;
@@ -80,6 +83,9 @@
                         buildBookClose(buf);
                         buildDocumentClose(buf);
                         writeDocument(buf, lastBookName);
+                        XMLProcess parser = new XMLProcess();
+                        parser.getFeatures().setFeatureStates("-s", "-f", "-va", "-dv"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
+                        parser.parse(lastBookName + ".xml"); //$NON-NLS-1$
                     }
 
                     buf = new StringBuffer();
@@ -91,6 +97,11 @@
                 {
                     if (currentChapter != 1)
                     {
+                        if (inPreVerse)
+                        {
+                            buildPreVerseClose(buf);
+                            inPreVerse = false;
+                        }
                         buildChapterClose(buf);
                     }
                     buildChapterOpen(buf, currentBookName, currentChapter);
@@ -98,25 +109,42 @@
 
                 /* Output the verse */
                 
-                /* TODO(DMS):
+                /*
                  * If the "raw" verse contains a "preverse" pull it out.
                  * If there were a former preverse then close the "section" div
                  * before outputting it before the verse.
                  */
                 boolean foundPreVerse = false;
-                String preVerseText = "title"; //$NON-NLS-1$
+                String preVerseText = ""; //$NON-NLS-1$
+                if (raw.contains(preVerseStart))
+                {
+                    Matcher preVerseStartMatcher = preVerseStartPattern.matcher(raw);
+                    if (preVerseStartMatcher.find())
+                    {
+                        int start = preVerseStartMatcher.start();
+                        Matcher preVerseEndMatcher = preVerseEndPattern.matcher(raw);
+                        if (preVerseEndMatcher.find(1 + preVerseStartMatcher.end()))
+                        {
+                            int end = preVerseEndMatcher.end();
+                            foundPreVerse = true;
+                            preVerseText = raw.substring(start, end);
+                            raw = raw.replace(preVerseText, ""); //$NON-NLS-1$
+                            preVerseText = preVerseText.substring(preVerseStart.length(), preVerseText.length()-preVerseEnd.length());
+                        }
+                    }
+                }
                 if (foundPreVerse)
                 {
                     if (inPreVerse)
                     {
                         buildPreVerseClose(buf);
                     }
-                    buildPreVerseOpen(buf, preVerseText); //$NON-NLS-1$
+                    buildPreVerseOpen(buf, cleanup(osisID, preVerseText)); //$NON-NLS-1$
                     inPreVerse = true;
                 }
 
                 buildVerseOpen(buf, osisID);
-                buf.append(raw);
+                buf.append(cleanup(osisID, raw));
                 buildVerseClose(buf, osisID);
 
                 lastChapter = currentChapter;
@@ -134,6 +162,7 @@
             buildBookClose(buf);
             buildDocumentClose(buf);
             writeDocument(buf, lastBookName);
+            new XMLProcess().parse(lastBookName + ".xml"); //$NON-NLS-1$
         }
         catch (BookException e)
         {
@@ -169,13 +198,13 @@
         docBuffer.append("\n  xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\""); //$NON-NLS-1$
         docBuffer.append("\n  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"");  //$NON-NLS-1$
         docBuffer.append("\n  xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.2.1.xsd\">"); //$NON-NLS-1$
-        docBuffer.append("\n    <osisText osisIDWork=\"{0}\" osisRefWork=\"defaultReferenceScheme\">"); //$NON-NLS-1$
+        docBuffer.append("\n    <osisText osisIDWork=\"{0}\" osisRefWork=\"defaultReferenceScheme\" xml:lang=\"en\">"); //$NON-NLS-1$
         docBuffer.append("\n    <header>"); //$NON-NLS-1$
         docBuffer.append("\n      <work osisWork=\"{0}\">"); //$NON-NLS-1$
         docBuffer.append("\n      <title>{1}</title>"); //$NON-NLS-1$
         docBuffer.append("\n      <identifier type=\"OSIS\">Bible.{0}</identifier>"); //$NON-NLS-1$
         docBuffer.append("\n      <refSystem>Bible.KJV</refSystem>"); //$NON-NLS-1$
-        docBuffer.append("\n      <scope>{2}</scope>"); //$NON-NLS-1$
+//        docBuffer.append("\n      <scope>{2}</scope>"); //$NON-NLS-1$
         docBuffer.append("\n    </work>"); //$NON-NLS-1$
         docBuffer.append("\n    <work osisWork=\"defaultReferenceScheme\">"); //$NON-NLS-1$
         docBuffer.append("\n      <refSystem>Bible.KJV</refSystem>"); //$NON-NLS-1$
@@ -242,6 +271,70 @@
         writer.write(buf.toString());
         writer.close();
     }
-    
+    private String cleanup(@SuppressWarnings("unused") String osisID, String input)
+    {
+        // Fix up bad notes
+        while (true)
+        {
+            if (input.contains("note type=\"strongsMarkup\"")) //$NON-NLS-1$
+            {
+                Matcher badNoteMatcher = badNotePattern.matcher(input);
+                if (!badNoteMatcher.find())
+                {
+                    break;
+                }
+                String note = badNoteMatcher.group();
+                String fixed = note.substring(0, note.length()-2) + '>';
+                fixed = fixed.replaceAll("strongsMarkup", "x-strongsMarkup"); //$NON-NLS-1$ //$NON-NLS-2$
+                fixed = fixed.replaceAll(" name=\"[^\"]*\"", ""); //$NON-NLS-1$ //$NON-NLS-2$
+                fixed = fixed.replaceAll(" date=\"[^\"]*\"", ""); //$NON-NLS-1$ //$NON-NLS-2$
+                input = input.replace(note, fixed);
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        // Fix up bad w tags
+        Matcher wMatcher = wPattern.matcher(input);
+        while (wMatcher.find())
+        {
+            String whole = wMatcher.group(0);
+            String fixed = whole.replaceAll(" (src |w |morph )", " "); //$NON-NLS-1$ //$NON-NLS-2$
+            fixed = fixed.replaceAll("\\|", " "); //$NON-NLS-1$ //$NON-NLS-2$
+            fixed = fixed.replaceAll("x-Strongs", "strong"); //$NON-NLS-1$ //$NON-NLS-2$
+            fixed = fixed.replaceAll("x-StrongsMorph", "morph"); //$NON-NLS-1$ //$NON-NLS-2$
+            fixed = fixed.replaceAll("x-Robinson", "robinson"); //$NON-NLS-1$ //$NON-NLS-2$
+            fixed = fixed.replaceAll("splitID=\"", "type=\"x-split\" subType=\"x-"); //$NON-NLS-1$ //$NON-NLS-2$
+            if ( !whole.equals(fixed))
+            {
+                input = input.replace(whole, fixed); //$NON-NLS-1$
+            }
+        }
+
+input = input.replaceAll("\"transChange\"", "\"x-transChange\""); //$NON-NLS-1$ //$NON-NLS-2$
+input = input.replaceAll("\"type:", "\"x-"); //$NON-NLS-1$ //$NON-NLS-2$
+input = input.replaceAll("<resp\\s[^>]*/>", ""); //$NON-NLS-1$ //$NON-NLS-2$
+input = input.replaceAll("changeType=\"", "type=\""); //$NON-NLS-1$ //$NON-NLS-2$
+input = input.replaceAll("<p/>", "<lb/>"); //$NON-NLS-1$ //$NON-NLS-2$
+if (osisID.equals("Matt.24.38")) //$NON-NLS-1$
+{
+    input = input.replace("<w src=\"18\" lemma=\"strong:G3739\" morph=\"robinson:R-GSF\"><w src=\"7\" lemma=\"strong:G3588\" morph=\"robinson:T-DPF\">that</w></w>", //$NON-NLS-1$
+                          "<w src=\"18\" lemma=\"strong:G3739\" morph=\"robinson:R-GSF\"></w><w src=\"7\" lemma=\"strong:G3588\" morph=\"robinson:T-DPF\">that</w>"); //$NON-NLS-1$
+}
+        return input;
+    }
     private static FieldPosition pos = new FieldPosition(0);
+
+    private static String preVerseStart = "<title subtype=\"x-preverse\" type=\"section\">"; //$NON-NLS-1$
+    private static String preVerseEnd = "</title>"; //$NON-NLS-1$
+    private static Pattern preVerseStartPattern = Pattern.compile(preVerseStart);
+    private static Pattern preVerseEndPattern = Pattern.compile(preVerseEnd); //$NON-NLS-1$
+
+    private static String badNote = "<note type=\"[^\"]*\" name=\"[^\"]*\" date=\"[^\"]*\"/>"; //$NON-NLS-1$
+    private static Pattern badNotePattern = Pattern.compile(badNote);
+
+    private static String wElement = "<w\\s[^>]*>"; //$NON-NLS-1$
+    private static Pattern wPattern = Pattern.compile(wElement);
 }



More information about the jsword-svn mailing list