Coverage Report - org.crosswire.common.diff.DifferenceEngine
 
Classes in this File Line Coverage Branch Coverage Complexity
DifferenceEngine
0%
0/163
0%
0/100
9.333
 
 1  
 /**
 2  
  * Distribution License:
 3  
  * JSword is free software; you can redistribute it and/or modify it under
 4  
  * the terms of the GNU Lesser General Public License, version 2.1 or later
 5  
  * as published by the Free Software Foundation. This program is distributed
 6  
  * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 7  
  * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 8  
  * See the GNU Lesser General Public License for more details.
 9  
  *
 10  
  * The License is available on the internet at:
 11  
  *      http://www.gnu.org/copyleft/lgpl.html
 12  
  * or by writing to:
 13  
  *      Free Software Foundation, Inc.
 14  
  *      59 Temple Place - Suite 330
 15  
  *      Boston, MA 02111-1307, USA
 16  
  *
 17  
  * © CrossWire Bible Society, 2007 - 2016
 18  
  *
 19  
  */
 20  
 package org.crosswire.common.diff;
 21  
 
 22  
 import java.util.ArrayList;
 23  
 import java.util.HashMap;
 24  
 import java.util.HashSet;
 25  
 import java.util.List;
 26  
 import java.util.Map;
 27  
 import java.util.Set;
 28  
 
 29  
 /**
 30  
  * Builds a map of a baseline/source text and a changed/target text, navigating
 31  
  * it to determine differences.
 32  
  * 
 33  
  * Based on the LGPL Diff_Match_Patch v1.5 javascript of Neil Fraser, Copyright (C) 2006<br>
 34  
  * <a href="http://neil.fraser.name/software/diff_match_patch/">http://neil.fraser.name/software/diff_match_patch/</a>
 35  
  * 
 36  
  * @see gnu.lgpl.License The GNU Lesser General Public License for details.
 37  
  * @author DM Smith
 38  
  */
 39  0
 public class DifferenceEngine {
 40  
 
 41  
     /**
 42  
      * Empty Difference Engine, which won't find anything.
 43  
      */
 44  
     public DifferenceEngine() {
 45  0
         this("", "");
 46  0
     }
 47  
 
 48  
     /**
 49  
      * Find the differences between two texts. Simplifies the problem by
 50  
      * stripping any common prefix or suffix off the texts before diffing.
 51  
      * 
 52  
      * @param source
 53  
      *            Old string to be diffed
 54  
      * @param target
 55  
      *            New string to be diffed
 56  
      */
 57  0
     public DifferenceEngine(final String source, final String target) {
 58  0
         this.source = source;
 59  0
         this.target = target;
 60  0
         this.sourceLength = source.length();
 61  0
         this.targetLength = target.length();
 62  0
     }
 63  
 
 64  
     /**
 65  
      * Explore the intersection points between the two texts.
 66  
      * 
 67  
      * @return List of Difference objects or null if no diff available
 68  
      */
 69  
     public List<Difference> generate() {
 70  0
         long msEnd = System.currentTimeMillis() + (long) (timeout * 1000);
 71  0
         int maxD = (this.sourceLength + this.targetLength) / 2;
 72  0
         List<Set<String>> vMap1 = new ArrayList<Set<String>>();
 73  0
         List<Set<String>> vMap2 = new ArrayList<Set<String>>();
 74  0
         Map<Integer, Integer> v1 = new HashMap<Integer, Integer>();
 75  0
         Map<Integer, Integer> v2 = new HashMap<Integer, Integer>();
 76  0
         v1.put(Integer.valueOf(1), Integer.valueOf(0));
 77  0
         v2.put(Integer.valueOf(1), Integer.valueOf(0));
 78  
         int x;
 79  
         int y;
 80  
         String footstep; // Used to track overlapping paths.
 81  0
         Map<String, Integer> footsteps = new HashMap<String, Integer>();
 82  0
         boolean done = false;
 83  
         // If the total number of characters is odd, then the front path will
 84  
         // collide with the reverse path.
 85  0
         boolean front = (this.sourceLength + this.targetLength) % 2 != 0;
 86  0
         for (int d = 0; d < maxD; d++) {
 87  
             // Bail out if timeout reached.
 88  0
             if (timeout > 0 && System.currentTimeMillis() > msEnd) {
 89  0
                 return null;
 90  
             }
 91  
 
 92  
             // Walk the front path one step.
 93  0
             vMap1.add(new HashSet<String>()); // Adds at index 'd'.
 94  0
             for (int k = -d; k <= d; k += 2) {
 95  0
                 Integer kPlus1Key = Integer.valueOf(k + 1);
 96  0
                 Integer kPlus1Value = v1.get(kPlus1Key);
 97  0
                 Integer kMinus1Key = Integer.valueOf(k - 1);
 98  0
                 Integer kMinus1Value = v1.get(kMinus1Key);
 99  0
                 if (k == -d || k != d && kMinus1Value.intValue() < kPlus1Value.intValue()) {
 100  0
                     x = kPlus1Value.intValue();
 101  
                 } else {
 102  0
                     x = kMinus1Value.intValue() + 1;
 103  
                 }
 104  0
                 y = x - k;
 105  0
                 footstep = x + "," + y;
 106  0
                 if (front && (footsteps.containsKey(footstep))) {
 107  0
                     done = true;
 108  
                 }
 109  0
                 if (!front) {
 110  0
                     footsteps.put(footstep, Integer.valueOf(d));
 111  
                 }
 112  0
                 while (!done && x < this.sourceLength && y < this.targetLength && source.charAt(x) == target.charAt(y)) {
 113  0
                     x++;
 114  0
                     y++;
 115  0
                     footstep = x + "," + y;
 116  0
                     if (front && footsteps.containsKey(footstep)) {
 117  0
                         done = true;
 118  
                     }
 119  0
                     if (!front) {
 120  0
                         footsteps.put(footstep, Integer.valueOf(d));
 121  
                     }
 122  
                 }
 123  0
                 v1.put(Integer.valueOf(k), Integer.valueOf(x));
 124  0
                 Set<String> s = vMap1.get(d);
 125  0
                 s.add(x + "," + y);
 126  0
                 if (done) {
 127  
                     // Front path ran over reverse path.
 128  0
                     Integer footstepValue = footsteps.get(footstep);
 129  0
                     vMap2 = vMap2.subList(0, footstepValue.intValue() + 1);
 130  0
                     List<Difference> a = path1(vMap1, source.substring(0, x), target.substring(0, y));
 131  0
                     a.addAll(path2(vMap2, source.substring(x), target.substring(y)));
 132  0
                     return a;
 133  
                 }
 134  
             }
 135  
 
 136  
             // Walk the reverse path one step.
 137  0
             vMap2.add(new HashSet<String>()); // Adds at index 'd'.
 138  0
             for (int k = -d; k <= d; k += 2) {
 139  0
                 Integer kPlus1Key = Integer.valueOf(k + 1);
 140  0
                 Integer kPlus1Value = v2.get(kPlus1Key);
 141  0
                 Integer kMinus1Key = Integer.valueOf(k - 1);
 142  0
                 Integer kMinus1Value = v2.get(kMinus1Key);
 143  0
                 if (k == -d || k != d && kMinus1Value.intValue() < kPlus1Value.intValue()) {
 144  0
                     x = kPlus1Value.intValue();
 145  
                 } else {
 146  0
                     x = kMinus1Value.intValue() + 1;
 147  
                 }
 148  0
                 y = x - k;
 149  0
                 footstep = (this.sourceLength - x) + "," + (this.targetLength - y);
 150  0
                 if (!front && (footsteps.containsKey(footstep))) {
 151  0
                     done = true;
 152  
                 }
 153  0
                 if (front) {
 154  0
                     footsteps.put(footstep, Integer.valueOf(d));
 155  
                 }
 156  0
                 while (!done && x < this.sourceLength && y < this.targetLength && source.charAt(this.sourceLength - x - 1) == target.charAt(this.targetLength - y - 1)) {
 157  0
                     x++;
 158  0
                     y++;
 159  0
                     footstep = (this.sourceLength - x) + "," + (this.targetLength - y);
 160  0
                     if (!front && (footsteps.containsKey(footstep))) {
 161  0
                         done = true;
 162  
                     }
 163  0
                     if (front) {
 164  0
                         footsteps.put(footstep, Integer.valueOf(d));
 165  
                     }
 166  
                 }
 167  
 
 168  0
                 v2.put(Integer.valueOf(k), Integer.valueOf(x));
 169  0
                 Set<String> s = vMap2.get(d);
 170  0
                 s.add(x + "," + y);
 171  0
                 if (done) {
 172  
                     // Reverse path ran over front path.
 173  0
                     Integer footstepValue = footsteps.get(footstep);
 174  0
                     vMap1 = vMap1.subList(0, footstepValue.intValue() + 1);
 175  0
                     List<Difference> a = path1(vMap1, source.substring(0, this.sourceLength - x), target.substring(0, this.targetLength - y));
 176  0
                     a.addAll(path2(vMap2, source.substring(this.sourceLength - x), target.substring(this.targetLength - y)));
 177  0
                     return a;
 178  
                 }
 179  
             }
 180  
         }
 181  
 
 182  
         // Number of diffs equals number of characters, no commonality at all.
 183  0
         return null;
 184  
     }
 185  
 
 186  
     /**
 187  
      * Work from the middle back to the start to determine the path.
 188  
      * 
 189  
      * @param vMap
 190  
      *            List of path sets.
 191  
      * @param newSource
 192  
      *            Old string fragment to be diffed
 193  
      * @param newTarget
 194  
      *            New string fragment to be diffed
 195  
      * @return List of Difference objects
 196  
      */
 197  
     protected List<Difference> path1(final List<Set<String>> vMap, final String newSource, final String newTarget) {
 198  0
         List<Difference> path = new ArrayList<Difference>();
 199  0
         int x = newSource.length();
 200  0
         int y = newTarget.length();
 201  0
         EditType lastEditType = null;
 202  0
         for (int d = vMap.size() - 2; d >= 0; d--) {
 203  
             while (true) {
 204  0
                 Set<String> set = vMap.get(d);
 205  0
                 if (set.contains((x - 1) + "," + y)) {
 206  0
                     x--;
 207  0
                     if (EditType.DELETE.equals(lastEditType)) {
 208  0
                         Difference firstDiff = path.get(0);
 209  0
                         firstDiff.prependText(newSource.charAt(x));
 210  0
                     } else {
 211  0
                         path.add(0, new Difference(EditType.DELETE, newSource.substring(x, x + 1)));
 212  
                     }
 213  0
                     lastEditType = EditType.DELETE;
 214  0
                     break;
 215  0
                 } else if (set.contains(x + "," + (y - 1))) {
 216  0
                     y--;
 217  0
                     if (EditType.INSERT.equals(lastEditType)) {
 218  0
                         Difference firstDiff = path.get(0);
 219  0
                         firstDiff.prependText(newTarget.charAt(y));
 220  0
                     } else {
 221  0
                         path.add(0, new Difference(EditType.INSERT, newTarget.substring(y, y + 1)));
 222  
                     }
 223  0
                     lastEditType = EditType.INSERT;
 224  0
                     break;
 225  
                 } else {
 226  0
                     x--;
 227  0
                     y--;
 228  0
                     assert newSource.charAt(x) == newTarget.charAt(y) : "No diagonal.  Can't happen. (path1)";
 229  0
                     if (EditType.EQUAL.equals(lastEditType)) {
 230  0
                         Difference firstDiff = path.get(0);
 231  0
                         firstDiff.prependText(newSource.charAt(x));
 232  0
                     } else {
 233  0
                         path.add(0, new Difference(EditType.EQUAL, newSource.substring(x, x + 1)));
 234  
                     }
 235  0
                     lastEditType = EditType.EQUAL;
 236  
                 }
 237  0
             }
 238  
         }
 239  0
         return path;
 240  
     }
 241  
 
 242  
     /**
 243  
      * Work from the middle back to the end to determine the path.
 244  
      * 
 245  
      * @param vMap
 246  
      *            List of path sets.
 247  
      * @param newSource
 248  
      *            Old string fragment to be diffed
 249  
      * @param newTarget
 250  
      *            New string fragment to be diffed
 251  
      * @return List of Difference objects
 252  
      */
 253  
     protected List<Difference> path2(final List<Set<String>> vMap, final String newSource, final String newTarget) {
 254  0
         List<Difference> path = new ArrayList<Difference>();
 255  
 
 256  
         //cached versions of length from immutable strings
 257  0
         final int cachedNewSourceLength = newSource.length();
 258  0
         final int cachedNewTargetLength = newTarget.length();
 259  
 
 260  0
         int x = cachedNewSourceLength;
 261  0
         int y = cachedNewTargetLength;
 262  0
         EditType lastEditType = null;
 263  0
         for (int d = vMap.size() - 2; d >= 0; d--) {
 264  
             while (true) {
 265  0
                 Set<String> set = vMap.get(d);
 266  0
                 if (set.contains((x - 1) + "," + y)) {
 267  0
                     x--;
 268  0
                     if (EditType.DELETE.equals(lastEditType)) {
 269  0
                         Difference lastDiff = path.get(path.size() - 1);
 270  0
                         lastDiff.appendText(newSource.charAt(cachedNewSourceLength - x - 1));
 271  0
                     } else {
 272  0
                         path.add(new Difference(EditType.DELETE, newSource.substring(cachedNewSourceLength - x - 1, cachedNewSourceLength - x)));
 273  
                     }
 274  0
                     lastEditType = EditType.DELETE;
 275  0
                     break;
 276  0
                 } else if (set.contains(x + "," + (y - 1))) {
 277  0
                     y--;
 278  0
                     if (EditType.INSERT.equals(lastEditType)) {
 279  0
                         Difference lastDiff = path.get(path.size() - 1);
 280  0
                         lastDiff.appendText(newTarget.charAt(cachedNewTargetLength - y - 1));
 281  0
                     } else {
 282  0
                         path.add(new Difference(EditType.INSERT, newTarget.substring(cachedNewTargetLength - y - 1, cachedNewTargetLength - y)));
 283  
                     }
 284  0
                     lastEditType = EditType.INSERT;
 285  0
                     break;
 286  
                 } else {
 287  0
                     x--;
 288  0
                     y--;
 289  0
                     assert newSource.charAt(cachedNewSourceLength - x - 1) == newTarget.charAt(cachedNewTargetLength - y - 1) : "No diagonal.  Can't happen. (path2)";
 290  
 
 291  0
                     if (EditType.EQUAL.equals(lastEditType)) {
 292  0
                         Difference lastDiff = path.get(path.size() - 1);
 293  0
                         lastDiff.appendText(newSource.charAt(cachedNewSourceLength - x - 1));
 294  0
                     } else {
 295  0
                         path.add(new Difference(EditType.EQUAL, newSource.substring(cachedNewSourceLength - x - 1, cachedNewSourceLength - x)));
 296  
                     }
 297  0
                     lastEditType = EditType.EQUAL;
 298  
                 }
 299  0
             }
 300  
         }
 301  0
         return path;
 302  
     }
 303  
 
 304  
     /**
 305  
      * Set the timeout for the diff operation. The default is 1 second. Use 0
 306  
      * for infinity.
 307  
      * 
 308  
      * @param newTimeout the new timeout
 309  
      */
 310  
     public static void setTimeout(float newTimeout) {
 311  0
         timeout = newTimeout;
 312  0
     }
 313  
 
 314  
     /**
 315  
      * Number of seconds to map a diff before giving up. Use 0 for infinity.
 316  
      */
 317  
     private static final float TIMEOUT = 1.0f;
 318  0
     private static float timeout = TIMEOUT;
 319  
     /**
 320  
      * Made final because we now rely on caching the string lengths (they could be cached
 321  
      * at method level if require non-final in future)
 322  
      * The baseline text.
 323  
      */
 324  
     private final String source;
 325  
 
 326  
     /**
 327  
      * The changed text.
 328  
      * 
 329  
      */
 330  
     private final String target;
 331  
 
 332  
     // cached versions of source and target length, 
 333  
     // as for roughly 1 call to generate(), was causing 117'000 calls to length()
 334  
     private final int targetLength;
 335  
     private final int sourceLength;
 336  
 
 337  
 }