Myers Diff

Posted by Dustin Boston in .


Computes the differences between two sequences efficiently, commonly used in version control systems to show changes between file revisions.

Source Code Listing

code.ts

/**
 * @file Myers Diff
 * @author Dustin Boston
 * @see E. Myers, "An O(ND) difference algorithm and its variations,"
 *   _Algorithmica_, vol. 1, no. 1–4, pp. 251–266, 1986.
 * @see J. Coglan, "The Myers diff algorithm: part 1," The if works,
 *   https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/
 * @see R. Elder, "Myers Diff Algorithm - Code & Interactive Visualization,"
 *   Robert Elder Software Inc, https://blog.robertelder.org/diff-algorithm/
 */

import {NegativeArray} from "../negative-array/code.ts";

/**
 * Computes the diff between two strings using the Myers algorithm and returns
 * an edit script in the form `xD` or `xIb` where `x` is the `x` coordinate, `D`
 * is delete, `I` is insert, and `b` is the char inserted at`x`, for example
 * `1D,2D,3IB,6D,7IC`.
 *
 * @param original - The original string.
 * @param target - The target string to compare.
 * @returns An edit script.
 */
export function diff(original: string, target: string): string[] {
  const operations: string[] = [];
  const editTrace = shortestEdit(original, target);
  if (!editTrace) throw new Error("Invalid return.");

  // Backtrack yields from the end to the start.
  for (const [previousX, previousY, currentX, currentY] of backtrack(
    original.length,
    target.length,
    editTrace,
  )) {
    const targetChar = target[previousY];
    if (currentX === previousX) operations.unshift(`${currentX}I${targetChar}`);
    else if (currentY === previousY) operations.unshift(`${currentX}D`);
  }

  return operations;
}

/**
 * The Greedy LCS/SES Algorithm (figure 2 in the paper) Finds the shortest edit
 * path for two strings using the Myers diff algorithm.
 *
 * @param original - The original string.
 * @param target - The target string.
 * @returns The trace of V operations with each step taken.
 */
export function shortestEdit(
  original: string,
  target: string,
): number[][] | undefined {
  const originalLength = original.length;
  const targetLength = target.length;
  const maxLength = originalLength + targetLength;

  /** An array of the furthest reaching edit distances across the edit graph */
  const furthestReachingPoints: number[] = new NegativeArray<number>(
    2 * maxLength + 2,
  );
  furthestReachingPoints[1] = 0;

  /** Record of each operation */
  const trace: number[][] = new NegativeArray<number[]>();

  // Calculate distances (d)
  for (let distance = 0; distance <= maxLength + 1; distance++) {
    trace.push(new NegativeArray<number>(...furthestReachingPoints));

    // Calculate the operations (k)
    for (let diagonal = -distance; diagonal <= distance + 1; diagonal += 2) {
      let currentX =
        diagonal === -distance ||
        (diagonal !== distance &&
          furthestReachingPoints[diagonal - 1] <
            furthestReachingPoints[diagonal + 1])
          ? furthestReachingPoints[diagonal + 1] // Move down (deletion)
          : furthestReachingPoints[diagonal - 1] + 1; // Move right (insertion)

      let currentY = currentX - diagonal;
      while (
        currentX < originalLength &&
        currentY < targetLength &&
        original[currentX] === target[currentY]
      ) {
        currentX++;
        currentY++;
      }

      furthestReachingPoints[diagonal] = currentX;
      if (currentX >= originalLength && currentY >= targetLength) {
        return trace;
      }
    }
  }
}

/** Records a movement from the previous position to the current position. */
export type Move = [
  previousX: number,
  previousY: number,
  currentX: number,
  currentY: number,
];

/**
 * A generator function that backtracks from the computed trace to find the
 * actual diff path.
 *
 * @param originalLength - Length of the original string.
 * @param targetLength - Length of the target string.
 * @param trace - The edit trace
 * @yields Individual moves to transform the original string into the target
 *   string.
 */
export function* backtrack(
  originalLength: number,
  targetLength: number,
  trace: number[][],
): Generator<Move, void> {
  let currentX = originalLength;
  let currentY = targetLength;

  for (let distance = trace.length - 1; distance > -1; distance--) {
    /** Array of furthest reaching x distances across the edit graph */
    const furthestReachingPoints = trace[distance];
    /** The operation */
    const diagonal = currentX - currentY;

    /** Previous edit operation */
    const previousDiagonal =
      diagonal === -distance ||
      (diagonal !== distance &&
        furthestReachingPoints[diagonal - 1] <
          furthestReachingPoints[diagonal + 1])
        ? diagonal + 1
        : diagonal - 1;

    const previousX = furthestReachingPoints[previousDiagonal];
    const previousY = previousX - previousDiagonal;

    while (currentX > previousX && currentY > previousY) {
      yield [currentX - 1, currentY - 1, currentX, currentY];
      currentX -= 1;
      currentY -= 1;
    }

    if (distance > 0) yield [previousX, previousY, currentX, currentY];
    currentX = previousX;
    currentY = previousY;
  }
}

// Usage
(() => {
  const array = new NegativeArray(1, 2, 3, 4, 5);
  if (array[-1] !== 5) {
    throw new Error(`Expected 5 but got ${array[-1]}`);
  }

  // Diff between 'ABCABBA' and 'CBABAC'
  const abcActual = diff("ABCABBA", "CBABAC");
  const abcExpected = "1D,2D,3IB,6D,7IC";

  if (abcActual.toString() !== abcExpected) {
    throw new Error(`Expected ${abcExpected} but got ${abcActual.join(",")}`);
  }

  // Diff between 'CARE' and 'BEAR'
  const careActual = diff("CARE", "BEAR");
  const careExpected = "1D,1IB,1IE,4D";

  if (careActual.toString() !== careExpected) {
    throw new Error(`Expected ${careExpected} but got ${careActual.join(",")}`);
  }

  console.log("Diff passed");
})();

myers_diff_test.ts

import {diff} from "../../../../data/algorithms/src/myers_diff.ts";
import {assertEquals} from "$assert";

test("Diff between 'ABCABBA' and 'CBABAC'", () => {
  const result = diff("ABCABBA", "CBABAC");
  const expected = "1D,2D,3IB,6D,7IC";
  assertEquals(result.toString(), expected);
});

test("Diff between 'CARE' and 'BEAR'", () => {
  const result = diff("CARE", "BEAR");
  const expected = "1D,1IB,1IE,4D";
  assertEquals(result.toString(), expected);
});