Myers Diff
Posted by Dustin Boston in Algorithm.
Computes the differences between two sequences efficiently, commonly used in version control systems to show changes between file revisions.
Source Code Listing
code.ts
/**
* @file Myers Diff
* @author Dustin Boston
* @see E. Myers, "An O(ND) difference algorithm and its variations,"
* _Algorithmica_, vol. 1, no. 1–4, pp. 251–266, 1986.
* @see J. Coglan, "The Myers diff algorithm: part 1," The if works,
* https://blog.jcoglan.com/2017/02/12/the-myers-diff-algorithm-part-1/
* @see R. Elder, "Myers Diff Algorithm - Code & Interactive Visualization,"
* Robert Elder Software Inc, https://blog.robertelder.org/diff-algorithm/
*/
import {NegativeArray} from "../negative-array/code.ts";
/**
* Computes the diff between two strings using the Myers algorithm and returns
* an edit script in the form `xD` or `xIb` where `x` is the `x` coordinate, `D`
* is delete, `I` is insert, and `b` is the char inserted at`x`, for example
* `1D,2D,3IB,6D,7IC`.
*
* @param original - The original string.
* @param target - The target string to compare.
* @returns An edit script.
*/
export function diff(original: string, target: string): string[] {
const operations: string[] = [];
const editTrace = shortestEdit(original, target);
if (!editTrace) throw new Error("Invalid return.");
// Backtrack yields from the end to the start.
for (const [previousX, previousY, currentX, currentY] of backtrack(
original.length,
target.length,
editTrace,
)) {
const targetChar = target[previousY];
if (currentX === previousX) operations.unshift(`${currentX}I${targetChar}`);
else if (currentY === previousY) operations.unshift(`${currentX}D`);
}
return operations;
}
/**
* The Greedy LCS/SES Algorithm (figure 2 in the paper) Finds the shortest edit
* path for two strings using the Myers diff algorithm.
*
* @param original - The original string.
* @param target - The target string.
* @returns The trace of V operations with each step taken.
*/
export function shortestEdit(
original: string,
target: string,
): number[][] | undefined {
const originalLength = original.length;
const targetLength = target.length;
const maxLength = originalLength + targetLength;
/** An array of the furthest reaching edit distances across the edit graph */
const furthestReachingPoints: number[] = new NegativeArray<number>(
2 * maxLength + 2,
);
furthestReachingPoints[1] = 0;
/** Record of each operation */
const trace: number[][] = new NegativeArray<number[]>();
// Calculate distances (d)
for (let distance = 0; distance <= maxLength + 1; distance++) {
trace.push(new NegativeArray<number>(...furthestReachingPoints));
// Calculate the operations (k)
for (let diagonal = -distance; diagonal <= distance + 1; diagonal += 2) {
let currentX =
diagonal === -distance ||
(diagonal !== distance &&
furthestReachingPoints[diagonal - 1] <
furthestReachingPoints[diagonal + 1])
? furthestReachingPoints[diagonal + 1] // Move down (deletion)
: furthestReachingPoints[diagonal - 1] + 1; // Move right (insertion)
let currentY = currentX - diagonal;
while (
currentX < originalLength &&
currentY < targetLength &&
original[currentX] === target[currentY]
) {
currentX++;
currentY++;
}
furthestReachingPoints[diagonal] = currentX;
if (currentX >= originalLength && currentY >= targetLength) {
return trace;
}
}
}
}
/** Records a movement from the previous position to the current position. */
export type Move = [
previousX: number,
previousY: number,
currentX: number,
currentY: number,
];
/**
* A generator function that backtracks from the computed trace to find the
* actual diff path.
*
* @param originalLength - Length of the original string.
* @param targetLength - Length of the target string.
* @param trace - The edit trace
* @yields Individual moves to transform the original string into the target
* string.
*/
export function* backtrack(
originalLength: number,
targetLength: number,
trace: number[][],
): Generator<Move, void> {
let currentX = originalLength;
let currentY = targetLength;
for (let distance = trace.length - 1; distance > -1; distance--) {
/** Array of furthest reaching x distances across the edit graph */
const furthestReachingPoints = trace[distance];
/** The operation */
const diagonal = currentX - currentY;
/** Previous edit operation */
const previousDiagonal =
diagonal === -distance ||
(diagonal !== distance &&
furthestReachingPoints[diagonal - 1] <
furthestReachingPoints[diagonal + 1])
? diagonal + 1
: diagonal - 1;
const previousX = furthestReachingPoints[previousDiagonal];
const previousY = previousX - previousDiagonal;
while (currentX > previousX && currentY > previousY) {
yield [currentX - 1, currentY - 1, currentX, currentY];
currentX -= 1;
currentY -= 1;
}
if (distance > 0) yield [previousX, previousY, currentX, currentY];
currentX = previousX;
currentY = previousY;
}
}
// Usage
(() => {
const array = new NegativeArray(1, 2, 3, 4, 5);
if (array[-1] !== 5) {
throw new Error(`Expected 5 but got ${array[-1]}`);
}
// Diff between 'ABCABBA' and 'CBABAC'
const abcActual = diff("ABCABBA", "CBABAC");
const abcExpected = "1D,2D,3IB,6D,7IC";
if (abcActual.toString() !== abcExpected) {
throw new Error(`Expected ${abcExpected} but got ${abcActual.join(",")}`);
}
// Diff between 'CARE' and 'BEAR'
const careActual = diff("CARE", "BEAR");
const careExpected = "1D,1IB,1IE,4D";
if (careActual.toString() !== careExpected) {
throw new Error(`Expected ${careExpected} but got ${careActual.join(",")}`);
}
console.log("Diff passed");
})();
myers_diff_test.ts
import {diff} from "../../../../data/algorithms/src/myers_diff.ts";
import {assertEquals} from "$assert";
test("Diff between 'ABCABBA' and 'CBABAC'", () => {
const result = diff("ABCABBA", "CBABAC");
const expected = "1D,2D,3IB,6D,7IC";
assertEquals(result.toString(), expected);
});
test("Diff between 'CARE' and 'BEAR'", () => {
const result = diff("CARE", "BEAR");
const expected = "1D,1IB,1IE,4D";
assertEquals(result.toString(), expected);
});