Wagner-Fischer Edit Distance

Posted by Dustin Boston in .


The Wagner-Fischer algorithm is a dynamic programming method for calculating the Levenshtein distance (edit distance) between two strings. It determines the minimum number of single-character edits (insertions, deletions, or substitutions) needed to transform one string into another by filling a matrix where each cell represents the edit distance between prefixes of the two strings. The algorithm builds the solution incrementally, ensuring optimal substructure and overlapping subproblems for efficient computation.

Source Code Listing

code.ts

// @file Wagner-Fischer Edit Distance
// @author Dustin Boston
// @see R. Wagner, and M. Fischer, "The String-to-String Correction Problem,"
//  ACM, vol. 21, no. 1, pp. 168–173, 1974.

// Calculates the edit distance between two strings, indicating how many
// operations (insert, delete, substitute) are required to transform the
// start word into the target word. This function implements the Wagner-Fischer
// algorithm.
export function editDistance(
  startTerm: string,
  targetTerm: string,
): number[][] {
  const distance: number[][] = [[0]];
  for (let i = 1; i <= startTerm.length; i++) {
    distance[i] = [distance[i - 1][0] + cost(startTerm[i - 1], undefined)];
  }

  for (let index = 1; index <= targetTerm.length; index++) {
    distance[0][index] =
      distance[0][index - 1] + cost(undefined, targetTerm[index - 1]);
  }

  for (let i = 1; i <= startTerm.length; i++) {
    for (let index = 1; index <= targetTerm.length; index++) {
      const m1 =
        distance[i - 1][index - 1] +
        cost(startTerm[i - 1], targetTerm[index - 1]);
      const m2 = distance[i - 1][index] + cost(startTerm[i - 1], undefined);
      const m3 =
        distance[i][index - 1] + cost(undefined, targetTerm[index - 1]);
      distance[i][index] = Math.min(m1, m2, m3);
    }
  }

  return distance;
}

/**
 * Determines the cost of an edit operation between two characters or undefined
 * values. The cost is used in the calculation of the edit distance between
 * two strings.
 *
 * @param a - Char from the first string or undefined if it's an insert/delete.
 * @param b - Char from the second string or undefined if it's an insert/delete.
 * @returns Cost of the edit operation. 0 for no change, 1 for all others.
 * @throws Error if both parameters are undefined.
 */
export function cost(
  a: string | undefined = undefined,
  b: string | undefined = undefined,
): number {
  if (a === b) return 0;
  if (a !== undefined && b !== undefined) return 1; // Change
  if (a === undefined && b !== undefined) return 1; // Insert
  if (a !== undefined && b === undefined) return 1; // Delete
  throw new Error("Invalid cost arguments");
}

test.ts

import {describe, test, expect} from "bun:test";
import {editDistance} from "./code.ts";

describe("Edit Distance", () => {
  test("Sitting Kitten", () => {
    const a = "sitting";
    const b = "kitten";
    const table = editDistance(a, b);
    const actual = table[a.length - 1]?.[b.length - 1];
    expect(actual).toBe(3);
  });

  test("Sunday Saturday", () => {
    const a = "Sunday";
    const b = "Saturday";
    const table = editDistance(a, b);
    const actual = table[a.length - 1]?.[b.length - 1];
    expect(actual).toBe(3);
  });

  test("Flaw Lawn", () => {
    const a = "flaw";
    const b = "lawn";
    const table = editDistance(a, b);
    const actual = table[a.length - 1]?.[b.length - 1];
    expect(actual).toBe(2);
  });
});