Rabin-Karp Matcher
The Rabin-Karp algorithm finds a pattern in text. It uses hashing to convert the pattern into a number. It converts a piece of the text too. The algorithm does not start over for each piece. It uses a rolling hash. The next number comes from the one before it. This is fast.
Once it finds a matching number, it checks the letters to prove the match is real. Sometimes the numbers match but the letters do not. This is a false match. It happens because of a hash collision.
Source Code Listing
code.ts
/**
 * @file Rabin-Karp Matcher
 * @author Dustin Boston
 * @see Cormen, T, Leiserson, C, Rivest, R, and Stein, C. (2001).
 *   Introduction to Algorithms. (2nd ed). The MIT Press.
 */
export function rabinKarpMatcher(
  T: string, // Text
  P: string, // Pattern
  d: number, // Radix
  q: number, // Prime
) {
  const n = T.length;
  const m = P.length;
  const h = d ** (m - 1) % q;
  let p = 0; // Code point of a given letter
  let t = 0; // Code point of a given letter
  // Preprocessing
  for (let i = 0; i < m; i++) {
    p = (d * p + (P.codePointAt(i) ?? 0)) % q;
    t = (d * t + (T.codePointAt(i) ?? 0)) % q;
  }
  // Matching
  for (let s = 0; s <= n - m; s++) {
    if (p === t && P.slice(0, m) === T.slice(s, s + m)) {
      console.log(`Pattern occurs with shift ${s}`);
      return [P.slice(0, m), s];
    }
    if (s < n - m) {
      let newt = (d * (t - (T.codePointAt(s) ?? 0) * h) + (T.codePointAt(s + m) ?? 0)) % q;
      if (newt < 0) {
        newt += q;
      }
      t = newt;
    }
  }
}
code_test.ts
import {test, expect} from "bun:test";
import {rabinKarpMatcher} from "./code";
test(`find pattern "aab" in "acaabc"`, () => {
  const actual = rabinKarpMatcher("acaabc", "aab", 3, 13) ?? [];
  const pattern = actual[0];
  const shift = actual[1];
  expect(pattern).toEqual("aab");
  expect(shift).toEqual(2);
});
Tags
[End]