Trie Symbol Table

Posted by Dustin Boston in .

The Trie Symbol Table is a data structure designed to efficiently store and retrieve key-value pairs where the keys are strings. It organizes data into a tree-like structure, where each node represents a character, and paths from the root to nodes represent strings.

Source Code Listing

code.ts

/*!
 * @file Implements Trie Symbol Table data structure in TypeScript.
 * @author Dustin Boston <mail@dustin.boston>
 * @see Sedgewick, R., & Wayne, K. (2011). _Algorithms_ (4th ed.). Addison-Wesley.
 * @see https://algs4.cs.princeton.edu/52trie/TrieST.java.html
 * @see https://learnersbucket.com/tutorials/data-structures/trie-data-structure-in-javascript/
 */

export type Maybe<T> = T | undefined;

/** R-way trie node */
export class Node<T> {
  next: Array<Node<T>> = [];
  value?: T = undefined;
}

/**
 * Trie Symbol Table Implementation
 *
 * The TrieST class represents a symbol table of key-value pairs, with string
 * keys and generic values.
 *
 * A symbol table implements the _associative array_ abstraction:
 * when associating a value with a key that is already in the symbol table,
 * the convention is to replace the old value with the new value.
 *
 * Values cannot be `undefined`—setting the value associated with a key to
 * `undefined` is equivalent to deleting the key from the symbol table.
 *
 * This implementation uses an 256-way trie. The `put`, `contains`, `delete`,
 * and `longestPrefixOf` operations take time proportional to the length of the
 * key (in the worst case). Everything else is constant time.
 */
export class TrieST<T> {
  /** Extended ASCII */
  readonly #radix = 256;

  /** The root node in the trie */
  #root?: Node<T>;

  /** The number of key-value pairs in this symbol table. */
  #size = 0;

  /**
   * Retrieves the value associated with a given key.
   * @param key - The key for which to retrieve the value.
   * @returns The value associated with the key, or undefined if not found.
   */
  get(key: string): Maybe<T> {
    if (!key) throw new TypeError("Undefined argument to get.");
    const x = this.#get(this.#root, key, 0);
    if (!x) return undefined;
    return x.value;
  }

  /**
   * Recursive method to retrieve a node corresponding to the key.
   * @param node - The current node in the trie.
   * @param key - The key to search for.
   * @param d - The index of the character in the key.
   * @returns A node corresponding to the key or undefined if not found.
   */
  #get(node: Maybe<Node<T>>, key: string, d: number): Maybe<Node<T>> {
    if (!node) return undefined;
    if (d === key.length) return node;
    const charCode = key.charCodeAt(d);
    return this.#get(node.next[charCode], key, d + 1);
  }

  /**
   * Inserts the key-value pair into the symbol table. If the key exists
   * the value will be overwritten. An undefined value will delete the key.
   * @param key - The key to insert or update.
   * @param value - Value to associate with the key, undefined to delete it.
   * @throws if the key is undefined.
   */
  put(key: string, value: Maybe<T>) {
    if (key === undefined) {
      throw new TypeError("First argument to put is undefined.");
    }

    if (value === undefined) this.delete(key);
    else this.#root = this.#put(this.#root, key, value, 0);
  }

  /**
   * Recursive method to insert or update a node in the symbol table.
   * @param node - The current node.
   * @param key - The key to insert or update.
   * @param value - The value to associate with the key.
   * @param index - The index of the character in the key.
   * @returns The updated or newly inserted node.
   * @throws if the code point for the given index is undefined.
   */
  #put(node: Maybe<Node<T>>, key: string, value: T, index: number): Node<T> {
    if (node === undefined) node = new Node<T>();
    if (index === key.length) {
      if (node.value === undefined) this.#size++;
      node.value = value;
      return node;
    }

    const charCode = key.charCodeAt(index);
    node.next[charCode] = this.#put(node.next[charCode], key, value, index + 1);

    return node;
  }

  /**
   * Gets number of key-value pairs in this symbol table.
   * @returns The number of key-value pairs in this symbol table.
   */
  get size() {
    return this.#size;
  }

  /**
   * Sets the size of the symbol table.
   * This is used internally to update the size when keys are added or removed.
   */
  set size(value: number) {
    this.#size = value;
  }

  /**
   * Whether the symbol table is empty.
   * @returns true if size is 0, otherwise false.
   */
  get isEmpty() {
    return this.#size === 0;
  }

  /**
   * Does this symbol table contain the given key?
   * @param key - the key to search for in the symbol table.
   * @returns true if this symbol table contains the key, otherwise false.
   * @throws if key is undefined.
   */
  contains(key: string) {
    if (!key) throw new TypeError("Argument to contains is undefined.");
    return this.get(key) !== undefined;
  }

  /**
   * Gets all keys in the symbol table.
   * @returns an array of keys.
   */
  keys(): string[] {
    return this.keysWithPrefix("");
  }

  /**
   * Returns all of the keys in the set that start with `prefix`.
   * This is iterative to prevent stack overflows.
   * @param prefix - The prefix to search for.
   * @returns All keys in the set that start with `prefix`
   */
  keysWithPrefix(prefix: string) {
    const results: string[] = [];
    const node = this.#get(this.#root, prefix, 0);
    this.#collectPrefixes(node, prefix, results);
    return results;
  }

  #collectPrefixes(node: Maybe<Node<T>>, prefix: string, results: string[]) {
    if (node === undefined) return;
    if (node.value !== undefined) results.push(prefix);
    for (let charCode = 0; charCode <= this.#radix; charCode++) {
      // const newPrefix = prefix + String.fromCharCode(charCode);
      prefix += String.fromCharCode(charCode);
      this.#collectPrefixes(node.next[charCode], prefix, results);
      prefix = prefix.slice(0, -1);
    }
  }

  keysThatMatch(pattern: string) {
    const results: string[] = [];
    this.#collectKeys(this.#root, "", pattern, results);
    return results;
  }

  /**
   * Find keys that match a pattern, where "." is a wild card.
   * Will only find keys of the same length as the pattern.
   * @param node - The node to search for matching keys.
   * @param prefix - The prefix to search for.
   * @param pattern - the pattern to search for.
   * @returns all keys that match the pattern.
   */
  #collectKeys(
    node: Maybe<Node<T>>,
    prefix: string,
    pattern: string,
    results: string[],
  ) {
    if (node === undefined) return;

    const index = prefix.length;
    if (index === pattern.length && node.value !== undefined) {
      results.push(prefix);
    }

    if (index === pattern.length) return;

    const char = pattern.charAt(index);
    if (char === ".") {
      for (let n = 0; n < this.#radix; n++) {
        prefix += String.fromCharCode(n);
        this.#collectKeys(node.next[n], prefix, pattern, results);
        prefix = prefix.slice(0, -1);
      }
    } else {
      prefix += char;
      const charCode = char.charCodeAt(0);
      this.#collectKeys(node.next[charCode], prefix, pattern, results);
      prefix = prefix.slice(0, -1);
    }
  }

  /**
   * Finds the string in the symbol table that is the longest prefix of
   * `query`. This is iterative to prevent a stack overflow.
   * @param query - the string to search for.
   * @returns the longest prefix of query in the symbol table.
   */
  longestPrefixOf(query: string): Maybe<string> {
    if (query === undefined) {
      throw new TypeError("Argument to longestPrefixOf is undefined.");
    }
    const length = this.#longestPrefixOf(this.#root, query, 0, -1);
    if (length === -1) return;
    else return query.slice(0, length);
  }

  /**
   * Returns the length of the longest string key in the subtrie rooted at `node`
   * that is a prefix of the query string, assuming the first character match
   * and we have already found a prefix match of given length (-1 if no match).
   *
   * @param node
   * @param query
   * @param index
   * @param length
   */
  #longestPrefixOf(
    node: Maybe<Node<T>>,
    query: string,
    index: number,
    length: number,
  ): number {
    if (node === undefined) return length;
    if (node.value !== undefined) length = index;
    if (index === query.length) return length;

    const char = query.charCodeAt(index);
    return this.#longestPrefixOf(node.next[char], query, index + 1, length);
  }

  /**
   * Removes the key from the set if the key is present.
   *
   * @param key - the key to remove.
   */
  delete(key: string) {
    if (key === undefined) {
      throw new TypeError("Argument to delete is undefined.");
    }

    this.#root = this.deleteNode(this.#root, key, 0);
  }

  /**
   * Removes the key from the set if it is present.
   * @param node - the node to process.
   * @param key - the key to search for.
   * @param index - the index of the character in the key.
   */
  private deleteNode(
    node: Node<T> | undefined,
    key: string,
    index: number,
  ): Node<T> | undefined {
    if (node === undefined) return;

    if (index === key.length) {
      if (node.value !== undefined) this.#size--;
      node.value = undefined;
    } else {
      const charCode = key.charCodeAt(index);
      node.next[charCode] = this.deleteNode(
        node.next[charCode],
        key,
        index + 1,
      ) as Node<T>;
    }

    // Remove subtrie rooted at `node` if it is completely empty.
    if (node.value !== undefined) return node;

    for (let subtrieIndex = 0; subtrieIndex < this.#radix; subtrieIndex++) {
      if (node.next[subtrieIndex] !== undefined) return node;
    }

    return undefined;
  }
}

code_test.ts

import {describe, test, expect} from "bun:test";
import {TrieST} from "./code";

describe("TrieSymbolTable", () => {
  test("testInsert", () => {
    const st = buildSymbolTable();
    expect(st.size).toBe(7);
    expect(st.longestPrefixOf("shellsort")).toBe("shells");
    expect(st.longestPrefixOf("quicksort")).toBeUndefined();
  });

  test("testKeysWithPrefix", () => {
    const st = buildSymbolTable();
    const keys = st.keysWithPrefix("shor");
    expect(keys.length).toBe(1);
    expect(keys.toString()).toBe("shore");
  });

  test("testKeysThatMatch", () => {
    const t = buildSymbolTable();
    const keys = t.keysThatMatch(".he.l.");
    expect(keys.join(",")).toBe("shells");
  });
});

function buildSymbolTable(): TrieST<number> {
  const input = "she sells sea shells by the sea shore";
  const st = new TrieST<number>();
  for (const [index, key] of input.split(" ").entries()) {
    st.put(key, index);
  }
  return st;
}