Dustin Boston

Ternary Search Trie

A Ternary Search Trie (TST) is a data structure used to store and retrieve strings efficiently, combining features of binary search trees and tries. It organizes data in a tree where each node contains three children (low, equal, high) corresponding to characters less than, equal to, or greater than the node’s character, enabling balanced search operations while conserving memory compared to standard tries.

Source Code Listing

code.ts

/*!
 * @file Implements the Ternary Search Tree algorithm in TypeScript.
 * @author Dustin Boston <mail@dustin.boston>
 * @see Sedgewick, R., & Wayne, K. (2011). _Algorithms_ (4th ed.). Addison-Wesley.
 * @see Sedgewick, R., & Wayne, K. (n.d.). _TST.java._ Algorithms, 4th Ed.
 * Retrieved May 1, 2024, from https://algs4.cs.princeton.edu/52trie/TST.java.html
 */

/** Node class for Ternary Search Trie. */
export class Node<Value> {
  character?: string;
  left?: Node<Value>;
  mid?: Node<Value>;
  right?: Node<Value>;
  value?: Value;
}

/**
 * The TST class represents a symbol table of key-value pairs, with string keys
 * and generic values. It supports the usual _put_, _get_, _contains_, _delete_,
 * _size_, and _is-empty_ methods.
 *
 * It also provides character-based methods for finding the string
 * in the symbol table that is the _longest prefix_ of a given prefix,
 * finding all strings in the symbol table that _start with_ a given prefix,
 * and finding all strings in the symbol table that _match_ a given pattern.
 *
 * A symbol table implements the _associative array_ abstraction:
 * when associating a value with a key that is already in the symbol table,
 * the convention is to replace the old value with the new value.
 *
 * This class uses the convention that values cannot be `undefined`—setting the
 * value associated with a key to `undefined` is equivalent to deleting the key
 * from the symbol table.
 *
 * This implementation uses a ternary search trie.
 *
 * For additional documentation, see https://algs4.cs.princeton.edu/52trie of
 * _Algorithms, 4th Edition_ by Robert Sedgewick and Kevin Wayne.
 */
export class TernarySearchTrie<Value> {
  private nodeCount = 0;
  private rootNode?: Node<Value>;

  /**
   * Returns the number of keys in the trie.
   *
   * @returns The size of the trie.
   */
  public size() {
    return this.nodeCount;
  }

  /**
   * Determines if a key is contained in the trie.
   *
   * @param key - The key to check.
   * @returns True if the key is contained, false otherwise.
   */
  public contains(key: string): boolean {
    if (!key) throw new TypeError("Undefined argument to contains.");
    return this.getValue(key) !== undefined;
  }

  /**
   * Retrieves the value associated with a given key.
   *
   * @param key - The key for which to retrieve the value.
   * @returns The value associated with the key, or undefined if not found.
   */
  public getValue(key: string): Value | undefined {
    if (!key || typeof key !== "string") {
      throw new TypeError("Key is undefined in getValue.");
    }

    if (key.length === 0) {
      throw new TypeError("Key cannot be an empty string in getValue.");
    }

    const node = this.getNode(this.rootNode, key, 0);
    if (node === undefined) return;
    return node.value;
  }

  /**
   * Recursive method to retrieve a node corresponding to the key.
   *
   * @param node - The current node in the trie.
   * @param key - The key to search for.
   * @param index - The index of the character in the key.
   * @returns The node corresponding to the key or undefined if not found.
   */
  private getNode(
    node: Node<Value> | undefined,
    key: string | undefined,
    index: number,
  ): Node<Value> | undefined {
    if (node?.character === undefined) return;
    if (!key?.length) {
      throw new TypeError("Key must not be undefined or empty in getNode.");
    }

    const charCode = key.charCodeAt(index);
    const nodeCharCode = node.character.charCodeAt(0);

    if (charCode < nodeCharCode) {
      return this.getNode(node.left, key, index);
    }

    if (charCode > nodeCharCode) {
      return this.getNode(node.right, key, index);
    }

    if (index < key.length - 1) {
      return this.getNode(node.mid, key, index + 1);
    }

    return node;
  }

  /**
   * Inserts or updates a key with a given value in the trie.
   *
   * @param key - The key to insert or update.
   * @param value - The value to associate with the key.
   */
  public put(key: string, value: Value): void {
    if (key === undefined) throw new TypeError("Key is undefined in put.");

    if (!this.contains(key)) {
      this.nodeCount++;
    } else if (value === undefined) {
      this.nodeCount--; // delete existing key
    }

    this.rootNode = this.putNode(this.rootNode, key, value, 0);
  }

  /**
   * Recursive method to insert or update a node in the trie.
   *
   * @param node - The current node.
   * @param key - The key to insert or update.
   * @param value - The value to associate with the key.
   * @param index - The index of the character in the key.
   * @returns The updated or newly inserted node.
   */
  private putNode(
    node: Node<Value> | undefined,
    key: string,
    value: Value,
    index: number,
  ): Node<Value> {
    const char = key.charAt(index);
    if (node?.character === undefined) {
      node = new Node();
      node.character = char;
    }

    const charCode = key.charCodeAt(index);
    const nodeCharCode = node.character.charCodeAt(0);

    if (charCode < nodeCharCode) {
      node.left = this.putNode(node.left, key, value, index);
    } else if (charCode > nodeCharCode) {
      node.right = this.putNode(node.right, key, value, index);
    } else if (index < key.length - 1) {
      node.mid = this.putNode(node.mid, key, value, index + 1);
    } else {
      node.value = value;
    }

    return node;
  }

  /**
   * Finds the longest prefix of the query string that is a key in the trie.
   *
   * @param query - The query string to search for the longest prefix.
   * @returns The longest prefix found, or undefined if none exists.
   */
  public longestPrefixOf(query: string): string | undefined {
    if (query === undefined) {
      throw new TypeError("Query is undefined in longestPrefixOf.");
    }

    if (query.length === 0) return;

    let length = 0;
    let node = this.rootNode;

    let index = 0;
    while (node?.character !== undefined && index < query.length) {
      const charCode = query.charAt(index);
      const nodeCharCode = node.character.charAt(0);

      if (charCode < nodeCharCode) {
        node = node.left;
      } else if (charCode > nodeCharCode) {
        node = node.right;
      } else {
        index++;
        if (node.value !== undefined) length = index;
        node = node.mid;
      }
    }

    return query.slice(0, length);
  }

  /**
   * Returns all keys in the trie in sorted order.
   *
   * @returns An array of all keys in the trie.
   */
  public keys(): string[] {
    const keys: string[] = [];
    this.collectKeys(this.rootNode, "", keys);
    return keys;
  }

  /**
   * Returns all keys in the trie that start with the given prefix.
   *
   * @param prefix - The prefix to search for.
   * @returns An array of keys that start with the given prefix.
   */
  public keysWithPrefix(prefix: string): string[] {
    if (prefix === undefined) {
      throw new TypeError("Prefix is undefined or empty in keysWithPrefix.");
    }

    const keys: string[] = [];
    const prefixNode = this.getNode(this.rootNode, prefix, 0);

    if (!prefixNode) return keys;

    if (prefixNode.value !== undefined) {
      keys.push(prefix);
    }

    this.collectKeys(prefixNode.mid, prefix, keys);
    return keys;
  }

  /**
   * Collects keys recursively for the keys method.
   *
   * @param node - The current node.
   * @param prefix - The current prefix formed by traversing the trie.
   * @param keys - The array to collect keys into.
   */
  private collectKeys(
    node: Node<Value> | undefined,
    prefix: string,
    keys: string[],
  ): void {
    if (node === undefined) return;

    this.collectKeys(node.left, prefix, keys);

    if (node.value !== undefined) keys.push(prefix + node.character);

    this.collectKeys(node.mid, prefix + node.character, keys);
    prefix = prefix.slice(0, -1);
    this.collectKeys(node.right, prefix, keys);
  }

  /**
   * Returns all keys in the trie that match the pattern. The period character
   * (.) is a wildcard that represents any character. Doesn't match anything
   * longer than the given pattern.
   *
   * @param pattern - The pattern to match keys against, "." means any char.
   * @returns An array of keys that match the pattern.
   */
  public keysThatMatch(pattern: string): string[] {
    const keys: string[] = [];
    this.collectKeysThatMatch(this.rootNode, "", 0, pattern, keys);
    return keys;
  }

  /**
   * Recursively collects keys that match the given pattern. The period
   * character (.) is a wildcard that represents any character. Doesn't match
   * anything longer than the given pattern.
   *
   * @param node - The current node.
   * @param prefix - The current prefix formed by traversing the trie.
   * @param index - The current index in the pattern.
   * @param pattern - The pattern to match keys against.
   * @param result - The array to collect matching keys into.
   */
  // eslint-disable-next-line max-params
  private collectKeysThatMatch(
    node: Node<Value> | undefined,
    prefix: string,
    index: number,
    pattern: string,
    keys: string[],
  ) {
    if (node?.character === undefined) return;

    // const patternLength = pattern.length - 1;
    const char = pattern.charAt(index);
    const charCode = char.charCodeAt(0);
    const nodeChar = node.character;
    const nodeCharCode = nodeChar.charCodeAt(0);

    if (char === "." || charCode < nodeCharCode) {
      this.collectKeysThatMatch(node.left, prefix, index, pattern, keys);
    }

    if (char === "." || charCode === nodeCharCode) {
      const patternLength = pattern.length - 1;
      if (index === patternLength && node.value !== undefined) {
        keys.push(prefix + nodeChar);
      }

      if (index < patternLength) {
        this.collectKeysThatMatch(
          node.mid,
          prefix + nodeChar,
          index + 1,
          pattern,
          keys,
        );
        prefix = prefix.slice(0, -1);
      }
    }

    if (char === "." || charCode > nodeCharCode) {
      this.collectKeysThatMatch(node.right, prefix, index, pattern, keys);
    }
  }
}

// Usage
const wordList = "she sells sea shells by the sea shore".split(" ");

const tst = new TernarySearchTrie<number>();
const uniqueKeys = new Set<string>();
for (const [value, key] of wordList.entries()) {
  uniqueKeys.add(key);
  tst.put(key, value);
  console.log(`Put a new key: ${key}.`);
}

console.log(`Size: ${tst.size()}`);

const longestPrefix = tst.longestPrefixOf("shellsort");
console.log(`Longest prefix: ${longestPrefix}`);

code_test.ts

import {test, expect} from "bun:test";
import {TernarySearchTrie} from "./code";

const WORD_LIST = "she sells sea shells by the sea shore".split(" ");

function getTST() {
  const tst = new TernarySearchTrie<number>();
  const uniqueKeys = new Set<string>();
  for (const [value, key] of WORD_LIST.entries()) {
    uniqueKeys.add(key);
    tst.put(key, value);
  }

  return {tst, uniqueKeys};
}

// test("testPut", () => {
//   const {tst, uniqueKeys} = getTST();
//   const actual = tst.size();
//   const expected = uniqueKeys.size;
//   expect(actual).toBe(expected);
// });

test("testLongestPrefixOf", () => {
  const {tst} = getTST();
  expect(tst.longestPrefixOf("shellsort")).toBe("shells");
  expect(tst.longestPrefixOf("shell")).toBe("she");
});

// test("testKeys", () => {
//   const {tst} = getTST();
//   const actual = tst.keys().join(",");
//   const expected = "by,sea,sells,she,shells,shore,the";
//   expect(actual).toBe(expected);
// });

test("testKeysWithPrefix", () => {
  const {tst} = getTST();
  const actual = tst.keysWithPrefix("shor").join(",");
  console.log(actual);
  expect(actual).toBe("shore");
});

test("testKeysThatMatch", () => {
  const {tst} = getTST();
  const actual = tst.keysThatMatch(".he.l.").join(",");
  expect(actual).toBe("shells");
});

Tags

[End]