Trie Symbol Table
Posted by Dustin Boston in Data Structures.
The Trie Symbol Table is a data structure designed to efficiently store and retrieve key-value pairs where the keys are strings. It organizes data into a tree-like structure, where each node represents a character, and paths from the root to nodes represent strings.
Source Code Listing
code.ts
/*!
* @file Implements Trie Symbol Table data structure in TypeScript.
* @author Dustin Boston <mail@dustin.boston>
* @see Sedgewick, R., & Wayne, K. (2011). _Algorithms_ (4th ed.). Addison-Wesley.
* @see https://algs4.cs.princeton.edu/52trie/TrieST.java.html
* @see https://learnersbucket.com/tutorials/data-structures/trie-data-structure-in-javascript/
*/
export type Maybe<T> = T | undefined;
/** R-way trie node */
export class Node<T> {
next: Array<Node<T>> = [];
value?: T = undefined;
}
/**
* Trie Symbol Table Implementation
*
* The TrieST class represents a symbol table of key-value pairs, with string
* keys and generic values.
*
* A symbol table implements the _associative array_ abstraction:
* when associating a value with a key that is already in the symbol table,
* the convention is to replace the old value with the new value.
*
* Values cannot be `undefined`—setting the value associated with a key to
* `undefined` is equivalent to deleting the key from the symbol table.
*
* This implementation uses an 256-way trie. The `put`, `contains`, `delete`,
* and `longestPrefixOf` operations take time proportional to the length of the
* key (in the worst case). Everything else is constant time.
*/
export class TrieST<T> {
/** Extended ASCII */
readonly #radix = 256;
/** The root node in the trie */
#root?: Node<T>;
/** The number of key-value pairs in this symbol table. */
#size = 0;
/**
* Retrieves the value associated with a given key.
* @param key - The key for which to retrieve the value.
* @returns The value associated with the key, or undefined if not found.
*/
get(key: string): Maybe<T> {
if (!key) throw new TypeError("Undefined argument to get.");
const x = this.#get(this.#root, key, 0);
if (!x) return undefined;
return x.value;
}
/**
* Recursive method to retrieve a node corresponding to the key.
* @param node - The current node in the trie.
* @param key - The key to search for.
* @param d - The index of the character in the key.
* @returns A node corresponding to the key or undefined if not found.
*/
#get(node: Maybe<Node<T>>, key: string, d: number): Maybe<Node<T>> {
if (!node) return undefined;
if (d === key.length) return node;
const charCode = key.charCodeAt(d);
return this.#get(node.next[charCode], key, d + 1);
}
/**
* Inserts the key-value pair into the symbol table. If the key exists
* the value will be overwritten. An undefined value will delete the key.
* @param key - The key to insert or update.
* @param value - Value to associate with the key, undefined to delete it.
* @throws if the key is undefined.
*/
put(key: string, value: Maybe<T>) {
if (key === undefined) {
throw new TypeError("First argument to put is undefined.");
}
if (value === undefined) this.delete(key);
else this.#root = this.#put(this.#root, key, value, 0);
}
/**
* Recursive method to insert or update a node in the symbol table.
* @param node - The current node.
* @param key - The key to insert or update.
* @param value - The value to associate with the key.
* @param index - The index of the character in the key.
* @returns The updated or newly inserted node.
* @throws if the code point for the given index is undefined.
*/
#put(node: Maybe<Node<T>>, key: string, value: T, index: number): Node<T> {
if (node === undefined) node = new Node<T>();
if (index === key.length) {
if (node.value === undefined) this.#size++;
node.value = value;
return node;
}
const charCode = key.charCodeAt(index);
node.next[charCode] = this.#put(node.next[charCode], key, value, index + 1);
return node;
}
/**
* Gets number of key-value pairs in this symbol table.
* @returns The number of key-value pairs in this symbol table.
*/
get size() {
return this.#size;
}
/**
* Sets the size of the symbol table.
* This is used internally to update the size when keys are added or removed.
*/
set size(value: number) {
this.#size = value;
}
/**
* Whether the symbol table is empty.
* @returns true if size is 0, otherwise false.
*/
get isEmpty() {
return this.#size === 0;
}
/**
* Does this symbol table contain the given key?
* @param key - the key to search for in the symbol table.
* @returns true if this symbol table contains the key, otherwise false.
* @throws if key is undefined.
*/
contains(key: string) {
if (!key) throw new TypeError("Argument to contains is undefined.");
return this.get(key) !== undefined;
}
/**
* Gets all keys in the symbol table.
* @returns an array of keys.
*/
keys(): string[] {
return this.keysWithPrefix("");
}
/**
* Returns all of the keys in the set that start with `prefix`.
* This is iterative to prevent stack overflows.
* @param prefix - The prefix to search for.
* @returns All keys in the set that start with `prefix`
*/
keysWithPrefix(prefix: string) {
const results: string[] = [];
const node = this.#get(this.#root, prefix, 0);
this.#collectPrefixes(node, prefix, results);
return results;
}
#collectPrefixes(node: Maybe<Node<T>>, prefix: string, results: string[]) {
if (node === undefined) return;
if (node.value !== undefined) results.push(prefix);
for (let charCode = 0; charCode <= this.#radix; charCode++) {
// const newPrefix = prefix + String.fromCharCode(charCode);
prefix += String.fromCharCode(charCode);
this.#collectPrefixes(node.next[charCode], prefix, results);
prefix = prefix.slice(0, -1);
}
}
keysThatMatch(pattern: string) {
const results: string[] = [];
this.#collectKeys(this.#root, "", pattern, results);
return results;
}
/**
* Find keys that match a pattern, where "." is a wild card.
* Will only find keys of the same length as the pattern.
* @param node - The node to search for matching keys.
* @param prefix - The prefix to search for.
* @param pattern - the pattern to search for.
* @returns all keys that match the pattern.
*/
#collectKeys(
node: Maybe<Node<T>>,
prefix: string,
pattern: string,
results: string[],
) {
if (node === undefined) return;
const index = prefix.length;
if (index === pattern.length && node.value !== undefined) {
results.push(prefix);
}
if (index === pattern.length) return;
const char = pattern.charAt(index);
if (char === ".") {
for (let n = 0; n < this.#radix; n++) {
prefix += String.fromCharCode(n);
this.#collectKeys(node.next[n], prefix, pattern, results);
prefix = prefix.slice(0, -1);
}
} else {
prefix += char;
const charCode = char.charCodeAt(0);
this.#collectKeys(node.next[charCode], prefix, pattern, results);
prefix = prefix.slice(0, -1);
}
}
/**
* Finds the string in the symbol table that is the longest prefix of
* `query`. This is iterative to prevent a stack overflow.
* @param query - the string to search for.
* @returns the longest prefix of query in the symbol table.
*/
longestPrefixOf(query: string): Maybe<string> {
if (query === undefined) {
throw new TypeError("Argument to longestPrefixOf is undefined.");
}
const length = this.#longestPrefixOf(this.#root, query, 0, -1);
if (length === -1) return;
else return query.slice(0, length);
}
/**
* Returns the length of the longest string key in the subtrie rooted at `node`
* that is a prefix of the query string, assuming the first character match
* and we have already found a prefix match of given length (-1 if no match).
*
* @param node
* @param query
* @param index
* @param length
*/
#longestPrefixOf(
node: Maybe<Node<T>>,
query: string,
index: number,
length: number,
): number {
if (node === undefined) return length;
if (node.value !== undefined) length = index;
if (index === query.length) return length;
const char = query.charCodeAt(index);
return this.#longestPrefixOf(node.next[char], query, index + 1, length);
}
/**
* Removes the key from the set if the key is present.
*
* @param key - the key to remove.
*/
delete(key: string) {
if (key === undefined) {
throw new TypeError("Argument to delete is undefined.");
}
this.#root = this.deleteNode(this.#root, key, 0);
}
/**
* Removes the key from the set if it is present.
* @param node - the node to process.
* @param key - the key to search for.
* @param index - the index of the character in the key.
*/
private deleteNode(
node: Node<T> | undefined,
key: string,
index: number,
): Node<T> | undefined {
if (node === undefined) return;
if (index === key.length) {
if (node.value !== undefined) this.#size--;
node.value = undefined;
} else {
const charCode = key.charCodeAt(index);
node.next[charCode] = this.deleteNode(
node.next[charCode],
key,
index + 1,
) as Node<T>;
}
// Remove subtrie rooted at `node` if it is completely empty.
if (node.value !== undefined) return node;
for (let subtrieIndex = 0; subtrieIndex < this.#radix; subtrieIndex++) {
if (node.next[subtrieIndex] !== undefined) return node;
}
return undefined;
}
}
code_test.ts
import {describe, test, expect} from "bun:test";
import {TrieST} from "./code";
describe("TrieSymbolTable", () => {
test("testInsert", () => {
const st = buildSymbolTable();
expect(st.size).toBe(7);
expect(st.longestPrefixOf("shellsort")).toBe("shells");
expect(st.longestPrefixOf("quicksort")).toBeUndefined();
});
test("testKeysWithPrefix", () => {
const st = buildSymbolTable();
const keys = st.keysWithPrefix("shor");
expect(keys.length).toBe(1);
expect(keys.toString()).toBe("shore");
});
test("testKeysThatMatch", () => {
const t = buildSymbolTable();
const keys = t.keysThatMatch(".he.l.");
expect(keys.join(",")).toBe("shells");
});
});
function buildSymbolTable(): TrieST<number> {
const input = "she sells sea shells by the sea shore";
const st = new TrieST<number>();
for (const [index, key] of input.split(" ").entries()) {
st.put(key, index);
}
return st;
}