Huffman Encoding
Posted by Dustin Boston .
The Huffman Encoding data structure uses a Min Heap to sort characters by frequency. It then assigns codes based on frequency; the most frequent characters will have the shortest codes.
Source Code Listing
code.ts
/**
* @file Huffman Encoding
* @author Dustin Boston
* @see Cormen, T, Leiserson, C, Rivest, R, and Stein, C. (2001).
* Introduction to Algorithms. (2nd ed). The MIT Press.
*/
import {MinHeap, Node} from "../min-heap/code.ts";
/**
* Represents a node in a Huffman tree.
*/
export class HuffNode extends Node<string | undefined> {
/**
* Creates an instance of a Huffman node.
* @param key - The priority of the node.
* @param value - The character associated with the node.
* @param left - The left child of the node.
* @param right - The right child of the node.
*/
constructor(
public key: number,
public value: string | undefined,
public left: Node<string | undefined> | undefined = undefined,
public right: Node<string | undefined> | undefined = undefined,
) {
super(key, value);
}
}
/**
* Builds a Huffman tree from a set of characters and their frequencies.
* @param characterSet - Array of HuffNode representing characters and their frequencies.
* @returns The root of the Huffman tree.
*/
export function huffman(characterSet: HuffNode[]): HuffNode {
const n = characterSet.length;
const queue = MinHeap.buildMinHeap<string, HuffNode>(characterSet.slice(0));
for (let i = 1; i < n; i++) {
const x = queue.heapExtractMin();
const y = queue.heapExtractMin();
const z = new HuffNode(x.key + y.key, undefined, x, y);
queue.minHeapInsert(z);
}
return queue.heapExtractMin();
}
/**
* Computes the frequency of each character in a given text.
* @param text - The input text to compute frequencies from.
* @returns An array of HuffNode representing characters and their frequencies.
*/
export function computeFrequency(text: string): HuffNode[] {
const frequency = new Map<string, number>();
for (const char of text) {
frequency.set(char, (frequency.get(char) ?? 0) + 1);
}
return Array.from(frequency.entries()).map(
([char, freq]) => new HuffNode(freq, char),
);
}
/**
* Computes the Huffman codes for each character in the Huffman tree.
* @param node - The root of the Huffman tree.
* @param prefix - The prefix for the current node (default is an empty string).
* @returns A map of characters to their corresponding Huffman codes.
*/
export function computeCodes(node: HuffNode, prefix = ""): Map<string, string> {
const codes = new Map<string, string>();
walk(node, prefix);
return codes;
function walk(node: HuffNode, prefix: string): void {
if (node.value === undefined) {
if (node.left) {
walk(node.left as HuffNode, prefix + "0");
}
if (node.right) {
walk(node.right as HuffNode, prefix + "1");
}
} else {
codes.set(node.value, prefix);
}
}
}
/**
* Encodes a given text using the provided Huffman codes.
* @param text - The input text to encode.
* @param codes - A map of characters to their corresponding Huffman codes.
* @returns The encoded string.
*/
export function encodeHuffman(
text: string,
codes: Map<string, string>,
): string {
let encoded = "";
for (const char of text) {
encoded += codes.get(char);
}
return encoded;
}
/**
* Decodes a given encoded string using the Huffman tree.
* @param encoded - The encoded string to decode.
* @param root - The root of the Huffman tree.
* @returns The decoded string.
*/
export function decodeHuffman(encoded: string, root: HuffNode): string {
let decoded = "";
let node = root;
for (const bit of encoded) {
node = bit === "0" ? (node.left as HuffNode) : (node.right as HuffNode);
if (node.value !== undefined) {
decoded += node.value;
node = root;
}
}
return decoded;
}
test.ts
import {test, expect, describe} from "bun:test";
import {
huffman,
HuffNode,
computeCodes,
encodeHuffman,
decodeHuffman,
computeFrequency,
} from "./code.ts";
describe("huffman", () => {
test("minHeapInsert and heapMinimum", () => {
const characterSet = [
new HuffNode(5, "f"),
new HuffNode(9, "e"),
new HuffNode(16, "d"),
new HuffNode(12, "c"),
new HuffNode(13, "b"),
new HuffNode(45, "a"),
];
const rootNode = huffman(characterSet);
expect(rootNode.key).toBe(100);
expect(rootNode.value).toBeUndefined();
expect(rootNode.left).toBeDefined();
expect(rootNode.left?.value).toBe("a");
expect(rootNode.right).toBeDefined();
expect(rootNode.right?.value).toBeUndefined();
});
test("huffman with empty input", () => {
const characterSet: HuffNode[] = [];
expect(() => {
huffman(characterSet);
}).toThrow();
});
test("huffman with single element", () => {
const characterSet = [new HuffNode(5, "f")];
const rootNode = huffman(characterSet);
expect(rootNode.key).toBe(5);
expect(rootNode.value).toBe("f");
expect(rootNode.left).toBeUndefined();
expect(rootNode.right).toBeUndefined();
});
test("huffman with two elements", () => {
const characterSet = [new HuffNode(5, "f"), new HuffNode(9, "e")];
const rootNode = huffman(characterSet);
expect(rootNode.key).toBe(14);
expect(rootNode.value).toBeUndefined();
expect(rootNode.left).toBeDefined();
expect(rootNode.right).toBeDefined();
expect(rootNode.left?.value).toBe("f");
expect(rootNode.right?.value).toBe("e");
});
});
describe("extractCodes", () => {
test("extractCodes with empty input", () => {
const rootNode = new HuffNode(0, undefined);
const codes = computeCodes(rootNode);
expect(codes.size).toBe(0);
});
test("extractCodes with many elements", () => {
const characterSet = [
new HuffNode(5, "f"),
new HuffNode(9, "e"),
new HuffNode(16, "d"),
new HuffNode(12, "c"),
new HuffNode(13, "b"),
new HuffNode(45, "a"),
];
const rootNode = huffman(characterSet);
const codes = computeCodes(rootNode);
expect(codes.size).toBe(6);
expect(codes.get("a")).toBe("0");
expect(codes.get("b")).toBe("101");
expect(codes.get("c")).toBe("100");
expect(codes.get("d")).toBe("111");
expect(codes.get("e")).toBe("1101");
expect(codes.get("f")).toBe("1100");
});
});
describe("encode/decode", () => {
test("with many elements", () => {
const phrase = "feed a bad babe dead beef"; // 32 chars * 8 bits each = 256 bits
const characterSet = computeFrequency(phrase);
const rootNode = huffman(characterSet);
const codes = computeCodes(rootNode);
const encoded = encodeHuffman(phrase, codes); // Binary
const decoded = decodeHuffman(encoded, rootNode);
expect(encoded.length).toBe(64);
expect(encoded).toBe(
"1000101111001100010111011100101110101010011101110111001010101100",
);
expect(phrase.length * 8).toBe(200);
expect(decoded).toBe(phrase);
});
});