Huffman Encoding

Posted by Dustin Boston .


The Huffman Encoding data structure uses a Min Heap to sort characters by frequency. It then assigns codes based on frequency; the most frequent characters will have the shortest codes.

Source Code Listing

code.ts

/**
 * @file Huffman Encoding
 * @author Dustin Boston
 * @see Cormen, T, Leiserson, C, Rivest, R, and Stein, C. (2001).
 *   Introduction to Algorithms. (2nd ed). The MIT Press.
 */

import {MinHeap, Node} from "../min-heap/code.ts";

/**
 * Represents a node in a Huffman tree.
 */
export class HuffNode extends Node<string | undefined> {
  /**
   * Creates an instance of a Huffman node.
   * @param key - The priority of the node.
   * @param value - The character associated with the node.
   * @param left - The left child of the node.
   * @param right - The right child of the node.
   */
  constructor(
    public key: number,
    public value: string | undefined,
    public left: Node<string | undefined> | undefined = undefined,
    public right: Node<string | undefined> | undefined = undefined,
  ) {
    super(key, value);
  }
}

/**
 * Builds a Huffman tree from a set of characters and their frequencies.
 * @param characterSet - Array of HuffNode representing characters and their frequencies.
 * @returns The root of the Huffman tree.
 */
export function huffman(characterSet: HuffNode[]): HuffNode {
  const n = characterSet.length;
  const queue = MinHeap.buildMinHeap<string, HuffNode>(characterSet.slice(0));

  for (let i = 1; i < n; i++) {
    const x = queue.heapExtractMin();
    const y = queue.heapExtractMin();
    const z = new HuffNode(x.key + y.key, undefined, x, y);

    queue.minHeapInsert(z);
  }

  return queue.heapExtractMin();
}

/**
 * Computes the frequency of each character in a given text.
 * @param text - The input text to compute frequencies from.
 * @returns An array of HuffNode representing characters and their frequencies.
 */
export function computeFrequency(text: string): HuffNode[] {
  const frequency = new Map<string, number>();

  for (const char of text) {
    frequency.set(char, (frequency.get(char) ?? 0) + 1);
  }

  return Array.from(frequency.entries()).map(
    ([char, freq]) => new HuffNode(freq, char),
  );
}

/**
 * Computes the Huffman codes for each character in the Huffman tree.
 * @param node - The root of the Huffman tree.
 * @param prefix - The prefix for the current node (default is an empty string).
 * @returns A map of characters to their corresponding Huffman codes.
 */
export function computeCodes(node: HuffNode, prefix = ""): Map<string, string> {
  const codes = new Map<string, string>();

  walk(node, prefix);

  return codes;

  function walk(node: HuffNode, prefix: string): void {
    if (node.value === undefined) {
      if (node.left) {
        walk(node.left as HuffNode, prefix + "0");
      }

      if (node.right) {
        walk(node.right as HuffNode, prefix + "1");
      }
    } else {
      codes.set(node.value, prefix);
    }
  }
}

/**
 * Encodes a given text using the provided Huffman codes.
 * @param text - The input text to encode.
 * @param codes - A map of characters to their corresponding Huffman codes.
 * @returns The encoded string.
 */
export function encodeHuffman(
  text: string,
  codes: Map<string, string>,
): string {
  let encoded = "";

  for (const char of text) {
    encoded += codes.get(char);
  }

  return encoded;
}

/**
 * Decodes a given encoded string using the Huffman tree.
 * @param encoded - The encoded string to decode.
 * @param root - The root of the Huffman tree.
 * @returns The decoded string.
 */
export function decodeHuffman(encoded: string, root: HuffNode): string {
  let decoded = "";
  let node = root;

  for (const bit of encoded) {
    node = bit === "0" ? (node.left as HuffNode) : (node.right as HuffNode);

    if (node.value !== undefined) {
      decoded += node.value;
      node = root;
    }
  }

  return decoded;
}

test.ts

import {test, expect, describe} from "bun:test";
import {
  huffman,
  HuffNode,
  computeCodes,
  encodeHuffman,
  decodeHuffman,
  computeFrequency,
} from "./code.ts";

describe("huffman", () => {
  test("minHeapInsert and heapMinimum", () => {
    const characterSet = [
      new HuffNode(5, "f"),
      new HuffNode(9, "e"),
      new HuffNode(16, "d"),
      new HuffNode(12, "c"),
      new HuffNode(13, "b"),
      new HuffNode(45, "a"),
    ];
    const rootNode = huffman(characterSet);

    expect(rootNode.key).toBe(100);
    expect(rootNode.value).toBeUndefined();
    expect(rootNode.left).toBeDefined();
    expect(rootNode.left?.value).toBe("a");
    expect(rootNode.right).toBeDefined();
    expect(rootNode.right?.value).toBeUndefined();
  });

  test("huffman with empty input", () => {
    const characterSet: HuffNode[] = [];

    expect(() => {
      huffman(characterSet);
    }).toThrow();
  });

  test("huffman with single element", () => {
    const characterSet = [new HuffNode(5, "f")];
    const rootNode = huffman(characterSet);
    expect(rootNode.key).toBe(5);
    expect(rootNode.value).toBe("f");
    expect(rootNode.left).toBeUndefined();
    expect(rootNode.right).toBeUndefined();
  });

  test("huffman with two elements", () => {
    const characterSet = [new HuffNode(5, "f"), new HuffNode(9, "e")];
    const rootNode = huffman(characterSet);
    expect(rootNode.key).toBe(14);
    expect(rootNode.value).toBeUndefined();
    expect(rootNode.left).toBeDefined();
    expect(rootNode.right).toBeDefined();
    expect(rootNode.left?.value).toBe("f");
    expect(rootNode.right?.value).toBe("e");
  });
});

describe("extractCodes", () => {
  test("extractCodes with empty input", () => {
    const rootNode = new HuffNode(0, undefined);
    const codes = computeCodes(rootNode);
    expect(codes.size).toBe(0);
  });

  test("extractCodes with many elements", () => {
    const characterSet = [
      new HuffNode(5, "f"),
      new HuffNode(9, "e"),
      new HuffNode(16, "d"),
      new HuffNode(12, "c"),
      new HuffNode(13, "b"),
      new HuffNode(45, "a"),
    ];

    const rootNode = huffman(characterSet);
    const codes = computeCodes(rootNode);

    expect(codes.size).toBe(6);
    expect(codes.get("a")).toBe("0");
    expect(codes.get("b")).toBe("101");
    expect(codes.get("c")).toBe("100");
    expect(codes.get("d")).toBe("111");
    expect(codes.get("e")).toBe("1101");
    expect(codes.get("f")).toBe("1100");
  });
});

describe("encode/decode", () => {
  test("with many elements", () => {
    const phrase = "feed a bad babe dead beef"; // 32 chars * 8 bits each = 256 bits
    const characterSet = computeFrequency(phrase);
    const rootNode = huffman(characterSet);
    const codes = computeCodes(rootNode);
    const encoded = encodeHuffman(phrase, codes); // Binary
    const decoded = decodeHuffman(encoded, rootNode);

    expect(encoded.length).toBe(64);
    expect(encoded).toBe(
      "1000101111001100010111011100101110101010011101110111001010101100",
    );

    expect(phrase.length * 8).toBe(200);
    expect(decoded).toBe(phrase);
  });
});