/**
* @file This file contains functions to calculate the cosine
* similarity between two vectors, which is a measure of their
* similarity as the cosine of the angle between them. It also provides
* helper functions to compute the dot product and magnitude of vectors,
* which are essential parts of the cosine similarity calculation.
*/
export function calculateCosineSimilarityMatrix(
tfIdfMatrixA: number[][],
tfIdfMatrixB?: number[][],
): number[][] {
const similarityMatrix: number[][] = [];
const matrixB = tfIdfMatrixB ?? tfIdfMatrixA;
for (const [indexForMatrixA, vectorA] of tfIdfMatrixA.entries()) {
similarityMatrix[indexForMatrixA] = [];
for (const [indexForMatrixB, vectorB] of matrixB.entries()) {
if (tfIdfMatrixB === undefined && indexForMatrixA === indexForMatrixB) {
similarityMatrix[indexForMatrixA][indexForMatrixB] = 1; // If only one matrix, diagonal is 1
} else {
similarityMatrix[indexForMatrixA][indexForMatrixB] =
calculateCosineSimilarity(vectorA, vectorB);
}
}
}
return similarityMatrix;
}
/**
* Calculates the cosine similarity between two vectors.
* Cosine similarity is a measure of similarity between two non-zero vectors
* of an inner product space that measures the cosine of the angle between them.
*
* @param vectorA - The first vector for comparison.
* @param vectorB - The second vector for comparison.
* @throws {Error} If vectors are not of the same length.
* @returns The cosine similarity ranging from -1 to 1, where 1 means the
* vectors are identical.
*/
export function calculateCosineSimilarity(
vectorA: number[],
vectorB: number[],
): number {
if (vectorA.length !== vectorB.length) {
throw new Error("Vectors must be the same length");
}
const dotProduct = calculateDotProduct(vectorA, vectorB);
const magnitude = calculateMagnitude(vectorA) * calculateMagnitude(vectorB);
if (magnitude === 0) return 0;
const cosineSimilarity = dotProduct / magnitude;
return cosineSimilarity;
}
/**
* Calculates the dot product (also known as scalar product or inner product)
* of two vectors. The dot product is the sum of the products of the
* corresponding entries of the two sequences of numbers.
*
* @param vectorA - The first vector for the dot product calculation.
* @param vectorB - The second vector for the dot product calculation.
* @throws {Error} If vectors are not of the same length.
* @returns The dot product of the two vectors.
*/
export function calculateDotProduct(vectorA: number[], vectorB: number[]) {
if (vectorA.length !== vectorB.length) {
throw new Error("Vectors must be the same length");
}
return vectorA.reduce<number>(
(sum, weight, i) => sum + weight * vectorB[i],
0,
);
}
/**
* Calculates the magnitude (or length) of a vector in n-dimensional space.
* The magnitude is the square root of the sum of the squares of the vector's
* components.
*
* @param vector - The vector to calculate the magnitude of.
* @returns The magnitude of the vector.
*/
export function calculateMagnitude(vector: number[]) {
return Math.sqrt(
vector.reduce<number>((sum, weight) => sum + weight ** 2, 0),
);
}
import {test, expect, describe} from "bun:test";
import {
calculateCosineSimilarity,
calculateDotProduct,
calculateMagnitude,
} from "./code.ts";
describe("Cosine Similarity", () => {
test("Zero Magnitute Vectors", () => {
const vectorA = [0, 0, 0];
const vectorB = [0, 0, 0];
try {
calculateCosineSimilarity(vectorA, vectorB);
} catch (error) {
expect(error).toBeInstanceOf(Error);
expect(String(error)).toBe(
"Cannot calculate cosine similarity for zero magnitude vectors",
);
}
});
test("Calculate Dot Product", () => {
const vectorA = [1, 2, 3];
const vectorB = [4, 5, 6];
const expectedDotProduct = 32; // 1*4 + 2*5 + 3*6
const actualDotProduct = calculateDotProduct(vectorA, vectorB);
expect(actualDotProduct).toBe(expectedDotProduct);
});
test("Calculate Magnitude", () => {
const vector = [1, 2, 3];
const expectedMagnitude = Math.sqrt(1 + 4 + 9); // -> sqrt(1^2 + 2^2 + 3^2)
expect(calculateMagnitude(vector)).toBe(expectedMagnitude);
});
test("Calculate Cosine Similarity", () => {
const vectorA = [1, 0, 0];
const vectorB = [1, 0, 0];
const expectedSimilarity = 1;
expect(calculateCosineSimilarity(vectorA, vectorB)).toBe(
expectedSimilarity,
);
});
test("Vector Lengths", () => {
const vectorA = [1, 2];
const vectorB = [1, 2, 3];
expect(() => calculateCosineSimilarity(vectorA, vectorB)).toThrow(Error);
});
test("Orthognal Vectors", () => {
const vectorA = [1, 0];
const vectorB = [0, 1];
const expectedSimilarity = 0;
expect(calculateCosineSimilarity(vectorA, vectorB)).toBe(
expectedSimilarity,
);
});
test("Negative Values", () => {
const vectorA = [-1, -2, -3];
const vectorB = [-1, -2, -3];
const expectedSimilarity = 1;
expect(calculateCosineSimilarity(vectorA, vectorB)).toBe(
expectedSimilarity,
);
});
test("Large Values", () => {
const largeNumber = Number.MAX_SAFE_INTEGER;
const vectorA = [largeNumber, largeNumber];
const vectorB = [largeNumber, largeNumber];
const expectedSimilarity = 1;
expect(calculateCosineSimilarity(vectorA, vectorB)).toBe(
expectedSimilarity,
);
});
test("Precision Issues", () => {
const vectorA = [0.1, 0.2];
const vectorB = [0.1, 0.2];
const expectedSimilarity = 1;
const similarity = calculateCosineSimilarity(vectorA, vectorB);
expect(Math.abs(similarity - expectedSimilarity)).toBeLessThan(
Number.EPSILON,
);
});
// Binary vectors
// ----------------------------------------------------------------------------
test("Identical Binary Vectors (1)", () => {
const vectorA1 = [1, 0, 1, 1];
const vectorB1 = [1, 0, 1, 1];
expect(calculateCosineSimilarity(vectorA1, vectorB1)).toBeCloseTo(1);
});
test("Orthogonal Binary Vectors (0)", () => {
const vectorA2 = [1, 0, 1, 0];
const vectorB2 = [0, 1, 0, 1];
expect(calculateCosineSimilarity(vectorA2, vectorB2)).toBe(0);
});
test("Base Case (0-1)", () => {
const vectorA3 = [1, 1, 0, 0];
const vectorB3 = [0, 1, 1, 0];
const similarity = calculateCosineSimilarity(vectorA3, vectorB3);
expect(similarity).toBeGreaterThan(0);
expect(similarity).toBeLessThan(1);
});
});