Cosine Similarity
Posted by Dustin Boston in Algorithms.
Cosine similarity is a measure of similarity between two non-zero vectors in a multi-dimensional space, defined as the cosine of the angle between them. It is calculated as the dot product of the vectors divided by the product of their magnitudes. This metric is often used in text analysis, clustering, and information retrieval to determine the similarity of documents or data points, independent of their magnitude.
Source Code Listing
code.ts
/**
* @file This file contains functions to calculate the cosine
* similarity between two vectors, which is a measure of their
* similarity as the cosine of the angle between them. It also provides
* helper functions to compute the dot product and magnitude of vectors,
* which are essential parts of the cosine similarity calculation.
*/
export function calculateCosineSimilarityMatrix(
tfIdfMatrixA: number[][],
tfIdfMatrixB?: number[][],
): number[][] {
const similarityMatrix: number[][] = [];
const matrixB = tfIdfMatrixB ?? tfIdfMatrixA;
for (const [indexForMatrixA, vectorA] of tfIdfMatrixA.entries()) {
similarityMatrix[indexForMatrixA] = [];
for (const [indexForMatrixB, vectorB] of matrixB.entries()) {
if (tfIdfMatrixB === undefined && indexForMatrixA === indexForMatrixB) {
similarityMatrix[indexForMatrixA][indexForMatrixB] = 1; // If only one matrix, diagonal is 1
} else {
similarityMatrix[indexForMatrixA][indexForMatrixB] =
calculateCosineSimilarity(vectorA, vectorB);
}
}
}
return similarityMatrix;
}
/**
* Calculates the cosine similarity between two vectors.
* Cosine similarity is a measure of similarity between two non-zero vectors
* of an inner product space that measures the cosine of the angle between them.
*
* @param vectorA - The first vector for comparison.
* @param vectorB - The second vector for comparison.
* @throws {Error} If vectors are not of the same length.
* @returns The cosine similarity ranging from -1 to 1, where 1 means the
* vectors are identical.
*/
export function calculateCosineSimilarity(
vectorA: number[],
vectorB: number[],
): number {
if (vectorA.length !== vectorB.length) {
throw new Error("Vectors must be the same length");
}
const dotProduct = calculateDotProduct(vectorA, vectorB);
const magnitude = calculateMagnitude(vectorA) * calculateMagnitude(vectorB);
if (magnitude === 0) return 0;
const cosineSimilarity = dotProduct / magnitude;
return cosineSimilarity;
}
/**
* Calculates the dot product (also known as scalar product or inner product)
* of two vectors. The dot product is the sum of the products of the
* corresponding entries of the two sequences of numbers.
*
* @param vectorA - The first vector for the dot product calculation.
* @param vectorB - The second vector for the dot product calculation.
* @throws {Error} If vectors are not of the same length.
* @returns The dot product of the two vectors.
*/
export function calculateDotProduct(vectorA: number[], vectorB: number[]) {
if (vectorA.length !== vectorB.length) {
throw new Error("Vectors must be the same length");
}
return vectorA.reduce<number>(
(sum, weight, i) => sum + weight * vectorB[i],
0,
);
}
/**
* Calculates the magnitude (or length) of a vector in n-dimensional space.
* The magnitude is the square root of the sum of the squares of the vector's
* components.
*
* @param vector - The vector to calculate the magnitude of.
* @returns The magnitude of the vector.
*/
export function calculateMagnitude(vector: number[]) {
return Math.sqrt(
vector.reduce<number>((sum, weight) => sum + weight ** 2, 0),
);
}
test.ts
import {test, expect, describe} from "bun:test";
import {
calculateCosineSimilarity,
calculateDotProduct,
calculateMagnitude,
} from "./code.ts";
describe("Cosine Similarity", () => {
test("Zero Magnitute Vectors", () => {
const vectorA = [0, 0, 0];
const vectorB = [0, 0, 0];
try {
calculateCosineSimilarity(vectorA, vectorB);
} catch (error) {
expect(error).toBeInstanceOf(Error);
expect(String(error)).toBe(
"Cannot calculate cosine similarity for zero magnitude vectors",
);
}
});
test("Calculate Dot Product", () => {
const vectorA = [1, 2, 3];
const vectorB = [4, 5, 6];
const expectedDotProduct = 32; // 1*4 + 2*5 + 3*6
const actualDotProduct = calculateDotProduct(vectorA, vectorB);
expect(actualDotProduct).toBe(expectedDotProduct);
});
test("Calculate Magnitude", () => {
const vector = [1, 2, 3];
const expectedMagnitude = Math.sqrt(1 + 4 + 9); // -> sqrt(1^2 + 2^2 + 3^2)
expect(calculateMagnitude(vector)).toBe(expectedMagnitude);
});
test("Calculate Cosine Similarity", () => {
const vectorA = [1, 0, 0];
const vectorB = [1, 0, 0];
const expectedSimilarity = 1;
expect(calculateCosineSimilarity(vectorA, vectorB)).toBe(
expectedSimilarity,
);
});
test("Vector Lengths", () => {
const vectorA = [1, 2];
const vectorB = [1, 2, 3];
expect(() => calculateCosineSimilarity(vectorA, vectorB)).toThrow(Error);
});
test("Orthognal Vectors", () => {
const vectorA = [1, 0];
const vectorB = [0, 1];
const expectedSimilarity = 0;
expect(calculateCosineSimilarity(vectorA, vectorB)).toBe(
expectedSimilarity,
);
});
test("Negative Values", () => {
const vectorA = [-1, -2, -3];
const vectorB = [-1, -2, -3];
const expectedSimilarity = 1;
expect(calculateCosineSimilarity(vectorA, vectorB)).toBe(
expectedSimilarity,
);
});
test("Large Values", () => {
const largeNumber = Number.MAX_SAFE_INTEGER;
const vectorA = [largeNumber, largeNumber];
const vectorB = [largeNumber, largeNumber];
const expectedSimilarity = 1;
expect(calculateCosineSimilarity(vectorA, vectorB)).toBe(
expectedSimilarity,
);
});
test("Precision Issues", () => {
const vectorA = [0.1, 0.2];
const vectorB = [0.1, 0.2];
const expectedSimilarity = 1;
const similarity = calculateCosineSimilarity(vectorA, vectorB);
expect(Math.abs(similarity - expectedSimilarity)).toBeLessThan(
Number.EPSILON,
);
});
// Binary vectors
// ----------------------------------------------------------------------------
test("Identical Binary Vectors (1)", () => {
const vectorA1 = [1, 0, 1, 1];
const vectorB1 = [1, 0, 1, 1];
expect(calculateCosineSimilarity(vectorA1, vectorB1)).toBeCloseTo(1);
});
test("Orthogonal Binary Vectors (0)", () => {
const vectorA2 = [1, 0, 1, 0];
const vectorB2 = [0, 1, 0, 1];
expect(calculateCosineSimilarity(vectorA2, vectorB2)).toBe(0);
});
test("Base Case (0-1)", () => {
const vectorA3 = [1, 1, 0, 0];
const vectorB3 = [0, 1, 1, 0];
const similarity = calculateCosineSimilarity(vectorA3, vectorB3);
expect(similarity).toBeGreaterThan(0);
expect(similarity).toBeLessThan(1);
});
});