/ src / features / semantic-search / tests / embedding-service.test.ts
embedding-service.test.ts
  1  import { describe, it, expect } from 'vitest';
  2  import { TextProcessor, VectorUtils } from '../services/embedding-service';
  3  
  4  describe('TextProcessor', () => {
  5    describe('cleanText', () => {
  6      it('should remove extra whitespace', () => {
  7        const result = TextProcessor.cleanText('  hello   world  ');
  8        expect(result).toBe('hello world');
  9      });
 10  
 11      it('should handle empty strings', () => {
 12        const result = TextProcessor.cleanText('');
 13        expect(result).toBe('');
 14      });
 15  
 16      it('should preserve single spaces', () => {
 17        const result = TextProcessor.cleanText('hello world');
 18        expect(result).toBe('hello world');
 19      });
 20    });
 21  
 22    describe('extractContent', () => {
 23      it('should remove markdown formatting', () => {
 24        const markdown = '# Title\n\n**Bold text** and *italic*\n\n- List item\n- Another item';
 25        const result = TextProcessor.extractContent(markdown);
 26        expect(result).toContain('Title');
 27        expect(result).toContain('Bold text');
 28        expect(result).not.toContain('#');
 29        expect(result).not.toContain('**');
 30      });
 31  
 32      it('should handle empty content', () => {
 33        const result = TextProcessor.extractContent('');
 34        expect(result).toBe('');
 35      });
 36    });
 37  
 38    describe('chunkText', () => {
 39      it('should split text into chunks', () => {
 40        const text = 'a'.repeat(1000);
 41        const chunks = TextProcessor.chunkText(text, 300, 50);
 42        
 43        expect(chunks.length).toBeGreaterThan(1);
 44        expect(chunks[0]).toHaveLength(300);
 45      });
 46  
 47      it('should handle overlap correctly', () => {
 48        const text = 'abcdefghijklmnopqrstuvwxyz'.repeat(20);
 49        const chunks = TextProcessor.chunkText(text, 100, 20);
 50        
 51        expect(chunks.length).toBeGreaterThan(1);
 52        // Should have overlap between chunks
 53        expect(chunks[1].substring(0, 20)).toBe(chunks[0].substring(80, 100));
 54      });
 55  
 56      it('should return single chunk for short text', () => {
 57        const text = 'short text';
 58        const chunks = TextProcessor.chunkText(text, 100, 20);
 59        
 60        expect(chunks).toHaveLength(1);
 61        expect(chunks[0]).toBe(text);
 62      });
 63    });
 64  });
 65  
 66  describe('VectorUtils', () => {
 67    describe('cosineSimilarity', () => {
 68      it('should calculate similarity between identical vectors', () => {
 69        const vector = [1, 2, 3, 4, 5];
 70        const similarity = VectorUtils.cosineSimilarity(vector, vector);
 71        expect(similarity).toBeCloseTo(1.0, 5);
 72      });
 73  
 74      it('should calculate similarity between orthogonal vectors', () => {
 75        const vector1 = [1, 0, 0];
 76        const vector2 = [0, 1, 0];
 77        const similarity = VectorUtils.cosineSimilarity(vector1, vector2);
 78        expect(similarity).toBeCloseTo(0.0, 5);
 79      });
 80  
 81      it('should handle opposite vectors', () => {
 82        const vector1 = [1, 2, 3];
 83        const vector2 = [-1, -2, -3];
 84        const similarity = VectorUtils.cosineSimilarity(vector1, vector2);
 85        expect(similarity).toBeCloseTo(-1.0, 5);
 86      });
 87  
 88      it('should throw error for mismatched dimensions', () => {
 89        const vector1 = [1, 2, 3];
 90        const vector2 = [1, 2];
 91        expect(() => VectorUtils.cosineSimilarity(vector1, vector2)).toThrow();
 92      });
 93    });
 94  
 95    describe('averageVectors', () => {
 96      it('should average multiple vectors', () => {
 97        const vectors = [
 98          [1, 2, 3],
 99          [4, 5, 6],
100          [7, 8, 9]
101        ];
102        const average = VectorUtils.averageVectors(vectors);
103        expect(average).toEqual([4, 5, 6]);
104      });
105  
106      it('should handle single vector', () => {
107        const vectors = [[1, 2, 3]];
108        const average = VectorUtils.averageVectors(vectors);
109        expect(average).toEqual([1, 2, 3]);
110      });
111  
112      it('should throw error for empty array', () => {
113        expect(() => VectorUtils.averageVectors([])).toThrow();
114      });
115  
116      it('should throw error for mismatched dimensions', () => {
117        const vectors = [
118          [1, 2, 3],
119          [4, 5]
120        ];
121        expect(() => VectorUtils.averageVectors(vectors)).toThrow();
122      });
123    });
124  
125    describe('normalizeVector', () => {
126      it('should normalize vector to unit length', () => {
127        const vector = [3, 4, 0];
128        const normalized = VectorUtils.normalizeVector(vector);
129        const magnitude = Math.sqrt(normalized.reduce((sum, val) => sum + val * val, 0));
130        expect(magnitude).toBeCloseTo(1.0, 5);
131      });
132  
133      it('should handle zero vector', () => {
134        const vector = [0, 0, 0];
135        const normalized = VectorUtils.normalizeVector(vector);
136        expect(normalized).toEqual([0, 0, 0]);
137      });
138    });
139  });