/ tests / pipeline / rescoring-validation.test.js
rescoring-validation.test.js
  1  /**
  2   * Tests for rescoring image validation
  3   */
  4  
  5  import { test } from 'node:test';
  6  import { strict as assert } from 'node:assert';
  7  import { readFileSync } from 'fs';
  8  
  9  // Mock dependencies
 10  const mockLogger = {
 11    info: () => {},
 12    error: () => {},
 13    success: () => {},
 14    warn: () => {},
 15  };
 16  
 17  // Import validation logic by extracting it from rescoring.js
 18  // Since the function is not exported, we'll test it indirectly via extractTextFromImage
 19  
 20  test('Base64 validation should reject empty data', async t => {
 21    // Test validation logic
 22    const validateBase64Image = base64Data => {
 23      if (!base64Data || typeof base64Data !== 'string') {
 24        return { valid: false, reason: 'Missing or invalid base64 data' };
 25      }
 26  
 27      if (base64Data.length < 100) {
 28        return { valid: false, reason: 'Base64 data too short (likely empty image)' };
 29      }
 30  
 31      const base64Regex = /^[A-Za-z0-9+/=]+$/;
 32      if (!base64Regex.test(base64Data)) {
 33        return { valid: false, reason: 'Invalid base64 characters' };
 34      }
 35  
 36      const estimatedBytes = (base64Data.length * 3) / 4;
 37      const estimatedMB = estimatedBytes / (1024 * 1024);
 38  
 39      if (estimatedMB > 10) {
 40        return { valid: false, reason: `Image too large (~${estimatedMB.toFixed(1)}MB)` };
 41      }
 42  
 43      return { valid: true, sizeBytes: estimatedBytes, sizeMB: estimatedMB };
 44    };
 45  
 46    // Test empty data
 47    const result1 = validateBase64Image('');
 48    assert.strictEqual(result1.valid, false);
 49    assert.ok(result1.reason.includes('too short') || result1.reason.includes('Missing'));
 50  
 51    // Test null
 52    const result2 = validateBase64Image(null);
 53    assert.strictEqual(result2.valid, false);
 54    assert.ok(result2.reason.includes('Missing'));
 55  
 56    // Test invalid characters
 57    const result3 = validateBase64Image(`invalid!@#$%^&*()${'A'.repeat(100)}`);
 58    assert.strictEqual(result3.valid, false);
 59    assert.ok(result3.reason.includes('Invalid base64 characters'));
 60  
 61    // Test valid small image
 62    const validBase64 = 'A'.repeat(1000);
 63    const result4 = validateBase64Image(validBase64);
 64    assert.strictEqual(result4.valid, true);
 65    assert.ok(result4.sizeMB < 1);
 66  
 67    // Test too large image (> 10MB)
 68    const largeBase64 = 'A'.repeat(15 * 1024 * 1024); // ~15MB when decoded
 69    const result5 = validateBase64Image(largeBase64);
 70    assert.strictEqual(result5.valid, false);
 71    assert.ok(result5.reason.includes('too large'));
 72  });
 73  
 74  test('Assets stage query should include status=found', async t => {
 75    // Verify the SQL query includes 'found' status
 76    const queryPattern = /WHERE \(status = 'found' OR status = 'assets_captured'\)/;
 77  
 78    // Read assets.js
 79    const assetsCode = readFileSync('src/stages/assets.js', 'utf-8');
 80  
 81    // Check if query includes 'found' status
 82    assert.ok(
 83      assetsCode.includes("WHERE (status = 'found' OR status = 'assets_captured')"),
 84      'Assets query should include status=found to process stuck sites'
 85    );
 86  });
 87  
 88  test('Outreach stage query should include status=pending', async t => {
 89    // Read outreach.js
 90    const outreachCode = readFileSync('src/stages/outreach.js', 'utf-8');
 91  
 92    // Check if query includes approval_status for outbound messages
 93    assert.ok(
 94      outreachCode.includes("approval_status = 'approved'") ||
 95        outreachCode.includes("approval_status IN ('pending', 'approved')"),
 96      'Outreach query should filter by approval_status'
 97    );
 98  });
 99  
100  test('Circuit breaker should classify 400 errors as business logic', async t => {
101    // Test error classification
102    const shouldTriggerBreaker = error => {
103      const message = error.message || '';
104  
105      const businessLogicPatterns = [
106        /400/i,
107        /401/i,
108        /403/i,
109        /404/i,
110        /422/i,
111        /validation/i,
112        /invalid/i,
113        /missing/i,
114        /not found/i,
115        /unauthorized/i,
116      ];
117  
118      for (const pattern of businessLogicPatterns) {
119        if (pattern.test(message)) {
120          return false;
121        }
122      }
123  
124      const serviceErrorPatterns = [
125        /5[0-9]{2}/i,
126        /timeout/i,
127        /ECONNREFUSED/i,
128        /ETIMEDOUT/i,
129        /ENOTFOUND/i,
130        /network/i,
131        /rate limit/i,
132      ];
133  
134      for (const pattern of serviceErrorPatterns) {
135        if (pattern.test(message)) {
136          return true;
137        }
138      }
139  
140      return true;
141    };
142  
143    // Test business logic errors (should NOT trigger breaker)
144    assert.strictEqual(shouldTriggerBreaker(new Error('400 Bad Request')), false);
145    assert.strictEqual(shouldTriggerBreaker(new Error('404 Not Found')), false);
146    assert.strictEqual(shouldTriggerBreaker(new Error('Validation failed')), false);
147    assert.strictEqual(shouldTriggerBreaker(new Error('Invalid input')), false);
148  
149    // Test service errors (SHOULD trigger breaker)
150    assert.strictEqual(shouldTriggerBreaker(new Error('500 Internal Server Error')), true);
151    assert.strictEqual(shouldTriggerBreaker(new Error('Request timeout')), true);
152    assert.strictEqual(shouldTriggerBreaker(new Error('ECONNREFUSED')), true);
153    assert.strictEqual(shouldTriggerBreaker(new Error('Network error')), true);
154  });