score-storage.js
1 /** 2 * Filesystem-backed score_json storage for sites. 3 * 4 * Stores the full scoring JSON blob in: 5 * data/scores/{site_id}.json 6 * 7 * This reduces DB size by ~232 MB and dramatically improves query performance 8 * on the sites table (every scan no longer has to skip over score_json blobs). 9 * 10 * Pattern: read from filesystem first, fall back to DB column for 11 * sites that haven't been extracted yet. 12 */ 13 14 import { readFileSync, writeFileSync, mkdirSync, unlinkSync, existsSync } from 'fs'; 15 import { join, dirname } from 'path'; 16 17 const DATA_DIR = join(process.cwd(), 'data', 'scores'); 18 19 /** Get file path for a site's score JSON */ 20 function sitePath(siteId) { 21 const id = Number(siteId); 22 if (!Number.isInteger(id) || id <= 0) { 23 throw new Error(`Invalid siteId: ${siteId}`); 24 } 25 const dir = process.env.SCORE_STORAGE_BASE 26 ? join(process.env.SCORE_STORAGE_BASE, 'scores') 27 : DATA_DIR; 28 return join(dir, `${id}.json`); 29 } 30 31 /** 32 * Read score_json from filesystem. 33 * @param {number} siteId 34 * @returns {string|null} Raw JSON string, or null if not on filesystem 35 */ 36 function getScoreJson(siteId) { 37 try { 38 return readFileSync(sitePath(siteId), 'utf8') || null; 39 } catch { 40 return null; 41 } 42 } 43 44 /** 45 * Read and parse score_json from filesystem. 46 * @param {number} siteId 47 * @returns {Object|null} Parsed score data, or null if not on filesystem 48 */ 49 function getScoreData(siteId) { 50 const raw = getScoreJson(siteId); 51 if (!raw) return null; 52 try { 53 return JSON.parse(raw); 54 } catch { 55 return null; 56 } 57 } 58 59 /** 60 * Write score_json to filesystem. 61 * @param {number} siteId 62 * @param {string|Object} scoreData - JSON string or object to store 63 */ 64 function setScoreJson(siteId, scoreData) { 65 if (!scoreData) return; 66 const filePath = sitePath(siteId); 67 try { 68 mkdirSync(dirname(filePath), { recursive: true }); 69 const json = typeof scoreData === 'string' ? scoreData : JSON.stringify(scoreData); 70 writeFileSync(filePath, json, 'utf8'); 71 } catch (err) { 72 throw new Error(`Failed to write score storage for site ${siteId}: ${err.message}`); 73 } 74 } 75 76 /** 77 * Delete score_json from filesystem. 78 * @param {number} siteId 79 * @returns {boolean} true if file existed and was removed 80 */ 81 function deleteScoreJson(siteId) { 82 try { 83 unlinkSync(sitePath(siteId)); 84 return true; 85 } catch { 86 return false; 87 } 88 } 89 90 /** 91 * Check if score_json exists on filesystem. 92 * @param {number} siteId 93 * @returns {boolean} 94 */ 95 function hasScoreJson(siteId) { 96 return existsSync(sitePath(siteId)); 97 } 98 99 /** 100 * Read score_json with DB fallback. 101 * Checks filesystem first; if missing, reads from the DB row's score_json column. 102 * @param {number} siteId 103 * @param {Object} [dbRow] - Optional DB row with score_json column (avoids extra query) 104 * @returns {string|null} Raw JSON string 105 */ 106 function getScoreJsonWithFallback(siteId, dbRow) { 107 // Filesystem first 108 const fsData = getScoreJson(siteId); 109 if (fsData) return fsData; 110 111 // Fall back to DB column (skip sentinel value from extraction) 112 if (dbRow && dbRow.score_json && !dbRow.score_json.includes('"_fs"')) return dbRow.score_json; 113 114 return null; 115 } 116 117 /** 118 * Read and parse score_json with DB fallback. 119 * @param {number} siteId 120 * @param {Object} [dbRow] - Optional DB row with score_json column 121 * @returns {Object|null} Parsed score data 122 */ 123 function getScoreDataWithFallback(siteId, dbRow) { 124 const raw = getScoreJsonWithFallback(siteId, dbRow); 125 if (!raw) return null; 126 try { 127 return JSON.parse(raw); 128 } catch { 129 return null; 130 } 131 } 132 133 export { 134 getScoreJson, 135 getScoreData, 136 setScoreJson, 137 deleteScoreJson, 138 hasScoreJson, 139 getScoreJsonWithFallback, 140 getScoreDataWithFallback, 141 DATA_DIR, 142 };