/ src / utils / score-storage.js
score-storage.js
  1  /**
  2   * Filesystem-backed score_json storage for sites.
  3   *
  4   * Stores the full scoring JSON blob in:
  5   *   data/scores/{site_id}.json
  6   *
  7   * This reduces DB size by ~232 MB and dramatically improves query performance
  8   * on the sites table (every scan no longer has to skip over score_json blobs).
  9   *
 10   * Pattern: read from filesystem first, fall back to DB column for
 11   * sites that haven't been extracted yet.
 12   */
 13  
 14  import { readFileSync, writeFileSync, mkdirSync, unlinkSync, existsSync } from 'fs';
 15  import { join, dirname } from 'path';
 16  
 17  const DATA_DIR = join(process.cwd(), 'data', 'scores');
 18  
 19  /** Get file path for a site's score JSON */
 20  function sitePath(siteId) {
 21    const id = Number(siteId);
 22    if (!Number.isInteger(id) || id <= 0) {
 23      throw new Error(`Invalid siteId: ${siteId}`);
 24    }
 25    const dir = process.env.SCORE_STORAGE_BASE
 26      ? join(process.env.SCORE_STORAGE_BASE, 'scores')
 27      : DATA_DIR;
 28    return join(dir, `${id}.json`);
 29  }
 30  
 31  /**
 32   * Read score_json from filesystem.
 33   * @param {number} siteId
 34   * @returns {string|null} Raw JSON string, or null if not on filesystem
 35   */
 36  function getScoreJson(siteId) {
 37    try {
 38      return readFileSync(sitePath(siteId), 'utf8') || null;
 39    } catch {
 40      return null;
 41    }
 42  }
 43  
 44  /**
 45   * Read and parse score_json from filesystem.
 46   * @param {number} siteId
 47   * @returns {Object|null} Parsed score data, or null if not on filesystem
 48   */
 49  function getScoreData(siteId) {
 50    const raw = getScoreJson(siteId);
 51    if (!raw) return null;
 52    try {
 53      return JSON.parse(raw);
 54    } catch {
 55      return null;
 56    }
 57  }
 58  
 59  /**
 60   * Write score_json to filesystem.
 61   * @param {number} siteId
 62   * @param {string|Object} scoreData - JSON string or object to store
 63   */
 64  function setScoreJson(siteId, scoreData) {
 65    if (!scoreData) return;
 66    const filePath = sitePath(siteId);
 67    try {
 68      mkdirSync(dirname(filePath), { recursive: true });
 69      const json = typeof scoreData === 'string' ? scoreData : JSON.stringify(scoreData);
 70      writeFileSync(filePath, json, 'utf8');
 71    } catch (err) {
 72      throw new Error(`Failed to write score storage for site ${siteId}: ${err.message}`);
 73    }
 74  }
 75  
 76  /**
 77   * Delete score_json from filesystem.
 78   * @param {number} siteId
 79   * @returns {boolean} true if file existed and was removed
 80   */
 81  function deleteScoreJson(siteId) {
 82    try {
 83      unlinkSync(sitePath(siteId));
 84      return true;
 85    } catch {
 86      return false;
 87    }
 88  }
 89  
 90  /**
 91   * Check if score_json exists on filesystem.
 92   * @param {number} siteId
 93   * @returns {boolean}
 94   */
 95  function hasScoreJson(siteId) {
 96    return existsSync(sitePath(siteId));
 97  }
 98  
 99  /**
100   * Read score_json with DB fallback.
101   * Checks filesystem first; if missing, reads from the DB row's score_json column.
102   * @param {number} siteId
103   * @param {Object} [dbRow] - Optional DB row with score_json column (avoids extra query)
104   * @returns {string|null} Raw JSON string
105   */
106  function getScoreJsonWithFallback(siteId, dbRow) {
107    // Filesystem first
108    const fsData = getScoreJson(siteId);
109    if (fsData) return fsData;
110  
111    // Fall back to DB column (skip sentinel value from extraction)
112    if (dbRow && dbRow.score_json && !dbRow.score_json.includes('"_fs"')) return dbRow.score_json;
113  
114    return null;
115  }
116  
117  /**
118   * Read and parse score_json with DB fallback.
119   * @param {number} siteId
120   * @param {Object} [dbRow] - Optional DB row with score_json column
121   * @returns {Object|null} Parsed score data
122   */
123  function getScoreDataWithFallback(siteId, dbRow) {
124    const raw = getScoreJsonWithFallback(siteId, dbRow);
125    if (!raw) return null;
126    try {
127      return JSON.parse(raw);
128    } catch {
129      return null;
130    }
131  }
132  
133  export {
134    getScoreJson,
135    getScoreData,
136    setScoreJson,
137    deleteScoreJson,
138    hasScoreJson,
139    getScoreJsonWithFallback,
140    getScoreDataWithFallback,
141    DATA_DIR,
142  };