/ src / browser / verify-fixture.ts
verify-fixture.ts
  1  /**
  2   * Verify fixture: structural expectations for `opencli browser verify` output.
  3   *
  4   * The adapter-author skill runbook says every published adapter must write a
  5   * fixture under `~/.opencli/sites/<site>/verify/<command>.json` so later verify
  6   * runs can catch shape regressions (missing columns, wrong types, bleeding
  7   * values) without relying on exact content match — BBS / news / market data is
  8   * too volatile for value equality.
  9   *
 10   * Schema:
 11   *   {
 12   *     // args can be either:
 13   *     //   - an object of named flags: { "limit": 3 }  → expands to `--limit 3`
 14   *     //   - a raw argv array:         ["123", "--limit", "3"]  → passed verbatim
 15   *     // Use the array form for adapters that take positional subjects (e.g. <tid>, <url>, <query>).
 16   *     "args": { "limit": 3 },
 17   *     "expect": {
 18   *       "rowCount": { "min": 1, "max": 10 },  // inclusive bounds
 19   *       "columns":  ["a", "b"],                // every row must have these keys
 20   *       "types":    { "a": "string", "b": "number|string" },
 21   *       "patterns": { "url": "^https?://" },
 22   *       "notEmpty": ["title", "url"],          // trimmed string must be non-empty
 23   *       "mustNotContain": {                     // catch content-contamination bleed
 24   *         "description": ["address:", "category:"]
 25   *       },
 26   *       "mustBeTruthy": ["count"]               // catch silent `|| 0` fallbacks
 27   *     }
 28   *   }
 29   */
 30  import * as fs from 'node:fs';
 31  import * as os from 'node:os';
 32  import * as path from 'node:path';
 33  
 34  export type FixtureExpect = {
 35    rowCount?: { min?: number; max?: number };
 36    columns?: string[];
 37    types?: Record<string, string>;
 38    patterns?: Record<string, string>;
 39    notEmpty?: string[];
 40    /**
 41     * Substrings/regex fragments that MUST NOT appear in the column value.
 42     *
 43     * Catches silent content contamination that `notEmpty` alone misses —
 44     * e.g. a `description` field that accidentally carries "address: ..." /
 45     * "category: ..." fragments from sibling DOM nodes, or a `title` that
 46     * bled in a navigation-breadcrumb prefix. Each entry is matched as a
 47     * plain substring against the stringified column value.
 48     */
 49    mustNotContain?: Record<string, string[]>;
 50    /**
 51     * Columns whose values must be truthy. Complements `notEmpty` (which
 52     * only rejects empty-string/null/undefined) by also catching silent
 53     * `|| 0` / `|| false` fallbacks in numeric/boolean fields. Fires when
 54     * the value coerces to `false` in JS.
 55     */
 56    mustBeTruthy?: string[];
 57  };
 58  
 59  export type FixtureArgs = Record<string, unknown> | unknown[];
 60  
 61  export type Fixture = {
 62    args?: FixtureArgs;
 63    expect?: FixtureExpect;
 64  };
 65  
 66  export type ValidationFailure = {
 67    rule: 'rowCount' | 'column' | 'type' | 'pattern' | 'notEmpty' | 'mustNotContain' | 'mustBeTruthy';
 68    detail: string;
 69    rowIndex?: number;
 70  };
 71  
 72  export type Row = Record<string, unknown>;
 73  
 74  export function fixturePath(site: string, command: string): string {
 75    return path.join(os.homedir(), '.opencli', 'sites', site, 'verify', `${command}.json`);
 76  }
 77  
 78  export function loadFixture(site: string, command: string): Fixture | null {
 79    const p = fixturePath(site, command);
 80    if (!fs.existsSync(p)) return null;
 81    try {
 82      const raw = fs.readFileSync(p, 'utf-8');
 83      const parsed = JSON.parse(raw) as Fixture;
 84      return parsed;
 85    } catch (err) {
 86      throw new Error(`Failed to parse fixture ${p}: ${err instanceof Error ? err.message : String(err)}`);
 87    }
 88  }
 89  
 90  export function writeFixture(site: string, command: string, fixture: Fixture): string {
 91    const p = fixturePath(site, command);
 92    fs.mkdirSync(path.dirname(p), { recursive: true });
 93    fs.writeFileSync(p, `${JSON.stringify(fixture, null, 2)}\n`, 'utf-8');
 94    return p;
 95  }
 96  
 97  /**
 98   * Derive a reasonable fixture from sample output. Used by `--write-fixture`
 99   * to seed a first draft the author can hand-tune.
100   *
101   * Heuristics:
102   * - rowCount.min = 1 if rows non-empty, else 0
103   * - columns = keys from the first row
104   * - types = typeof of the first row's values, with "number|string" for mixed
105   * - no auto patterns / notEmpty — author should add those deliberately
106   */
107  export function deriveFixture(rows: Row[], args?: FixtureArgs): Fixture {
108    const expect: FixtureExpect = {};
109    if (rows.length === 0) {
110      expect.rowCount = { min: 0 };
111      return { ...(args ? { args } : {}), expect };
112    }
113    expect.rowCount = { min: 1 };
114  
115    const first = rows[0];
116    const columns = Object.keys(first);
117    expect.columns = columns;
118  
119    const types: Record<string, string> = {};
120    for (const col of columns) {
121      const observed = new Set<string>();
122      for (const row of rows) {
123        const v = row[col];
124        observed.add(jsType(v));
125      }
126      types[col] = [...observed].sort().join('|');
127    }
128    expect.types = types;
129  
130    return { ...(args ? { args } : {}), expect };
131  }
132  
133  export function validateRows(rows: Row[], fixture: Fixture): ValidationFailure[] {
134    const failures: ValidationFailure[] = [];
135    const expect = fixture.expect;
136    if (!expect) return failures;
137  
138    if (expect.rowCount) {
139      const { min, max } = expect.rowCount;
140      if (typeof min === 'number' && rows.length < min) {
141        failures.push({ rule: 'rowCount', detail: `got ${rows.length} rows, expected at least ${min}` });
142      }
143      if (typeof max === 'number' && rows.length > max) {
144        failures.push({ rule: 'rowCount', detail: `got ${rows.length} rows, expected at most ${max}` });
145      }
146    }
147  
148    const columns = expect.columns ?? [];
149    const types = expect.types ?? {};
150    const patterns = expect.patterns ?? {};
151    const notEmpty = expect.notEmpty ?? [];
152  
153    const compiledPatterns: Record<string, RegExp> = {};
154    for (const [col, src] of Object.entries(patterns)) {
155      try {
156        compiledPatterns[col] = new RegExp(src);
157      } catch (err) {
158        failures.push({ rule: 'pattern', detail: `pattern for "${col}" invalid: ${err instanceof Error ? err.message : String(err)}` });
159      }
160    }
161  
162    rows.forEach((row, i) => {
163      for (const col of columns) {
164        if (!(col in row)) {
165          failures.push({ rule: 'column', detail: `missing column "${col}"`, rowIndex: i });
166        }
167      }
168      for (const [col, declared] of Object.entries(types)) {
169        if (!(col in row)) continue;
170        const actual = jsType(row[col]);
171        if (!typeMatches(actual, declared)) {
172          failures.push({
173            rule: 'type',
174            detail: `"${col}" is ${actual}, expected ${declared}`,
175            rowIndex: i,
176          });
177        }
178      }
179      for (const [col, re] of Object.entries(compiledPatterns)) {
180        if (!(col in row)) continue;
181        const v = row[col];
182        if (v === null || v === undefined) continue;
183        if (!re.test(String(v))) {
184          failures.push({
185            rule: 'pattern',
186            detail: `"${col}"=${JSON.stringify(String(v).slice(0, 60))} does not match /${re.source}/`,
187            rowIndex: i,
188          });
189        }
190      }
191      for (const col of notEmpty) {
192        const v = row[col];
193        if (v === null || v === undefined || String(v).trim() === '') {
194          failures.push({ rule: 'notEmpty', detail: `"${col}" is empty`, rowIndex: i });
195        }
196      }
197      for (const [col, needles] of Object.entries(expect.mustNotContain ?? {})) {
198        if (!(col in row)) continue;
199        const v = row[col];
200        if (v === null || v === undefined) continue;
201        const haystack = String(v);
202        for (const needle of needles) {
203          if (haystack.includes(needle)) {
204            failures.push({
205              rule: 'mustNotContain',
206              detail: `"${col}" contains forbidden substring ${JSON.stringify(needle)}`,
207              rowIndex: i,
208            });
209          }
210        }
211      }
212      for (const col of expect.mustBeTruthy ?? []) {
213        if (!(col in row)) continue;
214        if (!row[col]) {
215          failures.push({
216            rule: 'mustBeTruthy',
217            detail: `"${col}" is falsy (${JSON.stringify(row[col])}) — likely silent fallback`,
218            rowIndex: i,
219          });
220        }
221      }
222    });
223  
224    return failures;
225  }
226  
227  /**
228   * Convert fixture args into argv tokens appended after the command name.
229   * - Array form is passed through verbatim (stringified), supporting positional subjects.
230   * - Object form is expanded to `--key value` pairs.
231   */
232  export function expandFixtureArgs(args: FixtureArgs | undefined): string[] {
233    if (!args) return [];
234    if (Array.isArray(args)) return args.map((v) => String(v));
235    const out: string[] = [];
236    for (const [k, v] of Object.entries(args)) {
237      out.push(`--${k}`, String(v));
238    }
239    return out;
240  }
241  
242  function jsType(v: unknown): string {
243    if (v === null) return 'null';
244    if (Array.isArray(v)) return 'array';
245    return typeof v;
246  }
247  
248  function typeMatches(actual: string, declared: string): boolean {
249    const allowed = declared.split('|').map((s) => s.trim()).filter(Boolean);
250    if (allowed.length === 0) return true;
251    if (allowed.includes('any')) return true;
252    return allowed.includes(actual);
253  }