/ utils / pdfUtils.ts
pdfUtils.ts
 1  import { getMainLoopModel } from './model/model.js'
 2  
 3  // Document extensions that are handled specially
 4  export const DOCUMENT_EXTENSIONS = new Set(['pdf'])
 5  
 6  /**
 7   * Parse a page range string into firstPage/lastPage numbers.
 8   * Supported formats:
 9   * - "5" → { firstPage: 5, lastPage: 5 }
10   * - "1-10" → { firstPage: 1, lastPage: 10 }
11   * - "3-" → { firstPage: 3, lastPage: Infinity }
12   *
13   * Returns null on invalid input (non-numeric, zero, inverted range).
14   * Pages are 1-indexed.
15   */
16  export function parsePDFPageRange(
17    pages: string,
18  ): { firstPage: number; lastPage: number } | null {
19    const trimmed = pages.trim()
20    if (!trimmed) {
21      return null
22    }
23  
24    // "N-" open-ended range
25    if (trimmed.endsWith('-')) {
26      const first = parseInt(trimmed.slice(0, -1), 10)
27      if (isNaN(first) || first < 1) {
28        return null
29      }
30      return { firstPage: first, lastPage: Infinity }
31    }
32  
33    const dashIndex = trimmed.indexOf('-')
34    if (dashIndex === -1) {
35      // Single page: "5"
36      const page = parseInt(trimmed, 10)
37      if (isNaN(page) || page < 1) {
38        return null
39      }
40      return { firstPage: page, lastPage: page }
41    }
42  
43    // Range: "1-10"
44    const first = parseInt(trimmed.slice(0, dashIndex), 10)
45    const last = parseInt(trimmed.slice(dashIndex + 1), 10)
46    if (isNaN(first) || isNaN(last) || first < 1 || last < 1 || last < first) {
47      return null
48    }
49    return { firstPage: first, lastPage: last }
50  }
51  
52  /**
53   * Check if PDF reading is supported with the current model.
54   * PDF document blocks work on all providers (1P, Vertex, Bedrock, Foundry).
55   * Haiku 3 is the only remaining model that predates PDF support; users on
56   * it fall back to the page-extraction path (poppler-utils). Substring match
57   * covers all provider ID formats (Bedrock prefixes, Vertex @-dates).
58   */
59  export function isPDFSupported(): boolean {
60    return !getMainLoopModel().toLowerCase().includes('claude-3-haiku')
61  }
62  
63  /**
64   * Check if a file extension is a PDF document.
65   * @param ext File extension (with or without leading dot)
66   */
67  export function isPDFExtension(ext: string): boolean {
68    const normalized = ext.startsWith('.') ? ext.slice(1) : ext
69    return DOCUMENT_EXTENSIONS.has(normalized.toLowerCase())
70  }