/ clis / grok / image.ts
image.ts
  1  import * as fs from 'node:fs';
  2  import * as path from 'node:path';
  3  import * as crypto from 'node:crypto';
  4  import { cli, Strategy } from '@jackwener/opencli/registry';
  5  import type { IPage } from '@jackwener/opencli/types';
  6  
  7  const GROK_URL = 'https://grok.com/';
  8  const NO_IMAGE_PREFIX = '[NO IMAGE]';
  9  const BLOCKED_PREFIX = '[BLOCKED]';
 10  const SESSION_HINT = 'Likely login/auth/challenge/session issue in the existing grok.com browser session.';
 11  
 12  type SendResult = {
 13    ok?: boolean;
 14    msg?: string;
 15    reason?: string;
 16    detail?: string;
 17  };
 18  
 19  type BubbleImage = {
 20    src: string;
 21    w: number;
 22    h: number;
 23  };
 24  
 25  type BubbleImageSet = BubbleImage[];
 26  
 27  type FetchResult = {
 28    ok: boolean;
 29    base64?: string;
 30    contentType?: string;
 31    error?: string;
 32  };
 33  
 34  function normalizeBooleanFlag(value: unknown): boolean {
 35    if (typeof value === 'boolean') return value;
 36    const normalized = String(value ?? '').trim().toLowerCase();
 37    return normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'on';
 38  }
 39  
 40  function dedupeBySrc(images: BubbleImage[]): BubbleImage[] {
 41    const seen = new Set<string>();
 42    const out: BubbleImage[] = [];
 43    for (const img of images) {
 44      if (!img.src || seen.has(img.src)) continue;
 45      seen.add(img.src);
 46      out.push(img);
 47    }
 48    return out;
 49  }
 50  
 51  function imagesSignature(images: BubbleImage[]): string {
 52    return images.map(i => i.src).sort().join('|');
 53  }
 54  
 55  function extFromContentType(ct?: string): string {
 56    if (!ct) return 'jpg';
 57    if (ct.includes('png')) return 'png';
 58    if (ct.includes('webp')) return 'webp';
 59    if (ct.includes('gif')) return 'gif';
 60    return 'jpg';
 61  }
 62  
 63  function buildFilename(src: string, ct?: string): string {
 64    const ext = extFromContentType(ct);
 65    const hash = crypto.createHash('sha1').update(src).digest('hex').slice(0, 12);
 66    return `grok-${Date.now()}-${hash}.${ext}`;
 67  }
 68  
 69  /** Check whether the tab is already on grok.com (any path). */
 70  async function isOnGrok(page: IPage): Promise<boolean> {
 71    const url = await page.evaluate('window.location.href').catch(() => '');
 72    if (typeof url !== 'string' || !url) return false;
 73    try {
 74      const hostname = new URL(url).hostname;
 75      return hostname === 'grok.com' || hostname.endsWith('.grok.com');
 76    } catch {
 77      return false;
 78    }
 79  }
 80  
 81  async function tryStartFreshChat(page: IPage): Promise<void> {
 82    await page.evaluate(`(() => {
 83      const isVisible = (node) => {
 84        if (!(node instanceof HTMLElement)) return false;
 85        const rect = node.getBoundingClientRect();
 86        const style = window.getComputedStyle(node);
 87        return rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none';
 88      };
 89      const candidates = Array.from(document.querySelectorAll('a, button')).filter(node => {
 90        if (!isVisible(node)) return false;
 91        const text = (node.textContent || '').trim().toLowerCase();
 92        const aria = (node.getAttribute('aria-label') || '').trim().toLowerCase();
 93        const href = node.getAttribute('href') || '';
 94        return text.includes('new chat')
 95          || text.includes('new conversation')
 96          || aria.includes('new chat')
 97          || aria.includes('new conversation')
 98          || href === '/';
 99      });
100      const target = candidates[0];
101      if (target instanceof HTMLElement) target.click();
102    })()`);
103  }
104  
105  async function sendPrompt(page: IPage, prompt: string): Promise<SendResult> {
106    const promptJson = JSON.stringify(prompt);
107    return page.evaluate(`(async () => {
108      try {
109        const waitFor = (ms) => new Promise(resolve => setTimeout(resolve, ms));
110        const composerSelector = '.ProseMirror[contenteditable="true"]';
111        const isVisibleEnabledSubmit = (node) => {
112          if (!(node instanceof HTMLButtonElement)) return false;
113          const rect = node.getBoundingClientRect();
114          const style = window.getComputedStyle(node);
115          return !node.disabled
116            && rect.width > 0
117            && rect.height > 0
118            && style.visibility !== 'hidden'
119            && style.display !== 'none';
120        };
121  
122        let pm = null;
123        let box = null;
124        for (let attempt = 0; attempt < 12; attempt += 1) {
125          const composer = document.querySelector(composerSelector);
126          if (composer instanceof HTMLElement) {
127            pm = composer;
128            break;
129          }
130  
131          const textarea = document.querySelector('textarea');
132          if (textarea instanceof HTMLTextAreaElement) {
133            box = textarea;
134            break;
135          }
136  
137          await waitFor(1000);
138        }
139  
140        // Prefer the ProseMirror composer when present (current grok.com UI).
141        if (pm && pm.editor && pm.editor.commands) {
142          try {
143            if (pm.editor.commands.clearContent) pm.editor.commands.clearContent();
144            pm.editor.commands.focus();
145            pm.editor.commands.insertContent(${promptJson});
146            for (let attempt = 0; attempt < 6; attempt += 1) {
147              const sbtn = Array.from(document.querySelectorAll('button[aria-label="Submit"], button[aria-label="\\u63d0\\u4ea4"]'))
148                .find(isVisibleEnabledSubmit);
149              if (sbtn) {
150                sbtn.click();
151                return { ok: true, msg: 'pm-submit' };
152              }
153              await waitFor(500);
154            }
155          } catch (e) { /* fall through to textarea */ }
156        }
157  
158        // Fallback: legacy textarea composer.
159        if (!box) return { ok: false, msg: 'no composer (neither ProseMirror nor textarea)' };
160        box.focus(); box.value = '';
161        document.execCommand('selectAll');
162        document.execCommand('insertText', false, ${promptJson});
163        for (let attempt = 0; attempt < 6; attempt += 1) {
164          const btn = Array.from(document.querySelectorAll('button[aria-label="\\u63d0\\u4ea4"], button[aria-label="Submit"]'))
165            .find(isVisibleEnabledSubmit);
166          if (btn) {
167            btn.click();
168            return { ok: true, msg: 'clicked' };
169          }
170  
171          const sub = Array.from(document.querySelectorAll('button[type="submit"]'))
172            .find(isVisibleEnabledSubmit);
173          if (sub) {
174            sub.click();
175            return { ok: true, msg: 'clicked-submit' };
176          }
177  
178          await waitFor(500);
179        }
180        box.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', keyCode: 13, bubbles: true }));
181        return { ok: true, msg: 'enter' };
182      } catch (e) { return { ok: false, msg: e && e.toString ? e.toString() : String(e) }; }
183    })()`) as Promise<SendResult>;
184  }
185  
186  /** Read <img> elements from all message bubbles so callers can filter by baseline. */
187  async function getBubbleImageSets(page: IPage): Promise<BubbleImageSet[]> {
188    const result = await page.evaluate(`(() => {
189      const bubbles = document.querySelectorAll('div.message-bubble, [data-testid="message-bubble"]');
190      return Array.from(bubbles).map(bubble => Array.from(bubble.querySelectorAll('img'))
191        .map(img => ({
192          src: img.currentSrc || img.src || '',
193          w: img.naturalWidth || img.width || 0,
194          h: img.naturalHeight || img.height || 0,
195        }))
196        .filter(i => i.src && /^https?:/.test(i.src))
197        // Ignore tiny UI/avatar images that may live in the bubble chrome.
198        .filter(i => (i.w === 0 || i.w >= 128) && (i.h === 0 || i.h >= 128)));
199    })()`) as BubbleImageSet[] | undefined;
200  
201    const raw = Array.isArray(result) ? result : [];
202    return raw.map(dedupeBySrc);
203  }
204  
205  function pickLatestImageCandidate(
206    bubbleImageSets: BubbleImageSet[],
207    baselineCount: number,
208  ): BubbleImage[] {
209    const freshSets = bubbleImageSets.slice(Math.max(0, baselineCount));
210    for (let i = freshSets.length - 1; i >= 0; i -= 1) {
211      if (freshSets[i].length) return freshSets[i];
212    }
213    return [];
214  }
215  
216  // Download through the browser's fetch so grok.com cookies and referer are
217  // attached automatically — assets.grok.com is gated by Cloudflare and will
218  // refuse direct curl/node downloads.
219  async function fetchImageAsBase64(page: IPage, url: string): Promise<FetchResult> {
220    const urlJson = JSON.stringify(url);
221    return page.evaluate(`(async () => {
222      try {
223        const res = await fetch(${urlJson}, { credentials: 'include', referrer: 'https://grok.com/' });
224        if (!res.ok) return { ok: false, error: 'HTTP ' + res.status };
225        const blob = await res.blob();
226        const buf = await blob.arrayBuffer();
227        const bytes = new Uint8Array(buf);
228        let binary = '';
229        for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]);
230        return { ok: true, base64: btoa(binary), contentType: blob.type || 'image/jpeg' };
231      } catch (e) { return { ok: false, error: e && e.message || String(e) }; }
232    })()`) as Promise<FetchResult>;
233  }
234  
235  async function saveImages(
236    page: IPage,
237    images: BubbleImage[],
238    outDir: string,
239  ): Promise<Array<BubbleImage & { path: string }>> {
240    fs.mkdirSync(outDir, { recursive: true });
241    const results: Array<BubbleImage & { path: string }> = [];
242    for (const img of images) {
243      const fetched = await fetchImageAsBase64(page, img.src);
244      if (!fetched || !fetched.ok) {
245        results.push({ ...img, path: `[DOWNLOAD FAILED] ${fetched?.error || 'unknown'}` });
246        continue;
247      }
248      const filepath = path.join(outDir, buildFilename(img.src, fetched.contentType));
249      fs.writeFileSync(filepath, Buffer.from(fetched.base64 || '', 'base64'));
250      results.push({ ...img, path: filepath });
251    }
252    return results;
253  }
254  
255  function toRow(img: BubbleImage, savedPath = '') {
256    return { url: img.src, width: img.w, height: img.h, path: savedPath };
257  }
258  
259  export const imageCommand = cli({
260    site: 'grok',
261    name: 'image',
262    description: 'Generate images on grok.com and return image URLs',
263    domain: 'grok.com',
264    strategy: Strategy.COOKIE,
265    browser: true,
266    args: [
267      { name: 'prompt', positional: true, type: 'string', required: true, help: 'Image generation prompt' },
268      { name: 'timeout', type: 'int', default: 240, help: 'Max seconds to wait for the image (default: 240)' },
269      { name: 'new', type: 'boolean', default: false, help: 'Start a new chat before sending (default: false)' },
270      { name: 'count', type: 'int', default: 1, help: 'Minimum images to wait for before returning (default: 1)' },
271      { name: 'out', type: 'string', default: '', help: 'Directory to save downloaded images (uses browser session to bypass auth)' },
272    ],
273    columns: ['url', 'width', 'height', 'path'],
274    func: async (page: IPage, kwargs: Record<string, any>) => {
275      const prompt = kwargs.prompt as string;
276      const timeoutMs = ((kwargs.timeout as number) || 240) * 1000;
277      const newChat = normalizeBooleanFlag(kwargs.new);
278      const minCount = Math.max(1, Number(kwargs.count || 1));
279      const outDir = (kwargs.out || '').toString().trim();
280  
281      if (newChat) {
282        await page.goto(GROK_URL);
283        await page.wait(2);
284        await tryStartFreshChat(page);
285        await page.wait(2);
286      } else if (!(await isOnGrok(page))) {
287        await page.goto(GROK_URL);
288        await page.wait(3);
289      }
290  
291      const baselineBubbleCount = (await getBubbleImageSets(page)).length;
292      const sendResult = await sendPrompt(page, prompt);
293      if (!sendResult || !sendResult.ok) {
294        return [{
295          url: `${BLOCKED_PREFIX} send failed: ${JSON.stringify(sendResult)}. ${SESSION_HINT}`,
296          width: 0,
297          height: 0,
298          path: '',
299        }];
300      }
301  
302      const startTime = Date.now();
303      let lastSignature = '';
304      let stableCount = 0;
305      let lastImages: BubbleImage[] = [];
306  
307      while (Date.now() - startTime < timeoutMs) {
308        await page.wait(3);
309        const bubbleImageSets = await getBubbleImageSets(page);
310        const images = pickLatestImageCandidate(bubbleImageSets, baselineBubbleCount);
311  
312        if (images.length >= minCount) {
313          const signature = imagesSignature(images);
314          if (signature === lastSignature) {
315            stableCount += 1;
316            // Require two consecutive stable reads (~6s) before declaring done.
317            if (stableCount >= 2) {
318              if (outDir) {
319                const saved = await saveImages(page, images, outDir);
320                return saved.map(s => toRow(s, s.path));
321              }
322              return images.map(i => toRow(i));
323            }
324          } else {
325            stableCount = 0;
326            lastSignature = signature;
327            lastImages = images;
328          }
329        }
330      }
331  
332      if (lastImages.length) {
333        if (outDir) {
334          const saved = await saveImages(page, lastImages, outDir);
335          return saved.map(s => toRow(s, s.path));
336        }
337        return lastImages.map(i => toRow(i));
338      }
339      return [{
340        url: `${NO_IMAGE_PREFIX} No image appeared within ${Math.round(timeoutMs / 1000)}s.`,
341        width: 0,
342        height: 0,
343        path: '',
344      }];
345    },
346  });
347  
348  export const __test__ = {
349    normalizeBooleanFlag,
350    isOnGrok,
351    dedupeBySrc,
352    imagesSignature,
353    extFromContentType,
354    buildFilename,
355    pickLatestImageCandidate,
356  };