image.ts
1 import * as fs from 'node:fs'; 2 import * as path from 'node:path'; 3 import * as crypto from 'node:crypto'; 4 import { cli, Strategy } from '@jackwener/opencli/registry'; 5 import type { IPage } from '@jackwener/opencli/types'; 6 7 const GROK_URL = 'https://grok.com/'; 8 const NO_IMAGE_PREFIX = '[NO IMAGE]'; 9 const BLOCKED_PREFIX = '[BLOCKED]'; 10 const SESSION_HINT = 'Likely login/auth/challenge/session issue in the existing grok.com browser session.'; 11 12 type SendResult = { 13 ok?: boolean; 14 msg?: string; 15 reason?: string; 16 detail?: string; 17 }; 18 19 type BubbleImage = { 20 src: string; 21 w: number; 22 h: number; 23 }; 24 25 type BubbleImageSet = BubbleImage[]; 26 27 type FetchResult = { 28 ok: boolean; 29 base64?: string; 30 contentType?: string; 31 error?: string; 32 }; 33 34 function normalizeBooleanFlag(value: unknown): boolean { 35 if (typeof value === 'boolean') return value; 36 const normalized = String(value ?? '').trim().toLowerCase(); 37 return normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'on'; 38 } 39 40 function dedupeBySrc(images: BubbleImage[]): BubbleImage[] { 41 const seen = new Set<string>(); 42 const out: BubbleImage[] = []; 43 for (const img of images) { 44 if (!img.src || seen.has(img.src)) continue; 45 seen.add(img.src); 46 out.push(img); 47 } 48 return out; 49 } 50 51 function imagesSignature(images: BubbleImage[]): string { 52 return images.map(i => i.src).sort().join('|'); 53 } 54 55 function extFromContentType(ct?: string): string { 56 if (!ct) return 'jpg'; 57 if (ct.includes('png')) return 'png'; 58 if (ct.includes('webp')) return 'webp'; 59 if (ct.includes('gif')) return 'gif'; 60 return 'jpg'; 61 } 62 63 function buildFilename(src: string, ct?: string): string { 64 const ext = extFromContentType(ct); 65 const hash = crypto.createHash('sha1').update(src).digest('hex').slice(0, 12); 66 return `grok-${Date.now()}-${hash}.${ext}`; 67 } 68 69 /** Check whether the tab is already on grok.com (any path). */ 70 async function isOnGrok(page: IPage): Promise<boolean> { 71 const url = await page.evaluate('window.location.href').catch(() => ''); 72 if (typeof url !== 'string' || !url) return false; 73 try { 74 const hostname = new URL(url).hostname; 75 return hostname === 'grok.com' || hostname.endsWith('.grok.com'); 76 } catch { 77 return false; 78 } 79 } 80 81 async function tryStartFreshChat(page: IPage): Promise<void> { 82 await page.evaluate(`(() => { 83 const isVisible = (node) => { 84 if (!(node instanceof HTMLElement)) return false; 85 const rect = node.getBoundingClientRect(); 86 const style = window.getComputedStyle(node); 87 return rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none'; 88 }; 89 const candidates = Array.from(document.querySelectorAll('a, button')).filter(node => { 90 if (!isVisible(node)) return false; 91 const text = (node.textContent || '').trim().toLowerCase(); 92 const aria = (node.getAttribute('aria-label') || '').trim().toLowerCase(); 93 const href = node.getAttribute('href') || ''; 94 return text.includes('new chat') 95 || text.includes('new conversation') 96 || aria.includes('new chat') 97 || aria.includes('new conversation') 98 || href === '/'; 99 }); 100 const target = candidates[0]; 101 if (target instanceof HTMLElement) target.click(); 102 })()`); 103 } 104 105 async function sendPrompt(page: IPage, prompt: string): Promise<SendResult> { 106 const promptJson = JSON.stringify(prompt); 107 return page.evaluate(`(async () => { 108 try { 109 const waitFor = (ms) => new Promise(resolve => setTimeout(resolve, ms)); 110 const composerSelector = '.ProseMirror[contenteditable="true"]'; 111 const isVisibleEnabledSubmit = (node) => { 112 if (!(node instanceof HTMLButtonElement)) return false; 113 const rect = node.getBoundingClientRect(); 114 const style = window.getComputedStyle(node); 115 return !node.disabled 116 && rect.width > 0 117 && rect.height > 0 118 && style.visibility !== 'hidden' 119 && style.display !== 'none'; 120 }; 121 122 let pm = null; 123 let box = null; 124 for (let attempt = 0; attempt < 12; attempt += 1) { 125 const composer = document.querySelector(composerSelector); 126 if (composer instanceof HTMLElement) { 127 pm = composer; 128 break; 129 } 130 131 const textarea = document.querySelector('textarea'); 132 if (textarea instanceof HTMLTextAreaElement) { 133 box = textarea; 134 break; 135 } 136 137 await waitFor(1000); 138 } 139 140 // Prefer the ProseMirror composer when present (current grok.com UI). 141 if (pm && pm.editor && pm.editor.commands) { 142 try { 143 if (pm.editor.commands.clearContent) pm.editor.commands.clearContent(); 144 pm.editor.commands.focus(); 145 pm.editor.commands.insertContent(${promptJson}); 146 for (let attempt = 0; attempt < 6; attempt += 1) { 147 const sbtn = Array.from(document.querySelectorAll('button[aria-label="Submit"], button[aria-label="\\u63d0\\u4ea4"]')) 148 .find(isVisibleEnabledSubmit); 149 if (sbtn) { 150 sbtn.click(); 151 return { ok: true, msg: 'pm-submit' }; 152 } 153 await waitFor(500); 154 } 155 } catch (e) { /* fall through to textarea */ } 156 } 157 158 // Fallback: legacy textarea composer. 159 if (!box) return { ok: false, msg: 'no composer (neither ProseMirror nor textarea)' }; 160 box.focus(); box.value = ''; 161 document.execCommand('selectAll'); 162 document.execCommand('insertText', false, ${promptJson}); 163 for (let attempt = 0; attempt < 6; attempt += 1) { 164 const btn = Array.from(document.querySelectorAll('button[aria-label="\\u63d0\\u4ea4"], button[aria-label="Submit"]')) 165 .find(isVisibleEnabledSubmit); 166 if (btn) { 167 btn.click(); 168 return { ok: true, msg: 'clicked' }; 169 } 170 171 const sub = Array.from(document.querySelectorAll('button[type="submit"]')) 172 .find(isVisibleEnabledSubmit); 173 if (sub) { 174 sub.click(); 175 return { ok: true, msg: 'clicked-submit' }; 176 } 177 178 await waitFor(500); 179 } 180 box.dispatchEvent(new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', keyCode: 13, bubbles: true })); 181 return { ok: true, msg: 'enter' }; 182 } catch (e) { return { ok: false, msg: e && e.toString ? e.toString() : String(e) }; } 183 })()`) as Promise<SendResult>; 184 } 185 186 /** Read <img> elements from all message bubbles so callers can filter by baseline. */ 187 async function getBubbleImageSets(page: IPage): Promise<BubbleImageSet[]> { 188 const result = await page.evaluate(`(() => { 189 const bubbles = document.querySelectorAll('div.message-bubble, [data-testid="message-bubble"]'); 190 return Array.from(bubbles).map(bubble => Array.from(bubble.querySelectorAll('img')) 191 .map(img => ({ 192 src: img.currentSrc || img.src || '', 193 w: img.naturalWidth || img.width || 0, 194 h: img.naturalHeight || img.height || 0, 195 })) 196 .filter(i => i.src && /^https?:/.test(i.src)) 197 // Ignore tiny UI/avatar images that may live in the bubble chrome. 198 .filter(i => (i.w === 0 || i.w >= 128) && (i.h === 0 || i.h >= 128))); 199 })()`) as BubbleImageSet[] | undefined; 200 201 const raw = Array.isArray(result) ? result : []; 202 return raw.map(dedupeBySrc); 203 } 204 205 function pickLatestImageCandidate( 206 bubbleImageSets: BubbleImageSet[], 207 baselineCount: number, 208 ): BubbleImage[] { 209 const freshSets = bubbleImageSets.slice(Math.max(0, baselineCount)); 210 for (let i = freshSets.length - 1; i >= 0; i -= 1) { 211 if (freshSets[i].length) return freshSets[i]; 212 } 213 return []; 214 } 215 216 // Download through the browser's fetch so grok.com cookies and referer are 217 // attached automatically — assets.grok.com is gated by Cloudflare and will 218 // refuse direct curl/node downloads. 219 async function fetchImageAsBase64(page: IPage, url: string): Promise<FetchResult> { 220 const urlJson = JSON.stringify(url); 221 return page.evaluate(`(async () => { 222 try { 223 const res = await fetch(${urlJson}, { credentials: 'include', referrer: 'https://grok.com/' }); 224 if (!res.ok) return { ok: false, error: 'HTTP ' + res.status }; 225 const blob = await res.blob(); 226 const buf = await blob.arrayBuffer(); 227 const bytes = new Uint8Array(buf); 228 let binary = ''; 229 for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]); 230 return { ok: true, base64: btoa(binary), contentType: blob.type || 'image/jpeg' }; 231 } catch (e) { return { ok: false, error: e && e.message || String(e) }; } 232 })()`) as Promise<FetchResult>; 233 } 234 235 async function saveImages( 236 page: IPage, 237 images: BubbleImage[], 238 outDir: string, 239 ): Promise<Array<BubbleImage & { path: string }>> { 240 fs.mkdirSync(outDir, { recursive: true }); 241 const results: Array<BubbleImage & { path: string }> = []; 242 for (const img of images) { 243 const fetched = await fetchImageAsBase64(page, img.src); 244 if (!fetched || !fetched.ok) { 245 results.push({ ...img, path: `[DOWNLOAD FAILED] ${fetched?.error || 'unknown'}` }); 246 continue; 247 } 248 const filepath = path.join(outDir, buildFilename(img.src, fetched.contentType)); 249 fs.writeFileSync(filepath, Buffer.from(fetched.base64 || '', 'base64')); 250 results.push({ ...img, path: filepath }); 251 } 252 return results; 253 } 254 255 function toRow(img: BubbleImage, savedPath = '') { 256 return { url: img.src, width: img.w, height: img.h, path: savedPath }; 257 } 258 259 export const imageCommand = cli({ 260 site: 'grok', 261 name: 'image', 262 description: 'Generate images on grok.com and return image URLs', 263 domain: 'grok.com', 264 strategy: Strategy.COOKIE, 265 browser: true, 266 args: [ 267 { name: 'prompt', positional: true, type: 'string', required: true, help: 'Image generation prompt' }, 268 { name: 'timeout', type: 'int', default: 240, help: 'Max seconds to wait for the image (default: 240)' }, 269 { name: 'new', type: 'boolean', default: false, help: 'Start a new chat before sending (default: false)' }, 270 { name: 'count', type: 'int', default: 1, help: 'Minimum images to wait for before returning (default: 1)' }, 271 { name: 'out', type: 'string', default: '', help: 'Directory to save downloaded images (uses browser session to bypass auth)' }, 272 ], 273 columns: ['url', 'width', 'height', 'path'], 274 func: async (page: IPage, kwargs: Record<string, any>) => { 275 const prompt = kwargs.prompt as string; 276 const timeoutMs = ((kwargs.timeout as number) || 240) * 1000; 277 const newChat = normalizeBooleanFlag(kwargs.new); 278 const minCount = Math.max(1, Number(kwargs.count || 1)); 279 const outDir = (kwargs.out || '').toString().trim(); 280 281 if (newChat) { 282 await page.goto(GROK_URL); 283 await page.wait(2); 284 await tryStartFreshChat(page); 285 await page.wait(2); 286 } else if (!(await isOnGrok(page))) { 287 await page.goto(GROK_URL); 288 await page.wait(3); 289 } 290 291 const baselineBubbleCount = (await getBubbleImageSets(page)).length; 292 const sendResult = await sendPrompt(page, prompt); 293 if (!sendResult || !sendResult.ok) { 294 return [{ 295 url: `${BLOCKED_PREFIX} send failed: ${JSON.stringify(sendResult)}. ${SESSION_HINT}`, 296 width: 0, 297 height: 0, 298 path: '', 299 }]; 300 } 301 302 const startTime = Date.now(); 303 let lastSignature = ''; 304 let stableCount = 0; 305 let lastImages: BubbleImage[] = []; 306 307 while (Date.now() - startTime < timeoutMs) { 308 await page.wait(3); 309 const bubbleImageSets = await getBubbleImageSets(page); 310 const images = pickLatestImageCandidate(bubbleImageSets, baselineBubbleCount); 311 312 if (images.length >= minCount) { 313 const signature = imagesSignature(images); 314 if (signature === lastSignature) { 315 stableCount += 1; 316 // Require two consecutive stable reads (~6s) before declaring done. 317 if (stableCount >= 2) { 318 if (outDir) { 319 const saved = await saveImages(page, images, outDir); 320 return saved.map(s => toRow(s, s.path)); 321 } 322 return images.map(i => toRow(i)); 323 } 324 } else { 325 stableCount = 0; 326 lastSignature = signature; 327 lastImages = images; 328 } 329 } 330 } 331 332 if (lastImages.length) { 333 if (outDir) { 334 const saved = await saveImages(page, lastImages, outDir); 335 return saved.map(s => toRow(s, s.path)); 336 } 337 return lastImages.map(i => toRow(i)); 338 } 339 return [{ 340 url: `${NO_IMAGE_PREFIX} No image appeared within ${Math.round(timeoutMs / 1000)}s.`, 341 width: 0, 342 height: 0, 343 path: '', 344 }]; 345 }, 346 }); 347 348 export const __test__ = { 349 normalizeBooleanFlag, 350 isOnGrok, 351 dedupeBySrc, 352 imagesSignature, 353 extFromContentType, 354 buildFilename, 355 pickLatestImageCandidate, 356 };