utils.js
1 /** 2 * ChatGPT web browser automation helpers for image generation. 3 * Cross-platform: works on Linux/macOS/Windows via OpenCLI's CDP browser automation. 4 */ 5 6 export const CHATGPT_DOMAIN = 'chatgpt.com'; 7 export const CHATGPT_URL = 'https://chatgpt.com'; 8 9 // Selectors 10 const COMPOSER_SELECTOR = '[aria-label="Chat with ChatGPT"]'; 11 const SEND_BTN_SELECTOR = 'button[aria-label="Send prompt"]'; 12 13 function buildComposerLocatorScript() { 14 const selectorsJson = JSON.stringify([COMPOSER_SELECTOR]); 15 const markerAttr = 'data-opencli-chatgpt-composer'; 16 return ` 17 const isVisible = (el) => { 18 if (!(el instanceof HTMLElement)) return false; 19 const style = window.getComputedStyle(el); 20 if (style.display === 'none' || style.visibility === 'hidden') return false; 21 const rect = el.getBoundingClientRect(); 22 return rect.width > 0 && rect.height > 0; 23 }; 24 25 const markerAttr = ${JSON.stringify(markerAttr)}; 26 const clearMarkers = (active) => { 27 document.querySelectorAll('[' + markerAttr + ']').forEach(node => { 28 if (node !== active) node.removeAttribute(markerAttr); 29 }); 30 }; 31 32 const findComposer = () => { 33 const marked = document.querySelector('[' + markerAttr + '="1"]'); 34 if (marked instanceof HTMLElement && isVisible(marked)) return marked; 35 36 for (const selector of ${JSON.stringify([COMPOSER_SELECTOR])}) { 37 const node = Array.from(document.querySelectorAll(selector)).find(c => c instanceof HTMLElement && isVisible(c)); 38 if (node instanceof HTMLElement) { 39 node.setAttribute(markerAttr, '1'); 40 return node; 41 } 42 } 43 return null; 44 }; 45 46 findComposer.toString = () => 'findComposer'; 47 return { findComposer, markerAttr }; 48 `; 49 } 50 51 /** 52 * Send a message to the ChatGPT composer and submit it. 53 * Returns true if the message was sent successfully. 54 */ 55 export async function sendChatGPTMessage(page, text) { 56 // Close sidebar if open (it can cover the chat composer) 57 await page.evaluate(` 58 (() => { 59 const closeBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Close sidebar'); 60 if (closeBtn) closeBtn.click(); 61 })() 62 `); 63 await page.wait(0.5); 64 65 // Wait for composer to be ready and use Playwright's type() 66 await page.wait(1.5); 67 68 const typeResult = await page.evaluate(` 69 (() => { 70 ${buildComposerLocatorScript()} 71 const composer = findComposer(); 72 if (!composer) return false; 73 composer.focus(); 74 composer.textContent = ''; 75 return true; 76 })() 77 `); 78 79 if (!typeResult) return false; 80 81 // Use page.type() which is Playwright's native method 82 try { 83 if (page.nativeType) { 84 await page.nativeType(text); 85 } else { 86 throw new Error('nativeType unavailable'); 87 } 88 } catch (e) { 89 // Fallback: use execCommand 90 await page.evaluate(` 91 (() => { 92 const composer = document.querySelector('[aria-label="Chat with ChatGPT"]'); 93 if (!composer) return; 94 composer.focus(); 95 document.execCommand('insertText', false, ${JSON.stringify(text)}); 96 })() 97 `); 98 } 99 100 // Wait for send button to appear (it only shows when there's text) 101 await page.wait(1.5); 102 103 // Click send button 104 const sent = await page.evaluate(` 105 (() => { 106 const btns = Array.from(document.querySelectorAll('button')); 107 const sendBtn = btns.find(b => b.getAttribute('aria-label') === 'Send prompt'); 108 return { sendBtnFound: !!sendBtn }; 109 })() 110 `); 111 112 if (!sent || !sent.sendBtnFound) { 113 return false; 114 } 115 116 await page.evaluate(` 117 (() => { 118 const sendBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Send prompt'); 119 if (sendBtn) sendBtn.click(); 120 })() 121 `); 122 return true; 123 } 124 125 /** 126 * Check if ChatGPT is still generating a response. 127 */ 128 export async function isGenerating(page) { 129 return await page.evaluate(` 130 (() => { 131 return Array.from(document.querySelectorAll('button')).some(b => { 132 const label = b.getAttribute('aria-label') || ''; 133 return label === 'Stop generating' || label.includes('Thinking'); 134 }); 135 })() 136 `); 137 } 138 139 /** 140 * Get visible image URLs from the ChatGPT page (excluding profile/avatar images). 141 */ 142 export async function getChatGPTVisibleImageUrls(page) { 143 return await page.evaluate(` 144 (() => { 145 const isVisible = (el) => { 146 if (!(el instanceof HTMLElement)) return false; 147 const style = window.getComputedStyle(el); 148 if (style.display === 'none' || style.visibility === 'hidden') return false; 149 const rect = el.getBoundingClientRect(); 150 return rect.width > 32 && rect.height > 32; 151 }; 152 153 const imgs = Array.from(document.querySelectorAll('img')).filter(img => 154 img instanceof HTMLImageElement && isVisible(img) 155 ); 156 157 const urls = []; 158 const seen = new Set(); 159 160 for (const img of imgs) { 161 const src = img.currentSrc || img.src || ''; 162 const alt = (img.getAttribute('alt') || '').toLowerCase(); 163 const cls = (img.className || '').toLowerCase(); 164 const width = img.naturalWidth || img.width || 0; 165 const height = img.naturalHeight || img.height || 0; 166 167 if (!src) continue; 168 if (alt.includes('avatar') || alt.includes('profile') || alt.includes('logo') || alt.includes('icon')) continue; 169 if (cls.includes('avatar') || cls.includes('profile') || cls.includes('icon')) continue; 170 if (width < 128 && height < 128) continue; 171 if (seen.has(src)) continue; 172 173 seen.add(src); 174 urls.push(src); 175 } 176 return urls; 177 })() 178 `); 179 } 180 181 /** 182 * Wait for new images to appear after sending a prompt. 183 */ 184 export async function waitForChatGPTImages(page, beforeUrls, timeoutSeconds) { 185 const beforeSet = new Set(beforeUrls); 186 const pollIntervalSeconds = 3; 187 const maxPolls = Math.max(1, Math.ceil(timeoutSeconds / pollIntervalSeconds)); 188 let lastUrls = []; 189 let stableCount = 0; 190 191 for (let i = 0; i < maxPolls; i++) { 192 await page.wait(i === 0 ? 3 : pollIntervalSeconds); 193 194 // Check if still generating 195 const generating = await isGenerating(page); 196 if (generating) continue; 197 198 const urls = (await getChatGPTVisibleImageUrls(page)).filter(url => !beforeSet.has(url)); 199 if (urls.length === 0) continue; 200 201 const key = urls.join('\n'); 202 const prevKey = lastUrls.join('\n'); 203 if (key === prevKey) { 204 stableCount += 1; 205 } else { 206 lastUrls = urls; 207 stableCount = 1; 208 } 209 210 if (stableCount >= 2 || i === maxPolls - 1) { 211 return lastUrls; 212 } 213 } 214 return lastUrls; 215 } 216 217 /** 218 * Export images by URL: fetch from ChatGPT backend API and convert to base64 data URLs. 219 */ 220 export async function getChatGPTImageAssets(page, urls) { 221 const urlsJson = JSON.stringify(urls); 222 return await page.evaluate(` 223 (async (targetUrls) => { 224 const blobToDataUrl = (blob) => new Promise((resolve, reject) => { 225 const reader = new FileReader(); 226 reader.onloadend = () => resolve(String(reader.result || '')); 227 reader.onerror = () => reject(new Error('Failed to read blob')); 228 reader.readAsDataURL(blob); 229 }); 230 231 const inferMime = (value, fallbackUrl) => { 232 if (value) return value; 233 const lower = String(fallbackUrl || '').toLowerCase(); 234 if (lower.includes('.png')) return 'image/png'; 235 if (lower.includes('.webp')) return 'image/webp'; 236 if (lower.includes('.gif')) return 'image/gif'; 237 return 'image/jpeg'; 238 }; 239 240 const results = []; 241 242 for (const targetUrl of targetUrls) { 243 let dataUrl = ''; 244 let mimeType = 'image/jpeg'; 245 let width = 0; 246 let height = 0; 247 248 // Try to find the img element for size info 249 const img = Array.from(document.querySelectorAll('img')).find(el => 250 (el.currentSrc || el.src || '') === targetUrl 251 ); 252 if (img) { 253 width = img.naturalWidth || img.width || 0; 254 height = img.naturalHeight || img.height || 0; 255 } 256 257 try { 258 if (String(targetUrl).startsWith('data:')) { 259 dataUrl = String(targetUrl); 260 mimeType = (String(targetUrl).match(/^data:([^;]+);/i) || [])[1] || 'image/png'; 261 } else { 262 // Try to fetch via CORS from the page's origin 263 const res = await fetch(targetUrl, { credentials: 'include' }); 264 if (res.ok) { 265 const blob = await res.blob(); 266 mimeType = inferMime(blob.type, targetUrl); 267 dataUrl = await blobToDataUrl(blob); 268 } 269 } 270 } catch (e) { 271 // If fetch fails (CORS), try canvas approach via img element 272 } 273 274 // Fallback: draw img to canvas 275 if (!dataUrl && img && img instanceof HTMLImageElement) { 276 try { 277 const canvas = document.createElement('canvas'); 278 canvas.width = img.naturalWidth || img.width || 512; 279 canvas.height = img.naturalHeight || img.height || 512; 280 const ctx = canvas.getContext('2d'); 281 if (ctx) { 282 ctx.drawImage(img, 0, 0); 283 dataUrl = canvas.toDataURL('image/png'); 284 mimeType = 'image/png'; 285 } 286 } catch (e) { } 287 } 288 289 if (dataUrl) { 290 results.push({ url: String(targetUrl), dataUrl, mimeType, width, height }); 291 } 292 } 293 294 return results; 295 })(${urlsJson}) 296 `, urls); 297 }