/ clis / chatgpt / utils.js
utils.js
  1  /**
  2   * ChatGPT web browser automation helpers for image generation.
  3   * Cross-platform: works on Linux/macOS/Windows via OpenCLI's CDP browser automation.
  4   */
  5  
  6  export const CHATGPT_DOMAIN = 'chatgpt.com';
  7  export const CHATGPT_URL = 'https://chatgpt.com';
  8  
  9  // Selectors
 10  const COMPOSER_SELECTOR = '[aria-label="Chat with ChatGPT"]';
 11  const SEND_BTN_SELECTOR = 'button[aria-label="Send prompt"]';
 12  
 13  function buildComposerLocatorScript() {
 14      const selectorsJson = JSON.stringify([COMPOSER_SELECTOR]);
 15      const markerAttr = 'data-opencli-chatgpt-composer';
 16      return `
 17        const isVisible = (el) => {
 18          if (!(el instanceof HTMLElement)) return false;
 19          const style = window.getComputedStyle(el);
 20          if (style.display === 'none' || style.visibility === 'hidden') return false;
 21          const rect = el.getBoundingClientRect();
 22          return rect.width > 0 && rect.height > 0;
 23        };
 24  
 25        const markerAttr = ${JSON.stringify(markerAttr)};
 26        const clearMarkers = (active) => {
 27          document.querySelectorAll('[' + markerAttr + ']').forEach(node => {
 28            if (node !== active) node.removeAttribute(markerAttr);
 29          });
 30        };
 31  
 32        const findComposer = () => {
 33          const marked = document.querySelector('[' + markerAttr + '="1"]');
 34          if (marked instanceof HTMLElement && isVisible(marked)) return marked;
 35  
 36          for (const selector of ${JSON.stringify([COMPOSER_SELECTOR])}) {
 37            const node = Array.from(document.querySelectorAll(selector)).find(c => c instanceof HTMLElement && isVisible(c));
 38            if (node instanceof HTMLElement) {
 39              node.setAttribute(markerAttr, '1');
 40              return node;
 41            }
 42          }
 43          return null;
 44        };
 45  
 46        findComposer.toString = () => 'findComposer';
 47        return { findComposer, markerAttr };
 48      `;
 49  }
 50  
 51  /**
 52   * Send a message to the ChatGPT composer and submit it.
 53   * Returns true if the message was sent successfully.
 54   */
 55  export async function sendChatGPTMessage(page, text) {
 56      // Close sidebar if open (it can cover the chat composer)
 57      await page.evaluate(`
 58          (() => {
 59              const closeBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Close sidebar');
 60              if (closeBtn) closeBtn.click();
 61          })()
 62      `);
 63      await page.wait(0.5);
 64  
 65      // Wait for composer to be ready and use Playwright's type()
 66      await page.wait(1.5);
 67      
 68      const typeResult = await page.evaluate(`
 69          (() => {
 70              ${buildComposerLocatorScript()}
 71              const composer = findComposer();
 72              if (!composer) return false;
 73              composer.focus();
 74              composer.textContent = '';
 75              return true;
 76          })()
 77      `);
 78      
 79      if (!typeResult) return false;
 80      
 81      // Use page.type() which is Playwright's native method
 82      try {
 83          if (page.nativeType) {
 84              await page.nativeType(text);
 85          } else {
 86              throw new Error('nativeType unavailable');
 87          }
 88      } catch (e) {
 89          // Fallback: use execCommand
 90          await page.evaluate(`
 91              (() => {
 92                  const composer = document.querySelector('[aria-label="Chat with ChatGPT"]');
 93                  if (!composer) return;
 94                  composer.focus();
 95                  document.execCommand('insertText', false, ${JSON.stringify(text)});
 96              })()
 97          `);
 98      }
 99      
100      // Wait for send button to appear (it only shows when there's text)
101      await page.wait(1.5);
102  
103      // Click send button
104      const sent = await page.evaluate(`
105          (() => {
106              const btns = Array.from(document.querySelectorAll('button'));
107              const sendBtn = btns.find(b => b.getAttribute('aria-label') === 'Send prompt');
108              return { sendBtnFound: !!sendBtn };
109          })()
110      `);
111      
112      if (!sent || !sent.sendBtnFound) {
113          return false;
114      }
115      
116      await page.evaluate(`
117          (() => {
118              const sendBtn = Array.from(document.querySelectorAll('button')).find(b => b.getAttribute('aria-label') === 'Send prompt');
119              if (sendBtn) sendBtn.click();
120          })()
121      `);
122      return true;
123  }
124  
125  /**
126   * Check if ChatGPT is still generating a response.
127   */
128  export async function isGenerating(page) {
129      return await page.evaluate(`
130          (() => {
131              return Array.from(document.querySelectorAll('button')).some(b => {
132                  const label = b.getAttribute('aria-label') || '';
133                  return label === 'Stop generating' || label.includes('Thinking');
134              });
135          })()
136      `);
137  }
138  
139  /**
140   * Get visible image URLs from the ChatGPT page (excluding profile/avatar images).
141   */
142  export async function getChatGPTVisibleImageUrls(page) {
143      return await page.evaluate(`
144          (() => {
145              const isVisible = (el) => {
146                  if (!(el instanceof HTMLElement)) return false;
147                  const style = window.getComputedStyle(el);
148                  if (style.display === 'none' || style.visibility === 'hidden') return false;
149                  const rect = el.getBoundingClientRect();
150                  return rect.width > 32 && rect.height > 32;
151              };
152  
153              const imgs = Array.from(document.querySelectorAll('img')).filter(img =>
154                  img instanceof HTMLImageElement && isVisible(img)
155              );
156  
157              const urls = [];
158              const seen = new Set();
159  
160              for (const img of imgs) {
161                  const src = img.currentSrc || img.src || '';
162                  const alt = (img.getAttribute('alt') || '').toLowerCase();
163                  const cls = (img.className || '').toLowerCase();
164                  const width = img.naturalWidth || img.width || 0;
165                  const height = img.naturalHeight || img.height || 0;
166  
167                  if (!src) continue;
168                  if (alt.includes('avatar') || alt.includes('profile') || alt.includes('logo') || alt.includes('icon')) continue;
169                  if (cls.includes('avatar') || cls.includes('profile') || cls.includes('icon')) continue;
170                  if (width < 128 && height < 128) continue;
171                  if (seen.has(src)) continue;
172  
173                  seen.add(src);
174                  urls.push(src);
175              }
176              return urls;
177          })()
178      `);
179  }
180  
181  /**
182   * Wait for new images to appear after sending a prompt.
183   */
184  export async function waitForChatGPTImages(page, beforeUrls, timeoutSeconds) {
185      const beforeSet = new Set(beforeUrls);
186      const pollIntervalSeconds = 3;
187      const maxPolls = Math.max(1, Math.ceil(timeoutSeconds / pollIntervalSeconds));
188      let lastUrls = [];
189      let stableCount = 0;
190  
191      for (let i = 0; i < maxPolls; i++) {
192          await page.wait(i === 0 ? 3 : pollIntervalSeconds);
193  
194          // Check if still generating
195          const generating = await isGenerating(page);
196          if (generating) continue;
197  
198          const urls = (await getChatGPTVisibleImageUrls(page)).filter(url => !beforeSet.has(url));
199          if (urls.length === 0) continue;
200  
201          const key = urls.join('\n');
202          const prevKey = lastUrls.join('\n');
203          if (key === prevKey) {
204              stableCount += 1;
205          } else {
206              lastUrls = urls;
207              stableCount = 1;
208          }
209  
210          if (stableCount >= 2 || i === maxPolls - 1) {
211              return lastUrls;
212          }
213      }
214      return lastUrls;
215  }
216  
217  /**
218   * Export images by URL: fetch from ChatGPT backend API and convert to base64 data URLs.
219   */
220  export async function getChatGPTImageAssets(page, urls) {
221      const urlsJson = JSON.stringify(urls);
222      return await page.evaluate(`
223          (async (targetUrls) => {
224              const blobToDataUrl = (blob) => new Promise((resolve, reject) => {
225                  const reader = new FileReader();
226                  reader.onloadend = () => resolve(String(reader.result || ''));
227                  reader.onerror = () => reject(new Error('Failed to read blob'));
228                  reader.readAsDataURL(blob);
229              });
230  
231              const inferMime = (value, fallbackUrl) => {
232                  if (value) return value;
233                  const lower = String(fallbackUrl || '').toLowerCase();
234                  if (lower.includes('.png')) return 'image/png';
235                  if (lower.includes('.webp')) return 'image/webp';
236                  if (lower.includes('.gif')) return 'image/gif';
237                  return 'image/jpeg';
238              };
239  
240              const results = [];
241  
242              for (const targetUrl of targetUrls) {
243                  let dataUrl = '';
244                  let mimeType = 'image/jpeg';
245                  let width = 0;
246                  let height = 0;
247  
248                  // Try to find the img element for size info
249                  const img = Array.from(document.querySelectorAll('img')).find(el =>
250                      (el.currentSrc || el.src || '') === targetUrl
251                  );
252                  if (img) {
253                      width = img.naturalWidth || img.width || 0;
254                      height = img.naturalHeight || img.height || 0;
255                  }
256  
257                  try {
258                      if (String(targetUrl).startsWith('data:')) {
259                          dataUrl = String(targetUrl);
260                          mimeType = (String(targetUrl).match(/^data:([^;]+);/i) || [])[1] || 'image/png';
261                      } else {
262                          // Try to fetch via CORS from the page's origin
263                          const res = await fetch(targetUrl, { credentials: 'include' });
264                          if (res.ok) {
265                              const blob = await res.blob();
266                              mimeType = inferMime(blob.type, targetUrl);
267                              dataUrl = await blobToDataUrl(blob);
268                          }
269                      }
270                  } catch (e) {
271                      // If fetch fails (CORS), try canvas approach via img element
272                  }
273  
274                  // Fallback: draw img to canvas
275                  if (!dataUrl && img && img instanceof HTMLImageElement) {
276                      try {
277                          const canvas = document.createElement('canvas');
278                          canvas.width = img.naturalWidth || img.width || 512;
279                          canvas.height = img.naturalHeight || img.height || 512;
280                          const ctx = canvas.getContext('2d');
281                          if (ctx) {
282                              ctx.drawImage(img, 0, 0);
283                              dataUrl = canvas.toDataURL('image/png');
284                              mimeType = 'image/png';
285                          }
286                      } catch (e) { }
287                  }
288  
289                  if (dataUrl) {
290                      results.push({ url: String(targetUrl), dataUrl, mimeType, width, height });
291                  }
292              }
293  
294              return results;
295          })(${urlsJson})
296      `, urls);
297  }