/ clis / youtube / utils.js
utils.js
  1  /**
  2   * Extract a YouTube video ID from a URL or bare video ID string.
  3   * Supports: watch?v=, youtu.be/, /shorts/, /embed/, /live/, /v/
  4   */
  5  export function parseVideoId(input) {
  6      if (!input.startsWith('http'))
  7          return input;
  8      try {
  9          const parsed = new URL(input);
 10          if (parsed.searchParams.has('v')) {
 11              return parsed.searchParams.get('v');
 12          }
 13          if (parsed.hostname === 'youtu.be') {
 14              return parsed.pathname.slice(1).split('/')[0];
 15          }
 16          // Handle /shorts/xxx, /embed/xxx, /live/xxx, /v/xxx
 17          const pathMatch = parsed.pathname.match(/^\/(shorts|embed|live|v)\/([^/?]+)/);
 18          if (pathMatch)
 19              return pathMatch[2];
 20      }
 21      catch {
 22          // Not a valid URL — treat entire input as video ID
 23      }
 24      return input;
 25  }
 26  /**
 27   * Extract a JSON object assigned to a known bootstrap variable inside YouTube HTML.
 28   */
 29  export function extractJsonAssignmentFromHtml(html, keys) {
 30      const candidates = Array.isArray(keys) ? keys : [keys];
 31      for (const key of candidates) {
 32          const markers = [
 33              `var ${key} = `,
 34              `window["${key}"] = `,
 35              `window.${key} = `,
 36              `${key} = `,
 37          ];
 38          for (const marker of markers) {
 39              const markerIndex = html.indexOf(marker);
 40              if (markerIndex === -1)
 41                  continue;
 42              const jsonStart = html.indexOf('{', markerIndex + marker.length);
 43              if (jsonStart === -1)
 44                  continue;
 45              let depth = 0;
 46              let inString = false;
 47              let escaping = false;
 48              for (let i = jsonStart; i < html.length; i += 1) {
 49                  const ch = html[i];
 50                  if (inString) {
 51                      if (escaping) {
 52                          escaping = false;
 53                      }
 54                      else if (ch === '\\') {
 55                          escaping = true;
 56                      }
 57                      else if (ch === '"') {
 58                          inString = false;
 59                      }
 60                      continue;
 61                  }
 62                  if (ch === '"') {
 63                      inString = true;
 64                      continue;
 65                  }
 66                  if (ch === '{') {
 67                      depth += 1;
 68                      continue;
 69                  }
 70                  if (ch === '}') {
 71                      depth -= 1;
 72                      if (depth === 0) {
 73                          try {
 74                              return JSON.parse(html.slice(jsonStart, i + 1));
 75                          }
 76                          catch {
 77                              break;
 78                          }
 79                      }
 80                  }
 81              }
 82          }
 83      }
 84      return null;
 85  }
 86  /**
 87   * Prepare a quiet YouTube API-capable page without opening the watch UI.
 88   */
 89  export async function prepareYoutubeApiPage(page) {
 90      await page.goto('https://www.youtube.com', { waitUntil: 'none' });
 91      await page.wait(2);
 92  }
 93  /**
 94   * Inline InnerTube browse API helper for use inside page.evaluate() strings.
 95   * Inject via FETCH_BROWSE_FN, then call: fetchBrowse(apiKey, body)
 96   */
 97  export const FETCH_BROWSE_FN = `
 98  async function fetchBrowse(apiKey, body) {
 99    const resp = await fetch('/youtubei/v1/browse?key=' + apiKey + '&prettyPrint=false', {
100      method: 'POST',
101      credentials: 'include',
102      headers: { 'Content-Type': 'application/json' },
103      body: JSON.stringify(body),
104    });
105    if (!resp.ok) return { error: 'InnerTube browse API returned HTTP ' + resp.status };
106    return resp.json();
107  }
108  `;
109  /**
110   * Extract video objects from playlistVideoRenderer items (playlists, watch-later).
111   * Pure function — inject into page.evaluate() via: extractPlaylistVideos.toString()
112   */
113  export function extractPlaylistVideos(items) {
114      return items
115          .filter(i => i.playlistVideoRenderer)
116          .map(i => {
117          const v = i.playlistVideoRenderer;
118          const infoRuns = v.videoInfo?.runs || [];
119          return {
120              rank: parseInt(v.index?.simpleText || '0', 10),
121              title: v.title?.runs?.[0]?.text || '',
122              channel: v.shortBylineText?.runs?.[0]?.text || '',
123              duration: v.lengthText?.simpleText || '',
124              views: infoRuns[0]?.text || '',
125              published: infoRuns[2]?.text || '',
126              url: 'https://www.youtube.com/watch?v=' + v.videoId,
127          };
128      });
129  }
130  /**
131   * Normalize a subscribed channel entry from YouTube's channelRenderer payload.
132   * Different surfaces/locales may expose the handle in channelHandleText, canonicalBaseUrl,
133   * or, in some variants, overload one of the count fields with an @handle string.
134   */
135  export function extractSubscriptionChannel(channelRenderer) {
136      const readText = (value) => {
137          if (!value)
138              return '';
139          if (typeof value.simpleText === 'string')
140              return value.simpleText.trim();
141          if (Array.isArray(value.runs)) {
142              return value.runs
143                  .map((run) => run?.text || '')
144                  .join('')
145                  .trim();
146          }
147          return '';
148      };
149      const ch = channelRenderer || {};
150      const name = readText(ch.title);
151      const baseUrl = ch.navigationEndpoint?.browseEndpoint?.canonicalBaseUrl || '';
152      const channelId = ch.channelId || ch.navigationEndpoint?.browseEndpoint?.browseId || '';
153      const subscriberCountText = readText(ch.subscriberCountText);
154      const videoCountText = readText(ch.videoCountText);
155      const handle = [
156          readText(ch.channelHandleText),
157          baseUrl.startsWith('/@') ? baseUrl.slice(1) : '',
158          subscriberCountText.startsWith('@') ? subscriberCountText : '',
159          videoCountText.startsWith('@') ? videoCountText : '',
160      ].find(Boolean) || '';
161      const subscribers = [
162          !subscriberCountText.startsWith('@') ? subscriberCountText : '',
163          !videoCountText.startsWith('@') ? videoCountText : '',
164      ].find(Boolean) || '';
165      const url = baseUrl
166          ? 'https://www.youtube.com' + baseUrl
167          : channelId ? 'https://www.youtube.com/channel/' + channelId : '';
168      return { name, handle, subscribers, url };
169  }
170  /**
171   * Inline @handle → channelId resolver for use inside page.evaluate() strings.
172   * Inject via RESOLVE_CHANNEL_HANDLE_FN, then call: resolveChannelHandle(input, apiKey, context)
173   */
174  export const RESOLVE_CHANNEL_HANDLE_FN = `
175  async function resolveChannelHandle(input, apiKey, context) {
176    if (!input.startsWith('@')) return input;
177    const resp = await fetch('/youtubei/v1/navigation/resolve_url?key=' + apiKey + '&prettyPrint=false', {
178      method: 'POST',
179      credentials: 'include',
180      headers: { 'Content-Type': 'application/json' },
181      body: JSON.stringify({ context, url: 'https://www.youtube.com/' + input }),
182    });
183    if (!resp.ok) return input;
184    const data = await resp.json().catch(() => ({}));
185    return data.endpoint?.browseEndpoint?.browseId || input;
186  }
187  `;
188  /**
189   * Inline SAPISIDHASH helper for use inside page.evaluate() strings.
190   * YouTube write APIs (like, subscribe) require:
191   *   Authorization: SAPISIDHASH {time}_{SHA1(time + " " + SAPISID + " " + origin)}
192   */
193  export const SAPISID_HASH_FN = `
194  async function getSapisidHash(origin) {
195    const cookies = document.cookie.split('; ');
196    let sapisid = '';
197    for (const c of cookies) {
198      const eq = c.indexOf('=');
199      if (eq === -1) continue;
200      const name = c.slice(0, eq);
201      const val = c.slice(eq + 1);
202      if (name === '__Secure-3PAPISID' || name === 'SAPISID') {
203        sapisid = val;
204        if (name === '__Secure-3PAPISID') break;
205      }
206    }
207    if (!sapisid) return null;
208    const time = Math.floor(Date.now() / 1000);
209    const msgBuffer = new TextEncoder().encode(time + ' ' + sapisid + ' ' + origin);
210    const hashBuffer = await crypto.subtle.digest('SHA-1', msgBuffer);
211    const hashHex = Array.from(new Uint8Array(hashBuffer)).map(b => b.toString(16).padStart(2, '0')).join('');
212    return 'SAPISIDHASH ' + time + '_' + hashHex;
213  }
214  `;