/ clis / douyin / _shared / tos-upload.js
tos-upload.js
  1  /**
  2   * TOS (ByteDance Object Storage) multipart uploader with resume support.
  3   *
  4   * Uses AWS Signature V4 (HMAC-SHA256) with STS2 temporary credentials.
  5   * For the init multipart upload call, the pre-computed auth from TosUploadInfo is used.
  6   * For PUT part uploads and the final complete call, AWS4 is computed from STS2 credentials.
  7   */
  8  import * as crypto from 'node:crypto';
  9  import * as fs from 'node:fs';
 10  import * as os from 'node:os';
 11  import * as path from 'node:path';
 12  import { CommandExecutionError } from '@jackwener/opencli/errors';
 13  const PART_SIZE = 5 * 1024 * 1024; // 5 MB minimum per TOS/S3 spec
 14  const RESUME_DIR = path.join(os.homedir(), '.opencli', 'douyin-resume');
 15  // ── Resume file helpers ──────────────────────────────────────────────────────
 16  function getResumeFilePath(filePath) {
 17      const hash = crypto.createHash('sha256').update(filePath).digest('hex');
 18      return path.join(RESUME_DIR, `${hash}.json`);
 19  }
 20  function loadResumeState(resumePath, fileSize) {
 21      try {
 22          const raw = fs.readFileSync(resumePath, 'utf8');
 23          const state = JSON.parse(raw);
 24          if (state.fileSize === fileSize && state.uploadId && Array.isArray(state.parts)) {
 25              return state;
 26          }
 27      }
 28      catch {
 29          // no valid resume state
 30      }
 31      return null;
 32  }
 33  function saveResumeState(resumePath, state) {
 34      fs.mkdirSync(path.dirname(resumePath), { recursive: true });
 35      fs.writeFileSync(resumePath, JSON.stringify(state, null, 2), 'utf8');
 36  }
 37  function deleteResumeState(resumePath) {
 38      try {
 39          fs.unlinkSync(resumePath);
 40      }
 41      catch {
 42          // ignore if not found
 43      }
 44  }
 45  // ── AWS Signature V4 ─────────────────────────────────────────────────────────
 46  function hmacSha256(key, data) {
 47      return crypto.createHmac('sha256', key).update(data, 'utf8').digest();
 48  }
 49  function sha256Hex(data) {
 50      const hash = crypto.createHash('sha256');
 51      if (typeof data === 'string') {
 52          hash.update(data, 'utf8');
 53      }
 54      else {
 55          hash.update(data);
 56      }
 57      return hash.digest('hex');
 58  }
 59  function extractRegionFromHost(host) {
 60      // e.g. "tos-cn-i-alisg.volces.com" → "cn-i-alisg"
 61      // e.g. "tos-cn-beijing.ivolces.com" → "cn-beijing"
 62      const match = host.match(/^tos-([^.]+)\./);
 63      if (match)
 64          return match[1];
 65      return 'cn-north-1'; // fallback
 66  }
 67  /**
 68   * Compute AWS Signature V4 headers for a TOS request.
 69   * Returns a Record of all headers to include (including Authorization, x-amz-date, etc.)
 70   */
 71  function computeAws4Headers(opts) {
 72      const { method, url, credentials, service, region, datetime } = opts;
 73      const date = datetime.slice(0, 8); // YYYYMMDD
 74      const parsedUrl = new URL(url);
 75      const canonicalUri = parsedUrl.pathname || '/';
 76      // Canonical query string: sort by name, encode
 77      const queryParams = [...parsedUrl.searchParams.entries()]
 78          .sort(([a], [b]) => a.localeCompare(b))
 79          .map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(v)}`)
 80          .join('&');
 81      const bodyHash = sha256Hex(opts.body);
 82      // Merge in required headers and compute canonical headers
 83      const allHeaders = {
 84          ...opts.headers,
 85          host: parsedUrl.host,
 86          'x-amz-content-sha256': bodyHash,
 87          'x-amz-date': datetime,
 88          'x-amz-security-token': credentials.session_token,
 89      };
 90      const sortedHeaderKeys = Object.keys(allHeaders).sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
 91      const canonicalHeaders = sortedHeaderKeys
 92          .map(k => `${k.toLowerCase()}:${allHeaders[k].trim()}`)
 93          .join('\n') + '\n';
 94      const signedHeadersList = sortedHeaderKeys.map(k => k.toLowerCase()).join(';');
 95      const canonicalRequest = [
 96          method.toUpperCase(),
 97          canonicalUri,
 98          queryParams,
 99          canonicalHeaders,
100          signedHeadersList,
101          bodyHash,
102      ].join('\n');
103      const credentialScope = `${date}/${region}/${service}/aws4_request`;
104      const stringToSign = [
105          'AWS4-HMAC-SHA256',
106          datetime,
107          credentialScope,
108          sha256Hex(canonicalRequest),
109      ].join('\n');
110      // Signing key chain
111      const kDate = hmacSha256(`AWS4${credentials.secret_access_key}`, date);
112      const kRegion = hmacSha256(kDate, region);
113      const kService = hmacSha256(kRegion, service);
114      const kSigning = hmacSha256(kService, 'aws4_request');
115      const signature = hmacSha256(kSigning, stringToSign).toString('hex');
116      const authorization = `AWS4-HMAC-SHA256 Credential=${credentials.access_key_id}/${credentialScope}, SignedHeaders=${signedHeadersList}, Signature=${signature}`;
117      return {
118          ...allHeaders,
119          Authorization: authorization,
120      };
121  }
122  // ── HTTP helpers ─────────────────────────────────────────────────────────────
123  async function tosRequest(opts) {
124      const { method, url, headers, body } = opts;
125      const fetchBody = body == null ? null
126          : typeof body === 'string' ? body
127              : body;
128      const res = await fetch(url, {
129          method,
130          headers,
131          body: fetchBody,
132      });
133      const responseBody = await res.text();
134      const responseHeaders = {};
135      res.headers.forEach((value, key) => {
136          responseHeaders[key.toLowerCase()] = value;
137      });
138      return { status: res.status, headers: responseHeaders, body: responseBody };
139  }
140  function nowDatetime() {
141      return new Date().toISOString().replace(/[-:]/g, '').replace(/\.\d+Z$/, 'Z');
142  }
143  // ── Phase 1: Init multipart upload ───────────────────────────────────────────
144  async function initMultipartUpload(tosUrl, auth, credentials) {
145      const initUrl = `${tosUrl}?uploads`;
146      const datetime = nowDatetime();
147      // Use the pre-computed auth for INIT, as it comes from ApplyVideoUpload
148      const headers = {
149          Authorization: auth,
150          'x-amz-date': datetime,
151          'x-amz-security-token': credentials.session_token,
152          'content-type': 'application/octet-stream',
153      };
154      const res = await tosRequest({ method: 'POST', url: initUrl, headers });
155      if (res.status !== 200) {
156          throw new CommandExecutionError(`TOS init multipart upload failed with status ${res.status}: ${res.body}`, 'Check that TOS credentials are valid and not expired.');
157      }
158      // Parse UploadId from XML: <UploadId>...</UploadId>
159      const match = res.body.match(/<UploadId>([^<]+)<\/UploadId>/);
160      if (!match) {
161          throw new CommandExecutionError(`TOS init response missing UploadId: ${res.body}`);
162      }
163      return match[1];
164  }
165  // ── Phase 2: Upload a single part ────────────────────────────────────────────
166  async function uploadPart(tosUrl, partNumber, uploadId, data, credentials, region) {
167      const parsedUrl = new URL(tosUrl);
168      parsedUrl.searchParams.set('partNumber', String(partNumber));
169      parsedUrl.searchParams.set('uploadId', uploadId);
170      const url = parsedUrl.toString();
171      const datetime = nowDatetime();
172      const headers = computeAws4Headers({
173          method: 'PUT',
174          url,
175          headers: { 'content-type': 'application/octet-stream' },
176          body: data,
177          credentials,
178          service: 'tos',
179          region,
180          datetime,
181      });
182      const res = await tosRequest({ method: 'PUT', url, headers, body: data });
183      if (res.status !== 200) {
184          throw new CommandExecutionError(`TOS upload part ${partNumber} failed with status ${res.status}: ${res.body}`, 'Check that STS2 credentials are valid and not expired.');
185      }
186      const etag = res.headers['etag'];
187      if (!etag) {
188          throw new CommandExecutionError(`TOS upload part ${partNumber} response missing ETag header`);
189      }
190      return etag;
191  }
192  // ── Phase 3: Complete multipart upload ───────────────────────────────────────
193  async function completeMultipartUpload(tosUrl, uploadId, parts, credentials, region) {
194      const parsedUrl = new URL(tosUrl);
195      parsedUrl.searchParams.set('uploadId', uploadId);
196      const url = parsedUrl.toString();
197      const xmlBody = '<CompleteMultipartUpload>' +
198          parts
199              .sort((a, b) => a.partNumber - b.partNumber)
200              .map(p => `<Part><PartNumber>${p.partNumber}</PartNumber><ETag>${p.etag}</ETag></Part>`)
201              .join('') +
202          '</CompleteMultipartUpload>';
203      const datetime = nowDatetime();
204      const headers = computeAws4Headers({
205          method: 'POST',
206          url,
207          headers: { 'content-type': 'application/xml' },
208          body: xmlBody,
209          credentials,
210          service: 'tos',
211          region,
212          datetime,
213      });
214      const res = await tosRequest({
215          method: 'POST',
216          url,
217          headers,
218          body: xmlBody,
219      });
220      if (res.status !== 200) {
221          throw new CommandExecutionError(`TOS complete multipart upload failed with status ${res.status}: ${res.body}`, 'Check that all parts were uploaded successfully.');
222      }
223  }
224  let _readSyncOverride = null;
225  /** @internal — for testing only */
226  export function setReadSyncOverride(fn) {
227      _readSyncOverride = fn;
228  }
229  // ── Public API ───────────────────────────────────────────────────────────────
230  export async function tosUpload(options) {
231      const { filePath, uploadInfo, credentials, onProgress } = options;
232      // Validate file exists
233      if (!fs.existsSync(filePath)) {
234          throw new CommandExecutionError(`Video file not found: ${filePath}`, 'Ensure the file path is correct and accessible.');
235      }
236      const { size: fileSize } = fs.statSync(filePath);
237      if (fileSize === 0) {
238          throw new CommandExecutionError(`Video file is empty: ${filePath}`);
239      }
240      const { tos_upload_url: tosUrl, auth } = uploadInfo;
241      const parsedTosUrl = new URL(tosUrl);
242      const region = extractRegionFromHost(parsedTosUrl.host);
243      const resumePath = getResumeFilePath(filePath);
244      let resumeState = loadResumeState(resumePath, fileSize);
245      let uploadId;
246      let completedParts;
247      if (resumeState) {
248          // Resume from previous state
249          uploadId = resumeState.uploadId;
250          completedParts = resumeState.parts;
251      }
252      else {
253          // Start fresh
254          uploadId = await initMultipartUpload(tosUrl, auth, credentials);
255          completedParts = [];
256          saveResumeState(resumePath, { uploadId, fileSize, parts: completedParts });
257      }
258      // Determine which parts are already done
259      const completedPartNumbers = new Set(completedParts.map(p => p.partNumber));
260      // Calculate total parts
261      const totalParts = Math.ceil(fileSize / PART_SIZE);
262      // Track uploaded bytes for progress
263      let uploadedBytes = completedParts.length * PART_SIZE;
264      if (onProgress)
265          onProgress(Math.min(uploadedBytes, fileSize), fileSize);
266      const fd = fs.openSync(filePath, 'r');
267      try {
268          for (let partNumber = 1; partNumber <= totalParts; partNumber++) {
269              if (completedPartNumbers.has(partNumber)) {
270                  continue; // already uploaded
271              }
272              const offset = (partNumber - 1) * PART_SIZE;
273              const chunkSize = Math.min(PART_SIZE, fileSize - offset);
274              const buffer = Buffer.allocUnsafe(chunkSize);
275              const readFn = _readSyncOverride ?? fs.readSync;
276              const bytesRead = readFn(fd, buffer, 0, chunkSize, offset);
277              if (bytesRead !== chunkSize) {
278                  throw new CommandExecutionError(`Short read on part ${partNumber}: expected ${chunkSize} bytes, got ${bytesRead}`);
279              }
280              const etag = await uploadPart(tosUrl, partNumber, uploadId, buffer, credentials, region);
281              completedParts.push({ partNumber, etag });
282              saveResumeState(resumePath, { uploadId, fileSize, parts: completedParts });
283              uploadedBytes = Math.min(offset + chunkSize, fileSize);
284              if (onProgress)
285                  onProgress(uploadedBytes, fileSize);
286          }
287      }
288      finally {
289          fs.closeSync(fd);
290      }
291      await completeMultipartUpload(tosUrl, uploadId, completedParts, credentials, region);
292      deleteResumeState(resumePath);
293  }
294  // ── Internal exports for testing ─────────────────────────────────────────────
295  export { PART_SIZE, RESUME_DIR, extractRegionFromHost, getResumeFilePath, loadResumeState, saveResumeState, deleteResumeState, computeAws4Headers, };