/ clis / chaoxing / utils.js
utils.js
  1  /**
  2   * Chaoxing (学习通) shared helpers.
  3   *
  4   * Flow: initSession → getCourses → enterCourse → getTabIframeUrl → navigate → parse DOM
  5   * Chaoxing has no flat "list all assignments" API; data is behind session-gated
  6   * course pages loaded as iframes.
  7   */
  8  // ── Utilities ────────────────────────────────────────────────────────
  9  /** Sleep for given milliseconds (anti-scraping delay). */
 10  export function sleep(ms) {
 11      return new Promise(resolve => setTimeout(resolve, ms));
 12  }
 13  /** Execute a credentialed fetch in the browser context, returning JSON or text. */
 14  export async function fetchChaoxing(page, url) {
 15      const urlJs = JSON.stringify(url);
 16      return page.evaluate(`
 17      async () => {
 18        const res = await fetch(${urlJs}, { credentials: "include" });
 19        const text = await res.text();
 20        try { return JSON.parse(text); } catch {}
 21        return text;
 22      }
 23    `);
 24  }
 25  /** Format a timestamp (seconds or milliseconds or date string) to YYYY-MM-DD HH:mm. */
 26  export function formatTimestamp(ts) {
 27      if (ts == null || ts === '' || ts === 0)
 28          return '';
 29      if (typeof ts === 'string' && !/^\d+$/.test(ts.trim()))
 30          return ts.trim();
 31      const num = Number(ts);
 32      if (Number.isNaN(num) || num <= 0)
 33          return String(ts);
 34      const millis = num > 1e12 ? num : num * 1000;
 35      const d = new Date(millis);
 36      if (Number.isNaN(d.getTime()))
 37          return String(ts);
 38      const yyyy = d.getFullYear();
 39      const mm = String(d.getMonth() + 1).padStart(2, '0');
 40      const dd = String(d.getDate()).padStart(2, '0');
 41      const hh = String(d.getHours()).padStart(2, '0');
 42      const mi = String(d.getMinutes()).padStart(2, '0');
 43      return `${yyyy}-${mm}-${dd} ${hh}:${mi}`;
 44  }
 45  /** Map numeric work status to Chinese label. */
 46  export function workStatusLabel(status) {
 47      if (status == null || status === '')
 48          return '未知';
 49      const s = Number(status);
 50      if (s === 0)
 51          return '未交';
 52      if (s === 1)
 53          return '已交';
 54      if (s === 2)
 55          return '已批阅';
 56      const str = String(status).trim();
 57      return str || '未知';
 58  }
 59  /** Fetch enrolled course list via backclazzdata JSON API. */
 60  export async function getCourses(page) {
 61      const resp = await fetchChaoxing(page, 'https://mooc1-api.chaoxing.com/mycourse/backclazzdata?view=json&rss=1');
 62      if (!resp || typeof resp !== 'object')
 63          return [];
 64      const channelList = resp.channelList ?? [];
 65      const courses = [];
 66      for (const channel of channelList) {
 67          const content = channel?.content;
 68          if (!content)
 69              continue;
 70          const courseData = content.course?.data;
 71          if (!Array.isArray(courseData))
 72              continue;
 73          for (const c of courseData) {
 74              courses.push({
 75                  courseId: String(c.id ?? ''),
 76                  classId: String(content.id ?? ''),
 77                  cpi: String(channel.cpi ?? ''),
 78                  title: String(c.name ?? ''),
 79              });
 80          }
 81      }
 82      return courses;
 83  }
 84  // ── Session & course entry ───────────────────────────────────────────
 85  /** Navigate to the interaction page to establish a Chaoxing session. */
 86  export async function initSession(page) {
 87      await page.goto('https://mooc2-ans.chaoxing.com/mooc2-ans/visit/interaction');
 88  }
 89  /**
 90   * Enter a course via stucoursemiddle redirect (establishes course session + enc).
 91   * After this call the browser is on the course page.
 92   */
 93  export async function enterCourse(page, course) {
 94      const url = `https://mooc1.chaoxing.com/visit/stucoursemiddle` +
 95          `?courseid=${course.courseId}&clazzid=${course.classId}&cpi=${course.cpi}&ismooc2=1&v=2`;
 96      await page.goto(url);
 97  }
 98  /**
 99   * On the course page, click a tab (作业 / 考试) and return the iframe src
100   * that gets loaded. Returns empty string if the tab is not found.
101   */
102  export async function getTabIframeUrl(page, tabName) {
103      const nameJs = JSON.stringify(tabName);
104      const result = await page.evaluate(`
105      async () => {
106        const tabs = document.querySelectorAll('a[data-url]');
107        let target = null;
108        for (const tab of tabs) {
109          if ((tab.innerText || '').trim() === ${nameJs}) { target = tab; break; }
110        }
111        if (!target) return '';
112        target.click();
113        await new Promise(r => setTimeout(r, 2000));
114        const iframe = document.getElementById('frame_content-hd') || document.querySelector('iframe');
115        return iframe?.src || '';
116      }
117    `);
118      return typeof result === 'string' ? result : '';
119  }
120  /**
121   * Parse assignments from the current page DOM (the 作业列表 page).
122   * The page uses `.ulDiv li` items with status/deadline/score info.
123   */
124  export async function parseAssignmentsFromDom(page, courseName) {
125      const raw = await page.evaluate(`
126      (() => {
127        const items = [];
128        // Each assignment is a li or div block; try multiple selectors
129        const blocks = document.querySelectorAll('.ulDiv li, .work-list-item, .listContent > div, ul > li');
130        for (const block of blocks) {
131          const text = (block.innerText || '').trim();
132          if (!text || text.length < 3) continue;
133          // Skip filter buttons and headers
134          if (/^(全部|已完成|未完成|筛选)$/.test(text)) continue;
135          items.push(text);
136        }
137        // Fallback: split body text by common patterns
138        if (items.length === 0) {
139          const body = (document.body?.innerText || '').trim();
140          return [body];
141        }
142        return items;
143      })()
144    `) ?? [];
145      const rows = [];
146      for (const text of raw) {
147          if (typeof text !== 'string' || text.length < 3)
148              continue;
149          // Skip noise
150          if (/^(全部|已完成|未完成|筛选|暂无|提交的作业将经过)/.test(text))
151              continue;
152          const lines = text.split('\n').map((l) => l.trim()).filter(Boolean);
153          if (!lines.length)
154              continue;
155          // First meaningful line is the title
156          const title = lines[0].replace(/\s+/g, ' ').trim();
157          if (!title || /^(全部|已完成|未完成|筛选)$/.test(title))
158              continue;
159          // Extract status: 未交 / 待批阅 / 已完成 / 已批阅
160          const statusMatch = text.match(/(未交|待批阅|已完成|已批阅)/);
161          const status = statusMatch?.[1] ?? '';
162          // Extract deadline: "剩余XXX" or date pattern
163          const remainMatch = text.match(/(剩余[\d天小时分钟秒]+)/);
164          const dateMatch = text.match(/(\d{4}[-/.]\d{1,2}[-/.]\d{1,2}(?:\s+\d{1,2}:\d{2})?)/);
165          const deadline = remainMatch?.[1] ?? dateMatch?.[1] ?? '';
166          // Extract score (exclude "分钟")
167          const scoreMatch = text.match(/(\d+(?:\.\d+)?)\s*分(?!钟)/);
168          const score = scoreMatch?.[1] ?? '';
169          rows.push({ course: courseName, title, deadline, status, score });
170      }
171      return rows;
172  }
173  /** Parse exams from the current page DOM (the 考试列表 page). */
174  export async function parseExamsFromDom(page, courseName) {
175      const raw = await page.evaluate(`
176      (() => {
177        const items = [];
178        const blocks = document.querySelectorAll('.ulDiv li, .exam-list-item, .listContent > div, ul > li');
179        for (const block of blocks) {
180          const text = (block.innerText || '').trim();
181          if (!text || text.length < 3) continue;
182          if (/^(全部|已完成|未完成|筛选|暂无)$/.test(text)) continue;
183          items.push(text);
184        }
185        if (items.length === 0) {
186          const body = (document.body?.innerText || '').trim();
187          return [body];
188        }
189        return items;
190      })()
191    `) ?? [];
192      // Check for "暂无考试"
193      if (raw.length === 1 && typeof raw[0] === 'string' && raw[0].includes('暂无考试')) {
194          return [];
195      }
196      const rows = [];
197      for (const text of raw) {
198          if (typeof text !== 'string' || text.length < 3)
199              continue;
200          if (/^(全部|已完成|未完成|筛选|暂无)/.test(text))
201              continue;
202          const lines = text.split('\n').map((l) => l.trim()).filter(Boolean);
203          if (!lines.length)
204              continue;
205          const title = lines[0].replace(/\s+/g, ' ').trim();
206          if (!title || /^(全部|已完成|未完成|筛选)$/.test(title))
207              continue;
208          // Extract dates
209          const dates = text.match(/\d{4}[-/.]\d{1,2}[-/.]\d{1,2}\s+\d{1,2}:\d{2}/g) ?? [];
210          const start = dates[0] ?? '';
211          const end = dates[1] ?? '';
212          // Status
213          const statusMatch = text.match(/(未开始|进行中|已结束|已完成|未交|待批阅)/);
214          let status = statusMatch?.[1] ?? '';
215          if (!status && text.includes('剩余'))
216              status = '进行中';
217          // Score (exclude "分钟")
218          const scoreMatch = text.match(/(\d+(?:\.\d+)?)\s*分(?!钟)/);
219          const score = scoreMatch?.[1] ?? '';
220          rows.push({ course: courseName, title, start, end, status, score });
221      }
222      return rows;
223  }