/ clis / ke / utils.js
utils.js
  1  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
  2  
  3  const CAPTCHA_TEXT_PATTERNS = [
  4      '请拖动下方滑块完成验证',
  5      '请按住滑块',
  6      '验证码',
  7      '安全验证',
  8      '访问验证',
  9      '滑动验证',
 10  ];
 11  
 12  const LOGIN_TEXT_PATTERNS = [
 13      '请登录',
 14      '登录后',
 15      '账号登录',
 16      '手机登录',
 17      '立即登录',
 18      '扫码登录',
 19  ];
 20  
 21  function cleanText(value) {
 22      return typeof value === 'string'
 23          ? value.replace(/\u00a0/g, ' ').replace(/\s+/g, ' ').trim()
 24          : '';
 25  }
 26  
 27  export async function readPageState(page) {
 28      const result = await page.evaluate(`
 29      (() => {
 30        try {
 31          return {
 32            href: window.location.href || '',
 33            title: document.title || '',
 34            body_text: document.body ? (document.body.innerText || '').substring(0, 2000) : '',
 35          };
 36        } catch(e) {
 37          return { href: '', title: '', body_text: '' };
 38        }
 39      })()
 40    `);
 41      if (!result) {
 42          return { href: '', title: '', body_text: '' };
 43      }
 44      return {
 45          href: cleanText(result.href),
 46          title: cleanText(result.title),
 47          body_text: cleanText(result.body_text),
 48      };
 49  }
 50  
 51  export function assertNotBlocked(state) {
 52      const { href, title, body_text } = state;
 53      if (href.includes('hip.ke.com/captcha') || href.includes('/captcha')) {
 54          throw new AuthRequiredError('ke.com', '触发了验证码,请先在浏览器中完成验证');
 55      }
 56      if (CAPTCHA_TEXT_PATTERNS.some(p => title.includes(p) || body_text.includes(p))) {
 57          throw new AuthRequiredError('ke.com', '触发了验证码,请先在浏览器中完成滑块验证');
 58      }
 59      if (LOGIN_TEXT_PATTERNS.some(p => title.includes(p))) {
 60          throw new AuthRequiredError('ke.com', '未登录,请先在浏览器中登录贝壳找房');
 61      }
 62  }
 63  
 64  export async function gotoKe(page, url) {
 65      await page.goto(url, { settleMs: 2500 });
 66      await page.wait(2);
 67      const state = await readPageState(page);
 68      assertNotBlocked(state);
 69      return state;
 70  }
 71  
 72  /**
 73   * Fetch a ke.com JSON API from inside the browser context (credentials included).
 74   */
 75  export async function fetchKeJson(page, url) {
 76      const result = await page.evaluate(`(async () => {
 77      const res = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
 78      if (!res.ok) return { __keErr: res.status };
 79      try {
 80        return await res.json();
 81      } catch {
 82        return { __keErr: 'parse' };
 83      }
 84    })()`);
 85      const r = result;
 86      if (r?.__keErr !== undefined) {
 87          const code = r.__keErr;
 88          if (code === 401 || code === 403) {
 89              throw new AuthRequiredError('ke.com', '未登录或登录已过期,请先在浏览器中登录贝壳找房');
 90          }
 91          if (code === 'parse') {
 92              throw new CommandExecutionError('响应不是有效 JSON', '可能触发了风控,请检查登录状态或稍后重试');
 93          }
 94          throw new CommandExecutionError(`HTTP ${code}`, '请检查网络连接或登录状态');
 95      }
 96      return result;
 97  }
 98  
 99  /**
100   * Build a ke.com city URL prefix. Default city is 'bj' (Beijing).
101   */
102  export function cityUrl(city) {
103      return `https://${city}.ke.com`;
104  }