/ clis / ke / ershoufang.js
ershoufang.js
  1  import { cli, Strategy } from '@jackwener/opencli/registry';
  2  import { cityUrl, gotoKe } from './utils.js';
  3  
  4  cli({
  5      site: 'ke',
  6      name: 'ershoufang',
  7      description: '贝壳找房二手房列表',
  8      domain: 'ke.com',
  9      strategy: Strategy.COOKIE,
 10      browser: true,
 11      args: [
 12          { name: 'city', default: 'bj', help: '城市代码,如 bj(北京), sh(上海), gz(广州), sz(深圳), zs(中山)' },
 13          { name: 'district', help: '区域拼音,如 chaoyang, haidian, tianhe' },
 14          { name: 'min-price', type: 'int', help: '最低总价(万元)' },
 15          { name: 'max-price', type: 'int', help: '最高总价(万元)' },
 16          { name: 'rooms', type: 'int', help: '几居室 (1-5)' },
 17          { name: 'limit', type: 'int', default: 20, help: '返回数量' },
 18      ],
 19      columns: ['title', 'community', 'layout', 'area', 'direction', 'total_price', 'unit_price', 'url'],
 20      func: async (page, kwargs) => {
 21          const city = kwargs.city || 'bj';
 22          const limit = Number(kwargs.limit) || 20;
 23          const base = cityUrl(city);
 24  
 25          let path = '/ershoufang/';
 26          if (kwargs.district) {
 27              path = `/ershoufang/${kwargs.district}/`;
 28          }
 29  
 30          const priceParts = [];
 31          if (kwargs['min-price'] || kwargs['max-price']) {
 32              const min = kwargs['min-price'] || '';
 33              const max = kwargs['max-price'] || '';
 34              priceParts.push(`p${min}t${max}`);
 35          }
 36  
 37          const roomParts = [];
 38          if (kwargs.rooms) {
 39              roomParts.push(`l${kwargs.rooms}`);
 40          }
 41  
 42          const filters = [...priceParts, ...roomParts].join('');
 43          const url = base + path + (filters ? filters + '/' : '');
 44  
 45          await gotoKe(page, url);
 46  
 47          const items = await page.evaluate(`(async () => {
 48    const cards = document.querySelectorAll('.sellListContent li.clear');
 49    const results = [];
 50    for (const card of cards) {
 51      const titleEl = card.querySelector('.title a');
 52      const communityEl = card.querySelector('.positionInfo a');
 53      const houseInfoEl = card.querySelector('.houseInfo');
 54      const priceEl = card.querySelector('.totalPrice span');
 55      const unitPriceEl = card.querySelector('.unitPrice span');
 56  
 57      if (!titleEl) continue;
 58  
 59      // houseInfo text varies:
 60      //   "中楼层 (共24层) 4室2厅 | 133.99平米 | 东南"
 61      //   "高楼层 (共32层) | 2022年 | 4室2厅 | 110平米"
 62      const houseText = (houseInfoEl ? houseInfoEl.textContent : '').replace(/\\s+/g, ' ').trim();
 63      const houseParts = houseText.split('|').map(s => s.trim());
 64  
 65      // Extract structured fields from all parts
 66      let layout = '', area = '', direction = '', floor = '';
 67      for (const part of houseParts) {
 68        if (/\\d室\\d厅/.test(part)) {
 69          layout = part.match(/(\\d室\\d厅)/)[1];
 70        } else if (/平米|㎡/.test(part)) {
 71          area = part;
 72        } else if (/^[东南西北]+$/.test(part.replace(/\\s/g, ''))) {
 73          direction = part;
 74        } else if (/楼层/.test(part)) {
 75          floor = part;
 76        }
 77      }
 78      // layout might be embedded in the floor part: "中楼层 (共24层) 4室2厅"
 79      if (!layout) {
 80        const m = houseText.match(/(\\d室\\d厅)/);
 81        if (m) layout = m[1];
 82      }
 83  
 84      results.push({
 85        title: (titleEl.textContent || '').trim(),
 86        url: titleEl.href || '',
 87        community: (communityEl ? communityEl.textContent : '').trim(),
 88        layout: layout,
 89        area: area,
 90        direction: direction,
 91        total_price: ((priceEl ? priceEl.textContent : '').trim() || '') + '万',
 92        unit_price: (unitPriceEl ? unitPriceEl.textContent : '').trim(),
 93      });
 94    }
 95    return results;
 96  })()`);
 97  
 98          return (items || []).slice(0, limit);
 99      },
100  });