/ support / ebsSupport / scrape.mjs
scrape.mjs
 1  import * as util from "./util.mjs"
 2  
 3  // https://emailregex.com/
 4  const EMAIL_RGX = new RegExp(/^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/, "g")
 5  
 6  function makeTokensFromMultilineText(text) {
 7    const tokens = text.split("\n").join(" ").split(" ")
 8    return tokens
 9  }
10  
11  function getFollowers(tokens) {
12    for (let i = 0; i < tokens.length; i++) {
13      const token = tokens[i]
14      if (token == "followers") {
15        return tokens[i-1]
16      }
17    }
18    return ""
19  }
20  
21  function getEmail(tokens) {
22    return new Set(tokens.map(t => {
23      const matches = t.match(EMAIL_RGX)
24      if (matches == null) return matches
25      return matches[0]
26    }).filter(m => m!=null))
27  }
28  
29  export async function GetTwitchStreamerInfo(browser,userName) {
30    const page = await browser.newPage()
31  
32    await page.goto(`https://twitch.tv/${userName}/about`)
33  
34    let aboutSection, panelsSection
35    let aboutLinks, panelsLinks
36    let allText, aboutText, panelsText
37  
38    const links = new Set()
39  
40    try {
41      aboutSection = await page.locator(".about-section")
42      aboutText = await aboutSection.innerText()
43  
44      if (page.locator(".channel-panels-container").isVisible()) {
45        panelsSection = await page.locator(".channel-panels-container")
46        panelsText = await panelsSection.innerText()  
47      }
48  
49      allText = [aboutText, panelsText].join(" ")
50      
51      let anchors = []
52  
53      for (const anchor of await aboutSection.locator("a").all()) {
54        links.add(await anchor.getAttribute("href"))
55      }
56      for (const anchor of await aboutSection.locator("a").all()) {
57        links.add(await anchor.getAttribute("href"))
58      }
59       
60    } catch (e) {
61      // it's fine if we fail here
62    }
63    
64    let email = new Set()
65    let followers = ""
66  
67    if (allText) {
68      const tokens = makeTokensFromMultilineText(allText)
69      email = getEmail(tokens)
70      followers = getFollowers(tokens)
71    }
72  
73    await page.close()
74  
75    return {
76      email: Array.from(email).join(","),
77      links: Array.from(links).join(","),
78      followers
79    }
80  }
81  
82