/ plugins / URLMonitor.py
URLMonitor.py
 1  from __future__ import print_function
 2  from rtmbot.core import Plugin, Job
 3  import requests
 4  from gglsbl import SafeBrowsingList
 5  
 6  try:
 7      from urlparse import urlparse
 8  except:
 9      from urllib.parse import urlparse
10  
11  class URLMonitor(Plugin):
12  
13      blacklist = []
14      moderators = []
15      sbl = None
16  
17      def __init__(self, *args, **kwargs):
18          super().__init__(*args, **kwargs)
19  
20          self.moderators = self.plugin_config['MODERATORS']
21  
22          # Initialize Safe Browsing API
23          if self.plugin_config['GOOGLE_SAFE_BROWSING']:
24              self.sbl = SafeBrowsingList(self.plugin_config['GOOGLE_SAFE_BROWSING_API_KEY'])
25              self.sbl.update_hash_prefix_cache()
26  
27          # Populate Blacklist from URLS
28          for url in self.plugin_config['BLACKLISTS']:
29              url = url.strip()
30              if url.endswith('.json'):
31                  r = requests.get(url)
32                  # Assuming MEW List format
33                  for item in r.json():
34                      self.blacklist.append(item['id'])
35  
36              elif url.endswidth('.csv'):
37                  print('csv not implemented') # TODO
38              else:
39                  print('txt not implement') # TODO
40  
41          print(self.__class__.__name__, 'initialized')
42  
43      def process_message(self, data):
44          # print(data)
45          # Private (Groups) or Public Channels
46          if chan.startswith('C') or chan.startswith('G'):
47              chan = data['channel']
48              text = data['text']
49  
50              # Find all URLS in message text, extract host and compare against blacklist and Google Safebrowsing
51              urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', text)
52  
53              def alert(url):
54                  # TODO flag user
55                  # TODO early warning system
56                  self.slack_client.api_call('chat.postMessage', channel=self.plugin_config['MODERATE_CHAN'], ' '.join(self.moderators) + ' ' + text) # TODO can probably use outputs for this
57                  if len(self.plugin_config.WARNING_MESSAGE):
58                      self.outputs.append( [data['channel'], self.plugin_config.WARNING_MESSAGE] )
59  
60              for u in urls:
61                  o = urlparse(u)
62                  host = re.split(":\d{,4}", o.netloc)[0]
63  
64                  # Check Blacklist
65                  if host in self.blacklist:
66                      alert(u)
67                      break
68                  # Check Google Safebrowsing
69                  elif sbl.lookup_url(u):
70                      alert(u)
71                      break