URLMonitor.py
1 from __future__ import print_function 2 from rtmbot.core import Plugin, Job 3 import requests 4 from gglsbl import SafeBrowsingList 5 6 try: 7 from urlparse import urlparse 8 except: 9 from urllib.parse import urlparse 10 11 class URLMonitor(Plugin): 12 13 blacklist = [] 14 moderators = [] 15 sbl = None 16 17 def __init__(self, *args, **kwargs): 18 super().__init__(*args, **kwargs) 19 20 self.moderators = self.plugin_config['MODERATORS'] 21 22 # Initialize Safe Browsing API 23 if self.plugin_config['GOOGLE_SAFE_BROWSING']: 24 self.sbl = SafeBrowsingList(self.plugin_config['GOOGLE_SAFE_BROWSING_API_KEY']) 25 self.sbl.update_hash_prefix_cache() 26 27 # Populate Blacklist from URLS 28 for url in self.plugin_config['BLACKLISTS']: 29 url = url.strip() 30 if url.endswith('.json'): 31 r = requests.get(url) 32 # Assuming MEW List format 33 for item in r.json(): 34 self.blacklist.append(item['id']) 35 36 elif url.endswidth('.csv'): 37 print('csv not implemented') # TODO 38 else: 39 print('txt not implement') # TODO 40 41 print(self.__class__.__name__, 'initialized') 42 43 def process_message(self, data): 44 # print(data) 45 # Private (Groups) or Public Channels 46 if chan.startswith('C') or chan.startswith('G'): 47 chan = data['channel'] 48 text = data['text'] 49 50 # Find all URLS in message text, extract host and compare against blacklist and Google Safebrowsing 51 urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', text) 52 53 def alert(url): 54 # TODO flag user 55 # TODO early warning system 56 self.slack_client.api_call('chat.postMessage', channel=self.plugin_config['MODERATE_CHAN'], ' '.join(self.moderators) + ' ' + text) # TODO can probably use outputs for this 57 if len(self.plugin_config.WARNING_MESSAGE): 58 self.outputs.append( [data['channel'], self.plugin_config.WARNING_MESSAGE] ) 59 60 for u in urls: 61 o = urlparse(u) 62 host = re.split(":\d{,4}", o.netloc)[0] 63 64 # Check Blacklist 65 if host in self.blacklist: 66 alert(u) 67 break 68 # Check Google Safebrowsing 69 elif sbl.lookup_url(u): 70 alert(u) 71 break