/ rules / D-KW-01.py
D-KW-01.py
  1  """
  2  Rule: D-KW-01 - blacklist keyword match
  3  Type: moderation | Output: binary
  4  Description: Reject on regex keyword match after normalization.
  5  Spec reference: 8.1.5
  6  
  7  Checks title, content (description), and file names against a compiled
  8  set of regex patterns. Case-insensitive. Any match = reject.
  9  
 10  Patterns are grouped by category for easy maintenance. Add new patterns
 11  to the appropriate list or create a new category.
 12  """
 13  
 14  import re
 15  from simple_types import BinaryRuleResult, Nip35Kind2003Event
 16  
 17  # ---------------------------------------------------------------------------
 18  # Pattern lists by category (case-insensitive, matched against normalized text)
 19  # ---------------------------------------------------------------------------
 20  
 21  # Malware / trojan lures
 22  MALWARE_PATTERNS = [
 23      r"(?:crack|keygen|patch)\s*(?:by|from)\s+.{0,30}(?:virus|trojan|malware)",
 24      r"\brat\s*(?:tool|server|client|builder)\b",
 25      r"\b(?:crypto|bitcoin|btc)\s*(?:miner|mining)\s*(?:hidden|stealth|silent)\b",
 26      r"\bpassword\s*(?:is|:|=)\s*(?:1234|password|infected)\b",
 27  ]
 28  
 29  # Scam / phishing
 30  SCAM_PATTERNS = [
 31      r"\b(?:free\s+)?(?:v[- ]?bucks|robux|riot\s*points)\s*(?:generator|hack|cheat|free)\b",
 32      r"\b(?:gift\s*card|giftcard)\s*(?:generator|hack|code|free)\b",
 33      r"\bcredit\s*card\s*(?:generator|hack|dump|fullz)\b",
 34      r"\b(?:paypal|venmo|cashapp)\s*(?:hack|generator|money\s*(?:adder|glitch))\b",
 35      r"\bget\s+rich\s+quick\b",
 36  ]
 37  
 38  # Spam / junk / SEO stuffing
 39  SPAM_PATTERNS = [
 40      r"(?:join|visit|go\s+to)\s+(?:my|our)\s+(?:telegram|discord|whatsapp)\b",
 41      r"\b(?:buy|order|cheap)\s+(?:viagra|cialis|xanax|adderall|oxycodone|tramadol)\b",
 42      r"\b(?:online\s+)?casino\s+(?:hack|cheat|bot|trick)\b",
 43      r"\bslots?\s+(?:hack|cheat|bot|trick)\b",
 44      r"\b(?:onlyfans?|fansly)\s*(?:leak|hack|free\s*access)\b",
 45      r"\bsex\s*(?:tape|video)\s*(?:leak|hack)\b",
 46  ]
 47  
 48  # Social engineering / password traps
 49  TRAP_PATTERNS = [
 50      r"(?:password|pass|pw)\s*(?:in|inside|:|=)\s*(?:description|readme|nfo|txt)\b",
 51      r"\bextract\s+(?:with\s+)?password\b",
 52      r"\bdisable\s+(?:your\s+)?(?:antivirus|av|defender|firewall)\s+(?:before|first|to)\b",
 53      r"\bturn\s+off\s+(?:antivirus|av|defender|firewall)\b",
 54  ]
 55  
 56  # Survey / link scam
 57  SURVEY_PATTERNS = [
 58      r"\bcomplete\s+(?:a\s+)?(?:survey|offer)\s+(?:to|for)\s+(?:download|unlock|access)\b",
 59      r"\bunlock\s+(?:download|file|content)\s+(?:by|with|after)\b",
 60      r"\bhuman\s+verification\s+required\b",
 61  ]
 62  
 63  # ---------------------------------------------------------------------------
 64  # Compile all patterns into a single regex for performance
 65  # ---------------------------------------------------------------------------
 66  
 67  ALL_PATTERNS = (
 68      MALWARE_PATTERNS
 69      + SCAM_PATTERNS
 70      + SPAM_PATTERNS
 71      + TRAP_PATTERNS
 72      + SURVEY_PATTERNS
 73  )
 74  
 75  _COMPILED = re.compile("|".join(f"(?:{p})" for p in ALL_PATTERNS), re.IGNORECASE)
 76  
 77  
 78  def _extract_text(entry: dict) -> str:
 79      """Build a single searchable string from the event's text fields."""
 80      parts = []
 81  
 82      # Content / description
 83      content = entry.get("content", "")
 84      if content:
 85          parts.append(content)
 86  
 87      # Tags: title and file names
 88      for tag in entry.get("tags", []):
 89          if len(tag) >= 2:
 90              if tag[0] == "title":
 91                  parts.append(tag[1])
 92              elif tag[0] == "file":
 93                  parts.append(tag[1])
 94  
 95      return " ".join(parts)
 96  
 97  
 98  def main(entry: Nip35Kind2003Event) -> BinaryRuleResult:
 99      text = _extract_text(entry)
100      if not text.strip():
101          return {"passed": True}
102  
103      match = _COMPILED.search(text)
104      if match:
105          return {"passed": False}
106  
107      return {"passed": True}