/ filters.py
filters.py
1 import json 2 import os 3 import glob 4 from datetime import datetime 5 import tkinter as tk 6 from tkinter import ttk, filedialog, messagebox, scrolledtext 7 from typing import Dict, List, Any, Tuple, Optional 8 9 class BlackbirdFilterGeneratorGUI: 10 def __init__(self, root): 11 self.root = root 12 self.root.title("Blackbird Filter Generator") 13 self.root.geometry("1000x900") 14 15 # Field mapping - using exact JSON field names 16 self.field_mapping = { 17 'category': 'cat', # JSON 'category' becomes Blackbird 'cat' 18 'name': 'name', # JSON 'name' becomes Blackbird 'name' 19 'uri_check': 'uri_check', 20 'e_code': 'e_code', 21 'e_string': 'e_string', 22 'm_string': 'm_string', 23 'm_code': 'm_code' 24 } 25 self.available_fields = ['name', 'cat', 'uri_check', 'e_code', 'e_string', 'm_string', 'm_code'] 26 self.operators = ['=', '~', '>', '<', '>=', '<=', '!='] 27 28 self.filters = [] 29 self.loaded_data = [] 30 self.loaded_files = [] 31 32 # Store site-category relationships 33 self.site_categories = {} # site_name -> category 34 self.category_sites = {} # category -> list of site_names 35 36 # Store file-source relationships 37 self.site_sources = {} # site_name -> list of source files 38 self.category_sources = {} # category -> list of source files 39 self.file_entries = {} # file_path -> list of entries 40 41 self.setup_gui() 42 43 def setup_gui(self): 44 # Create main frame 45 main_frame = ttk.Frame(self.root, padding="10") 46 main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) 47 48 # Configure grid weights 49 self.root.columnconfigure(0, weight=1) 50 self.root.rowconfigure(0, weight=1) 51 main_frame.columnconfigure(1, weight=1) 52 main_frame.rowconfigure(1, weight=1) # This gives the JSON frame proper weight 53 54 # File selection section 55 file_frame = ttk.LabelFrame(main_frame, text="Data Source", padding="5") 56 file_frame.grid(row=0, column=0, columnspan=2, sticky=(tk.W, tk.E), pady=(0, 10)) 57 file_frame.columnconfigure(1, weight=1) 58 59 ttk.Button(file_frame, text="Browse File/Directory", 60 command=self.browse_file).grid(row=0, column=0, padx=(0, 10)) 61 62 self.file_path_var = tk.StringVar() 63 ttk.Entry(file_frame, textvariable=self.file_path_var, state='readonly').grid(row=0, column=1, sticky=(tk.W, tk.E)) 64 65 self.recursive_var = tk.BooleanVar(value=True) 66 ttk.Checkbutton(file_frame, text="Search Subdirectories", 67 variable=self.recursive_var).grid(row=0, column=2, padx=(10, 0)) 68 69 ttk.Button(file_frame, text="Load Data", 70 command=self.load_data).grid(row=0, column=3, padx=(10, 0)) 71 72 73 ttk.Button(file_frame, text="Export JSON Analysis", 74 command=self.export_json_analysis).grid(row=0, column=4, padx=(10, 0)) 75 76 # JSON Structure Display 77 json_frame = ttk.LabelFrame(main_frame, text="JSON Structure Preview", padding="5") 78 json_frame.grid(row=1, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10)) 79 json_frame.columnconfigure(0, weight=1) 80 json_frame.rowconfigure(0, weight=1) # This makes the text widget expand 81 82 # Create a Text widget for JSON preview 83 self.json_structure_text = tk.Text(json_frame, wrap=tk.WORD, height=10) 84 self.json_structure_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) 85 86 # Create a Scrollbar 87 scrollbar = ttk.Scrollbar(json_frame, orient=tk.VERTICAL, command=self.json_structure_text.yview) 88 scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S, tk.E)) 89 90 # Link the Text widget to the Scrollbar 91 self.json_structure_text.config(yscrollcommand=scrollbar.set) 92 93 # Insert sample text (replace with your JSON string) 94 self.json_structure_text.insert(tk.END, "No data loaded - JSON structure will appear here") 95 96 # Main content area 97 content_frame = ttk.Frame(main_frame) 98 content_frame.grid(row=2, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S)) 99 content_frame.columnconfigure(0, weight=1) 100 content_frame.columnconfigure(1, weight=1) 101 content_frame.rowconfigure(0, weight=1) 102 103 # Left panel - Filter creation 104 left_frame = ttk.LabelFrame(content_frame, text="Create Filters", padding="5") 105 left_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(0, 5)) 106 left_frame.columnconfigure(0, weight=1) 107 left_frame.rowconfigure(2, weight=1) 108 109 # Category filters 110 cat_frame = ttk.LabelFrame(left_frame, text="Category Filters", padding="5") 111 cat_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=(0, 10)) 112 cat_frame.columnconfigure(0, weight=1) 113 114 self.category_listbox = tk.Listbox(cat_frame, selectmode=tk.MULTIPLE, height=6) 115 self.category_listbox.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=(5, 0)) 116 117 cat_btn_frame = ttk.Frame(cat_frame) 118 cat_btn_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(5, 0)) 119 120 ttk.Button(cat_btn_frame, text="Select All", 121 command=self.select_all_categories).pack(side=tk.LEFT, padx=(0, 5)) 122 ttk.Button(cat_btn_frame, text="Clear Selection", 123 command=self.clear_category_selection).pack(side=tk.LEFT, padx=(0, 5)) 124 ttk.Button(cat_btn_frame, text="Exclude Selected", 125 command=self.exclude_selected_categories).pack(side=tk.LEFT, padx=(0, 5)) 126 ttk.Button(cat_btn_frame, text="Include Selected", 127 command=self.include_selected_categories).pack(side=tk.LEFT) 128 129 # Website filters with category info 130 website_frame = ttk.LabelFrame(left_frame, text="Website Filters", padding="5") 131 website_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(0, 10)) 132 website_frame.columnconfigure(0, weight=1) 133 134 # Search box for websites 135 search_frame = ttk.Frame(website_frame) 136 search_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=(0, 5)) 137 138 ttk.Label(search_frame, text="Search:").pack(side=tk.LEFT) 139 self.website_search_var = tk.StringVar() 140 self.website_search_var.trace('w', self.filter_websites) 141 ttk.Entry(search_frame, textvariable=self.website_search_var).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(5, 0)) 142 143 # Category filter for websites 144 ttk.Label(search_frame, text="Category:").pack(side=tk.LEFT, padx=(10, 0)) 145 self.website_category_var = tk.StringVar(value="All Categories") 146 self.website_category_combo = ttk.Combobox(search_frame, textvariable=self.website_category_var, state="readonly") 147 self.website_category_combo.pack(side=tk.LEFT, padx=(5, 0)) 148 self.website_category_combo.bind('<<ComboboxSelected>>', self.filter_websites_by_category) 149 150 self.website_listbox = tk.Listbox(website_frame, selectmode=tk.MULTIPLE, height=6) 151 self.website_listbox.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(5, 0)) 152 153 website_btn_frame = ttk.Frame(website_frame) 154 website_btn_frame.grid(row=2, column=0, sticky=(tk.W, tk.E), pady=(5, 0)) 155 156 ttk.Button(website_btn_frame, text="Select All", 157 command=self.select_all_websites).pack(side=tk.LEFT, padx=(0, 5)) 158 ttk.Button(website_btn_frame, text="Clear Selection", 159 command=self.clear_website_selection).pack(side=tk.LEFT, padx=(0, 5)) 160 ttk.Button(website_btn_frame, text="Exclude Selected", 161 command=self.exclude_selected_websites).pack(side=tk.LEFT) 162 ttk.Button(website_btn_frame, text="Include Selected", 163 command=self.include_selected_websites).pack(side=tk.LEFT) 164 165 # Custom filter section 166 custom_frame = ttk.LabelFrame(left_frame, text="Custom Filters", padding="5") 167 custom_frame.grid(row=2, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) 168 custom_frame.columnconfigure(1, weight=1) 169 170 ttk.Label(custom_frame, text="Field:").grid(row=0, column=0, sticky=tk.W, pady=(0, 5)) 171 self.custom_field_var = tk.StringVar() 172 custom_field_combo = ttk.Combobox(custom_frame, textvariable=self.custom_field_var, 173 values=self.available_fields, state="readonly") 174 custom_field_combo.grid(row=0, column=1, sticky=(tk.W, tk.E), pady=(0, 5), padx=(5, 0)) 175 custom_field_combo.set('cat') 176 177 ttk.Label(custom_frame, text="Operator:").grid(row=1, column=0, sticky=tk.W, pady=(0, 5)) 178 self.custom_operator_var = tk.StringVar() 179 custom_operator_combo = ttk.Combobox(custom_frame, textvariable=self.custom_operator_var, 180 values=self.operators, state="readonly") 181 custom_operator_combo.grid(row=1, column=1, sticky=(tk.W, tk.E), pady=(0, 5), padx=(5, 0)) 182 custom_operator_combo.set('!=') 183 184 ttk.Label(custom_frame, text="Value:").grid(row=2, column=0, sticky=tk.W, pady=(0, 5)) 185 self.custom_value_var = tk.StringVar() 186 ttk.Entry(custom_frame, textvariable=self.custom_value_var).grid(row=2, column=1, sticky=(tk.W, tk.E), 187 pady=(0, 5), padx=(5, 0)) 188 189 ttk.Button(custom_frame, text="Add Custom Filter", 190 command=self.add_custom_filter).grid(row=3, column=0, columnspan=2, pady=(5, 0)) 191 192 # Right panel - Current filters and output 193 right_frame = ttk.LabelFrame(content_frame, text="Current Filters & Output", padding="5") 194 right_frame.grid(row=0, column=1, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(5, 0)) 195 right_frame.columnconfigure(0, weight=1) 196 right_frame.rowconfigure(1, weight=1) 197 198 # Current filters with reordering 199 ttk.Label(right_frame, text="Current Filters (drag to reorder):").grid(row=0, column=0, sticky=tk.W) 200 201 filter_btn_frame = ttk.Frame(right_frame) 202 filter_btn_frame.grid(row=0, column=0, sticky=(tk.E), pady=(0, 5)) 203 204 ttk.Button(filter_btn_frame, text="Clear All", 205 command=self.clear_all_filters).pack(side=tk.RIGHT, padx=(5, 0)) 206 ttk.Button(filter_btn_frame, text="Remove Selected", 207 command=self.remove_selected_filter).pack(side=tk.RIGHT) 208 209 # Use a Listbox for filters but we'll handle reordering manually 210 self.filters_listbox = tk.Listbox(right_frame, height=10) 211 self.filters_listbox.grid(row=1, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10)) 212 213 # Add move up/down buttons for reordering 214 reorder_frame = ttk.Frame(right_frame) 215 reorder_frame.grid(row=2, column=0, sticky=(tk.W, tk.E)) 216 217 ttk.Button(reorder_frame, text="Move Up", 218 command=self.move_filter_up).pack(side=tk.LEFT, padx=(0, 5)) 219 ttk.Button(reorder_frame, text="Move Down", 220 command=self.move_filter_down).pack(side=tk.LEFT) 221 222 # Generated filter string 223 ttk.Label(right_frame, text="Generated Filter String:").grid(row=3, column=0, sticky=tk.W, pady=(10, 0)) 224 225 self.filter_output_text = scrolledtext.ScrolledText(right_frame, height=6, width=50) 226 self.filter_output_text.grid(row=4, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(5, 0)) 227 228 # Action buttons 229 action_frame = ttk.Frame(right_frame) 230 action_frame.grid(row=5, column=0, sticky=(tk.W, tk.E), pady=(10, 0)) 231 232 ttk.Button(action_frame, text="Generate Filter", 233 command=self.generate_filter).pack(side=tk.LEFT, padx=(0, 5)) 234 ttk.Button(action_frame, text="Save to File", 235 command=self.save_to_file).pack(side=tk.LEFT, padx=(0, 5)) 236 ttk.Button(action_frame, text="Copy to Clipboard", 237 command=self.copy_to_clipboard).pack(side=tk.LEFT) 238 239 # Store original website list for filtering 240 self.all_websites = [] 241 self.all_websites_with_categories = [] # Store (website, category) tuples 242 243 # Initialize UI state 244 self.update_ui_state() 245 def export_json_analysis(self): 246 """Export analysis for each JSON file""" 247 if not self.loaded_files: 248 messagebox.showwarning("Warning", "No data loaded to export") 249 return 250 251 # Ask for export directory 252 export_dir = filedialog.askdirectory(title="Select Export Directory") 253 if not export_dir: 254 return 255 256 try: 257 # Create analysis for each file 258 for file_path in self.loaded_files: 259 self.export_single_file_analysis(file_path, export_dir) 260 261 messagebox.showinfo("Success", f"JSON analysis exported to:\n{export_dir}") 262 263 except Exception as e: 264 messagebox.showerror("Error", f"Failed to export analysis: {str(e)}") 265 266 def export_single_file_analysis(self, file_path, export_dir): 267 """Export analysis for a single JSON file""" 268 if file_path not in self.file_entries: 269 return 270 271 entries = self.file_entries[file_path] 272 if not entries: 273 return 274 275 # Create filename for export 276 base_name = os.path.splitext(os.path.basename(file_path))[0] 277 export_filename = f"{base_name}_analysis.json" 278 export_path = os.path.join(export_dir, export_filename) 279 280 # Analyze the file data 281 analysis = { 282 # Kept for a simple debug 283 # "source_file": file_path, 284 "relative_path": self.get_relative_source_path(file_path), 285 "total_entries": len(entries), 286 "export_timestamp": str(datetime.now()), 287 "categories": {}, 288 "websites": {}, 289 "unique_fields": set(), 290 "sample_entries": [] 291 } 292 293 # Analyze categories and websites 294 for entry in entries: 295 # Track unique fields 296 analysis["unique_fields"].update(entry.keys()) 297 298 # Analyze categories 299 if 'category' in entry and entry['category']: 300 category = str(entry['category']) 301 if category not in analysis["categories"]: 302 analysis["categories"][category] = 0 303 analysis["categories"][category] += 1 304 305 # Analyze websites 306 if 'name' in entry and entry['name']: 307 website = str(entry['name']) 308 if website not in analysis["websites"]: 309 analysis["websites"][website] = 0 310 analysis["websites"][website] += 1 311 312 # Convert set to list for JSON serialization 313 analysis["unique_fields"] = list(analysis["unique_fields"]) 314 315 # Add sample entries (first 10) 316 for entry in entries[:1000]: 317 sample_entry = {} 318 for key, value in entry.items(): 319 if key not in {'url', 'status', 'metadata'}: # Exclude sensitive/verbose fields 320 sample_entry[key] = value 321 analysis["sample_entries"].append(sample_entry) 322 323 # Sort categories and websites by count 324 analysis["categories"] = dict(sorted( 325 analysis["categories"].items(), 326 key=lambda x: x[1], 327 reverse=True 328 )) 329 analysis["websites"] = dict(sorted( 330 analysis["websites"].items(), 331 key=lambda x: x[1], 332 reverse=True 333 )) 334 335 # Write analysis to file 336 with open(export_path, 'w', encoding='utf-8') as f: 337 json.dump(analysis, f, indent=2, ensure_ascii=False) 338 339 def generate_summary_report(self, export_dir): 340 """Generate a summary report of all files""" 341 summary = { 342 "export_timestamp": str(datetime.now()), 343 "total_files_analyzed": len(self.loaded_files), 344 "total_entries": len(self.loaded_data), 345 "files": [] 346 } 347 348 for file_path in self.loaded_files: 349 if file_path in self.file_entries: 350 entries = self.file_entries[file_path] 351 # Count categories 352 categories = {} 353 for entry in entries: 354 if 'category' in entry and entry['category']: 355 category = str(entry['category']) 356 categories[category] = categories.get(category, 0) + 1 357 # Sort categories by count 358 categories = dict(sorted(categories.items(), key=lambda x: x[1], reverse=True)) 359 file_info = { 360 "relative_path": self.get_relative_source_path(file_path), 361 "entry_count": len(entries), 362 "categories_count": len(categories), 363 "categories": categories, # Include category breakdown 364 "websites_count": len(self.get_unique_values(entries, 'name')) 365 } 366 summary["files"].append(file_info) 367 368 # Write summary report 369 summary_path = os.path.join(export_dir, "summary_report.json") 370 with open(summary_path, 'w', encoding='utf-8') as f: 371 json.dump(summary, f, indent=2, ensure_ascii=False) 372 373 # Update the export_json_analysis method to include summary 374 def export_json_analysis(self): 375 """Export analysis for each JSON file""" 376 if not self.loaded_files: 377 messagebox.showwarning("Warning", "No data loaded to export") 378 return 379 380 # Ask for export directory 381 export_dir = filedialog.askdirectory(title="Select Export Directory") 382 if not export_dir: 383 return 384 385 try: 386 # Create analysis for each file 387 for file_path in self.loaded_files: 388 self.export_single_file_analysis(file_path, export_dir) 389 390 # Generate summary report 391 self.generate_summary_report(export_dir) 392 393 messagebox.showinfo("Success", f"JSON analysis exported to:\n{export_dir}\n\n" 394 f"• Individual file analysis: {len(self.loaded_files)} files\n" 395 f"• Summary report: summary_report.json") 396 397 except Exception as e: 398 messagebox.showerror("Error", f"Failed to export analysis: {str(e)}") 399 400 def browse_file(self): 401 path = filedialog.askdirectory(title="Select Directory with JSON Files") 402 if not path: 403 path = filedialog.askopenfilename( 404 title="Select JSON File", 405 filetypes=[("JSON files", "*.json"), ("All files", "*.*")] 406 ) 407 if path: 408 self.file_path_var.set(path) 409 410 def load_data(self): 411 path = self.file_path_var.get() 412 if not path or not os.path.exists(path): 413 messagebox.showerror("Error", "Please select a valid file or directory") 414 return 415 416 try: 417 self.loaded_data, self.loaded_files = self.load_json_files(path, self.recursive_var.get()) 418 if not self.loaded_data: 419 messagebox.showwarning("Warning", "No data loaded from the selected path") 420 return 421 422 # Build site-category relationships with sources 423 self.build_site_category_relationships() 424 425 # Update JSON structure display 426 self.update_json_structure_display() 427 428 self.populate_category_list() 429 self.populate_website_list() 430 self.update_ui_state() 431 432 except Exception as e: 433 messagebox.showerror("Error", f"Failed to load data: {str(e)}") 434 435 def update_json_structure_display(self): 436 """Show the actual JSON structure found in the files""" 437 # Clear the text widget first 438 self.json_structure_text.delete(1.0, tk.END) 439 440 if not self.loaded_data: 441 self.json_structure_text.insert(tk.END, "No data loaded") 442 return 443 444 # Fields to exclude from display 445 exclude_fields = {'url', 'status', 'metadata'} 446 447 # Analyze the first few entries to show JSON structure 448 sample_entries = self.loaded_data[:1000] # Show first 1000 entries as samples 449 450 structure_info = "JSON Structure Found:\n\n" 451 452 for i, entry in enumerate(sample_entries): 453 # Extract username from URL if available, otherwise use index 454 username = f"Entry {i+1}" 455 if 'url' in entry and entry['url']: 456 # Extract username from URL like "https://t.me/cssunshine" 457 url = entry['url'] 458 if '/' in url: 459 # Get the last part of the URL after the last slash 460 username = url.split('/')[-1] 461 462 # Find which file this entry came from 463 file_source = "Unknown source" 464 for file_path, entries in self.file_entries.items(): 465 if entry in entries: 466 # Get relative path from the base directory 467 rel_path = self.get_relative_source_path(file_path) 468 file_source = rel_path 469 break 470 471 structure_info += f"Sample {username} (from: {file_source}):\n" 472 for key, value in entry.items(): 473 # Skip excluded fields 474 if key in exclude_fields: 475 continue 476 structure_info += f" \"{key}\": \"{value}\"\n" 477 structure_info += "\n" 478 479 # Show field mapping 480 structure_info += "Field Mapping (JSON → Blackbird):\n" 481 for json_field, blackbird_field in self.field_mapping.items(): 482 if any(json_field in entry for entry in self.loaded_data): 483 unique_count = len(self.get_unique_values(self.loaded_data, json_field)) 484 structure_info += f" \"{json_field}\" → {blackbird_field} ({unique_count} unique values)\n" 485 486 # Insert the structure info into the Text widget 487 self.json_structure_text.insert(tk.END, structure_info) 488 489 def build_site_category_relationships(self): 490 """Build mappings between sites and their categories, and track sources""" 491 self.site_categories = {} 492 self.category_sites = {} 493 self.site_sources = {} 494 self.category_sources = {} 495 496 for file_path, entries in self.file_entries.items(): 497 for item in entries: 498 # Use exact JSON field names 499 if 'name' in item and item['name']: 500 site_name = str(item['name']) 501 502 # Map site to sources 503 if site_name not in self.site_sources: 504 self.site_sources[site_name] = [] 505 if file_path not in self.site_sources[site_name]: 506 self.site_sources[site_name].append(file_path) 507 508 if 'category' in item and item['category']: 509 category = str(item['category']) 510 511 # Map site to category 512 self.site_categories[site_name] = category 513 514 # Map category to sites 515 if category not in self.category_sites: 516 self.category_sites[category] = [] 517 if site_name not in self.category_sites[category]: 518 self.category_sites[category].append(site_name) 519 520 # Map category to sources 521 if category not in self.category_sources: 522 self.category_sources[category] = [] 523 if file_path not in self.category_sources[category]: 524 self.category_sources[category].append(file_path) 525 526 def load_json_files(self, path: str, recursive: bool = True) -> Tuple[List[Dict[str, Any]], List[str]]: 527 data = [] 528 loaded_files = [] 529 530 if os.path.isfile(path): 531 file_data = self._load_single_file(path) 532 if file_data: 533 data.extend(file_data) 534 loaded_files.append(path) 535 # Track source for this file 536 self.file_entries[path] = file_data 537 elif os.path.isdir(path): 538 if recursive: 539 json_files = self._find_json_files_recursive(path) 540 else: 541 json_pattern = os.path.join(path, "*.json") 542 json_files = glob.glob(json_pattern) 543 544 for json_file in json_files: 545 file_data = self._load_single_file(json_file) 546 if file_data: 547 data.extend(file_data) 548 loaded_files.append(json_file) 549 # Track source for this file 550 self.file_entries[json_file] = file_data 551 552 return data, loaded_files 553 554 def _find_json_files_recursive(self, directory: str) -> List[str]: 555 json_files = [] 556 for root, dirs, files in os.walk(directory): 557 for file in files: 558 if file.lower().endswith('.json'): 559 full_path = os.path.join(root, file) 560 json_files.append(full_path) 561 return json_files 562 563 def _load_single_file(self, file_path: str) -> List[Dict[str, Any]]: 564 try: 565 with open(file_path, 'r', encoding='utf-8') as file: 566 data = json.load(file) 567 if isinstance(data, list): 568 return data 569 elif isinstance(data, dict): 570 return [data] 571 return [] 572 except Exception as e: 573 print(f"Error loading {file_path}: {e}") 574 return [] 575 576 def get_unique_values(self, data: List[Dict[str, Any]], json_field: str) -> List[str]: 577 values = set() 578 for item in data: 579 if json_field in item and item[json_field] is not None: 580 values.add(str(item[json_field])) 581 return sorted(list(values)) 582 583 def get_relative_source_path(self, full_path): 584 """Convert full file path to a more readable relative format""" 585 base_path = self.file_path_var.get() 586 if base_path and full_path.startswith(base_path): 587 return os.path.relpath(full_path, base_path) 588 589 # If no base path or path doesn't match, show the last 2 directory components 590 dirname = os.path.dirname(full_path) 591 basename = os.path.basename(full_path) 592 parent_dir = os.path.basename(os.path.dirname(dirname)) 593 current_dir = os.path.basename(dirname) 594 595 if parent_dir and parent_dir != current_dir: 596 return os.path.join(parent_dir, current_dir, basename) 597 else: 598 return os.path.join(current_dir, basename) 599 600 def populate_category_list(self): 601 self.category_listbox.delete(0, tk.END) 602 categories = self.get_unique_values(self.loaded_data, 'category') 603 604 # Count occurrences and sites per category 605 cat_count = {} 606 for item in self.loaded_data: 607 if 'category' in item and item['category']: 608 cat = str(item['category']) 609 cat_count[cat] = cat_count.get(cat, 0) + 1 610 611 # Sort by count descending 612 sorted_categories = sorted(categories, key=lambda x: (-cat_count.get(x, 0), x)) 613 614 for cat in sorted_categories: 615 count = cat_count.get(cat, 0) 616 site_count = len(self.category_sites.get(cat, [])) 617 source_count = len(self.category_sources.get(cat, [])) 618 619 source_info = f" [from {source_count} sources]" if source_count > 1 else "" 620 self.category_listbox.insert(tk.END, f"{cat} ({count} entries, {site_count} sites{source_info})") 621 622 def populate_website_list(self): 623 self.website_listbox.delete(0, tk.END) 624 625 # Build list of websites with their categories and sources 626 self.all_websites_with_categories = [] 627 site_count = {} 628 629 for item in self.loaded_data: 630 if 'name' in item and item['name']: 631 name = str(item['name']) 632 category = self.site_categories.get(name, "Unknown") 633 site_count[name] = site_count.get(name, 0) + 1 634 self.all_websites_with_categories.append((name, category)) 635 636 # Remove duplicates and sort by count descending 637 unique_sites = {} 638 for name, category in self.all_websites_with_categories: 639 if name not in unique_sites: 640 source_count = len(self.site_sources.get(name, [])) 641 source_info = f" [{source_count} sources]" if source_count > 1 else "" 642 unique_sites[name] = (category, site_count.get(name, 0), source_info) 643 644 sorted_websites = sorted(unique_sites.items(), key=lambda x: (-x[1][1], x[0])) 645 self.all_websites = [site[0] for site in sorted_websites] 646 647 # Populate website list with category and source info 648 for website, (category, count, source_info) in sorted_websites: 649 self.website_listbox.insert(tk.END, f"{website} [{category}] ({count} entries{source_info})") 650 651 # Populate category filter for websites 652 categories = ["All Categories"] + sorted(self.get_unique_values(self.loaded_data, 'category')) 653 self.website_category_combo['values'] = categories 654 self.website_category_combo.set("All Categories") 655 656 def filter_websites(self, *args): 657 search_term = self.website_search_var.get().lower() 658 selected_category = self.website_category_var.get() 659 self.website_listbox.delete(0, tk.END) 660 661 # Rebuild the display list with source information 662 display_data = {} 663 for website, category in self.all_websites_with_categories: 664 if website not in display_data: 665 count = 0 666 for item in self.loaded_data: 667 if 'name' in item and item['name'] and str(item['name']) == website: 668 count += 1 669 source_count = len(self.site_sources.get(website, [])) 670 source_info = f" [{source_count} sources]" if source_count > 1 else "" 671 display_data[website] = (category, count, source_info) 672 673 for website, (category, count, source_info) in display_data.items(): 674 # Apply search filter 675 matches_search = search_term in website.lower() 676 677 # Apply category filter 678 matches_category = (selected_category == "All Categories" or selected_category == category) 679 680 if matches_search and matches_category: 681 self.website_listbox.insert(tk.END, f"{website} [{category}] ({count} entries{source_info})") 682 683 def filter_websites_by_category(self, event=None): 684 """Filter websites when category selection changes""" 685 self.filter_websites() 686 687 def select_all_categories(self): 688 self.category_listbox.select_set(0, tk.END) 689 690 def clear_category_selection(self): 691 self.category_listbox.selection_clear(0, tk.END) 692 693 def select_all_websites(self): 694 self.website_listbox.select_set(0, tk.END) 695 696 def clear_website_selection(self): 697 self.website_listbox.selection_clear(0, tk.END) 698 699 def exclude_selected_categories(self): 700 selected_indices = self.category_listbox.curselection() 701 for idx in selected_indices: 702 item_text = self.category_listbox.get(idx) 703 # Extract category name (remove count part) 704 category = item_text.split(' (')[0] 705 # Exclude the entire category - using 'cat' for Blackbird filter 706 self.add_filter('cat', '!=', category) 707 self.update_filters_display() 708 709 def include_selected_categories(self): 710 selected_indices = self.category_listbox.curselection() 711 for idx in selected_indices: 712 item_text = self.category_listbox.get(idx) 713 # Extract category name (remove count part) 714 category = item_text.split(' (')[0] 715 # Include the entire category - using 'cat' for Blackbird filter 716 self.add_filter('cat', '=', category) 717 self.update_filters_display() 718 719 def exclude_selected_websites(self): 720 selected_indices = self.website_listbox.curselection() 721 for idx in selected_indices: 722 item_text = self.website_listbox.get(idx) 723 # Extract website name (remove category and count parts) 724 website = item_text.split(' [')[0] 725 # Exclude website - using 'name' for Blackbird filter 726 self.add_filter('name', '!=', website) 727 self.update_filters_display() 728 729 def include_selected_websites(self): 730 selected_indices = self.website_listbox.curselection() 731 for idx in selected_indices: 732 item_text = self.website_listbox.get(idx) 733 # Extract website name (remove category and count parts) 734 website = item_text.split(' [')[0] 735 # Include website - using 'name' for Blackbird filter 736 self.add_filter('name', '=', website) 737 self.update_filters_display() 738 739 def add_custom_filter(self): 740 field = self.custom_field_var.get() 741 operator = self.custom_operator_var.get() 742 value = self.custom_value_var.get() 743 744 if not field or not operator or not value: 745 messagebox.showwarning("Warning", "Please fill in all custom filter fields") 746 return 747 748 self.add_filter(field, operator, value) 749 self.custom_value_var.set('') # Clear value field 750 self.update_filters_display() 751 752 def add_filter(self, filter_field: str, operator: str, value: str): 753 # Use single quotes for values to avoid escaping issues with Blackbird 754 if operator in ['=', '~', '!='] and (' ' in value or any(char in value for char in ['"', "'", '\\'])): 755 # Use single quotes and escape any existing single quotes 756 escaped_value = value.replace("'", "\\'") 757 filter_str = f"{filter_field}{operator}'{escaped_value}'" 758 else: 759 # For numeric operators or values without special characters 760 filter_str = f"{filter_field}{operator}{value}" 761 762 self.filters.append(filter_str) 763 764 def remove_selected_filter(self): 765 selected_indices = self.filters_listbox.curselection() 766 for idx in selected_indices[::-1]: # Reverse to maintain indices 767 if 0 <= idx < len(self.filters): 768 self.filters.pop(idx) 769 self.update_filters_display() 770 771 def move_filter_up(self): 772 selected_indices = self.filters_listbox.curselection() 773 if not selected_indices: 774 return 775 776 idx = selected_indices[0] 777 if idx > 0: 778 # Swap with previous filter 779 self.filters[idx], self.filters[idx-1] = self.filters[idx-1], self.filters[idx] 780 self.update_filters_display() 781 self.filters_listbox.select_set(idx-1) 782 783 def move_filter_down(self): 784 selected_indices = self.filters_listbox.curselection() 785 if not selected_indices: 786 return 787 788 idx = selected_indices[0] 789 if idx < len(self.filters) - 1: 790 # Swap with next filter 791 self.filters[idx], self.filters[idx+1] = self.filters[idx+1], self.filters[idx] 792 self.update_filters_display() 793 self.filters_listbox.select_set(idx+1) 794 795 def clear_all_filters(self): 796 self.filters.clear() 797 self.update_filters_display() 798 799 def update_filters_display(self): 800 self.filters_listbox.delete(0, tk.END) 801 for filter_str in self.filters: 802 self.filters_listbox.insert(tk.END, filter_str) 803 804 def generate_filter(self): 805 # Use a proper joining method that handles the logical operators correctly 806 filter_string = self.join_filters_safely() 807 self.filter_output_text.delete(1.0, tk.END) 808 self.filter_output_text.insert(1.0, filter_string) 809 810 # Show a warning if there are potential issues 811 self.validate_filters() 812 813 def join_filters_safely(self) -> str: 814 """ 815 Safely join filters with 'and' operators, ensuring proper formatting 816 that won't break Blackbird's parser 817 """ 818 if not self.filters: 819 return "" 820 821 # Simply join with ' and ' - Blackbird should handle this correctly 822 # The individual filters are already properly formatted with quotes 823 return " and ".join(self.filters) 824 825 def validate_filters(self): 826 """Check for potential issues in the generated filter""" 827 warnings = [] 828 829 # Check for duplicate filters 830 seen = set() 831 duplicates = set() 832 for filter_str in self.filters: 833 if filter_str in seen: 834 duplicates.add(filter_str) 835 seen.add(filter_str) 836 837 if duplicates: 838 warnings.append(f"Duplicate filters found: {', '.join(duplicates)}") 839 840 # Check for proper quoting 841 for i, filter_str in enumerate(self.filters): 842 # Look for unquoted values with spaces 843 if ' ' in filter_str and "'" not in filter_str and '"' not in filter_str: 844 parts = filter_str.split(' ', 1) 845 if len(parts) == 2: 846 field_op = parts[0] 847 value = parts[1] 848 if any(op in field_op for op in ['=', '~', '!=']) and ' ' in value: 849 warnings.append(f"Filter '{filter_str}' might need quotes around values with spaces") 850 851 # Check for mixed quote types (though single quotes are preferred now) 852 for filter_str in self.filters: 853 if '"' in filter_str and "'" in filter_str: 854 warnings.append(f"Filter '{filter_str}' uses both single and double quotes") 855 856 if warnings: 857 messagebox.showwarning("Filter Validation", "\n".join(warnings)) 858 859 def save_to_file(self): 860 filter_string = self.join_filters_safely() 861 if not filter_string: 862 messagebox.showwarning("Warning", "No filters to save") 863 return 864 865 filename = filedialog.asksaveasfilename( 866 title="Save Filter", 867 defaultextension=".txt", 868 filetypes=[("Text files", "*.txt"), ("All files", "*.*")] 869 ) 870 871 if filename: 872 try: 873 with open(filename, 'w', encoding='utf-8') as file: 874 file.write(filter_string) 875 messagebox.showinfo("Success", f"Filter saved to:\n{filename}") 876 except Exception as e: 877 messagebox.showerror("Error", f"Failed to save file: {str(e)}") 878 879 def copy_to_clipboard(self): 880 filter_string = self.join_filters_safely() 881 if not filter_string: 882 messagebox.showwarning("Warning", "No filters to copy") 883 return 884 885 self.root.clipboard_clear() 886 self.root.clipboard_append(filter_string) 887 messagebox.showinfo("Success", "Filter copied to clipboard!") 888 889 def update_ui_state(self): 890 has_data = len(self.loaded_data) > 0 891 state = tk.NORMAL if has_data else tk.DISABLED 892 893 # Enable/disable widgets based on data availability 894 self.category_listbox.config(state=state) 895 self.website_listbox.config(state=state) 896 self.website_category_combo.config(state=state) 897 898 def main(): 899 root = tk.Tk() 900 app = BlackbirdFilterGeneratorGUI(root) 901 root.mainloop() 902 903 if __name__ == "__main__": 904 main()