/ filters.py
filters.py
  1  import json
  2  import os
  3  import glob
  4  from datetime import datetime
  5  import tkinter as tk
  6  from tkinter import ttk, filedialog, messagebox, scrolledtext
  7  from typing import Dict, List, Any, Tuple, Optional
  8  
  9  class BlackbirdFilterGeneratorGUI:
 10      def __init__(self, root):
 11          self.root = root
 12          self.root.title("Blackbird Filter Generator")
 13          self.root.geometry("1000x900")
 14          
 15          # Field mapping - using exact JSON field names
 16          self.field_mapping = {
 17              'category': 'cat',  # JSON 'category' becomes Blackbird 'cat'
 18              'name': 'name',     # JSON 'name' becomes Blackbird 'name'
 19              'uri_check': 'uri_check', 
 20              'e_code': 'e_code',
 21              'e_string': 'e_string',
 22              'm_string': 'm_string',
 23              'm_code': 'm_code'
 24          }
 25          self.available_fields = ['name', 'cat', 'uri_check', 'e_code', 'e_string', 'm_string', 'm_code']
 26          self.operators = ['=', '~', '>', '<', '>=', '<=', '!=']
 27          
 28          self.filters = []
 29          self.loaded_data = []
 30          self.loaded_files = []
 31          
 32          # Store site-category relationships
 33          self.site_categories = {}  # site_name -> category
 34          self.category_sites = {}   # category -> list of site_names
 35          
 36          # Store file-source relationships
 37          self.site_sources = {}  # site_name -> list of source files
 38          self.category_sources = {}  # category -> list of source files
 39          self.file_entries = {}  # file_path -> list of entries
 40          
 41          self.setup_gui()
 42      
 43      def setup_gui(self):
 44          # Create main frame
 45          main_frame = ttk.Frame(self.root, padding="10")
 46          main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
 47          
 48          # Configure grid weights
 49          self.root.columnconfigure(0, weight=1)
 50          self.root.rowconfigure(0, weight=1)
 51          main_frame.columnconfigure(1, weight=1)
 52          main_frame.rowconfigure(1, weight=1)  # This gives the JSON frame proper weight
 53          
 54          # File selection section
 55          file_frame = ttk.LabelFrame(main_frame, text="Data Source", padding="5")
 56          file_frame.grid(row=0, column=0, columnspan=2, sticky=(tk.W, tk.E), pady=(0, 10))
 57          file_frame.columnconfigure(1, weight=1)
 58          
 59          ttk.Button(file_frame, text="Browse File/Directory", 
 60                    command=self.browse_file).grid(row=0, column=0, padx=(0, 10))
 61          
 62          self.file_path_var = tk.StringVar()
 63          ttk.Entry(file_frame, textvariable=self.file_path_var, state='readonly').grid(row=0, column=1, sticky=(tk.W, tk.E))
 64          
 65          self.recursive_var = tk.BooleanVar(value=True)
 66          ttk.Checkbutton(file_frame, text="Search Subdirectories", 
 67                         variable=self.recursive_var).grid(row=0, column=2, padx=(10, 0))
 68          
 69          ttk.Button(file_frame, text="Load Data", 
 70                    command=self.load_data).grid(row=0, column=3, padx=(10, 0))
 71          
 72  
 73          ttk.Button(file_frame, text="Export JSON Analysis", 
 74                    command=self.export_json_analysis).grid(row=0, column=4, padx=(10, 0))
 75  
 76          # JSON Structure Display
 77          json_frame = ttk.LabelFrame(main_frame, text="JSON Structure Preview", padding="5")
 78          json_frame.grid(row=1, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10))
 79          json_frame.columnconfigure(0, weight=1)
 80          json_frame.rowconfigure(0, weight=1)  # This makes the text widget expand
 81  
 82          # Create a Text widget for JSON preview
 83          self.json_structure_text = tk.Text(json_frame, wrap=tk.WORD, height=10)
 84          self.json_structure_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
 85  
 86          # Create a Scrollbar
 87          scrollbar = ttk.Scrollbar(json_frame, orient=tk.VERTICAL, command=self.json_structure_text.yview)
 88          scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S, tk.E))
 89  
 90          # Link the Text widget to the Scrollbar
 91          self.json_structure_text.config(yscrollcommand=scrollbar.set)
 92  
 93          # Insert sample text (replace with your JSON string)
 94          self.json_structure_text.insert(tk.END, "No data loaded - JSON structure will appear here")
 95          
 96          # Main content area
 97          content_frame = ttk.Frame(main_frame)
 98          content_frame.grid(row=2, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S))
 99          content_frame.columnconfigure(0, weight=1)
100          content_frame.columnconfigure(1, weight=1)
101          content_frame.rowconfigure(0, weight=1)
102          
103          # Left panel - Filter creation
104          left_frame = ttk.LabelFrame(content_frame, text="Create Filters", padding="5")
105          left_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(0, 5))
106          left_frame.columnconfigure(0, weight=1)
107          left_frame.rowconfigure(2, weight=1)
108          
109          # Category filters
110          cat_frame = ttk.LabelFrame(left_frame, text="Category Filters", padding="5")
111          cat_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=(0, 10))
112          cat_frame.columnconfigure(0, weight=1)
113  
114          self.category_listbox = tk.Listbox(cat_frame, selectmode=tk.MULTIPLE, height=6)
115          self.category_listbox.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=(5, 0))
116  
117          cat_btn_frame = ttk.Frame(cat_frame)
118          cat_btn_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(5, 0))
119  
120          ttk.Button(cat_btn_frame, text="Select All", 
121                    command=self.select_all_categories).pack(side=tk.LEFT, padx=(0, 5))
122          ttk.Button(cat_btn_frame, text="Clear Selection", 
123                    command=self.clear_category_selection).pack(side=tk.LEFT, padx=(0, 5))
124          ttk.Button(cat_btn_frame, text="Exclude Selected", 
125                    command=self.exclude_selected_categories).pack(side=tk.LEFT, padx=(0, 5))
126          ttk.Button(cat_btn_frame, text="Include Selected", 
127                    command=self.include_selected_categories).pack(side=tk.LEFT)
128          
129          # Website filters with category info
130          website_frame = ttk.LabelFrame(left_frame, text="Website Filters", padding="5")
131          website_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(0, 10))
132          website_frame.columnconfigure(0, weight=1)
133          
134          # Search box for websites
135          search_frame = ttk.Frame(website_frame)
136          search_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=(0, 5))
137          
138          ttk.Label(search_frame, text="Search:").pack(side=tk.LEFT)
139          self.website_search_var = tk.StringVar()
140          self.website_search_var.trace('w', self.filter_websites)
141          ttk.Entry(search_frame, textvariable=self.website_search_var).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(5, 0))
142          
143          # Category filter for websites
144          ttk.Label(search_frame, text="Category:").pack(side=tk.LEFT, padx=(10, 0))
145          self.website_category_var = tk.StringVar(value="All Categories")
146          self.website_category_combo = ttk.Combobox(search_frame, textvariable=self.website_category_var, state="readonly")
147          self.website_category_combo.pack(side=tk.LEFT, padx=(5, 0))
148          self.website_category_combo.bind('<<ComboboxSelected>>', self.filter_websites_by_category)
149          
150          self.website_listbox = tk.Listbox(website_frame, selectmode=tk.MULTIPLE, height=6)
151          self.website_listbox.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(5, 0))
152          
153          website_btn_frame = ttk.Frame(website_frame)
154          website_btn_frame.grid(row=2, column=0, sticky=(tk.W, tk.E), pady=(5, 0))
155          
156          ttk.Button(website_btn_frame, text="Select All", 
157                    command=self.select_all_websites).pack(side=tk.LEFT, padx=(0, 5))
158          ttk.Button(website_btn_frame, text="Clear Selection", 
159                    command=self.clear_website_selection).pack(side=tk.LEFT, padx=(0, 5))
160          ttk.Button(website_btn_frame, text="Exclude Selected", 
161                    command=self.exclude_selected_websites).pack(side=tk.LEFT)
162          ttk.Button(website_btn_frame, text="Include Selected", 
163                    command=self.include_selected_websites).pack(side=tk.LEFT)
164          
165          # Custom filter section
166          custom_frame = ttk.LabelFrame(left_frame, text="Custom Filters", padding="5")
167          custom_frame.grid(row=2, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
168          custom_frame.columnconfigure(1, weight=1)
169          
170          ttk.Label(custom_frame, text="Field:").grid(row=0, column=0, sticky=tk.W, pady=(0, 5))
171          self.custom_field_var = tk.StringVar()
172          custom_field_combo = ttk.Combobox(custom_frame, textvariable=self.custom_field_var, 
173                                           values=self.available_fields, state="readonly")
174          custom_field_combo.grid(row=0, column=1, sticky=(tk.W, tk.E), pady=(0, 5), padx=(5, 0))
175          custom_field_combo.set('cat')
176          
177          ttk.Label(custom_frame, text="Operator:").grid(row=1, column=0, sticky=tk.W, pady=(0, 5))
178          self.custom_operator_var = tk.StringVar()
179          custom_operator_combo = ttk.Combobox(custom_frame, textvariable=self.custom_operator_var,
180                                              values=self.operators, state="readonly")
181          custom_operator_combo.grid(row=1, column=1, sticky=(tk.W, tk.E), pady=(0, 5), padx=(5, 0))
182          custom_operator_combo.set('!=')
183          
184          ttk.Label(custom_frame, text="Value:").grid(row=2, column=0, sticky=tk.W, pady=(0, 5))
185          self.custom_value_var = tk.StringVar()
186          ttk.Entry(custom_frame, textvariable=self.custom_value_var).grid(row=2, column=1, sticky=(tk.W, tk.E), 
187                                                                          pady=(0, 5), padx=(5, 0))
188          
189          ttk.Button(custom_frame, text="Add Custom Filter", 
190                    command=self.add_custom_filter).grid(row=3, column=0, columnspan=2, pady=(5, 0))
191          
192          # Right panel - Current filters and output
193          right_frame = ttk.LabelFrame(content_frame, text="Current Filters & Output", padding="5")
194          right_frame.grid(row=0, column=1, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(5, 0))
195          right_frame.columnconfigure(0, weight=1)
196          right_frame.rowconfigure(1, weight=1)
197          
198          # Current filters with reordering
199          ttk.Label(right_frame, text="Current Filters (drag to reorder):").grid(row=0, column=0, sticky=tk.W)
200          
201          filter_btn_frame = ttk.Frame(right_frame)
202          filter_btn_frame.grid(row=0, column=0, sticky=(tk.E), pady=(0, 5))
203          
204          ttk.Button(filter_btn_frame, text="Clear All", 
205                    command=self.clear_all_filters).pack(side=tk.RIGHT, padx=(5, 0))
206          ttk.Button(filter_btn_frame, text="Remove Selected", 
207                    command=self.remove_selected_filter).pack(side=tk.RIGHT)
208          
209          # Use a Listbox for filters but we'll handle reordering manually
210          self.filters_listbox = tk.Listbox(right_frame, height=10)
211          self.filters_listbox.grid(row=1, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10))
212          
213          # Add move up/down buttons for reordering
214          reorder_frame = ttk.Frame(right_frame)
215          reorder_frame.grid(row=2, column=0, sticky=(tk.W, tk.E))
216              
217          ttk.Button(reorder_frame, text="Move Up", 
218                    command=self.move_filter_up).pack(side=tk.LEFT, padx=(0, 5))
219          ttk.Button(reorder_frame, text="Move Down", 
220                    command=self.move_filter_down).pack(side=tk.LEFT)
221          
222          # Generated filter string
223          ttk.Label(right_frame, text="Generated Filter String:").grid(row=3, column=0, sticky=tk.W, pady=(10, 0))
224          
225          self.filter_output_text = scrolledtext.ScrolledText(right_frame, height=6, width=50)
226          self.filter_output_text.grid(row=4, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(5, 0))
227          
228          # Action buttons
229          action_frame = ttk.Frame(right_frame)
230          action_frame.grid(row=5, column=0, sticky=(tk.W, tk.E), pady=(10, 0))
231          
232          ttk.Button(action_frame, text="Generate Filter", 
233                    command=self.generate_filter).pack(side=tk.LEFT, padx=(0, 5))
234          ttk.Button(action_frame, text="Save to File", 
235                    command=self.save_to_file).pack(side=tk.LEFT, padx=(0, 5))
236          ttk.Button(action_frame, text="Copy to Clipboard", 
237                    command=self.copy_to_clipboard).pack(side=tk.LEFT)
238          
239          # Store original website list for filtering
240          self.all_websites = []
241          self.all_websites_with_categories = []  # Store (website, category) tuples
242          
243          # Initialize UI state
244          self.update_ui_state()
245      def export_json_analysis(self):
246          """Export analysis for each JSON file"""
247          if not self.loaded_files:
248              messagebox.showwarning("Warning", "No data loaded to export")
249              return
250          
251          # Ask for export directory
252          export_dir = filedialog.askdirectory(title="Select Export Directory")
253          if not export_dir:
254              return
255          
256          try:
257              # Create analysis for each file
258              for file_path in self.loaded_files:
259                  self.export_single_file_analysis(file_path, export_dir)
260              
261              messagebox.showinfo("Success", f"JSON analysis exported to:\n{export_dir}")
262              
263          except Exception as e:
264              messagebox.showerror("Error", f"Failed to export analysis: {str(e)}")
265  
266      def export_single_file_analysis(self, file_path, export_dir):
267          """Export analysis for a single JSON file"""
268          if file_path not in self.file_entries:
269              return
270          
271          entries = self.file_entries[file_path]
272          if not entries:
273              return
274          
275          # Create filename for export
276          base_name = os.path.splitext(os.path.basename(file_path))[0]
277          export_filename = f"{base_name}_analysis.json"
278          export_path = os.path.join(export_dir, export_filename)
279          
280          # Analyze the file data
281          analysis = {
282          # Kept for a simple debug
283              # "source_file": file_path,
284              "relative_path": self.get_relative_source_path(file_path),
285              "total_entries": len(entries),
286              "export_timestamp": str(datetime.now()),
287              "categories": {},
288              "websites": {},
289              "unique_fields": set(),
290              "sample_entries": []
291          }
292          
293          # Analyze categories and websites
294          for entry in entries:
295              # Track unique fields
296              analysis["unique_fields"].update(entry.keys())
297              
298              # Analyze categories
299              if 'category' in entry and entry['category']:
300                  category = str(entry['category'])
301                  if category not in analysis["categories"]:
302                      analysis["categories"][category] = 0
303                  analysis["categories"][category] += 1
304              
305              # Analyze websites
306              if 'name' in entry and entry['name']:
307                  website = str(entry['name'])
308                  if website not in analysis["websites"]:
309                      analysis["websites"][website] = 0
310                  analysis["websites"][website] += 1
311          
312          # Convert set to list for JSON serialization
313          analysis["unique_fields"] = list(analysis["unique_fields"])
314          
315          # Add sample entries (first 10)
316          for entry in entries[:1000]:
317              sample_entry = {}
318              for key, value in entry.items():
319                  if key not in {'url', 'status', 'metadata'}:  # Exclude sensitive/verbose fields
320                      sample_entry[key] = value
321              analysis["sample_entries"].append(sample_entry)
322          
323          # Sort categories and websites by count
324          analysis["categories"] = dict(sorted(
325              analysis["categories"].items(), 
326              key=lambda x: x[1], 
327              reverse=True
328          ))
329          analysis["websites"] = dict(sorted(
330              analysis["websites"].items(), 
331              key=lambda x: x[1], 
332              reverse=True
333          ))
334          
335          # Write analysis to file
336          with open(export_path, 'w', encoding='utf-8') as f:
337              json.dump(analysis, f, indent=2, ensure_ascii=False)
338  
339      def generate_summary_report(self, export_dir):
340          """Generate a summary report of all files"""
341          summary = {
342              "export_timestamp": str(datetime.now()),
343              "total_files_analyzed": len(self.loaded_files),
344              "total_entries": len(self.loaded_data),
345              "files": []
346          }
347  
348          for file_path in self.loaded_files:
349              if file_path in self.file_entries:
350                  entries = self.file_entries[file_path]
351                  # Count categories
352                  categories = {}
353                  for entry in entries:
354                      if 'category' in entry and entry['category']:
355                          category = str(entry['category'])
356                          categories[category] = categories.get(category, 0) + 1
357                  # Sort categories by count
358                  categories = dict(sorted(categories.items(), key=lambda x: x[1], reverse=True))
359                  file_info = {
360                      "relative_path": self.get_relative_source_path(file_path),
361                      "entry_count": len(entries),
362                      "categories_count": len(categories),
363                      "categories": categories,  # Include category breakdown
364                      "websites_count": len(self.get_unique_values(entries, 'name'))
365                  }
366                  summary["files"].append(file_info)
367  
368          # Write summary report
369          summary_path = os.path.join(export_dir, "summary_report.json")
370          with open(summary_path, 'w', encoding='utf-8') as f:
371              json.dump(summary, f, indent=2, ensure_ascii=False)
372  
373      # Update the export_json_analysis method to include summary
374      def export_json_analysis(self):
375          """Export analysis for each JSON file"""
376          if not self.loaded_files:
377              messagebox.showwarning("Warning", "No data loaded to export")
378              return
379          
380          # Ask for export directory
381          export_dir = filedialog.askdirectory(title="Select Export Directory")
382          if not export_dir:
383              return
384          
385          try:
386              # Create analysis for each file
387              for file_path in self.loaded_files:
388                  self.export_single_file_analysis(file_path, export_dir)
389              
390              # Generate summary report
391              self.generate_summary_report(export_dir)
392              
393              messagebox.showinfo("Success", f"JSON analysis exported to:\n{export_dir}\n\n"
394                                            f"• Individual file analysis: {len(self.loaded_files)} files\n"
395                                            f"• Summary report: summary_report.json")
396              
397          except Exception as e:
398              messagebox.showerror("Error", f"Failed to export analysis: {str(e)}")
399      
400      def browse_file(self):
401          path = filedialog.askdirectory(title="Select Directory with JSON Files")
402          if not path:
403              path = filedialog.askopenfilename(
404                  title="Select JSON File",
405                  filetypes=[("JSON files", "*.json"), ("All files", "*.*")]
406              )
407          if path:
408              self.file_path_var.set(path)
409      
410      def load_data(self):
411          path = self.file_path_var.get()
412          if not path or not os.path.exists(path):
413              messagebox.showerror("Error", "Please select a valid file or directory")
414              return
415          
416          try:
417              self.loaded_data, self.loaded_files = self.load_json_files(path, self.recursive_var.get())
418              if not self.loaded_data:
419                  messagebox.showwarning("Warning", "No data loaded from the selected path")
420                  return
421              
422              # Build site-category relationships with sources
423              self.build_site_category_relationships()
424              
425              # Update JSON structure display
426              self.update_json_structure_display()
427              
428              self.populate_category_list()
429              self.populate_website_list()
430              self.update_ui_state()
431              
432          except Exception as e:
433              messagebox.showerror("Error", f"Failed to load data: {str(e)}")
434      
435      def update_json_structure_display(self):
436          """Show the actual JSON structure found in the files"""
437          # Clear the text widget first
438          self.json_structure_text.delete(1.0, tk.END)
439          
440          if not self.loaded_data:
441              self.json_structure_text.insert(tk.END, "No data loaded")
442              return
443          
444          # Fields to exclude from display
445          exclude_fields = {'url', 'status', 'metadata'}
446          
447          # Analyze the first few entries to show JSON structure
448          sample_entries = self.loaded_data[:1000]  # Show first 1000 entries as samples
449          
450          structure_info = "JSON Structure Found:\n\n"
451          
452          for i, entry in enumerate(sample_entries):
453              # Extract username from URL if available, otherwise use index
454              username = f"Entry {i+1}"
455              if 'url' in entry and entry['url']:
456                  # Extract username from URL like "https://t.me/cssunshine"
457                  url = entry['url']
458                  if '/' in url:
459                      # Get the last part of the URL after the last slash
460                      username = url.split('/')[-1]
461              
462              # Find which file this entry came from
463              file_source = "Unknown source"
464              for file_path, entries in self.file_entries.items():
465                  if entry in entries:
466                      # Get relative path from the base directory
467                      rel_path = self.get_relative_source_path(file_path)
468                      file_source = rel_path
469                      break
470              
471              structure_info += f"Sample {username} (from: {file_source}):\n"
472              for key, value in entry.items():
473                  # Skip excluded fields
474                  if key in exclude_fields:
475                      continue
476                  structure_info += f"  \"{key}\": \"{value}\"\n"
477              structure_info += "\n"
478          
479          # Show field mapping
480          structure_info += "Field Mapping (JSON → Blackbird):\n"
481          for json_field, blackbird_field in self.field_mapping.items():
482              if any(json_field in entry for entry in self.loaded_data):
483                  unique_count = len(self.get_unique_values(self.loaded_data, json_field))
484                  structure_info += f"  \"{json_field}\" → {blackbird_field} ({unique_count} unique values)\n"
485          
486          # Insert the structure info into the Text widget
487          self.json_structure_text.insert(tk.END, structure_info)
488      
489      def build_site_category_relationships(self):
490          """Build mappings between sites and their categories, and track sources"""
491          self.site_categories = {}
492          self.category_sites = {}
493          self.site_sources = {}
494          self.category_sources = {}
495          
496          for file_path, entries in self.file_entries.items():
497              for item in entries:
498                  # Use exact JSON field names
499                  if 'name' in item and item['name']:
500                      site_name = str(item['name'])
501                      
502                      # Map site to sources
503                      if site_name not in self.site_sources:
504                          self.site_sources[site_name] = []
505                      if file_path not in self.site_sources[site_name]:
506                          self.site_sources[site_name].append(file_path)
507                      
508                      if 'category' in item and item['category']:
509                          category = str(item['category'])
510                          
511                          # Map site to category
512                          self.site_categories[site_name] = category
513                          
514                          # Map category to sites
515                          if category not in self.category_sites:
516                              self.category_sites[category] = []
517                          if site_name not in self.category_sites[category]:
518                              self.category_sites[category].append(site_name)
519                          
520                          # Map category to sources
521                          if category not in self.category_sources:
522                              self.category_sources[category] = []
523                          if file_path not in self.category_sources[category]:
524                              self.category_sources[category].append(file_path)
525      
526      def load_json_files(self, path: str, recursive: bool = True) -> Tuple[List[Dict[str, Any]], List[str]]:
527          data = []
528          loaded_files = []
529          
530          if os.path.isfile(path):
531              file_data = self._load_single_file(path)
532              if file_data:
533                  data.extend(file_data)
534                  loaded_files.append(path)
535                  # Track source for this file
536                  self.file_entries[path] = file_data
537          elif os.path.isdir(path):
538              if recursive:
539                  json_files = self._find_json_files_recursive(path)
540              else:
541                  json_pattern = os.path.join(path, "*.json")
542                  json_files = glob.glob(json_pattern)
543              
544              for json_file in json_files:
545                  file_data = self._load_single_file(json_file)
546                  if file_data:
547                      data.extend(file_data)
548                      loaded_files.append(json_file)
549                      # Track source for this file
550                      self.file_entries[json_file] = file_data
551          
552          return data, loaded_files
553      
554      def _find_json_files_recursive(self, directory: str) -> List[str]:
555          json_files = []
556          for root, dirs, files in os.walk(directory):
557              for file in files:
558                  if file.lower().endswith('.json'):
559                      full_path = os.path.join(root, file)
560                      json_files.append(full_path)
561          return json_files
562      
563      def _load_single_file(self, file_path: str) -> List[Dict[str, Any]]:
564          try:
565              with open(file_path, 'r', encoding='utf-8') as file:
566                  data = json.load(file)
567              if isinstance(data, list):
568                  return data
569              elif isinstance(data, dict):
570                  return [data]
571              return []
572          except Exception as e:
573              print(f"Error loading {file_path}: {e}")
574              return []
575      
576      def get_unique_values(self, data: List[Dict[str, Any]], json_field: str) -> List[str]:
577          values = set()
578          for item in data:
579              if json_field in item and item[json_field] is not None:
580                  values.add(str(item[json_field]))
581          return sorted(list(values))
582      
583      def get_relative_source_path(self, full_path):
584          """Convert full file path to a more readable relative format"""
585          base_path = self.file_path_var.get()
586          if base_path and full_path.startswith(base_path):
587              return os.path.relpath(full_path, base_path)
588          
589          # If no base path or path doesn't match, show the last 2 directory components
590          dirname = os.path.dirname(full_path)
591          basename = os.path.basename(full_path)
592          parent_dir = os.path.basename(os.path.dirname(dirname))
593          current_dir = os.path.basename(dirname)
594          
595          if parent_dir and parent_dir != current_dir:
596              return os.path.join(parent_dir, current_dir, basename)
597          else:
598              return os.path.join(current_dir, basename)
599  
600      def populate_category_list(self):
601          self.category_listbox.delete(0, tk.END)
602          categories = self.get_unique_values(self.loaded_data, 'category')
603          
604          # Count occurrences and sites per category
605          cat_count = {}
606          for item in self.loaded_data:
607              if 'category' in item and item['category']:
608                  cat = str(item['category'])
609                  cat_count[cat] = cat_count.get(cat, 0) + 1
610          
611          # Sort by count descending
612          sorted_categories = sorted(categories, key=lambda x: (-cat_count.get(x, 0), x))
613          
614          for cat in sorted_categories:
615              count = cat_count.get(cat, 0)
616              site_count = len(self.category_sites.get(cat, []))
617              source_count = len(self.category_sources.get(cat, []))
618              
619              source_info = f" [from {source_count} sources]" if source_count > 1 else ""
620              self.category_listbox.insert(tk.END, f"{cat} ({count} entries, {site_count} sites{source_info})")
621      
622      def populate_website_list(self):
623          self.website_listbox.delete(0, tk.END)
624          
625          # Build list of websites with their categories and sources
626          self.all_websites_with_categories = []
627          site_count = {}
628          
629          for item in self.loaded_data:
630              if 'name' in item and item['name']:
631                  name = str(item['name'])
632                  category = self.site_categories.get(name, "Unknown")
633                  site_count[name] = site_count.get(name, 0) + 1
634                  self.all_websites_with_categories.append((name, category))
635          
636          # Remove duplicates and sort by count descending
637          unique_sites = {}
638          for name, category in self.all_websites_with_categories:
639              if name not in unique_sites:
640                  source_count = len(self.site_sources.get(name, []))
641                  source_info = f" [{source_count} sources]" if source_count > 1 else ""
642                  unique_sites[name] = (category, site_count.get(name, 0), source_info)
643          
644          sorted_websites = sorted(unique_sites.items(), key=lambda x: (-x[1][1], x[0]))
645          self.all_websites = [site[0] for site in sorted_websites]
646          
647          # Populate website list with category and source info
648          for website, (category, count, source_info) in sorted_websites:
649              self.website_listbox.insert(tk.END, f"{website} [{category}] ({count} entries{source_info})")
650          
651          # Populate category filter for websites
652          categories = ["All Categories"] + sorted(self.get_unique_values(self.loaded_data, 'category'))
653          self.website_category_combo['values'] = categories
654          self.website_category_combo.set("All Categories")
655      
656      def filter_websites(self, *args):
657          search_term = self.website_search_var.get().lower()
658          selected_category = self.website_category_var.get()
659          self.website_listbox.delete(0, tk.END)
660          
661          # Rebuild the display list with source information
662          display_data = {}
663          for website, category in self.all_websites_with_categories:
664              if website not in display_data:
665                  count = 0
666                  for item in self.loaded_data:
667                      if 'name' in item and item['name'] and str(item['name']) == website:
668                          count += 1
669                  source_count = len(self.site_sources.get(website, []))
670                  source_info = f" [{source_count} sources]" if source_count > 1 else ""
671                  display_data[website] = (category, count, source_info)
672          
673          for website, (category, count, source_info) in display_data.items():
674              # Apply search filter
675              matches_search = search_term in website.lower()
676              
677              # Apply category filter
678              matches_category = (selected_category == "All Categories" or selected_category == category)
679              
680              if matches_search and matches_category:
681                  self.website_listbox.insert(tk.END, f"{website} [{category}] ({count} entries{source_info})")
682      
683      def filter_websites_by_category(self, event=None):
684          """Filter websites when category selection changes"""
685          self.filter_websites()
686      
687      def select_all_categories(self):
688          self.category_listbox.select_set(0, tk.END)
689      
690      def clear_category_selection(self):
691          self.category_listbox.selection_clear(0, tk.END)
692      
693      def select_all_websites(self):
694          self.website_listbox.select_set(0, tk.END)
695      
696      def clear_website_selection(self):
697          self.website_listbox.selection_clear(0, tk.END)
698      
699      def exclude_selected_categories(self):
700          selected_indices = self.category_listbox.curselection()
701          for idx in selected_indices:
702              item_text = self.category_listbox.get(idx)
703              # Extract category name (remove count part)
704              category = item_text.split(' (')[0]
705              # Exclude the entire category - using 'cat' for Blackbird filter
706              self.add_filter('cat', '!=', category)
707          self.update_filters_display()
708  
709      def include_selected_categories(self):
710          selected_indices = self.category_listbox.curselection()
711          for idx in selected_indices:
712              item_text = self.category_listbox.get(idx)
713              # Extract category name (remove count part)
714              category = item_text.split(' (')[0]
715              # Include the entire category - using 'cat' for Blackbird filter
716              self.add_filter('cat', '=', category)
717          self.update_filters_display()
718      
719      def exclude_selected_websites(self):
720          selected_indices = self.website_listbox.curselection()
721          for idx in selected_indices:
722              item_text = self.website_listbox.get(idx)
723              # Extract website name (remove category and count parts)
724              website = item_text.split(' [')[0]
725              # Exclude website - using 'name' for Blackbird filter
726              self.add_filter('name', '!=', website)
727          self.update_filters_display()
728      
729      def include_selected_websites(self):
730          selected_indices = self.website_listbox.curselection()
731          for idx in selected_indices:
732              item_text = self.website_listbox.get(idx)
733              # Extract website name (remove category and count parts)
734              website = item_text.split(' [')[0]
735              # Include website - using 'name' for Blackbird filter
736              self.add_filter('name', '=', website)
737          self.update_filters_display()
738      
739      def add_custom_filter(self):
740          field = self.custom_field_var.get()
741          operator = self.custom_operator_var.get()
742          value = self.custom_value_var.get()
743          
744          if not field or not operator or not value:
745              messagebox.showwarning("Warning", "Please fill in all custom filter fields")
746              return
747          
748          self.add_filter(field, operator, value)
749          self.custom_value_var.set('')  # Clear value field
750          self.update_filters_display()
751      
752      def add_filter(self, filter_field: str, operator: str, value: str):
753          # Use single quotes for values to avoid escaping issues with Blackbird
754          if operator in ['=', '~', '!='] and (' ' in value or any(char in value for char in ['"', "'", '\\'])):
755              # Use single quotes and escape any existing single quotes
756              escaped_value = value.replace("'", "\\'")
757              filter_str = f"{filter_field}{operator}'{escaped_value}'"
758          else:
759              # For numeric operators or values without special characters
760              filter_str = f"{filter_field}{operator}{value}"
761          
762          self.filters.append(filter_str)
763      
764      def remove_selected_filter(self):
765          selected_indices = self.filters_listbox.curselection()
766          for idx in selected_indices[::-1]:  # Reverse to maintain indices
767              if 0 <= idx < len(self.filters):
768                  self.filters.pop(idx)
769          self.update_filters_display()
770      
771      def move_filter_up(self):
772          selected_indices = self.filters_listbox.curselection()
773          if not selected_indices:
774              return
775          
776          idx = selected_indices[0]
777          if idx > 0:
778              # Swap with previous filter
779              self.filters[idx], self.filters[idx-1] = self.filters[idx-1], self.filters[idx]
780              self.update_filters_display()
781              self.filters_listbox.select_set(idx-1)
782      
783      def move_filter_down(self):
784          selected_indices = self.filters_listbox.curselection()
785          if not selected_indices:
786              return
787          
788          idx = selected_indices[0]
789          if idx < len(self.filters) - 1:
790              # Swap with next filter
791              self.filters[idx], self.filters[idx+1] = self.filters[idx+1], self.filters[idx]
792              self.update_filters_display()
793              self.filters_listbox.select_set(idx+1)
794      
795      def clear_all_filters(self):
796          self.filters.clear()
797          self.update_filters_display()
798      
799      def update_filters_display(self):
800          self.filters_listbox.delete(0, tk.END)
801          for filter_str in self.filters:
802              self.filters_listbox.insert(tk.END, filter_str)
803      
804      def generate_filter(self):
805          # Use a proper joining method that handles the logical operators correctly
806          filter_string = self.join_filters_safely()
807          self.filter_output_text.delete(1.0, tk.END)
808          self.filter_output_text.insert(1.0, filter_string)
809          
810          # Show a warning if there are potential issues
811          self.validate_filters()
812      
813      def join_filters_safely(self) -> str:
814          """
815          Safely join filters with 'and' operators, ensuring proper formatting
816          that won't break Blackbird's parser
817          """
818          if not self.filters:
819              return ""
820          
821          # Simply join with ' and ' - Blackbird should handle this correctly
822          # The individual filters are already properly formatted with quotes
823          return " and ".join(self.filters)
824      
825      def validate_filters(self):
826          """Check for potential issues in the generated filter"""
827          warnings = []
828          
829          # Check for duplicate filters
830          seen = set()
831          duplicates = set()
832          for filter_str in self.filters:
833              if filter_str in seen:
834                  duplicates.add(filter_str)
835              seen.add(filter_str)
836          
837          if duplicates:
838              warnings.append(f"Duplicate filters found: {', '.join(duplicates)}")
839          
840          # Check for proper quoting
841          for i, filter_str in enumerate(self.filters):
842              # Look for unquoted values with spaces
843              if ' ' in filter_str and "'" not in filter_str and '"' not in filter_str:
844                  parts = filter_str.split(' ', 1)
845                  if len(parts) == 2:
846                      field_op = parts[0]
847                      value = parts[1]
848                      if any(op in field_op for op in ['=', '~', '!=']) and ' ' in value:
849                          warnings.append(f"Filter '{filter_str}' might need quotes around values with spaces")
850          
851          # Check for mixed quote types (though single quotes are preferred now)
852          for filter_str in self.filters:
853              if '"' in filter_str and "'" in filter_str:
854                  warnings.append(f"Filter '{filter_str}' uses both single and double quotes")
855          
856          if warnings:
857              messagebox.showwarning("Filter Validation", "\n".join(warnings))
858      
859      def save_to_file(self):
860          filter_string = self.join_filters_safely()
861          if not filter_string:
862              messagebox.showwarning("Warning", "No filters to save")
863              return
864          
865          filename = filedialog.asksaveasfilename(
866              title="Save Filter",
867              defaultextension=".txt",
868              filetypes=[("Text files", "*.txt"), ("All files", "*.*")]
869          )
870          
871          if filename:
872              try:
873                  with open(filename, 'w', encoding='utf-8') as file:
874                      file.write(filter_string)
875                  messagebox.showinfo("Success", f"Filter saved to:\n{filename}")
876              except Exception as e:
877                  messagebox.showerror("Error", f"Failed to save file: {str(e)}")
878      
879      def copy_to_clipboard(self):
880          filter_string = self.join_filters_safely()
881          if not filter_string:
882              messagebox.showwarning("Warning", "No filters to copy")
883              return
884          
885          self.root.clipboard_clear()
886          self.root.clipboard_append(filter_string)
887          messagebox.showinfo("Success", "Filter copied to clipboard!")
888      
889      def update_ui_state(self):
890          has_data = len(self.loaded_data) > 0
891          state = tk.NORMAL if has_data else tk.DISABLED
892          
893          # Enable/disable widgets based on data availability
894          self.category_listbox.config(state=state)
895          self.website_listbox.config(state=state)
896          self.website_category_combo.config(state=state)
897  
898  def main():
899      root = tk.Tk()
900      app = BlackbirdFilterGeneratorGUI(root)
901      root.mainloop()
902  
903  if __name__ == "__main__":
904      main()