/ video_to_text.py
video_to_text.py
 1  import subprocess
 2  import os
 3  
 4  def extract_video_text(video_path):
 5      """Extract text from video using multiple methods"""
 6      
 7      if not os.path.exists(video_path):
 8          print(f"❌ Video not found: {video_path}")
 9          return None
10      
11      print(f"🎬 Processing: {os.path.basename(video_path)}")
12      
13      # Method 1: Extract embedded subtitles
14      print("📝 Extracting subtitles...")
15      subtitle_cmd = f"ffmpeg -i '{video_path}' -map 0:s:0 subtitles.srt 2>/dev/null"
16      os.system(subtitle_cmd)
17      
18      if os.path.exists("subtitles.srt"):
19          with open("subtitles.srt", "r") as f:
20              subtitles = f.read()
21          print(f"   ✅ Found {len(subtitles)} characters of subtitles")
22      else:
23          subtitles = ""
24          print("   ⚠️ No embedded subtitles found")
25      
26      # Method 2: Extract metadata
27      print("📊 Extracting metadata...")
28      metadata_cmd = f"ffprobe -v quiet -print_format json -show_format -show_streams '{video_path}'"
29      result = subprocess.run(metadata_cmd, shell=True, capture_output=True, text=True)
30      
31      # Ask AI to analyze what we found
32      prompt = f"""Analyze this video file information:
33      
34      VIDEO: {os.path.basename(video_path)}
35      
36      METADATA:
37      {result.stdout[:1000] if result.returncode == 0 else "No metadata"}
38      
39      SUBTITLES (if any):
40      {subtitles[:1500]}
41      
42      Provide analysis:
43      1. What type of video is this? (duration, format, resolution)
44      2. Key information from subtitles/metadata
45      3. Recommended next steps for video analysis"""
46      
47      ai_result = subprocess.run(
48          ['ollama', 'run', 'tinyllama', prompt],
49          capture_output=True,
50          text=True,
51          timeout=60
52      )
53      
54      if ai_result.returncode == 0:
55          return ai_result.stdout
56      else:
57          return "AI analysis failed"
58  
59  # Test with a video from your drive
60  video_path = "/run/media/unknown/ADATA HD710 PRO/VIDEO/sample.mp4"  # CHANGE THIS
61  # Or find videos:
62  print("🔍 Looking for video files...")
63  find_cmd = "find '/run/media/unknown/ADATA HD710 PRO' -name '*.mp4' -o -name '*.mov' -o -name '*.avi' | head -5"
64  videos = subprocess.run(find_cmd, shell=True, capture_output=True, text=True).stdout.strip().split('\n')
65  
66  if videos and videos[0]:
67      analysis = extract_video_text(videos[0])
68      print("\n" + "="*60)
69      print("📹 VIDEO ANALYSIS RESULT:")
70      print("="*60)
71      print(analysis)
72  else:
73      print("No video files found. Try a specific path.")