import yt_dlp import requests import json video_id = "2dGB9Fo4hnU" url = f"https://www.youtube.com/watch?v={video_id}" print("\n--- Method 2: yt-dlp ---") try: ydl_opts = { 'skip_download': True, 'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['en'], 'quiet': True } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) subtitles = info.get('subtitles', {}) auto_captions = info.get('automatic_captions', {}) sub_url = None if 'en' in subtitles: print("Found manual English subtitles") # Prefer json3 for fmt in subtitles['en']: if fmt['ext'] == 'json3': sub_url = fmt['url'] break if not sub_url: sub_url = subtitles['en'][0]['url'] elif 'en' in auto_captions: print("Found auto English captions") for fmt in auto_captions['en']: if fmt['ext'] == 'json3': sub_url = fmt['url'] break if not sub_url: sub_url = auto_captions['en'][0]['url'] if sub_url: print(f"Fetching: {sub_url}") r = requests.get(sub_url) data = r.json() # print(json.dumps(data, indent=2)[:500]) # Parse json3 events = data.get('events', []) text = "" for event in events: if 'segs' in event: for seg in event['segs']: if 'utf8' in seg: text += seg['utf8'] text += " " print(f"Extracted text length: {len(text)}") print(f"Preview: {text[:100]}") else: print("No English subtitles found") except Exception as e: print(f"Method 2 failed: {e}")