Spaces:

jeevzz
/

deep-research-ai

Running

deep-research-ai / check_yt.py

Upload 9 files

5263a14 verified 12 days ago

2 kB


	import yt_dlp
	import requests
	import json

	video_id = "2dGB9Fo4hnU"
	url = f"https://www.youtube.com/watch?v={video_id}"

	print("\n--- Method 2: yt-dlp ---")
	try:
	ydl_opts = {
	'skip_download': True,
	'writesubtitles': True,
	'writeautomaticsub': True,
	'subtitleslangs': ['en'],
	'quiet': True
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=False)

	subtitles = info.get('subtitles', {})
	auto_captions = info.get('automatic_captions', {})

	sub_url = None
	if 'en' in subtitles:
	print("Found manual English subtitles")
	# Prefer json3
	for fmt in subtitles['en']:
	if fmt['ext'] == 'json3':
	sub_url = fmt['url']
	break
	if not sub_url:
	sub_url = subtitles['en'][0]['url']
	elif 'en' in auto_captions:
	print("Found auto English captions")
	for fmt in auto_captions['en']:
	if fmt['ext'] == 'json3':
	sub_url = fmt['url']
	break
	if not sub_url:
	sub_url = auto_captions['en'][0]['url']

	if sub_url:
	print(f"Fetching: {sub_url}")
	r = requests.get(sub_url)
	data = r.json()
	# print(json.dumps(data, indent=2)[:500])

	# Parse json3
	events = data.get('events', [])
	text = ""
	for event in events:
	if 'segs' in event:
	for seg in event['segs']:
	if 'utf8' in seg:
	text += seg['utf8']
	text += " "
	print(f"Extracted text length: {len(text)}")
	print(f"Preview: {text[:100]}")
	else:
	print("No English subtitles found")

	except Exception as e:
	print(f"Method 2 failed: {e}")