Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

sanatan_ai / modules /dropbox /audibles.py

vikramvasudevan

Upload folder using huggingface_hub

338ea9e verified 5 days ago

raw

history blame contribute delete

6.45 kB

	import asyncio
	import json
	import logging
	from datetime import datetime, timedelta, timezone
	from typing import List, Optional
	import dropbox
	from modules.dropbox.client import dbx
	from fastapi import HTTPException

	# Logger
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)

	# Cache: key = folder_path, value = {"timestamp": datetime, "data": List[dict]}
	_audible_cache: dict[str, dict] = {}
	CACHE_TTL = timedelta(hours=1)
	FOLDER_PATH = "/_audibles"

	async def fetch_audibles_from_dropbox() -> List[dict]:
	"""
	Fetch all audible JSONs for a scripture from Dropbox with caching.
	Expects files in "/_audibles/".
	"""
	loop = asyncio.get_running_loop()
	folder_path = FOLDER_PATH

	# Check cache
	cache_entry = _audible_cache.get(folder_path)
	if cache_entry:
	age = datetime.now() - cache_entry["timestamp"]
	if age < CACHE_TTL:
	logger.info(f"Using cached audibles for '{folder_path}' (age={age})")
	return cache_entry["data"]

	logger.info(f"Fetching audibles from Dropbox folder '{folder_path}'")
	audibles: List[dict] = []

	try:
	# List folder contents (synchronously in executor)
	res = await loop.run_in_executor(None, dbx.files_list_folder, folder_path)
	for entry in res.entries:
	if isinstance(entry, dropbox.files.FileMetadata) and entry.name.lower().endswith(".json"):
	metadata, fres = await loop.run_in_executor(
	None, dbx.files_download, f"{folder_path}/{entry.name}"
	)
	data = fres.content.decode("utf-8")
	audibles.append(json.loads(data))

	# Update cache
	_audible_cache[folder_path] = {"timestamp": datetime.now(), "data": audibles}
	logger.info(f"Cached {len(audibles)} audibles for '{folder_path}'")
	return audibles

	except Exception as e:
	logger.error(f"Error fetching audibles from '{folder_path}'", exc_info=e)
	# fallback to cached data if available
	if cache_entry:
	logger.warning(f"Returning stale cached audibles for '{folder_path}'")
	return cache_entry["data"]
	else:
	logger.warning(f"No cached audibles available for '{folder_path}'")
	return []


	async def get_audible_summaries(page: int = 1, per_page: int = 10):
	"""
	Returns paginated summaries: id, topic_name, artwork_url.
	Sorted by topic_name.
	"""
	all_audibles = await fetch_audibles_from_dropbox()

	# Build summaries
	summaries = [
	{
	"id": d.get("id"),
	"topic_name": d.get("topic_name"),
	"artwork_url": d.get("artwork_url"),
	}
	for d in all_audibles
	]

	summaries.sort(key=lambda x: (x.get("topic_name") or "").lower())

	# Pagination
	total_items = len(summaries)
	total_pages = (total_items + per_page - 1) // per_page
	if page < 1 or page > total_pages:
	logger.warning(f"Invalid page {page}. Must be between 1 and {total_pages}")
	return {"page": page, "per_page": per_page, "total_pages": total_pages, "total_items": total_items, "data": []}

	start = (page - 1) * per_page
	end = start + per_page
	paginated = summaries[start:end]

	print("audible data = ",paginated)

	return {
	"page": page,
	"per_page": per_page,
	"total_pages": total_pages,
	"total_items": total_items,
	"data": paginated,
	}


	async def get_audible_by_id(topic_id: int) -> Optional[dict]:
	"""
	Fetch a single audible JSON by topic_id from Dropbox.
	Uses in-memory caching per file.
	"""
	loop = asyncio.get_running_loop()
	file_path = f"{FOLDER_PATH}/{topic_id}.json"

	# Check cache
	cache_entry = _audible_cache.get(file_path)
	if cache_entry:
	age = datetime.now() - cache_entry["timestamp"]
	if age < CACHE_TTL:
	logger.info(f"Using cached audible for topic {topic_id} (age={age})")
	return cache_entry["data"]

	try:
	logger.info(f"Fetching audible {topic_id} from Dropbox: {file_path}")
	metadata, res = await loop.run_in_executor(None, dbx.files_download, file_path)
	data = res.content.decode("utf-8")
	audible = json.loads(data)

	# Update cache
	_audible_cache[file_path] = {"timestamp": datetime.now(), "data": audible}
	return audible

	except dropbox.exceptions.HttpError as e:
	logger.error(f"Dropbox file not found: {file_path}", exc_info=e)
	return None
	except Exception as e:
	logger.error(f"Error fetching audible {topic_id}", exc_info=e)
	# fallback to cached data if available
	if cache_entry:
	logger.warning(f"Returning stale cached audible for topic {topic_id}")
	return cache_entry["data"]
	return None

	# cache = { audible_path: {"url": ..., "expiry": ...} }
	audible_audio_cache: dict[str, dict] = {}
	AUDIBLE_CACHE_TTL = timedelta(hours=3, minutes=30)

	async def get_audible_audio_url(audio_path: str):
	"""
	Returns a temporary Dropbox download URL for an audible audio file.
	Uses in-memory caching to avoid regenerating links too frequently.
	"""

	if not audio_path:
	raise HTTPException(status_code=400, detail="audio_path is required")

	# Normalize path (ensure leading slash)
	dropbox_path = (
	audio_path if audio_path.startswith("/") else f"/{audio_path}"
	)

	now = datetime.now(timezone.utc)

	# 1️⃣ Check cache
	cached = audible_audio_cache.get(dropbox_path)
	if cached and cached["expiry"] > now:
	return {"audio_url": cached["url"]}

	# 2️⃣ Generate fresh Dropbox temp link
	try:
	temp_link = dbx.files_get_temporary_link(dropbox_path).link
	except dropbox.exceptions.ApiError:
	raise HTTPException(status_code=404, detail="Audible audio not found")

	# 3️⃣ Cache it
	audible_audio_cache[dropbox_path] = {
	"url": temp_link,
	"expiry": now + AUDIBLE_CACHE_TTL,
	}

	return {"audio_url": temp_link}


	async def cleanup_audible_audio_cache(interval_seconds: int = 600):
	while True:
	now = datetime.now(timezone.utc)
	expired = [
	k for k, v in audible_audio_cache.items()
	if v["expiry"] <= now
	]
	for k in expired:
	del audible_audio_cache[k]
	await asyncio.sleep(interval_seconds)