import gradio as gr import google.generativeai as genai from elevenlabs.client import ElevenLabs import os import json from pypdf import PdfReader from dotenv import load_dotenv # 1️⃣ Load API Keys load_dotenv() GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY") if GEMINI_API_KEY: genai.configure(api_key=GEMINI_API_KEY) if ELEVEN_API_KEY: client = ElevenLabs(api_key=ELEVEN_API_KEY) # 2️⃣ Podcast State class PodcastState: def __init__(self): self.script = [] self.current_index = 0 self.persona = "Serious Academic" self.full_text = "" state = PodcastState() # 3️⃣ PDF Extraction def extract_text_from_pdf(pdf_file): try: reader = PdfReader(pdf_file) text = "" for page in reader.pages[:5]: page_text = page.extract_text() if page_text: text += page_text + "\n" return text except Exception as e: return f"Error reading PDF: {e}" # 4️⃣ Generate Script def generate_script(pdf_file, persona_style): if not pdf_file: return "⚠️ Upload PDF first.", {} if not GEMINI_API_KEY or not ELEVEN_API_KEY: return "⚠️ API Keys missing in HF Secrets!", {} pdf_text = extract_text_from_pdf(pdf_file) state.full_text = pdf_text state.persona = persona_style prompts = { "Serious Academic": "Two professors discussing the paper. Tone: Intellectual.", "Gossip Columnist": "Two gossip hosts reacting dramatically.", } system_prompt = f""" {prompts.get(persona_style)} Based on the paper: "{pdf_text[:4000]}..." Generate a short 4-turn dialogue script. Return JSON ONLY: [ {{"speaker":"Host A","text":"..."}}, {{"speaker":"Host B","text":"..."}} ] """ try: model = genai.GenerativeModel("gemini-2.0-flash") response = model.generate_content(system_prompt) clean_json = response.text.replace("```json", "").replace("```", "").strip() script = json.loads(clean_json) state.script = script state.current_index = 0 return "✅ Script ready!", script except Exception as e: return f"Error generating script: {e}", {} # 5️⃣ Play next line def play_next_chunk(): if state.current_index >= len(state.script): return None, "🎉 Podcast finished." line = state.script[state.current_index] voice_id = "nPczCjz82tPNOwVbpGE2" if state.persona == "Gossip Columnist" and line["speaker"] == "Host B": voice_id = "21m00Tcm4TlvDq8ikWAM" try: audio_stream = client.generate( text=line["text"], voice=voice_id, model="eleven_monolingual_v1" ) save_path = f"temp_{state.current_index}.mp3" with open(save_path, "wb") as f: for chunk in audio_stream: f.write(chunk) state.current_index += 1 return save_path, f"{line['speaker']}: {line['text']}" except Exception as e: return None, f"Audio error: {e}" # 6️⃣ Interrupt & Ask def interrupt_and_ask(question): if not state.full_text: return None, "Upload a PDF first." model = genai.GenerativeModel("gemini-2.0-flash") prompt = f""" Persona: {state.persona} Context: {state.full_text[:1000]} User Question: {question} Answer briefly, then say "Anyway, back to the paper..." """ try: response = model.generate_content(prompt) answer = response.text audio_stream = client.generate( text=answer, voice="nPczCjz82tPNOwVbpGE2", model="eleven_monolingual_v1" ) save_path = "interrupt.mp3" with open(save_path, "wb") as f: for chunk in audio_stream: f.write(chunk) return save_path, answer except Exception as e: return None, f"Error: {e}" # 7️⃣ Build Gradio 5.7 UI with gr.Blocks() as demo: gr.Markdown("# 🎧 PodQuery — Research Paper Podcast Generator") with gr.Row(): with gr.Column(): pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) persona = gr.Dropdown( ["Serious Academic", "Gossip Columnist"], value="Serious Academic", label="Persona Style" ) btn_gen = gr.Button("Generate Podcast Script") status = gr.Textbox(label="Status") script_display = gr.JSON(label="Generated Script") with gr.Column(): player = gr.Audio(label="Audio Output", autoplay=True) transcript = gr.Textbox(label="Transcript") btn_play = gr.Button("▶️ Play Next Line") q_input = gr.Textbox(label="Ask a Question") btn_interrupt = gr.Button("✋ Interrupt Podcast") # Bind events btn_gen.click(generate_script, [pdf_input, persona], [status, script_display]) btn_play.click(play_next_chunk, [], [player, transcript]) btn_interrupt.click(interrupt_and_ask, [q_input], [player, transcript]) # 8️⃣ Launch if __name__ == "__main__": demo.launch()