Emeritus-21's picture
Update app.py
198f8e5 verified
import gradio as gr
import google.generativeai as genai
from elevenlabs.client import ElevenLabs
import os
import json
from pypdf import PdfReader
from dotenv import load_dotenv
# 1️⃣ Load API Keys
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")
if GEMINI_API_KEY:
genai.configure(api_key=GEMINI_API_KEY)
if ELEVEN_API_KEY:
client = ElevenLabs(api_key=ELEVEN_API_KEY)
# 2️⃣ Podcast State
class PodcastState:
def __init__(self):
self.script = []
self.current_index = 0
self.persona = "Serious Academic"
self.full_text = ""
state = PodcastState()
# 3️⃣ PDF Extraction
def extract_text_from_pdf(pdf_file):
try:
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages[:5]:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text
except Exception as e:
return f"Error reading PDF: {e}"
# 4️⃣ Generate Script
def generate_script(pdf_file, persona_style):
if not pdf_file:
return "⚠️ Upload PDF first.", {}
if not GEMINI_API_KEY or not ELEVEN_API_KEY:
return "⚠️ API Keys missing in HF Secrets!", {}
pdf_text = extract_text_from_pdf(pdf_file)
state.full_text = pdf_text
state.persona = persona_style
prompts = {
"Serious Academic": "Two professors discussing the paper. Tone: Intellectual.",
"Gossip Columnist": "Two gossip hosts reacting dramatically.",
}
system_prompt = f"""
{prompts.get(persona_style)}
Based on the paper:
"{pdf_text[:4000]}..."
Generate a short 4-turn dialogue script.
Return JSON ONLY:
[
{{"speaker":"Host A","text":"..."}},
{{"speaker":"Host B","text":"..."}}
]
"""
try:
model = genai.GenerativeModel("gemini-2.0-flash")
response = model.generate_content(system_prompt)
clean_json = response.text.replace("```json", "").replace("```", "").strip()
script = json.loads(clean_json)
state.script = script
state.current_index = 0
return "✅ Script ready!", script
except Exception as e:
return f"Error generating script: {e}", {}
# 5️⃣ Play next line
def play_next_chunk():
if state.current_index >= len(state.script):
return None, "🎉 Podcast finished."
line = state.script[state.current_index]
voice_id = "nPczCjz82tPNOwVbpGE2"
if state.persona == "Gossip Columnist" and line["speaker"] == "Host B":
voice_id = "21m00Tcm4TlvDq8ikWAM"
try:
audio_stream = client.generate(
text=line["text"],
voice=voice_id,
model="eleven_monolingual_v1"
)
save_path = f"temp_{state.current_index}.mp3"
with open(save_path, "wb") as f:
for chunk in audio_stream:
f.write(chunk)
state.current_index += 1
return save_path, f"{line['speaker']}: {line['text']}"
except Exception as e:
return None, f"Audio error: {e}"
# 6️⃣ Interrupt & Ask
def interrupt_and_ask(question):
if not state.full_text:
return None, "Upload a PDF first."
model = genai.GenerativeModel("gemini-2.0-flash")
prompt = f"""
Persona: {state.persona}
Context: {state.full_text[:1000]}
User Question: {question}
Answer briefly, then say "Anyway, back to the paper..."
"""
try:
response = model.generate_content(prompt)
answer = response.text
audio_stream = client.generate(
text=answer,
voice="nPczCjz82tPNOwVbpGE2",
model="eleven_monolingual_v1"
)
save_path = "interrupt.mp3"
with open(save_path, "wb") as f:
for chunk in audio_stream:
f.write(chunk)
return save_path, answer
except Exception as e:
return None, f"Error: {e}"
# 7️⃣ Build Gradio 5.7 UI
with gr.Blocks() as demo:
gr.Markdown("# 🎧 PodQuery — Research Paper Podcast Generator")
with gr.Row():
with gr.Column():
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
persona = gr.Dropdown(
["Serious Academic", "Gossip Columnist"],
value="Serious Academic",
label="Persona Style"
)
btn_gen = gr.Button("Generate Podcast Script")
status = gr.Textbox(label="Status")
script_display = gr.JSON(label="Generated Script")
with gr.Column():
player = gr.Audio(label="Audio Output", autoplay=True)
transcript = gr.Textbox(label="Transcript")
btn_play = gr.Button("▶️ Play Next Line")
q_input = gr.Textbox(label="Ask a Question")
btn_interrupt = gr.Button("✋ Interrupt Podcast")
# Bind events
btn_gen.click(generate_script, [pdf_input, persona], [status, script_display])
btn_play.click(play_next_chunk, [], [player, transcript])
btn_interrupt.click(interrupt_and_ask, [q_input], [player, transcript])
# 8️⃣ Launch
if __name__ == "__main__":
demo.launch()