Ashkchamp commited on
Commit
d60d78b
Β·
verified Β·
1 Parent(s): 7abc8ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -43
app.py CHANGED
@@ -1,8 +1,4 @@
1
- import os
2
- import re
3
- import validators
4
- import streamlit as st
5
- from dotenv import load_dotenv
6
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
7
  from langchain.prompts import PromptTemplate
8
  from langchain.chains.summarize import load_summarize_chain
@@ -10,18 +6,24 @@ from langchain_groq import ChatGroq
10
  from langchain.schema import Document
11
  from langchain_community.document_loaders import UnstructuredURLLoader
12
  from langchain.document_loaders import PyPDFLoader
13
- from groq._base_client import APIConnectionError
14
-
15
- load_dotenv()
16
- GROQ_KEY = os.getenv("GROQ_API_KEY")
17
 
 
18
  st.set_page_config(page_title="LangChain Summarizer", page_icon="🦜")
19
  st.title("🦜 LangChain: Summarize YT / Webpage / PDF")
20
 
21
- url_input = st.text_input("Paste a YouTube / web URL here:")
22
- file_input = st.file_uploader("…or upload a PDF", type=["pdf"])
 
 
 
 
 
 
 
 
23
 
24
- def get_video_id(url):
 
25
  m = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
26
  return m.group(1) if m else None
27
 
@@ -30,57 +32,66 @@ SUMMARY_PROMPT = PromptTemplate(
30
  input_variables=["text"],
31
  )
32
 
33
- def build_llm():
 
34
  if "llm" not in st.session_state:
35
- if not GROQ_KEY:
36
- raise RuntimeError("Missing GROQ_API_KEY")
37
- st.session_state.llm = ChatGroq(model="deepseek-r1-distill-llama-70b", groq_api_key=GROQ_KEY)
 
38
  return st.session_state.llm
39
 
40
  def summarize(docs):
41
- llm = build_llm()
42
  chain = load_summarize_chain(llm, chain_type="stuff", prompt=SUMMARY_PROMPT)
43
  return chain({"input_documents": docs})["output_text"]
44
 
 
45
  if st.button("Summarize"):
46
- if not GROQ_KEY:
47
- st.error("Set GROQ_API_KEY in .env")
48
- elif not url_input and not file_input:
49
- st.error("Provide a URL or upload a PDF")
50
  else:
51
  try:
52
  with st.spinner("Fetching and summarizing…"):
53
- if file_input:
54
- tmp = f"/tmp/{file_input.name}"
55
- with open(tmp, "wb") as f:
56
- f.write(file_input.read())
57
- docs = PyPDFLoader(tmp).load()
58
- summary = summarize(docs)
59
- os.remove(tmp)
60
- st.success(summary)
61
- elif "youtube" in url_input or "youtu.be" in url_input:
62
- vid = get_video_id(url_input)
 
 
 
63
  if not vid:
64
- st.error("Couldn’t extract YouTube ID")
65
  else:
66
  transcript = YouTubeTranscriptApi.get_transcript(vid)
67
  text = " ".join(t["text"] for t in transcript)
68
- summary = summarize([Document(page_content=text)])
69
- st.success(summary)
 
70
  else:
71
- if not validators.url(url_input):
72
- st.error("Invalid URL")
73
  else:
74
  docs = UnstructuredURLLoader(
75
- urls=[url_input],
76
  ssl_verify=False,
77
- headers={"User-Agent": "Mozilla/5.0"}
 
 
 
 
78
  ).load()
79
- summary = summarize(docs)
80
- st.success(summary)
81
  except (TranscriptsDisabled, VideoUnavailable) as yt_err:
82
  st.error(str(yt_err))
83
- except APIConnectionError:
84
- st.error("Connection to Groq API failed. Check network and API key.")
85
  except Exception as e:
86
- st.error(f"Unexpected error: {e}")
 
1
+ import os, re, validators, streamlit as st
 
 
 
 
2
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
3
  from langchain.prompts import PromptTemplate
4
  from langchain.chains.summarize import load_summarize_chain
 
6
  from langchain.schema import Document
7
  from langchain_community.document_loaders import UnstructuredURLLoader
8
  from langchain.document_loaders import PyPDFLoader
 
 
 
 
9
 
10
+ # ───────────────────────── STREAMLIT CONFIG ──────────────────────────
11
  st.set_page_config(page_title="LangChain Summarizer", page_icon="🦜")
12
  st.title("🦜 LangChain: Summarize YT / Webpage / PDF")
13
 
14
+ # ──────────────────────────── API KEY INPUT ──────────────────────────
15
+ with st.sidebar:
16
+ st.header("API keys")
17
+ groq_api_key = st.text_input("Groq API Key", type="password")
18
+ if groq_api_key:
19
+ os.environ["GROQ_API_KEY"] = groq_api_key # for libraries
20
+
21
+ # ───────────────────── PLACEHOLDERS / FILE & URL INPUT ───────────────
22
+ generic_url = st.text_input("Paste a YouTube / web URL here:")
23
+ uploaded_file = st.file_uploader("…or upload a PDF", type=["pdf"])
24
 
25
+ # ────────────────────────── UTILITY FUNCTIONS ────────────────────────
26
+ def get_video_id(url: str) -> str | None:
27
  m = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
28
  return m.group(1) if m else None
29
 
 
32
  input_variables=["text"],
33
  )
34
 
35
+ def build_llm() -> ChatGroq:
36
+ """Instantiate ChatGroq once and cache it in session_state."""
37
  if "llm" not in st.session_state:
38
+ st.session_state.llm = ChatGroq(
39
+ model="llama3-70b-8192",
40
+ groq_api_key=os.environ["GROQ_API_KEY"],
41
+ )
42
  return st.session_state.llm
43
 
44
  def summarize(docs):
45
+ llm = build_llm()
46
  chain = load_summarize_chain(llm, chain_type="stuff", prompt=SUMMARY_PROMPT)
47
  return chain({"input_documents": docs})["output_text"]
48
 
49
+ # ───────────────────────────── MAIN ACTION ───────────────────────────
50
  if st.button("Summarize"):
51
+ if not groq_api_key:
52
+ st.error("Please enter your Groq API key in the sidebar.")
53
+ elif not generic_url and not uploaded_file:
54
+ st.error("Provide a URL or upload a PDF, then press Summarize.")
55
  else:
56
  try:
57
  with st.spinner("Fetching and summarizing…"):
58
+
59
+ # ---------- PDF ----------
60
+ if uploaded_file:
61
+ tmp_path = f"/tmp/{uploaded_file.name}"
62
+ with open(tmp_path, "wb") as f:
63
+ f.write(uploaded_file.read())
64
+ docs = PyPDFLoader(tmp_path).load()
65
+ st.success(summarize(docs))
66
+ os.remove(tmp_path)
67
+
68
+ # ---------- YouTube ----------
69
+ elif "youtube" in generic_url or "youtu.be" in generic_url:
70
+ vid = get_video_id(generic_url)
71
  if not vid:
72
+ st.error("Couldn’t extract a YouTube video ID πŸ€”")
73
  else:
74
  transcript = YouTubeTranscriptApi.get_transcript(vid)
75
  text = " ".join(t["text"] for t in transcript)
76
+ st.success(summarize([Document(page_content=text)]))
77
+
78
+ # ---------- Plain Webpage ----------
79
  else:
80
+ if not validators.url(generic_url):
81
+ st.error("That doesn’t look like a valid URL.")
82
  else:
83
  docs = UnstructuredURLLoader(
84
+ urls=[generic_url],
85
  ssl_verify=False,
86
+ headers={
87
+ "User-Agent":
88
+ "Mozilla/5.0 (X11; Linux) AppleWebKit/537.36 "
89
+ "(KHTML, like Gecko) Chrome/121.0 Safari/537.36"
90
+ },
91
  ).load()
92
+ st.success(summarize(docs))
93
+
94
  except (TranscriptsDisabled, VideoUnavailable) as yt_err:
95
  st.error(str(yt_err))
 
 
96
  except Exception as e:
97
+ st.exception(e)