PranavReddy18 commited on
Commit
2007759
·
verified ·
1 Parent(s): 99389f0

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +73 -0
  2. requirements.txt +46 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import validators
2
+ import streamlit as st
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain_groq import ChatGroq
5
+ from langchain.chains.summarize import load_summarize_chain
6
+ from langchain.docstore.document import Document
7
+ from langchain_community.document_loaders import UnstructuredURLLoader
8
+ import yt_dlp
9
+
10
+ # Streamlit App Configuration
11
+ st.set_page_config(page_title="LangChain: Summarize Text From YT or Website", page_icon="🦜")
12
+ st.title("🦜 LangChain: Summarize Text From YT or Website")
13
+ st.subheader('Summarize URL')
14
+
15
+ # Hardcoded Groq API Key (use environment variables in production)
16
+ GROQ_API_KEY = "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"
17
+
18
+ # URL input field
19
+ generic_url = st.text_input("URL", label_visibility="collapsed")
20
+
21
+ # Gemma Model Using Groq API
22
+ llm = ChatGroq(model="mixtral-8x7b-32768", groq_api_key=GROQ_API_KEY)
23
+
24
+ prompt_template = """
25
+ Provide a summary of the following content in 300 words:
26
+ Content:{text}
27
+
28
+ """
29
+ prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
30
+
31
+ def load_youtube_data(url):
32
+ try:
33
+ ydl_opts = {}
34
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
35
+ info = ydl.extract_info(url, download=False)
36
+ title = info.get('title', 'No Title')
37
+ description = info.get('description', 'No Description')
38
+ content = f"Title: {title}\n\nDescription: {description}"
39
+ return [Document(page_content=content, metadata={"title": title})]
40
+ except Exception as e:
41
+ raise ValueError(f"Failed to extract YouTube data: {str(e)}")
42
+
43
+ def load_website_data(url):
44
+ loader = UnstructuredURLLoader(
45
+ urls=[url],
46
+ ssl_verify=False,
47
+ headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"}
48
+ )
49
+ docs = loader.load()
50
+ return [Document(page_content=doc.page_content, metadata=doc.metadata) for doc in docs]
51
+
52
+ if st.button("Summarize the Content from YT or Website"):
53
+ # Validate URL input
54
+ if not generic_url.strip():
55
+ st.error("Please provide a URL to get started")
56
+ elif not validators.url(generic_url):
57
+ st.error("Please enter a valid URL. It can be a YouTube video URL or website URL.")
58
+ else:
59
+ try:
60
+ with st.spinner("Processing..."):
61
+ # Load data based on URL type
62
+ if "youtube.com" in generic_url or "youtu.be" in generic_url:
63
+ docs = load_youtube_data(generic_url)
64
+ else:
65
+ docs = load_website_data(generic_url)
66
+
67
+ # Chain for Summarization
68
+ chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
69
+ output_summary = chain.run(docs)
70
+
71
+ st.success(output_summary)
72
+ except Exception as e:
73
+ st.exception(f"Exception: {e}")
requirements.txt ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain_astradb
3
+ python-dotenv
4
+ ipykernel
5
+ langchain-community
6
+ pypdf
7
+ bs4
8
+ arxiv
9
+ pymupdf
10
+ wikipedia
11
+ langchain-text-splitters
12
+ langchain-openai
13
+ chromadb
14
+ sentence_transformers
15
+ langchain_huggingface
16
+ faiss-cpu
17
+ langchain_chroma
18
+ streamlit
19
+ langchain_groq
20
+ fastapi
21
+ uvicorn
22
+ langserve[all]
23
+ sse_starlette
24
+ streamlit
25
+ PyPDF2
26
+ google.generativeai
27
+ arxiv
28
+ wikipedia
29
+ streamlit-pydantic
30
+ validators
31
+ youtube_transcript_api
32
+ pytube
33
+ Unstructured
34
+ yt-dlp
35
+ numexpr
36
+ langchain_huggingface
37
+ huggingface_hub
38
+ duckduckgo-search
39
+ langchain_nvidia_ai_endpoints
40
+ crewai
41
+ crewai_tools
42
+ pinecone
43
+ pinecone-client
44
+ neo4j==5.14
45
+ llama-index
46
+