Spaces:
Sleeping
Sleeping
Ayush Mangal
commited on
Commit
β’
c1f8ca0
1
Parent(s):
9d79b22
Add initial
Browse files- app.py +86 -0
- requirements.txt +7 -0
app.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain import PromptTemplate
|
3 |
+
from langchain.llms import Replicate
|
4 |
+
import os
|
5 |
+
from langchain.document_loaders import YoutubeLoader
|
6 |
+
import requests
|
7 |
+
import re
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain.vectorstores import FAISS
|
10 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
11 |
+
from langchain.chains import RetrievalQA
|
12 |
+
from langchain.vectorstores.base import VectorStoreRetriever
|
13 |
+
from langchain.prompts import PromptTemplate
|
14 |
+
|
15 |
+
st.set_page_config(page_title="π¦π YT Summarizer")
|
16 |
+
st.title('π¦π YT Summarizer')
|
17 |
+
jina_api_key = st.sidebar.text_input('Replicate API Key', type='password')
|
18 |
+
|
19 |
+
@st.cache_resource
|
20 |
+
def get_query_chain():
|
21 |
+
model_name = "sentence-transformers/all-mpnet-base-v2"
|
22 |
+
model_kwargs = {'device': 'cpu'}
|
23 |
+
encode_kwargs = {'normalize_embeddings': False}
|
24 |
+
hf = HuggingFaceEmbeddings(
|
25 |
+
model_name=model_name,
|
26 |
+
model_kwargs=model_kwargs,
|
27 |
+
encode_kwargs=encode_kwargs
|
28 |
+
)
|
29 |
+
loader = YoutubeLoader.from_youtube_url(
|
30 |
+
"https://www.youtube.com/watch?v=pAYrk3f9xRk", add_video_info=True
|
31 |
+
)
|
32 |
+
my_url = "https://www.youtube.com/@rrwithdeku8677/videos"
|
33 |
+
r = requests.get(my_url)
|
34 |
+
page = (r.text)
|
35 |
+
pattern = r'watch\?v=([^"]+)'
|
36 |
+
matches = re.findall(pattern, page, re.IGNORECASE)
|
37 |
+
ids = [x.split('=')[-1] for x in matches]
|
38 |
+
base_url = "https://www.youtube.com/watch?v="
|
39 |
+
|
40 |
+
video_data= []
|
41 |
+
|
42 |
+
#TODO - Cache this and only do this if there is a new video
|
43 |
+
for id in ids:
|
44 |
+
loader = YoutubeLoader.from_youtube_url(
|
45 |
+
base_url + id, add_video_info=True
|
46 |
+
)
|
47 |
+
print("got loader")
|
48 |
+
data = loader.load()
|
49 |
+
video_data.extend(data)
|
50 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 0)
|
51 |
+
all_splits = text_splitter.split_documents(video_data)
|
52 |
+
vectorstore = FAISS.from_documents(documents=all_splits, embedding=hf)
|
53 |
+
retriever = VectorStoreRetriever(vectorstore=vectorstore)
|
54 |
+
print("got retriever")
|
55 |
+
template = """Use the following pieces of context to answer the question at the end.
|
56 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
57 |
+
Use three sentences maximum and keep the answer as concise as possible.
|
58 |
+
Always say "thanks for asking!" at the end of the answer.
|
59 |
+
{context}
|
60 |
+
Question: {question}
|
61 |
+
Helpful Answer:"""
|
62 |
+
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
63 |
+
llm = Replicate(
|
64 |
+
model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
|
65 |
+
input={"temperature": 0.75, "max_length": 500, "top_p": 1},
|
66 |
+
)
|
67 |
+
qa_chain = RetrievalQA.from_chain_type(
|
68 |
+
llm,
|
69 |
+
retriever=retriever,
|
70 |
+
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
|
71 |
+
)
|
72 |
+
|
73 |
+
return qa_chain
|
74 |
+
|
75 |
+
def generate_response(topic, query_chain):
|
76 |
+
result = query_chain({"query": topic})
|
77 |
+
print(result)
|
78 |
+
return st.info(result)
|
79 |
+
|
80 |
+
with st.form('myform'):
|
81 |
+
topic_text = st.text_input('Enter keyword:', '')
|
82 |
+
submitted = st.form_submit_button('Submit')
|
83 |
+
if submitted :
|
84 |
+
os.environ["REPLICATE_API_TOKEN"] = jina_api_key
|
85 |
+
query_chain = get_query_chain()
|
86 |
+
generate_response(topic_text, query_chain)
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
Requests
|
3 |
+
streamlit
|
4 |
+
replicate
|
5 |
+
faiss-cpu
|
6 |
+
huggingface_hub
|
7 |
+
sentence_transformers
|