Ayush Mangal commited on
Commit
c1f8ca0
β€’
1 Parent(s): 9d79b22

Add initial

Browse files
Files changed (2) hide show
  1. app.py +86 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain import PromptTemplate
3
+ from langchain.llms import Replicate
4
+ import os
5
+ from langchain.document_loaders import YoutubeLoader
6
+ import requests
7
+ import re
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.vectorstores import FAISS
10
+ from langchain.embeddings import HuggingFaceEmbeddings
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.vectorstores.base import VectorStoreRetriever
13
+ from langchain.prompts import PromptTemplate
14
+
15
+ st.set_page_config(page_title="πŸ¦œπŸ”— YT Summarizer")
16
+ st.title('πŸ¦œπŸ”— YT Summarizer')
17
+ jina_api_key = st.sidebar.text_input('Replicate API Key', type='password')
18
+
19
+ @st.cache_resource
20
+ def get_query_chain():
21
+ model_name = "sentence-transformers/all-mpnet-base-v2"
22
+ model_kwargs = {'device': 'cpu'}
23
+ encode_kwargs = {'normalize_embeddings': False}
24
+ hf = HuggingFaceEmbeddings(
25
+ model_name=model_name,
26
+ model_kwargs=model_kwargs,
27
+ encode_kwargs=encode_kwargs
28
+ )
29
+ loader = YoutubeLoader.from_youtube_url(
30
+ "https://www.youtube.com/watch?v=pAYrk3f9xRk", add_video_info=True
31
+ )
32
+ my_url = "https://www.youtube.com/@rrwithdeku8677/videos"
33
+ r = requests.get(my_url)
34
+ page = (r.text)
35
+ pattern = r'watch\?v=([^"]+)'
36
+ matches = re.findall(pattern, page, re.IGNORECASE)
37
+ ids = [x.split('=')[-1] for x in matches]
38
+ base_url = "https://www.youtube.com/watch?v="
39
+
40
+ video_data= []
41
+
42
+ #TODO - Cache this and only do this if there is a new video
43
+ for id in ids:
44
+ loader = YoutubeLoader.from_youtube_url(
45
+ base_url + id, add_video_info=True
46
+ )
47
+ print("got loader")
48
+ data = loader.load()
49
+ video_data.extend(data)
50
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 0)
51
+ all_splits = text_splitter.split_documents(video_data)
52
+ vectorstore = FAISS.from_documents(documents=all_splits, embedding=hf)
53
+ retriever = VectorStoreRetriever(vectorstore=vectorstore)
54
+ print("got retriever")
55
+ template = """Use the following pieces of context to answer the question at the end.
56
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
57
+ Use three sentences maximum and keep the answer as concise as possible.
58
+ Always say "thanks for asking!" at the end of the answer.
59
+ {context}
60
+ Question: {question}
61
+ Helpful Answer:"""
62
+ QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
63
+ llm = Replicate(
64
+ model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
65
+ input={"temperature": 0.75, "max_length": 500, "top_p": 1},
66
+ )
67
+ qa_chain = RetrievalQA.from_chain_type(
68
+ llm,
69
+ retriever=retriever,
70
+ chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
71
+ )
72
+
73
+ return qa_chain
74
+
75
+ def generate_response(topic, query_chain):
76
+ result = query_chain({"query": topic})
77
+ print(result)
78
+ return st.info(result)
79
+
80
+ with st.form('myform'):
81
+ topic_text = st.text_input('Enter keyword:', '')
82
+ submitted = st.form_submit_button('Submit')
83
+ if submitted :
84
+ os.environ["REPLICATE_API_TOKEN"] = jina_api_key
85
+ query_chain = get_query_chain()
86
+ generate_response(topic_text, query_chain)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ langchain
2
+ Requests
3
+ streamlit
4
+ replicate
5
+ faiss-cpu
6
+ huggingface_hub
7
+ sentence_transformers