chai182 commited on
Commit
12b0fae
β€’
1 Parent(s): 459022c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +266 -0
  2. requirements.txt +12 -0
app.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ hide_streamlit_style = """
4
+ <style>
5
+ #MainMenu {visibility: hidden;}
6
+ footer {visibility: hidden;}
7
+ </style>
8
+ """
9
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
10
+ def paid_version():
11
+ import os
12
+ import argparse
13
+ import shutil
14
+ from langchain.document_loaders import YoutubeLoader
15
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
16
+ from langchain.vectorstores import Chroma
17
+ from langchain.embeddings import OpenAIEmbeddings
18
+ from langchain.chains import RetrievalQA
19
+ from langchain.llms import OpenAI
20
+ import streamlit as st
21
+ from langchain.chat_models import ChatOpenAI
22
+ from urllib.parse import urlparse, parse_qs
23
+ def extract_video_id(youtube_url):
24
+ try:
25
+ parsed_url = urlparse(youtube_url)
26
+ query_params = parse_qs(parsed_url.query)
27
+ video_id = query_params.get('v', [None])[0]
28
+
29
+ return video_id
30
+ except Exception as e:
31
+ print(f"Error extracting video ID: {e}")
32
+ return None
33
+ def set_openAi_api_key(api_key: str):
34
+ st.session_state["OPENAI_API_KEY"] = api_key
35
+ os.environ['OPENAI_API_KEY'] = api_key
36
+ def openai_api_insert_component():
37
+ with st.sidebar:
38
+ st.markdown(
39
+ """
40
+ ## Quick Guide πŸš€
41
+ 1. Get started by adding your [OpenAI API key](https://platform.openai.com/account/api-keys) belowπŸ”‘
42
+ 2. Easily input the video url
43
+ 3. Engage with the content - ask questions, seek answersπŸ’¬
44
+ """
45
+ )
46
+
47
+ api_key_input = st.text_input("Input your OpenAI API Key",
48
+ type="password",
49
+ placeholder="Format: sk-...",
50
+ help="You can get your API key from https://platform.openai.com/account/api-keys.")
51
+
52
+
53
+ if api_key_input == "" or api_key_input is None:
54
+ st.sidebar.caption("πŸ‘† :red[Please set your OpenAI API Key here]")
55
+
56
+
57
+ st.caption(":green[Your API is not stored anywhere. It is only used to generate answers to your questions.]")
58
+
59
+ set_openAi_api_key(api_key_input)
60
+
61
+ def launchpaidversion():
62
+ openai_api_insert_component()
63
+ os.environ['OPENAI_API_KEY'] = st.session_state['OPENAI_API_KEY']
64
+ st.title('MKG: Your Chat with Youtube Assistant')
65
+
66
+
67
+ videourl = st.text_input("Insert The video URL")
68
+ query = st.text_input("Ask any question about the video")
69
+ if st.button("Submit Question", type="primary"):
70
+ video_id = extract_video_id(videourl)
71
+ loader = YoutubeLoader(video_id)
72
+ documents = loader.load()
73
+
74
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
75
+ documents = text_splitter.split_documents(documents)
76
+
77
+ shutil.rmtree('./data')
78
+ vectordb = Chroma.from_documents(
79
+ documents,
80
+ embedding=OpenAIEmbeddings(),
81
+ persist_directory='./data'
82
+ )
83
+ vectordb.persist()
84
+
85
+ qa_chain = RetrievalQA.from_chain_type(
86
+ llm=ChatOpenAI(model_name='gpt-3.5-turbo'),
87
+ retriever=vectordb.as_retriever(),
88
+ return_source_documents=True,
89
+ verbose=False
90
+ )
91
+ response = qa_chain(query)
92
+ st.write(response)
93
+
94
+
95
+ launchpaidversion()
96
+ def free_version():
97
+ import torch
98
+ import os
99
+ import argparse
100
+ import shutil
101
+ from langchain.document_loaders import YoutubeLoader
102
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
103
+ from langchain.vectorstores import Chroma
104
+ from langchain.embeddings import OpenAIEmbeddings
105
+ from langchain.chains import RetrievalQA
106
+ from langchain.llms import OpenAI
107
+ import streamlit as st
108
+ from langchain.chat_models import ChatOpenAI
109
+ from langchain import HuggingFaceHub
110
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
111
+ from urllib.parse import urlparse, parse_qs
112
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
113
+ from transformers import pipeline
114
+ import textwrap
115
+ import time
116
+ from deep_translator import GoogleTranslator
117
+ from langdetect import detect
118
+
119
+
120
+ def typewriter(text: str, speed: float):
121
+ container = st.empty()
122
+ displayed_text = ""
123
+
124
+ for char in text:
125
+ displayed_text += char
126
+ container.markdown(displayed_text)
127
+ time.sleep(1/speed)
128
+ def wrap_text_preserve_newlines(text, width=110):
129
+ # Split the input text into lines based on newline characters
130
+ lines = text.split('\n')
131
+
132
+ # Wrap each line individually
133
+ wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
134
+
135
+ # Join the wrapped lines back together using newline characters
136
+ wrapped_text = '\n'.join(wrapped_lines)
137
+ return wrapped_text
138
+ def process_llm_response(llm_originalresponse2):
139
+ #result_text = wrap_text_preserve_newlines(llm_originalresponse2["result"])
140
+ typewriter(llm_originalresponse2["result"], speed=40)
141
+
142
+ def extract_video_id(youtube_url):
143
+ try:
144
+ parsed_url = urlparse(youtube_url)
145
+ query_params = parse_qs(parsed_url.query)
146
+ video_id = query_params.get('v', [None])[0]
147
+
148
+ return video_id
149
+ except Exception as e:
150
+ print(f"Error extracting video ID: {e}")
151
+ return None
152
+ def set_openAi_api_key(api_key: str):
153
+ st.session_state["OPENAI_API_KEY"] = api_key
154
+ os.environ['OPENAI_API_KEY'] = api_key
155
+ def openai_api_insert_component():
156
+ with st.sidebar:
157
+ st.markdown(
158
+ """
159
+ ## Quick Guide πŸš€
160
+ 1. Get started by adding your [OpenAI API key](https://platform.openai.com/account/api-keys) belowπŸ”‘
161
+ 2. Easily input the video url
162
+ 3. Engage with the content - ask questions, seek answersπŸ’¬
163
+ """
164
+ )
165
+
166
+ api_key_input = st.text_input("Input your OpenAI API Key",
167
+ type="password",
168
+ placeholder="Format: sk-...",
169
+ help="You can get your API key from https://platform.openai.com/account/api-keys.")
170
+
171
+
172
+ if api_key_input == "" or api_key_input is None:
173
+ st.sidebar.caption("πŸ‘† :red[Please set your OpenAI API Key here]")
174
+
175
+
176
+ st.caption(":green[Your API is not stored anywhere. It is only used to generate answers to your questions.]")
177
+
178
+ set_openAi_api_key(api_key_input)
179
+
180
+ def launchfreeversion():
181
+ HUGGINGFACE_API_TOKEN = os.environ['access_code']
182
+ model_name = "BAAI/bge-base-en"
183
+ encode_kwargs = {'normalize_embeddings': True}
184
+
185
+ st.title('MKG: Your Chat with Youtube Assistant')
186
+
187
+ videourl = st.text_input("Insert The video URL", placeholder="Format should be like: https://www.youtube.com/watch?v=pSLeYvld8Mk")
188
+ query = st.text_input("Ask any question about the video",help="Suggested queries: Summarize the key points of this video - What is this video about - Ask about a specific thing in the video ")
189
+ st.warning("⚠️ Please Keep in mind that the accuracy of the response relies on the :red[Video's quality] and the :red[prompt's Quality]. Occasionally, the response may not be entirely accurate. Consider using the response as a reference rather than a definitive answer.")
190
+
191
+ if st.button("Submit Question", type="primary"):
192
+ with st.spinner('Processing the Video...'):
193
+ video_id = extract_video_id(videourl)
194
+ loader = YoutubeLoader(video_id)
195
+ documents = loader.load()
196
+
197
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
198
+ documents = text_splitter.split_documents(documents)
199
+
200
+
201
+ vectordb = Chroma.from_documents(
202
+ documents,
203
+ #embedding = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
204
+ # model_kwargs={"device": "cuda"})
205
+ embedding= HuggingFaceBgeEmbeddings( model_name=model_name, model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}, encode_kwargs=encode_kwargs)
206
+ )
207
+
208
+ repo_id = "tiiuae/falcon-7b-instruct"
209
+ qa_chain = RetrievalQA.from_chain_type(
210
+
211
+ llm=HuggingFaceHub(huggingfacehub_api_token=HUGGINGFACE_API_TOKEN,
212
+ repo_id=repo_id,
213
+ model_kwargs={"temperature":0.1, "max_new_tokens":1000}),
214
+ retriever=vectordb.as_retriever(),
215
+ return_source_documents=False,
216
+ verbose=False
217
+ )
218
+ with st.spinner('Generating Answer...'):
219
+ llm_response = qa_chain(query)
220
+ #llm_originalresponse2=llm_response['result']
221
+ process_llm_response(llm_response)
222
+ launchfreeversion()
223
+
224
+
225
+ def intro():
226
+ st.markdown("""
227
+ # MKG: Your Chat with Youtube Assistant πŸŽ¬πŸ€–
228
+
229
+ Welcome to MKG-Assistant, where AI meets Youtube! πŸš€πŸ”
230
+
231
+ ## Base Models
232
+
233
+ Q&A-Assistant is built on OpenAI's GPT 3.5 for the premium version and Falcon 7B instruct Model for the free version to enhance your websites browsing experience. Whether you're a student, researcher, or professional, we're here to simplify your interactions with the web. πŸ’‘πŸ“š
234
+
235
+ ## How to Get Started
236
+
237
+ 1.Enter the Video URL.
238
+ 2. Enter your API key.(Only if you chose the premium version. Key is not needed in the free version)
239
+ 3. Ask questions using everyday language.
240
+ 4. Get detailed, AI-generated answers.
241
+
242
+ 5. Enjoy a smarter way to Interact with Youtube!
243
+
244
+
245
+
246
+ ## It is Time to Dive in!
247
+
248
+
249
+ """)
250
+ page_names_to_funcs = {
251
+ "Main Page": intro,
252
+ "Open Source Edition (Free version)": free_version,
253
+ "Premium edition (Requires Open AI API Key )": paid_version
254
+
255
+ }
256
+
257
+
258
+
259
+
260
+
261
+
262
+ #test
263
+ demo_name = st.sidebar.selectbox("Choose a version", page_names_to_funcs.keys())
264
+ page_names_to_funcs[demo_name]()
265
+ st.sidebar.markdown('<a href="https://www.linkedin.com/in/mohammed-khalil-ghali-11305119b/"> Connect on LinkedIn <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/linkedin/linkedin-original.svg" alt="LinkedIn" width="30" height="30"></a>', unsafe_allow_html=True)
266
+ st.sidebar.markdown('<a href="https://github.com/khalil-ghali"> Check out my GitHub <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/github/github-original.svg" alt="GitHub" width="30" height="30"></a>', unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --find-links https://download.pytorch.org/whl/torch_stable.html torch==1.2.0+cpu
2
+ langchain
3
+ chromadb
4
+ transformers
5
+ sentence-transformers
6
+ InstructorEmbedding
7
+ streamlit
8
+ youtube-transcript-api
9
+ deep_translator
10
+ langdetect
11
+ pyPDF
12
+ #FAISS