chai182 commited on
Commit
10dba5c
1 Parent(s): 49d09fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -179
app.py CHANGED
@@ -1,179 +1,71 @@
1
-
2
- import streamlit as st
3
- hide_streamlit_style = """
4
- <style>
5
- #MainMenu {visibility: hidden;}
6
- footer {visibility: hidden;}
7
- </style>
8
- """
9
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
10
-
11
- def free_version():
12
- import torch
13
- import os
14
- import argparse
15
- import shutil
16
- from langchain.document_loaders import YoutubeLoader
17
- from langchain.text_splitter import RecursiveCharacterTextSplitter
18
- from langchain.vectorstores import Chroma
19
- from langchain.embeddings import OpenAIEmbeddings
20
- from langchain.chains import RetrievalQA
21
- from langchain.llms import OpenAI
22
- import streamlit as st
23
- from langchain.chat_models import ChatOpenAI
24
- from langchain import HuggingFaceHub
25
- from langchain.embeddings import HuggingFaceInstructEmbeddings
26
- from urllib.parse import urlparse, parse_qs
27
- from langchain.embeddings import HuggingFaceBgeEmbeddings
28
- from transformers import pipeline
29
- import textwrap
30
- import time
31
- from deep_translator import GoogleTranslator
32
- from langdetect import detect
33
-
34
-
35
- def typewriter(text: str, speed: float):
36
- container = st.empty()
37
- displayed_text = ""
38
-
39
- for char in text:
40
- displayed_text += char
41
- container.markdown(displayed_text)
42
- time.sleep(1/speed)
43
- def wrap_text_preserve_newlines(text, width=110):
44
- # Split the input text into lines based on newline characters
45
- lines = text.split('\n')
46
-
47
- # Wrap each line individually
48
- wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
49
-
50
- # Join the wrapped lines back together using newline characters
51
- wrapped_text = '\n'.join(wrapped_lines)
52
- return wrapped_text
53
- def process_llm_response(llm_originalresponse2):
54
- #result_text = wrap_text_preserve_newlines(llm_originalresponse2["result"])
55
- typewriter(llm_originalresponse2["result"], speed=40)
56
-
57
- def extract_video_id(youtube_url):
58
- try:
59
- parsed_url = urlparse(youtube_url)
60
- query_params = parse_qs(parsed_url.query)
61
- video_id = query_params.get('v', [None])[0]
62
-
63
- return video_id
64
- except Exception as e:
65
- print(f"Error extracting video ID: {e}")
66
- return None
67
- def set_openAi_api_key(api_key: str):
68
- st.session_state["OPENAI_API_KEY"] = api_key
69
- os.environ['OPENAI_API_KEY'] = api_key
70
- def openai_api_insert_component():
71
- with st.sidebar:
72
- st.markdown(
73
- """
74
- ## Quick Guide 🚀
75
- 1. Get started by adding your [OpenAI API key](https://platform.openai.com/account/api-keys) below🔑
76
- 2. Easily input the video url
77
- 3. Engage with the content - ask questions, seek answers💬
78
- """
79
- )
80
-
81
- api_key_input = st.text_input("Input your OpenAI API Key",
82
- type="password",
83
- placeholder="Format: sk-...",
84
- help="You can get your API key from https://platform.openai.com/account/api-keys.")
85
-
86
-
87
- if api_key_input == "" or api_key_input is None:
88
- st.sidebar.caption("👆 :red[Please set your OpenAI API Key here]")
89
-
90
-
91
- st.caption(":green[Your API is not stored anywhere. It is only used to generate answers to your questions.]")
92
-
93
- set_openAi_api_key(api_key_input)
94
-
95
- def launchfreeversion():
96
- HUGGINGFACE_API_TOKEN = os.environ['access_code']
97
- model_name = "BAAI/bge-base-en"
98
- encode_kwargs = {'normalize_embeddings': True}
99
-
100
- st.title('MKG: Your Chat with Youtube Assistant')
101
-
102
- videourl = st.text_input("Insert The video URL", placeholder="Format should be like: https://www.youtube.com/watch?v=pSLeYvld8Mk")
103
- query = st.text_input("Ask any question about the video",help="Suggested queries: Summarize the key points of this video - What is this video about - Ask about a specific thing in the video ")
104
- st.warning("⚠️ Please Keep in mind that the accuracy of the response relies on the :red[Video's quality] and the :red[prompt's Quality]. Occasionally, the response may not be entirely accurate. Consider using the response as a reference rather than a definitive answer.")
105
-
106
- if st.button("Submit Question", type="primary"):
107
- with st.spinner('Processing the Video...'):
108
- video_id = extract_video_id(videourl)
109
- loader = YoutubeLoader(video_id)
110
- documents = loader.load()
111
-
112
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
113
- documents = text_splitter.split_documents(documents)
114
-
115
-
116
- vectordb = Chroma.from_documents(
117
- documents,
118
- #embedding = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
119
- # model_kwargs={"device": "cuda"})
120
- embedding= HuggingFaceBgeEmbeddings( model_name=model_name, model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}, encode_kwargs=encode_kwargs)
121
- )
122
-
123
- repo_id = "tiiuae/falcon-7b-instruct"
124
- qa_chain = RetrievalQA.from_chain_type(
125
-
126
- llm=HuggingFaceHub(huggingfacehub_api_token=HUGGINGFACE_API_TOKEN,
127
- repo_id=repo_id,
128
- model_kwargs={"temperature":0.1, "max_new_tokens":1000}),
129
- retriever=vectordb.as_retriever(),
130
- return_source_documents=False,
131
- verbose=False
132
- )
133
- with st.spinner('Generating Answer...'):
134
- llm_response = qa_chain(query)
135
- #llm_originalresponse2=llm_response['result']
136
- process_llm_response(llm_response)
137
- launchfreeversion()
138
-
139
-
140
- def intro():
141
- st.markdown("""
142
- # MKG: Your Chat with Youtube Assistant 🎬🤖
143
-
144
- Welcome to MKG-Assistant, where AI meets Youtube! 🚀🔍
145
-
146
- ## Base Models
147
-
148
- Q&A-Assistant is built on OpenAI's GPT 3.5 for the premium version and Falcon 7B instruct Model for the free version to enhance your websites browsing experience. Whether you're a student, researcher, or professional, we're here to simplify your interactions with the web. 💡📚
149
-
150
- ## How to Get Started
151
-
152
- 1.Enter the Video URL.
153
- 2. Enter your API key.(Only if you chose the premium version. Key is not needed in the free version)
154
- 3. Ask questions using everyday language.
155
- 4. Get detailed, AI-generated answers.
156
-
157
- 5. Enjoy a smarter way to Interact with Youtube!
158
-
159
-
160
-
161
- ## It is Time to Dive in!
162
-
163
-
164
- """)
165
- page_names_to_funcs = {
166
- "Main Page": intro,
167
- "Open Source Edition (Free version)": free_version
168
- }
169
-
170
-
171
-
172
-
173
-
174
-
175
- #test
176
- demo_name = st.sidebar.selectbox("Choose a version", page_names_to_funcs.keys())
177
- page_names_to_funcs[demo_name]()
178
- st.sidebar.markdown('<a href="https://www.linkedin.com/in/mohammed-khalil-ghali-11305119b/"> Connect on LinkedIn <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/linkedin/linkedin-original.svg" alt="LinkedIn" width="30" height="30"></a>', unsafe_allow_html=True)
179
- st.sidebar.markdown('<a href="https://github.com/khalil-ghali"> Check out my GitHub <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/github/github-original.svg" alt="GitHub" width="30" height="30"></a>', unsafe_allow_html=True)
 
1
+ import gradio as gr
2
+ import torch
3
+ from langchain.document_loaders import YoutubeLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
7
+ from langchain.chains import RetrievalQA
8
+ from langchain import HuggingFaceHub
9
+ from urllib.parse import urlparse, parse_qs
10
+
11
+ def extract_video_id(youtube_url):
12
+ try:
13
+ parsed_url = urlparse(youtube_url)
14
+ query_params = parse_qs(parsed_url.query)
15
+ video_id = query_params.get('v', [None])[0]
16
+ return video_id
17
+ except Exception as e:
18
+ return f"Error extracting video ID: {e}"
19
+
20
+ def process_video(youtube_url, question):
21
+ video_id = extract_video_id(youtube_url)
22
+ if not video_id:
23
+ return 'Invalid YouTube URL'
24
+
25
+ try:
26
+ # Initialize the YouTube Loader
27
+ loader = YoutubeLoader(video_id)
28
+ documents = loader.load()
29
+
30
+ # Process the documents
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
32
+ documents = text_splitter.split_documents(documents)
33
+
34
+ # Initialize Vector Store
35
+ model_name = "BAAI/bge-base-en"
36
+ encode_kwargs = {'normalize_embeddings': True}
37
+ vectordb = Chroma.from_documents(
38
+ documents,
39
+ embedding=HuggingFaceBgeEmbeddings(model_name=model_name,
40
+ model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'},
41
+ encode_kwargs=encode_kwargs)
42
+ )
43
+
44
+ # Setup the QA Chain
45
+ HUGGINGFACE_API_TOKEN = os.environ['HUGGINGFACE_API_TOKEN']
46
+ repo_id = "tiiuae/falcon-7b-instruct"
47
+ qa_chain = RetrievalQA.from_chain_type(
48
+ llm=HuggingFaceHub(huggingfacehub_api_token=HUGGINGFACE_API_TOKEN,
49
+ repo_id=repo_id,
50
+ model_kwargs={"temperature":0.1, "max_new_tokens":1000}),
51
+ retriever=vectordb.as_retriever(),
52
+ return_source_documents=False,
53
+ verbose=False
54
+ )
55
+
56
+ # Process the question
57
+ llm_response = qa_chain(question)
58
+ return llm_response['result']
59
+ except Exception as e:
60
+ return f"Error processing video: {e}"
61
+
62
+ iface = gr.Interface(
63
+ fn=process_video,
64
+ inputs=["text", "text"],
65
+ outputs="text",
66
+ title="YouTube Video AI Assistant",
67
+ description="Enter a YouTube URL and a question to get AI-generated answers based on the video."
68
+ )
69
+
70
+ if __name__ == "__main__":
71
+ iface.launch()