tushar310 commited on
Commit
455e88a
·
verified ·
1 Parent(s): fd74341

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -110
app.py CHANGED
@@ -14,160 +14,141 @@ from langchain.document_loaders import DataFrameLoader
14
  from langchain.vectorstores import Chroma
15
  from langchain.chains import RetrievalQAWithSourcesChain
16
  from langchain.embeddings.openai import OpenAIEmbeddings
17
- # from langchain.llms import OpenAI
18
- from langchain.chat_models import ChatOpenAI
19
  import pandas as pd
20
  import requests
21
 
22
  st.set_page_config(layout="centered", page_title="Youtube QnA")
23
 
24
- #header of the application
25
- # image = Image.open('logo.png')
26
-
27
  hide_streamlit_style = """
28
  <style>
29
  #MainMenu {visibility: hidden;}
30
  footer {visibility: hidden;}
31
  </style>
32
-
33
  """
34
  st.markdown(hide_streamlit_style, unsafe_allow_html=True)
35
 
36
-
37
  def load_lottieurl(url: str):
38
- r = requests.get(url)
39
- if r.status_code != 200:
 
 
 
 
 
40
  return None
41
- return r.json()
42
 
43
  url_lottie1 = "https://lottie.host/d860aaf2-a646-42f2-8a51-3efe3be59bf2/tpZB5YYkuT.json"
44
  url_lottie2 = "https://lottie.host/93dcafc4-8531-4406-891c-89c28e4f76e1/lWpokVrjB9.json"
45
  lottie_hello1 = load_lottieurl(url_lottie2)
46
  place1 = st.empty()
47
 
48
-
49
  logo1 = "aai_white.png"
50
  logo2 = "alphaGPT-2k.png"
51
  logo3 = "banner.png"
52
  with place1.container():
53
- #App title
54
  st.header("Youtube Question Answering Bot")
55
- anima1 , anima2 = st.columns([1,1])
56
  with anima1:
57
- # st.image("aai_black.png", width = 350, use_column_width=True)
58
- st.image("logo.png", width = 300, use_column_width=True)
59
  with anima2:
60
  st_lottie(
61
- lottie_hello1,
62
- speed=1,
63
- reverse=False,
64
- loop=True,
65
- quality="high", # medium ; high
66
- height=250,
67
- width=250,
68
- key=None,
69
  )
70
 
71
  def extract_and_save_audio(video_URL, destination, final_filename):
72
- video = YouTube(video_URL)#get video
73
- audio = video.streams.filter(only_audio=True).first()#seperate audio
74
- output = audio.download(output_path = destination)#download and save for transcription
75
- _, ext = os.path.splitext(output)
76
- new_file = final_filename + '.mp3'
77
- os.rename(output, new_file)
 
 
 
 
 
78
 
79
  def chunk_clips(transcription, clip_size):
80
- texts = []
81
- sources = []
82
- for i in range(0,len(transcription),clip_size):
83
- clip_df = transcription.iloc[i:i+clip_size,:]
84
- text = " ".join(clip_df['text'].to_list())
85
- source = str(round(clip_df.iloc[0]['start']/60,2))+ " - "+str(round(clip_df.iloc[-1]['end']/60,2)) + " min"
86
- print(text)
87
- print(source)
88
- texts.append(text)
89
- sources.append(source)
90
-
91
- return [texts,sources]
92
 
93
  openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
94
  if not openai_api_key:
95
  st.info("Please add your OpenAI API key to continue.")
96
  st.stop()
97
-
98
-
99
- # #App title
100
- # st.header("Youtube Question Answering Bot")
101
  state = st.session_state
102
  site = st.text_input("Enter your URL here")
103
  if st.button("Build Model"):
104
- if site is None:
105
- st.info(f"""Enter URL to Build QnA Bot""")
106
- elif site:
107
- try:
108
- my_bar = st.progress(0, text="Fetching the video. Please wait.")
109
- # Set the device
110
- device = "cuda" if torch.cuda.is_available() else "cpu"
111
-
112
- # Load the model
113
- whisper_model = whisper.load_model("base", device=device)
114
-
115
- # Video to audio
116
- video_URL = site
117
- destination = "."
118
- final_filename = "AlphaGPT"
119
- extract_and_save_audio(video_URL, destination, final_filename)
120
-
121
- # run the whisper model
122
- audio_file = "AlphaGPT.mp3"
123
- my_bar.progress(50, text="Transcribing the video.")
124
- result = whisper_model.transcribe(audio_file, fp16=False, language='English')
125
-
126
- transcription = pd.DataFrame(result['segments'])
127
-
128
- chunks = chunk_clips(transcription, 50)
129
- documents = chunks[0]
130
- sources = chunks[1]
131
-
 
 
 
 
 
 
 
132
 
133
- my_bar.progress(75, text="Building QnA model.")
134
- embeddings = OpenAIEmbeddings(openai_api_key = openai_api_key)
135
- #vstore with metadata. Here we will store page numbers.
136
- vStore = Chroma.from_texts(documents, embeddings, metadatas=[{"source": s} for s in sources])
137
- #deciding model
138
- model_name = "gpt-3.5-turbo"
139
-
140
- retriever = vStore.as_retriever()
141
- retriever.search_kwargs = {'k':2}
142
-
143
- llm = ChatOpenAI(model_name=model_name, openai_api_key=openai_api_key)
144
- # llm = OpenAI(model_name=model_name, openai_api_key = openai_api_key)
145
- model = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
146
-
147
- my_bar.progress(100, text="Model is ready.")
148
- st.session_state['crawling'] = True
149
- st.session_state['model'] = model
150
- st.session_state['site'] = site
151
 
152
- except Exception as e:
153
- st.error(f"An error occurred: {e}")
154
- st.error('Oops, crawling resulted in an error :( Please try again with a different URL.')
155
-
156
  if site and ("crawling" in state):
157
- st.header("Ask your data")
158
- model = st.session_state['model']
159
- site = st.session_state['site']
160
- st.video(site, format="video/mp4", start_time=0)
161
- user_q = st.text_input("Enter your questions here")
162
- if st.button("Get Response"):
163
  try:
164
- with st.spinner("Model is working on it..."):
165
- # st.write(model)
166
- result = model({"question":user_q}, return_only_outputs=True)
167
- st.subheader('Your response:')
168
- st.write(result["answer"])
169
- st.subheader('Sources:')
170
- st.write(result["sources"])
171
  except Exception as e:
172
- st.error(f"An error occurred: {e}")
173
- st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.')
 
14
  from langchain.vectorstores import Chroma
15
  from langchain.chains import RetrievalQAWithSourcesChain
16
  from langchain.embeddings.openai import OpenAIEmbeddings
17
+ from langchain.llms import OpenAI
 
18
  import pandas as pd
19
  import requests
20
 
21
  st.set_page_config(layout="centered", page_title="Youtube QnA")
22
 
 
 
 
23
  hide_streamlit_style = """
24
  <style>
25
  #MainMenu {visibility: hidden;}
26
  footer {visibility: hidden;}
27
  </style>
 
28
  """
29
  st.markdown(hide_streamlit_style, unsafe_allow_html=True)
30
 
 
31
  def load_lottieurl(url: str):
32
+ try:
33
+ r = requests.get(url)
34
+ if r.status_code != 200:
35
+ return None
36
+ return r.json()
37
+ except Exception as e:
38
+ st.error(f"Failed to load Lottie animation: {e}")
39
  return None
 
40
 
41
  url_lottie1 = "https://lottie.host/d860aaf2-a646-42f2-8a51-3efe3be59bf2/tpZB5YYkuT.json"
42
  url_lottie2 = "https://lottie.host/93dcafc4-8531-4406-891c-89c28e4f76e1/lWpokVrjB9.json"
43
  lottie_hello1 = load_lottieurl(url_lottie2)
44
  place1 = st.empty()
45
 
 
46
  logo1 = "aai_white.png"
47
  logo2 = "alphaGPT-2k.png"
48
  logo3 = "banner.png"
49
  with place1.container():
 
50
  st.header("Youtube Question Answering Bot")
51
+ anima1, anima2 = st.columns([1,1])
52
  with anima1:
53
+ st.image("logo.png", width=300, use_column_width=True)
 
54
  with anima2:
55
  st_lottie(
56
+ lottie_hello1,
57
+ speed=1,
58
+ reverse=False,
59
+ loop=True,
60
+ quality="high",
61
+ height=250,
62
+ width=250,
63
+ key=None,
64
  )
65
 
66
  def extract_and_save_audio(video_URL, destination, final_filename):
67
+ try:
68
+ video = YouTube(video_URL)
69
+ audio = video.streams.filter(only_audio=True).first()
70
+ output = audio.download(output_path=destination)
71
+ _, ext = os.path.splitext(output)
72
+ new_file = final_filename + '.mp3'
73
+ os.rename(output, new_file)
74
+ return new_file
75
+ except Exception as e:
76
+ st.error(f"Failed to extract audio: {e}")
77
+ return None
78
 
79
  def chunk_clips(transcription, clip_size):
80
+ texts = []
81
+ sources = []
82
+ for i in range(0, len(transcription), clip_size):
83
+ clip_df = transcription.iloc[i:i+clip_size, :]
84
+ text = " ".join(clip_df['text'].to_list())
85
+ source = str(round(clip_df.iloc[0]['start']/60, 2)) + " - " + str(round(clip_df.iloc[-1]['end']/60, 2)) + " min"
86
+ texts.append(text)
87
+ sources.append(source)
88
+ return [texts, sources]
 
 
 
89
 
90
  openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
91
  if not openai_api_key:
92
  st.info("Please add your OpenAI API key to continue.")
93
  st.stop()
94
+
 
 
 
95
  state = st.session_state
96
  site = st.text_input("Enter your URL here")
97
  if st.button("Build Model"):
98
+ if site is None:
99
+ st.info("Enter URL to Build QnA Bot")
100
+ elif site:
101
+ try:
102
+ my_bar = st.progress(0, text="Fetching the video. Please wait.")
103
+ device = "cuda" if torch.cuda.is_available() else "cpu"
104
+ whisper_model = whisper.load_model("base", device=device)
105
+ video_URL = site
106
+ destination = "."
107
+ final_filename = "AlphaGPT"
108
+ audio_file = extract_and_save_audio(video_URL, destination, final_filename)
109
+ if audio_file is None:
110
+ st.error("Failed to extract audio. Please try again with a different URL.")
111
+ st.stop()
112
+
113
+ my_bar.progress(50, text="Transcribing the video.")
114
+ result = whisper_model.transcribe(audio_file, fp16=False, language='English')
115
+ transcription = pd.DataFrame(result['segments'])
116
+ chunks = chunk_clips(transcription, 50)
117
+ documents = chunks[0]
118
+ sources = chunks[1]
119
+
120
+ my_bar.progress(75, text="Building QnA model.")
121
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
122
+ vStore = Chroma.from_texts(documents, embeddings, metadatas=[{"source": s} for s in sources])
123
+ model_name = "gpt-3.5-turbo"
124
+ retriever = vStore.as_retriever()
125
+ retriever.search_kwargs = {'k': 2}
126
+ llm = OpenAI(model_name=model_name, openai_api_key=openai_api_key)
127
+ model = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
128
+
129
+ my_bar.progress(100, text="Model is ready.")
130
+ st.session_state['crawling'] = True
131
+ st.session_state['model'] = model
132
+ st.session_state['site'] = site
133
 
134
+ except Exception as e:
135
+ st.error(f"An error occurred: {e}")
136
+ st.error('Oops, crawling resulted in an error :( Please try again with a different URL.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
 
 
 
 
138
  if site and ("crawling" in state):
139
+ st.header("Ask your data")
140
+ model = st.session_state['model']
141
+ site = st.session_state['site']
142
+ st.video(site, format="video/mp4", start_time=0)
143
+ user_q = st.text_input("Enter your questions here")
144
+ if st.button("Get Response"):
145
  try:
146
+ with st.spinner("Model is working on it..."):
147
+ result = model({"question": user_q}, return_only_outputs=True)
148
+ st.subheader('Your response:')
149
+ st.write(result["answer"])
150
+ st.subheader('Sources:')
151
+ st.write(result["sources"])
 
152
  except Exception as e:
153
+ st.error(f"An error occurred: {e}")
154
+ st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.')