marcellopoliti commited on
Commit
8e018ae
1 Parent(s): 611f226
app.py CHANGED
@@ -1,27 +1,47 @@
1
- import streamlit as st
2
- from utils import get_chroma_client, get_embedding_function
3
 
4
- # streamlit_app.py
5
-
6
- import hmac
7
  import streamlit as st
 
8
  import os
9
- import streamlit.components.v1 as components
10
- from retrieve_kb import get_current_knowledge_bases, get_knowledge_base_information
11
- import streamlit as st
12
  import requests
13
- import os
14
  from dotenv import load_dotenv
 
 
 
 
15
 
16
  __import__("pysqlite3")
17
  import sys
18
 
 
19
  sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
20
-
21
  st.set_page_config(page_title="Hello", page_icon="👋", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  def show_sidebar():
 
25
  # Sidebar
26
  st.sidebar.header(("About"))
27
  st.sidebar.markdown(
@@ -73,20 +93,10 @@ if not check_password():
73
 
74
  # Main Streamlit app starts here
75
  client = get_chroma_client()
76
- default_embedding_function = get_embedding_function()
77
-
78
-
79
- # Function to load a page
80
- def load_page(page_name):
81
- with open(f"pages/{page_name}", "r") as file:
82
- exec(file.read(), globals())
83
-
84
-
85
- client = get_chroma_client()
86
- default_embedding_function = get_embedding_function()
87
-
88
  show_sidebar()
89
 
 
90
  col1, col2, col3 = st.columns((1, 4, 1))
91
  with col2:
92
  st.image("https://brianknows.org/brian_logo.png", width=300)
@@ -94,39 +104,21 @@ st.write("# Brian Knowledge Base System! 👋")
94
 
95
 
96
  tab1, tab2 = st.tabs(["AskBrian", "BrianApp"])
 
 
97
  with tab1:
98
  st.markdown("## Ask Brian Anything")
99
  kb_name = "public-knowledge-box"
100
 
101
- load_dotenv()
102
- api_key = os.getenv("BRIAN_API_KEY")
103
-
104
- def send_post_request(prompt, kb):
105
- url = " https://api.brianknows.org/api/v0/agent/knowledge"
106
- data = {"prompt": prompt, "kb": kb}
107
- headers = {
108
- "Content-Type": "application/json",
109
- "X-Brian-Api-Key": api_key, # Include the API key in the headers
110
- }
111
-
112
- response = requests.post(url, json=data, headers=headers)
113
-
114
- if response.status_code == 200:
115
- return response.json() # Returns the JSON response if successful
116
- else:
117
- return (
118
- response.status_code,
119
- response.text,
120
- ) # Returns the status code and error if not successful
121
-
122
  # Example usage:
123
  kbs = get_current_knowledge_bases(client=client)
124
  kbs = (kb.name for kb in kbs)
125
  kb_name = st.selectbox("Select knowledge box", kbs)
126
  query = st.text_input(label="query")
127
  if st.button("askbrian"):
128
- result = send_post_request(query, kb_name)
129
  st.json(result)
130
 
 
131
  with tab2:
132
  components.iframe("https://www.brianknows.org/", height=650, scrolling=True)
 
1
+ """Entry point of streamòit app"""
 
2
 
 
 
 
3
  import streamlit as st
4
+ import hmac
5
  import os
 
 
 
6
  import requests
 
7
  from dotenv import load_dotenv
8
+ import streamlit.components.v1 as components
9
+
10
+ from utils import get_chroma_client, get_embedding_function
11
+ from retrieve_kb import get_current_knowledge_bases
12
 
13
  __import__("pysqlite3")
14
  import sys
15
 
16
+ # settings
17
  sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
 
18
  st.set_page_config(page_title="Hello", page_icon="👋", layout="wide")
19
+ load_dotenv()
20
+ brian_api_key = os.getenv("BRIAN_API_KEY")
21
+ openai_key = os.getenv("OPENAI_API_KEY")
22
+
23
+
24
+ def askbrian_request(prompt, kb, api_key):
25
+ url = " https://api.brianknows.org/api/v0/agent/knowledge"
26
+ data = {"prompt": prompt, "kb": kb}
27
+ headers = {
28
+ "Content-Type": "application/json",
29
+ "X-Brian-Api-Key": api_key, # Include the API key in the headers
30
+ }
31
+
32
+ response = requests.post(url, json=data, headers=headers)
33
+
34
+ if response.status_code == 200:
35
+ return response.json() # Returns the JSON response if successful
36
+ else:
37
+ return (
38
+ response.status_code,
39
+ response.text,
40
+ ) # Returns the status code and error if not successful
41
 
42
 
43
  def show_sidebar():
44
+ """Shows sidebar with Biran info"""
45
  # Sidebar
46
  st.sidebar.header(("About"))
47
  st.sidebar.markdown(
 
93
 
94
  # Main Streamlit app starts here
95
  client = get_chroma_client()
96
+ default_embedding_function = get_embedding_function(openai_key=openai_key)
 
 
 
 
 
 
 
 
 
 
 
97
  show_sidebar()
98
 
99
+
100
  col1, col2, col3 = st.columns((1, 4, 1))
101
  with col2:
102
  st.image("https://brianknows.org/brian_logo.png", width=300)
 
104
 
105
 
106
  tab1, tab2 = st.tabs(["AskBrian", "BrianApp"])
107
+
108
+ # Ask Brian Tab
109
  with tab1:
110
  st.markdown("## Ask Brian Anything")
111
  kb_name = "public-knowledge-box"
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  # Example usage:
114
  kbs = get_current_knowledge_bases(client=client)
115
  kbs = (kb.name for kb in kbs)
116
  kb_name = st.selectbox("Select knowledge box", kbs)
117
  query = st.text_input(label="query")
118
  if st.button("askbrian"):
119
+ result = askbrian_request(query, kb_name, brian_api_key)
120
  st.json(result)
121
 
122
+ # Brian App embedded Tab
123
  with tab2:
124
  components.iframe("https://www.brianknows.org/", height=650, scrolling=True)
generate_kb.py CHANGED
@@ -13,7 +13,6 @@ from spellchecker import SpellChecker
13
 
14
  load_dotenv()
15
  openai_key = os.getenv("OPENAI_API_KEY")
16
- openai_key = st.secrets["OPENAI_API_KEY"]
17
 
18
 
19
  def clean_text(text):
 
13
 
14
  load_dotenv()
15
  openai_key = os.getenv("OPENAI_API_KEY")
 
16
 
17
 
18
  def clean_text(text):
pages/create_knowledge_box.py CHANGED
@@ -1,11 +1,12 @@
1
  import streamlit as st
2
- from app import client, default_embedding_function
3
  import pandas as pd
4
  from generate_kb import generate_knowledge_box_from_url
5
  from utils import get_chroma_client
6
 
7
  # Title of the app
8
  st.title("Create a knowledge box from CSV file")
 
9
 
10
  # File uploader widget
11
  uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
 
1
  import streamlit as st
2
+ from app import client, default_embedding_function, show_sidebar
3
  import pandas as pd
4
  from generate_kb import generate_knowledge_box_from_url
5
  from utils import get_chroma_client
6
 
7
  # Title of the app
8
  st.title("Create a knowledge box from CSV file")
9
+ show_sidebar()
10
 
11
  # File uploader widget
12
  uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
pages/delete_knowledge_box⚠️.py CHANGED
@@ -1,9 +1,10 @@
1
  import streamlit as st
2
  from retrieve_kb import get_current_knowledge_bases
3
- from app import client
4
 
5
 
6
  st.title("Delete knowledge Base ☠️")
 
7
 
8
  st.title("Get knowledge boxes")
9
  if st.button("Get current knowledge bases"):
 
1
  import streamlit as st
2
  from retrieve_kb import get_current_knowledge_bases
3
+ from app import client, show_sidebar
4
 
5
 
6
  st.title("Delete knowledge Base ☠️")
7
+ show_sidebar()
8
 
9
  st.title("Get knowledge boxes")
10
  if st.button("Get current knowledge bases"):
pages/manage_knowledge_box.py CHANGED
@@ -1,8 +1,7 @@
1
- from __future__ import unicode_literals
 
 
2
  import streamlit as st
3
- from retrieve_kb import get_current_knowledge_bases, get_knowledge_base_information
4
- from generate_kb import add_links_to_knowledge_base
5
- from app import client, default_embedding_function
6
  import pandas as pd
7
  from tempfile import NamedTemporaryFile
8
  import os
@@ -11,136 +10,17 @@ from openai import OpenAI
11
  import wave
12
  from dotenv import load_dotenv
13
 
 
 
 
 
14
 
15
  load_dotenv()
16
  openai_key = os.getenv("OPENAI_API_KEY")
17
-
18
- st.title("Manage collections")
19
- kbs = get_current_knowledge_bases(client=client)
20
- kbs = (kb.name for kb in kbs)
21
- collection_name = st.selectbox("Select knowledge box", kbs)
22
- info = {}
23
- collection = None
24
-
25
- if "df" not in st.session_state:
26
- st.session_state["df"] = pd.DataFrame()
27
-
28
- col1, col2 = st.columns(2)
29
-
30
- if st.button("Get All"):
31
- collection_info, coll, client = get_knowledge_base_information(
32
- client=client,
33
- embedding_function=default_embedding_function,
34
- kb_name=collection_name,
35
- )
36
- st.session_state["collection"] = coll
37
- st.session_state["client"] = client
38
- collection = coll
39
- # st.write(collection_info)
40
- df = pd.DataFrame.from_records(collection_info)
41
- df["source"] = df["metadatas"].apply(lambda x: x.get("source", "unkown"))
42
- df["title"] = df["metadatas"].apply(lambda x: x.get("title", "unkown"))
43
- df = df[["documents", "source", "title", "ids"]]
44
- st.session_state["df"] = df
45
 
46
 
47
- if len(st.session_state["df"]) != 0:
48
- st.dataframe(st.session_state["df"], width=3_000)
49
- unique_df = st.session_state["df"]["source"].unique()
50
- st.text(f"unique urls: {len(unique_df)}")
51
- st.dataframe(unique_df)
52
-
53
- #############################
54
- #### REMOVE A SPLIT #########
55
- #############################
56
- st.header("Remove a split")
57
- id = st.text_input("Insert a split id")
58
- if st.button("Remove Id from collection"):
59
- if id in st.session_state["df"]["ids"].values.tolist():
60
- res = st.session_state["collection"].delete(ids=[f"{id}"])
61
- st.success(f"id {id} deleted")
62
- else:
63
- st.error(f"id {id} not in kb")
64
-
65
-
66
- #############################
67
- #### REMOVE URL ############
68
- #############################
69
- st.header("Remove url from collection")
70
- url = st.text_input("remove url")
71
- if st.button("Remove url from collection"):
72
- try:
73
- ids = st.session_state["collection"].get(where={"source": url})["ids"]
74
- st.session_state["collection"].delete(ids=ids)
75
- st.success("deleted")
76
- except Exception as e:
77
- st.error(str(e))
78
-
79
-
80
- #############################
81
- ########### ADD URL #########
82
- #############################
83
- st.header("Add url to existing collection")
84
- url_text = st.text_input("Insert a url link")
85
- if st.button("add url to collection"):
86
- urls = [url_text] # put in a list even if only one
87
- res = add_links_to_knowledge_base(client=client, kb_name=collection_name, urls=urls)
88
- st.write(res)
89
-
90
- st.header("Add pdf to existing collection")
91
- uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
92
- pdf_optional_link = st.text_input(
93
- "Insert a URL link you want to associate with the pdf"
94
- )
95
- pdf_title = st.text_input("This title will be displayed as a resource in ask brian")
96
- if st.button("add pdf"):
97
- # Create a temporary file
98
- with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
99
- # Write the uploaded PDF to the temporary file
100
- tmp_file.write(uploaded_file.getvalue())
101
- tmp_path = tmp_file.name
102
- print("PATH: ", tmp_path)
103
- urls = [tmp_path]
104
- res = add_links_to_knowledge_base(
105
- client=client,
106
- kb_name=collection_name,
107
- urls=urls,
108
- pdf_optional_link=pdf_optional_link,
109
- pdf_title=pdf_title,
110
- )
111
- st.write(res)
112
- # Clean up: delete the temporary file
113
- os.remove(tmp_path)
114
-
115
- #############################
116
- ########### ADD CSV #########
117
- #############################
118
- st.header("Add csv to existing collection")
119
- uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
120
- df = None
121
-
122
- if uploaded_file is not None:
123
- try:
124
- new_df = pd.read_csv(uploaded_file)
125
- st.write("DataFrame:")
126
- st.write(new_df)
127
- except Exception as e:
128
- st.error(str(e))
129
- if st.button("add csv urls to collection"):
130
- urls = new_df.values.tolist()
131
- st.write(urls)
132
- res = add_links_to_knowledge_base(
133
- client=client, kb_name=collection_name, urls=urls
134
- )
135
- st.write(res)
136
-
137
-
138
- #############################
139
- ########## YOUTUBE ##########
140
- #############################
141
-
142
-
143
- def transcribe_audio(audio_path, chunk_length=10000):
144
  """
145
  Transcribe audio by breaking it into chunks using wave and numpy.
146
  :param audio_path: Path to the audio file (e.g., "video.wav").
@@ -148,7 +28,7 @@ def transcribe_audio(audio_path, chunk_length=10000):
148
  :return: Full transcription of the audio file.
149
  """
150
  # Open the wave file
151
- client = OpenAI(api_key=open_ai_key)
152
 
153
  with wave.open(audio_path, "rb") as audio:
154
  frame_rate = audio.getframerate()
@@ -218,7 +98,7 @@ def download_and_transcribe_youtube(youtube_url):
218
  video_title = info_dict.get("title", None)
219
 
220
  # audio_file = open("video.wav", "rb")
221
- text = transcribe_audio("video.wav")
222
  f_out_path = f"{video_title}.txt"
223
  with open(f"{video_title}.txt", "w") as f_out:
224
  f_out.write(text)
@@ -235,19 +115,184 @@ def download_and_transcribe_youtube(youtube_url):
235
  os.remove("temp_chunk.wav")
236
 
237
 
238
- st.header("Add youtube video to collection")
239
- st.image(
240
- "",
241
- width=200, # Manually Adjust the width of the image as per requirement
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  )
243
 
244
- video_url = st.text_input("Youtube video url")
245
- st.text("Aggiungere il video puo impiegare un bel pò. Avvia e vatti a fare una canna")
246
- if st.button("Add video"):
247
- # Create a temporary file
248
- # Write the uploaded PDF to the temporary file
249
- try:
250
- download_and_transcribe_youtube(video_url)
251
- st.success("Video Added")
252
- except Exception as e:
253
- st.error(f"{str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Page to manage kbs"""
2
+
3
+ from __future__ import unicode_literals # this should always be the first import
4
  import streamlit as st
 
 
 
5
  import pandas as pd
6
  from tempfile import NamedTemporaryFile
7
  import os
 
10
  import wave
11
  from dotenv import load_dotenv
12
 
13
+ from retrieve_kb import get_current_knowledge_bases, get_knowledge_base_information
14
+ from generate_kb import add_links_to_knowledge_base
15
+ from app import client, default_embedding_function, show_sidebar
16
+
17
 
18
  load_dotenv()
19
  openai_key = os.getenv("OPENAI_API_KEY")
20
+ show_sidebar()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
+ def transcribe_audio(audio_path, openai_key, chunk_length=10000):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  """
25
  Transcribe audio by breaking it into chunks using wave and numpy.
26
  :param audio_path: Path to the audio file (e.g., "video.wav").
 
28
  :return: Full transcription of the audio file.
29
  """
30
  # Open the wave file
31
+ client = OpenAI(api_key=openai_key)
32
 
33
  with wave.open(audio_path, "rb") as audio:
34
  frame_rate = audio.getframerate()
 
98
  video_title = info_dict.get("title", None)
99
 
100
  # audio_file = open("video.wav", "rb")
101
+ text = transcribe_audio(audio_path="video.wav", openai_key=openai_key)
102
  f_out_path = f"{video_title}.txt"
103
  with open(f"{video_title}.txt", "w") as f_out:
104
  f_out.write(text)
 
115
  os.remove("temp_chunk.wav")
116
 
117
 
118
+ if "url_list" not in st.session_state:
119
+ st.session_state["url_list"] = []
120
+
121
+
122
+ def list_manager():
123
+ def add_element():
124
+ if len(user_input) > 0:
125
+ st.session_state["url_list"] += [user_input]
126
+ else:
127
+ st.warning("Enter text")
128
+
129
+ st.text("C'è un bug!!! Cliccare su add due volte!")
130
+ with st.expander("Add urls"):
131
+ user_input = st.text_input("Enter a url")
132
+ add_button = st.button("Add", key="add_button")
133
+ col1, col2 = st.columns((2))
134
+ with col1:
135
+ if add_button:
136
+ add_element()
137
+ with col2:
138
+ if st.button("reset"):
139
+ st.session_state["url_list"] = []
140
+ st.write(st.session_state["url_list"])
141
+
142
+
143
+ st.title("Manage collections")
144
+ kbs = get_current_knowledge_bases(client=client)
145
+ kbs = (kb.name for kb in kbs)
146
+ collection_name = st.selectbox("Select knowledge box", kbs)
147
+ info = {}
148
+ collection = None
149
+
150
+
151
+ if "df" not in st.session_state:
152
+ st.session_state["df"] = pd.DataFrame()
153
+
154
+ col1, col2 = st.columns(2)
155
+
156
+ if st.button("Get All"):
157
+ collection_info, coll, client = get_knowledge_base_information(
158
+ client=client,
159
+ embedding_function=default_embedding_function,
160
+ kb_name=collection_name,
161
+ )
162
+ st.session_state["collection"] = coll
163
+ st.session_state["client"] = client
164
+ collection = coll
165
+
166
+ df = pd.DataFrame.from_records(collection_info)
167
+ df["source"] = df["metadatas"].apply(lambda x: x.get("source", "unkown"))
168
+ df["title"] = df["metadatas"].apply(lambda x: x.get("title", "unkown"))
169
+ df = df[["documents", "source", "title", "ids"]]
170
+ st.session_state["df"] = df
171
+
172
+ if len(st.session_state["df"]) != 0:
173
+ st.dataframe(st.session_state["df"], width=3_000)
174
+ unique_df = st.session_state["df"]["source"].unique()
175
+ st.text(f"unique urls: {len(unique_df)}")
176
+ st.dataframe(unique_df)
177
+ else:
178
+ st.warning(f"{collection_name} KB is empty")
179
+
180
+
181
+ tab1, tab2, tab3, tab4, tab5 = st.tabs(
182
+ ["Remove", "Add URL", "Add CSV", "Add PDF", "Add Youtube"]
183
  )
184
 
185
+ # remove stuff tab
186
+ with tab1:
187
+ # remove a split
188
+ st.header("Remove a split")
189
+ id = st.text_input("Insert a split id")
190
+ if st.button("Remove Id from collection"):
191
+ try:
192
+ if id in st.session_state["df"]["ids"].values.tolist():
193
+ res = st.session_state["collection"].delete(ids=[f"{id}"])
194
+ st.success(f"id {id} deleted")
195
+ else:
196
+ st.error(f"id {id} not in kb")
197
+ except Exception as e:
198
+ st.error(f"{str(e)}")
199
+
200
+ # REMOVE URL
201
+ st.header("Remove url from collection")
202
+ url = st.text_input("remove url")
203
+ if st.button("Remove url from collection"):
204
+ try:
205
+ ids = st.session_state["collection"].get(where={"source": url})["ids"]
206
+ st.session_state["collection"].delete(ids=ids)
207
+ st.success("deleted")
208
+ except Exception as e:
209
+ st.error(str(e))
210
+
211
+
212
+ # ADD URL
213
+ with tab2:
214
+ st.header("Add url to existing collection")
215
+ url_text = st.text_input(
216
+ "Insert a url link",
217
+ help="This should be text stored in a webpage like wikipedia. NB notion pages are not supported yet!",
218
+ )
219
+ if st.button("add url to collection"):
220
+ urls = [url_text] # put in a list even if only one
221
+ res = add_links_to_knowledge_base(
222
+ client=client, kb_name=collection_name, urls=urls
223
+ )
224
+ st.write(res)
225
+
226
+
227
+ # ADD CSV
228
+ with tab3:
229
+ list_manager()
230
+
231
+ # st.header("Add csv to existing collection")
232
+ # uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
233
+ # df = None
234
+ # if uploaded_file is not None:
235
+ # try:
236
+ # new_df = pd.read_csv(uploaded_file)
237
+ # st.write("DataFrame:")
238
+ # st.write(new_df)
239
+ # except Exception as e:
240
+ # st.error(str(e))
241
+ # if st.button("add csv urls to collection"):
242
+ # urls = new_df.values.tolist()
243
+ # st.write(urls)
244
+ if st.button("add csv urls to collection"):
245
+ res = add_links_to_knowledge_base(
246
+ client=client, kb_name=collection_name, urls=st.session_state["url_list"]
247
+ )
248
+ st.write(res)
249
+
250
+
251
+ # Add PDF
252
+ with tab4:
253
+ st.header("Add pdf to existing collection")
254
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
255
+ pdf_optional_link = st.text_input(
256
+ "Insert a URL link you want to associate with the pdf"
257
+ )
258
+ pdf_title = st.text_input("This title will be displayed as a resource in ask brian")
259
+ if st.button("add pdf"):
260
+ # Create a temporary file
261
+ with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
262
+ # Write the uploaded PDF to the temporary file
263
+ tmp_file.write(uploaded_file.getvalue())
264
+ tmp_path = tmp_file.name
265
+ print("PATH: ", tmp_path)
266
+ urls = [tmp_path]
267
+ res = add_links_to_knowledge_base(
268
+ client=client,
269
+ kb_name=collection_name,
270
+ urls=urls,
271
+ pdf_optional_link=pdf_optional_link,
272
+ pdf_title=pdf_title,
273
+ )
274
+ st.write(res)
275
+ # Clean up: delete the temporary file
276
+ os.remove(tmp_path)
277
+
278
+
279
+ # Add YOUTUBE
280
+ with tab5:
281
+ st.header("Add youtube video to collection")
282
+ st.image(
283
+ "",
284
+ width=200, # Manually Adjust the width of the image as per requirement
285
+ )
286
+
287
+ video_url = st.text_input("Youtube video url")
288
+ st.text(
289
+ "Aggiungere il video puo impiegare un bel pò. Avvia e vatti a fare una canna"
290
+ )
291
+ if st.button("Add video"):
292
+ # Create a temporary file
293
+ # Write the uploaded PDF to the temporary file
294
+ try:
295
+ download_and_transcribe_youtube(video_url)
296
+ st.success("Video Added")
297
+ except Exception as e:
298
+ st.error(f"{str(e)}")
requirements.txt CHANGED
@@ -14,4 +14,5 @@ librosa
14
  future
15
  yt-dlp
16
  pysqlite3>=0.5.2
17
- pyspellchecker>=0.8.1
 
 
14
  future
15
  yt-dlp
16
  pysqlite3>=0.5.2
17
+ pyspellchecker>=0.8.1
18
+ beautifulsoup4>=4.12.2
retrieve_kb.py CHANGED
@@ -1,9 +1,12 @@
1
  from fastapi import APIRouter
2
  from utils import get_chroma_client, get_embedding_function
 
 
3
 
4
-
 
5
  router = APIRouter()
6
- default_embedding_function = get_embedding_function()
7
 
8
 
9
  def get_current_knowledge_bases(client):
 
1
  from fastapi import APIRouter
2
  from utils import get_chroma_client, get_embedding_function
3
+ import os
4
+ from dotenv import load_dotenv
5
 
6
+ load_dotenv()
7
+ openai_key = os.getenv("OPENAI_API_KEY")
8
  router = APIRouter()
9
+ default_embedding_function = get_embedding_function(openai_key=openai_key)
10
 
11
 
12
  def get_current_knowledge_bases(client):
utils.py CHANGED
@@ -3,10 +3,10 @@ from chromadb.config import Settings
3
  import chromadb.utils.embedding_functions as embedding_functions
4
  from dotenv import load_dotenv
5
  import streamlit as st
 
6
 
7
  load_dotenv()
8
- # openai_key = os.getenv("OPENAI_API_KEY")
9
- openai_key = st.secrets["OPENAI_API_KEY"]
10
 
11
 
12
  def get_chroma_client(
@@ -25,7 +25,7 @@ def get_chroma_client(
25
  return chroma_client
26
 
27
 
28
- def get_embedding_function(model_name="text-embedding-ada-002"):
29
  openai_ef = embedding_functions.OpenAIEmbeddingFunction(
30
  api_key=openai_key, model_name=model_name
31
  )
 
3
  import chromadb.utils.embedding_functions as embedding_functions
4
  from dotenv import load_dotenv
5
  import streamlit as st
6
+ import os
7
 
8
  load_dotenv()
9
+ openai_key = os.getenv("OPENAI_API_KEY")
 
10
 
11
 
12
  def get_chroma_client(
 
25
  return chroma_client
26
 
27
 
28
+ def get_embedding_function(openai_key, model_name="text-embedding-ada-002"):
29
  openai_ef = embedding_functions.OpenAIEmbeddingFunction(
30
  api_key=openai_key, model_name=model_name
31
  )