awacke1 commited on
Commit
8be3685
β€’
1 Parent(s): e63c12a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -96
app.py CHANGED
@@ -2,7 +2,6 @@ import streamlit as st
2
  import streamlit.components.v1 as components
3
  import huggingface_hub
4
  import gradio_client as gc
5
-
6
  import os
7
  import json
8
  import random
@@ -17,7 +16,6 @@ import textract
17
  import time
18
  import zipfile
19
  import dotenv
20
-
21
  from gradio_client import Client
22
  from audio_recorder_streamlit import audio_recorder
23
  from bs4 import BeautifulSoup
@@ -33,26 +31,11 @@ from xml.etree import ElementTree as ET
33
  from PIL import Image
34
  from urllib.parse import quote # Ensure this import is included
35
 
36
-
37
- ## Show examples
38
- sample_outputs = {
39
- 'output_placeholder': 'The LLM will provide an answer to your question here...',
40
- 'search_placeholder': '1. What is MoE, Multi Agent Systems, Self Rewarding AI, Semantic and Episodic memory, What is AutoGen, ChatDev, Omniverse, Lumiere, SORA?'
41
- }
42
-
43
- def save_file(content, file_type):
44
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
45
- file_name = f"{file_type}_{timestamp}.md"
46
- with open(file_name, "w") as file:
47
- file.write(content)
48
- return file_name
49
-
50
  def load_file(file_name):
51
  with open(file_name, "r") as file:
52
  content = file.read()
53
  return content
54
 
55
-
56
  # HTML5 based Speech Synthesis (Text to Speech in Browser)
57
  @st.cache_resource
58
  def SpeechSynthesis(result):
@@ -84,33 +67,22 @@ def SpeechSynthesis(result):
84
  components.html(documentHTML5, width=1280, height=300)
85
 
86
  def parse_to_markdown(text):
87
- # Split text into fields by | character
88
  fields = text.split("|")
89
-
90
  markdown = ""
91
  for field in fields:
92
- # Remove leading/trailing quotes and whitespace
93
  field = field.strip(" '")
94
-
95
- # Add field to markdown with whitespace separator
96
  markdown += field + "\n\n"
97
-
98
  return markdown
99
 
100
  def search_arxiv(query):
101
-
102
  # Show ArXiv Scholary Articles! ----------------*************-------------***************----------------------------------------
103
- # st.title("▢️ Semantic and Episodic Memory System")
104
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
105
-
106
  search_query = query
107
  #top_n_results = st.slider(key='topnresults', label="Top n results as context", min_value=4, max_value=100, value=100)
108
  #search_source = st.sidebar.selectbox(key='searchsource', label="Search Source", ["Semantic Search - up to 10 Mar 2024", "Arxiv Search - Latest - (EXPERIMENTAL)"])
109
  search_source = "Arxiv Search - Latest - (EXPERIMENTAL)" # "Semantic Search - up to 10 Mar 2024"
110
  #llm_model = st.sidebar.selectbox(key='llmmodel', label="LLM Model", ["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2", "google/gemma-7b-it", "None"])
111
  llm_model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
112
-
113
-
114
  st.sidebar.markdown('### πŸ”Ž ' + query)
115
  result = client.predict(
116
  search_query,
@@ -127,16 +99,8 @@ def search_arxiv(query):
127
  SpeechSynthesis(result) # Search History Reader / Writer IO Memory - Audio at Same time as Reading.
128
  filename=generate_filename(result, "md")
129
  create_file(filename, query, result, should_save)
130
-
131
-
132
- #file_type = st.radio("Select Which Type of Memory You Prefer:", ("Semantic", "Episodic"))
133
- #if st.button("Save"):
134
- # file_name = save_file(result, file_type)
135
- # st.success(f"File saved: {file_name}")
136
-
137
  saved_files = [f for f in os.listdir(".") if f.endswith(".md")]
138
  selected_file = st.sidebar.selectbox("Saved Files", saved_files)
139
-
140
  if selected_file:
141
  file_content = load_file(selected_file)
142
  st.sidebar.markdown(file_content)
@@ -177,7 +141,6 @@ def display_glossary_grid(roleplaying_glossary):
177
  "πŸ“š": lambda k: f"https://huggingface.co/spaces/awacke1/World-Ship-Design?q={quote(k)}-{quote(PromptPrefix2)}", # this url plus query!
178
  "πŸ”¬": lambda k: f"https://huggingface.co/spaces/awacke1/World-Ship-Design?q={quote(k)}-{quote(PromptPrefix3)}", # this url plus query!
179
  }
180
-
181
  for category, details in roleplaying_glossary.items():
182
  st.write(f"### {category}")
183
  cols = st.columns(len(details)) # Create dynamic columns based on the number of games
@@ -187,7 +150,6 @@ def display_glossary_grid(roleplaying_glossary):
187
  for term in terms:
188
  gameterm = category + ' - ' + game + ' - ' + term
189
  links_md = ' '.join([f"[{emoji}]({url(gameterm)})" for emoji, url in search_urls.items()])
190
- #links_md = ' '.join([f"[{emoji}]({url(term)})" for emoji, url in search_urls.items()])
191
  st.markdown(f"{term} {links_md}", unsafe_allow_html=True)
192
 
193
  def display_glossary_entity(k):
@@ -206,10 +168,6 @@ def display_glossary_entity(k):
206
  links_md = ' '.join([f"[{emoji}]({url(k)})" for emoji, url in search_urls.items()])
207
  st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
208
 
209
-
210
-
211
- #st.markdown('''### πŸ“–βœ¨πŸ” Arxiv-Paper-Search-QA-RAG-Streamlit-Gradio-AP ''')
212
-
213
  roleplaying_glossary = {
214
  "πŸ€– AI Concepts": {
215
  "MoE (Mixture of Experts) 🧠": [
@@ -383,7 +341,6 @@ def get_table_download_link(file_path):
383
  href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
384
  return href
385
 
386
-
387
  @st.cache_resource
388
  def create_zip_of_files(files): # ----------------------------------
389
  zip_name = "Arxiv-Paper-Search-QA-RAG-Streamlit-Gradio-AP.zip"
@@ -402,7 +359,6 @@ def get_zip_download_link(zip_file):
402
 
403
  def FileSidebar():
404
  # ----------------------------------------------------- File Sidebar for Jump Gates ------------------------------------------
405
- # Compose a file sidebar of markdown md files:
406
  all_files = glob.glob("*.md")
407
  all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
408
  all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
@@ -438,19 +394,16 @@ def FileSidebar():
438
  if st.button("πŸ—‘", key="delete_"+file):
439
  os.remove(file)
440
  st.experimental_rerun()
441
-
442
 
443
  if len(file_contents) > 0:
444
  if next_action=='open':
445
  file_content_area = st.text_area("File Contents:", file_contents, height=500)
446
- #try:
447
  if st.button("πŸ”", key="filecontentssearch"):
448
  #search_glossary(file_content_area)
449
  filesearch = PromptPrefix + file_content_area
450
  st.markdown(filesearch)
451
  if st.button(key=rerun, label='πŸ”Re-Spec' ):
452
  search_glossary(filesearch)
453
- #except:
454
  st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
455
 
456
  if next_action=='md':
@@ -458,28 +411,21 @@ def FileSidebar():
458
  buttonlabel = 'πŸ”Run'
459
  if st.button(key='Runmd', label = buttonlabel):
460
  user_prompt = file_contents
461
- #try:
462
  search_glossary(file_contents)
463
- #except:
464
  st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
465
 
466
  if next_action=='search':
467
  file_content_area = st.text_area("File Contents:", file_contents, height=500)
468
  user_prompt = file_contents
469
- #try:
470
- #search_glossary(file_contents)
471
  filesearch = PromptPrefix2 + file_content_area
472
  st.markdown(filesearch)
473
  if st.button(key=rerun, label='πŸ”Re-Code' ):
474
  search_glossary(filesearch)
475
-
476
- #except:
477
  st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
478
  # ----------------------------------------------------- File Sidebar for Jump Gates ------------------------------------------
479
  FileSidebar()
480
 
481
-
482
-
483
  # ---- Art Card Sidebar with Random Selection of image:
484
  def get_image_as_base64(url):
485
  response = requests.get(url)
@@ -794,8 +740,6 @@ def clear_query_params():
794
 
795
  # My Inference API Copy
796
  API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama
797
- # Meta's Original - Chat HF Free Version:
798
- #API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-2-7b-chat-hf"
799
  API_KEY = os.getenv('API_KEY')
800
  MODEL1="meta-llama/Llama-2-7b-chat-hf"
801
  MODEL1URL="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
@@ -809,8 +753,6 @@ prompt = "...."
809
  should_save = st.sidebar.checkbox("πŸ’Ύ Save", value=True, help="Save your session data.")
810
 
811
 
812
-
813
-
814
  # 3. Stream Llama Response
815
  # @st.cache_resource
816
  def StreamLLMChatResponse(prompt):
@@ -1253,28 +1195,11 @@ if 'action' in st.query_params:
1253
  clear_query_params()
1254
  st.experimental_rerun()
1255
 
1256
- # Handling repeated keys
1257
- #if 'multi' in st.query_params:
1258
- # multi_values = get_all_query_params('multi')
1259
- # st.write("Values for 'multi':", multi_values)
1260
-
1261
- # Manual entry for demonstration
1262
- #st.write("Enter query parameters in the URL like this: ?action=show_message&multi=1&multi=2")
1263
-
1264
  if 'query' in st.query_params:
1265
  query = st.query_params['query'][0] # Get the query parameter
1266
- # Display content or image based on the query
1267
  display_content_or_image(query)
1268
 
1269
- # Add a clear query parameters button for convenience
1270
- #if st.button("Clear Query Parameters", key='ClearQueryParams'):
1271
- # This will clear the browser URL's query parameters
1272
- # st.experimental_set_query_params
1273
- # st.experimental_rerun()
1274
-
1275
-
1276
  st.markdown("### πŸŽ²πŸ—ΊοΈ Arxiv Paper Search QA RAG MAS using Streamlit and Gradio API")
1277
-
1278
  filename = save_and_play_audio(audio_recorder)
1279
  if filename is not None:
1280
  transcription = transcribe_audio(filename)
@@ -1308,8 +1233,6 @@ if filename is not None:
1308
  os.remove(filename)
1309
 
1310
 
1311
-
1312
-
1313
  prompt = '''
1314
  What is MoE?
1315
  What are Multi Agent Systems?
@@ -1355,10 +1278,6 @@ if openai.api_key == None: openai.api_key = st.secrets['OPENAI_API_KEY']
1355
  menu = ["txt", "htm", "xlsx", "csv", "md", "py"]
1356
  choice = st.sidebar.selectbox("Output File Type:", menu)
1357
 
1358
- #model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
1359
- #user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
1360
-
1361
-
1362
  collength, colupload = st.columns([2,3]) # adjust the ratio as needed
1363
  with collength:
1364
  max_length = st.slider(key='maxlength', label="File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
@@ -1391,20 +1310,6 @@ if len(document_sections) > 0:
1391
  create_file(filename, user_prompt, response, should_save)
1392
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
1393
 
1394
- #if st.button('πŸ’¬ Chat'):
1395
- # st.write('Reasoning with your inputs...')
1396
- # user_prompt_sections = divide_prompt(user_prompt, max_length)
1397
- # full_response = ''
1398
- # for prompt_section in user_prompt_sections:
1399
- # response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
1400
- # full_response += response + '\n' # Combine the responses
1401
- # response = full_response
1402
- # st.write('Response:')
1403
- # st.write(response)
1404
- # filename = generate_filename(user_prompt, choice)
1405
- # create_file(filename, user_prompt, response, should_save)
1406
-
1407
  display_glossary_grid(roleplaying_glossary) # Word Glossary Jump Grid
1408
  display_videos_and_links() # Video Jump Grid
1409
  display_images_and_wikipedia_summaries() # Image Jump Grid
1410
- #display_buttons_with_scores() # Feedback Jump Grid
 
2
  import streamlit.components.v1 as components
3
  import huggingface_hub
4
  import gradio_client as gc
 
5
  import os
6
  import json
7
  import random
 
16
  import time
17
  import zipfile
18
  import dotenv
 
19
  from gradio_client import Client
20
  from audio_recorder_streamlit import audio_recorder
21
  from bs4 import BeautifulSoup
 
31
  from PIL import Image
32
  from urllib.parse import quote # Ensure this import is included
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def load_file(file_name):
35
  with open(file_name, "r") as file:
36
  content = file.read()
37
  return content
38
 
 
39
  # HTML5 based Speech Synthesis (Text to Speech in Browser)
40
  @st.cache_resource
41
  def SpeechSynthesis(result):
 
67
  components.html(documentHTML5, width=1280, height=300)
68
 
69
  def parse_to_markdown(text):
 
70
  fields = text.split("|")
 
71
  markdown = ""
72
  for field in fields:
 
73
  field = field.strip(" '")
 
 
74
  markdown += field + "\n\n"
 
75
  return markdown
76
 
77
  def search_arxiv(query):
 
78
  # Show ArXiv Scholary Articles! ----------------*************-------------***************----------------------------------------
 
79
  client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
 
80
  search_query = query
81
  #top_n_results = st.slider(key='topnresults', label="Top n results as context", min_value=4, max_value=100, value=100)
82
  #search_source = st.sidebar.selectbox(key='searchsource', label="Search Source", ["Semantic Search - up to 10 Mar 2024", "Arxiv Search - Latest - (EXPERIMENTAL)"])
83
  search_source = "Arxiv Search - Latest - (EXPERIMENTAL)" # "Semantic Search - up to 10 Mar 2024"
84
  #llm_model = st.sidebar.selectbox(key='llmmodel', label="LLM Model", ["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2", "google/gemma-7b-it", "None"])
85
  llm_model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 
 
86
  st.sidebar.markdown('### πŸ”Ž ' + query)
87
  result = client.predict(
88
  search_query,
 
99
  SpeechSynthesis(result) # Search History Reader / Writer IO Memory - Audio at Same time as Reading.
100
  filename=generate_filename(result, "md")
101
  create_file(filename, query, result, should_save)
 
 
 
 
 
 
 
102
  saved_files = [f for f in os.listdir(".") if f.endswith(".md")]
103
  selected_file = st.sidebar.selectbox("Saved Files", saved_files)
 
104
  if selected_file:
105
  file_content = load_file(selected_file)
106
  st.sidebar.markdown(file_content)
 
141
  "πŸ“š": lambda k: f"https://huggingface.co/spaces/awacke1/World-Ship-Design?q={quote(k)}-{quote(PromptPrefix2)}", # this url plus query!
142
  "πŸ”¬": lambda k: f"https://huggingface.co/spaces/awacke1/World-Ship-Design?q={quote(k)}-{quote(PromptPrefix3)}", # this url plus query!
143
  }
 
144
  for category, details in roleplaying_glossary.items():
145
  st.write(f"### {category}")
146
  cols = st.columns(len(details)) # Create dynamic columns based on the number of games
 
150
  for term in terms:
151
  gameterm = category + ' - ' + game + ' - ' + term
152
  links_md = ' '.join([f"[{emoji}]({url(gameterm)})" for emoji, url in search_urls.items()])
 
153
  st.markdown(f"{term} {links_md}", unsafe_allow_html=True)
154
 
155
  def display_glossary_entity(k):
 
168
  links_md = ' '.join([f"[{emoji}]({url(k)})" for emoji, url in search_urls.items()])
169
  st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
170
 
 
 
 
 
171
  roleplaying_glossary = {
172
  "πŸ€– AI Concepts": {
173
  "MoE (Mixture of Experts) 🧠": [
 
341
  href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
342
  return href
343
 
 
344
  @st.cache_resource
345
  def create_zip_of_files(files): # ----------------------------------
346
  zip_name = "Arxiv-Paper-Search-QA-RAG-Streamlit-Gradio-AP.zip"
 
359
 
360
  def FileSidebar():
361
  # ----------------------------------------------------- File Sidebar for Jump Gates ------------------------------------------
 
362
  all_files = glob.glob("*.md")
363
  all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
364
  all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
 
394
  if st.button("πŸ—‘", key="delete_"+file):
395
  os.remove(file)
396
  st.experimental_rerun()
 
397
 
398
  if len(file_contents) > 0:
399
  if next_action=='open':
400
  file_content_area = st.text_area("File Contents:", file_contents, height=500)
 
401
  if st.button("πŸ”", key="filecontentssearch"):
402
  #search_glossary(file_content_area)
403
  filesearch = PromptPrefix + file_content_area
404
  st.markdown(filesearch)
405
  if st.button(key=rerun, label='πŸ”Re-Spec' ):
406
  search_glossary(filesearch)
 
407
  st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
408
 
409
  if next_action=='md':
 
411
  buttonlabel = 'πŸ”Run'
412
  if st.button(key='Runmd', label = buttonlabel):
413
  user_prompt = file_contents
 
414
  search_glossary(file_contents)
 
415
  st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
416
 
417
  if next_action=='search':
418
  file_content_area = st.text_area("File Contents:", file_contents, height=500)
419
  user_prompt = file_contents
 
 
420
  filesearch = PromptPrefix2 + file_content_area
421
  st.markdown(filesearch)
422
  if st.button(key=rerun, label='πŸ”Re-Code' ):
423
  search_glossary(filesearch)
424
+
 
425
  st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
426
  # ----------------------------------------------------- File Sidebar for Jump Gates ------------------------------------------
427
  FileSidebar()
428
 
 
 
429
  # ---- Art Card Sidebar with Random Selection of image:
430
  def get_image_as_base64(url):
431
  response = requests.get(url)
 
740
 
741
  # My Inference API Copy
742
  API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama
 
 
743
  API_KEY = os.getenv('API_KEY')
744
  MODEL1="meta-llama/Llama-2-7b-chat-hf"
745
  MODEL1URL="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf"
 
753
  should_save = st.sidebar.checkbox("πŸ’Ύ Save", value=True, help="Save your session data.")
754
 
755
 
 
 
756
  # 3. Stream Llama Response
757
  # @st.cache_resource
758
  def StreamLLMChatResponse(prompt):
 
1195
  clear_query_params()
1196
  st.experimental_rerun()
1197
 
 
 
 
 
 
 
 
 
1198
  if 'query' in st.query_params:
1199
  query = st.query_params['query'][0] # Get the query parameter
 
1200
  display_content_or_image(query)
1201
 
 
 
 
 
 
 
 
1202
  st.markdown("### πŸŽ²πŸ—ΊοΈ Arxiv Paper Search QA RAG MAS using Streamlit and Gradio API")
 
1203
  filename = save_and_play_audio(audio_recorder)
1204
  if filename is not None:
1205
  transcription = transcribe_audio(filename)
 
1233
  os.remove(filename)
1234
 
1235
 
 
 
1236
  prompt = '''
1237
  What is MoE?
1238
  What are Multi Agent Systems?
 
1278
  menu = ["txt", "htm", "xlsx", "csv", "md", "py"]
1279
  choice = st.sidebar.selectbox("Output File Type:", menu)
1280
 
 
 
 
 
1281
  collength, colupload = st.columns([2,3]) # adjust the ratio as needed
1282
  with collength:
1283
  max_length = st.slider(key='maxlength', label="File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
 
1310
  create_file(filename, user_prompt, response, should_save)
1311
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
1312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1313
  display_glossary_grid(roleplaying_glossary) # Word Glossary Jump Grid
1314
  display_videos_and_links() # Video Jump Grid
1315
  display_images_and_wikipedia_summaries() # Image Jump Grid