awacke1 commited on
Commit
20d41ee
β€’
1 Parent(s): 4c9dbd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +275 -159
app.py CHANGED
@@ -53,7 +53,14 @@ st.set_page_config(
53
  }
54
  )
55
 
56
-
 
 
 
 
 
 
 
57
 
58
  # HTML5 based Speech Synthesis (Text to Speech in Browser)
59
  @st.cache_resource
@@ -359,6 +366,19 @@ def display_glossary_grid(roleplaying_glossary):
359
  st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True)
360
 
361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  @st.cache_resource
363
  def get_table_download_link(file_path):
364
 
@@ -576,10 +596,26 @@ def FileSidebar():
576
 
577
  if next_action=='md':
578
  st.markdown(file_contents)
 
 
579
  buttonlabel = 'πŸ”Run'
580
  if st.button(key='Runmd', label = buttonlabel):
581
- user_prompt = file_contents
582
- #try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  #search_glossary(file_contents)
584
  #except:
585
  #st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
@@ -752,7 +788,7 @@ def display_videos_and_links(num_columns):
752
  display_glossary_entity(k)
753
  col_index += 1 # Increment column index to place the next video in the next column
754
 
755
- @st.cache_resource
756
  def display_images_and_wikipedia_summaries(num_columns=4):
757
  image_files = [f for f in os.listdir('.') if f.endswith('.png')]
758
  if not image_files:
@@ -1242,35 +1278,6 @@ def get_audio_download_link(file_path):
1242
 
1243
 
1244
 
1245
-
1246
- # 🎡 Wav Audio files - Transcription History in Wav
1247
- all_files = glob.glob("*.wav")
1248
- all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
1249
- all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
1250
-
1251
- filekey = 'delall'
1252
- if st.sidebar.button("πŸ—‘ Delete All Audio", key=filekey):
1253
- for file in all_files:
1254
- os.remove(file)
1255
- st.rerun()
1256
-
1257
- for file in all_files:
1258
- col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
1259
- with col1:
1260
- st.markdown(file)
1261
- if st.button("🎡", key="play_" + file): # play emoji button
1262
- audio_file = open(file, 'rb')
1263
- audio_bytes = audio_file.read()
1264
- st.audio(audio_bytes, format='audio/wav')
1265
- #st.markdown(get_audio_download_link(file), unsafe_allow_html=True)
1266
- #st.text_input(label="", value=file)
1267
- with col2:
1268
- if st.button("πŸ—‘", key="delete_" + file):
1269
- os.remove(file)
1270
- st.rerun()
1271
-
1272
-
1273
-
1274
  GiveFeedback=False
1275
  if GiveFeedback:
1276
  with st.expander("Give your feedback πŸ‘", expanded=False):
@@ -1336,18 +1343,6 @@ def transcribe_canary(filename):
1336
  st.write(result)
1337
  return result
1338
 
1339
- # ChatBot client chat completions ------------------------- !!
1340
- def process_text2(MODEL='gpt-4o-2024-05-13', text_input='What is 2+2 and what is an imaginary number'):
1341
- if text_input:
1342
- completion = client.chat.completions.create(
1343
- model=MODEL,
1344
- messages=st.session_state.messages
1345
- )
1346
- return_text = completion.choices[0].message.content
1347
- st.write("Assistant: " + return_text)
1348
- filename = generate_filename(text_input, "md")
1349
- create_file(filename, text_input, return_text, should_save)
1350
- return return_text
1351
 
1352
  # Transcript to arxiv and client chat completion ------------------------- !!
1353
  filename = save_and_play_audio(audio_recorder)
@@ -1397,12 +1392,12 @@ if example_input:
1397
  for example_input in session_state["search_queries"]:
1398
  st.write(example_input)
1399
 
1400
- if st.button("Run Prompt", help="Click to run."):
1401
- try:
1402
- response=StreamLLMChatResponse(example_input)
1403
- create_file(filename, example_input, response, should_save)
1404
- except:
1405
- st.write('model is asleep. Starting now on A10 GPU. Please wait one minute then retry. KEDA triggered.')
1406
 
1407
  openai.api_key = os.getenv('OPENAI_API_KEY')
1408
  if openai.api_key == None: openai.api_key = st.secrets['OPENAI_API_KEY']
@@ -1446,7 +1441,7 @@ if AddAFileForContext:
1446
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
1447
 
1448
 
1449
- # documentation
1450
  # 1. Cookbook: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
1451
  # 2. Configure your Project and Orgs to limit/allow Models: https://platform.openai.com/settings/organization/general
1452
  # 3. Watch your Billing! https://platform.openai.com/settings/organization/billing/overview
@@ -1487,17 +1482,36 @@ def process_text(text_input):
1487
 
1488
  #st.write("Assistant: " + completion.choices[0].message.content)
1489
 
 
 
 
1490
 
1491
-
1492
-
1493
-
1494
- def save_image(image_input, filename):
1495
- # Save the uploaded image file
 
 
 
 
 
1496
  with open(filename, "wb") as f:
1497
- f.write(image_input.getvalue())
1498
  return filename
 
 
 
 
 
 
 
 
 
 
 
1499
 
1500
- def process_image(image_input):
1501
  if image_input:
1502
  st.markdown('Processing image: ' + image_input.name )
1503
  if image_input:
@@ -1507,7 +1521,7 @@ def process_image(image_input):
1507
  messages=[
1508
  {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
1509
  {"role": "user", "content": [
1510
- {"type": "text", "text": "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."},
1511
  {"type": "image_url", "image_url": {
1512
  "url": f"data:image/png;base64,{base64_image}"}
1513
  }
@@ -1528,79 +1542,65 @@ def process_image(image_input):
1528
  with open(filename_md, "w", encoding="utf-8") as f:
1529
  f.write(image_response)
1530
 
1531
- # Save copy of image with original filename
1532
- filename_img = image_input.name
1533
- save_image(image_input, filename_img)
 
 
 
 
 
1534
 
1535
  return image_response
1536
 
1537
- def save_imageold(image_input, filename_txt):
1538
- # Save the uploaded video file
1539
- with open(filename_txt, "wb") as f:
1540
- f.write(image_input.getbuffer())
1541
- return image_input.name
1542
-
1543
- def process_imageold(image_input):
1544
- if image_input:
1545
- base64_image = base64.b64encode(image_input.read()).decode("utf-8")
1546
- response = client.chat.completions.create(
1547
- model=MODEL,
1548
- messages=[
1549
- {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
1550
- {"role": "user", "content": [
1551
- {"type": "text", "text": "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."},
1552
- {"type": "image_url", "image_url": {
1553
- "url": f"data:image/png;base64,{base64_image}"}
1554
- }
1555
- ]}
1556
- ],
1557
- temperature=0.0,
1558
- )
1559
- image_response = response.choices[0].message.content
1560
- st.markdown(image_response)
1561
-
1562
- filename_txt = generate_filename(image_response, "md") # Save markdown on image AI output from gpt4o
1563
- create_file(filename_txt, image_response, '', True) #create_file() # create_file() 3 required positional arguments: 'filename', 'prompt', and 'response'
1564
-
1565
- filename_txt = generate_filename(image_response, "png")
1566
- save_image(image_input, filename_txt) # Save copy of image with new filename
1567
- #st.rerun() # rerun to show new image and new markdown files
1568
-
1569
- return image_response
1570
-
1571
 
1572
- def process_audio(audio_input):
1573
  if audio_input:
1574
  transcription = client.audio.transcriptions.create(
1575
  model="whisper-1",
1576
  file=audio_input,
1577
  )
1578
- response = client.chat.completions.create(
1579
- model=MODEL,
1580
- messages=[
1581
- {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
1582
- {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription.text}"}],}
1583
- ],
1584
- temperature=0,
1585
- )
1586
- st.markdown(response.choices[0].message.content)
 
 
 
 
 
1587
 
1588
  def process_audio_for_video(video_input):
1589
  if video_input:
1590
- transcription = client.audio.transcriptions.create(
1591
- model="whisper-1",
1592
- file=video_input,
1593
- )
1594
- response = client.chat.completions.create(
1595
- model=MODEL,
1596
- messages=[
1597
- {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
1598
- {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}],}
1599
- ],
1600
- temperature=0,
1601
- )
1602
- st.markdown(response.choices[0].message.content)
1603
- return response.choices[0].message.content
 
 
 
1604
 
1605
  def save_video(video_file):
1606
  # Save the uploaded video file
@@ -1631,10 +1631,16 @@ def process_video(video_path, seconds_per_frame=2):
1631
 
1632
  # Extract audio from video
1633
  audio_path = f"{base_video_path}.mp3"
1634
- clip = VideoFileClip(video_path)
1635
- clip.audio.write_audiofile(audio_path, bitrate="32k")
1636
- clip.audio.close()
1637
- clip.close()
 
 
 
 
 
 
1638
 
1639
  print(f"Extracted {len(base64Frames)} frames")
1640
  print(f"Extracted audio to {audio_path}")
@@ -1669,8 +1675,9 @@ def process_audio_and_video(video_input):
1669
  results = response.choices[0].message.content
1670
  st.markdown(results)
1671
 
1672
- filename = generate_filename(transcript, "md")
1673
- create_file(filename, transcript, results, should_save)
 
1674
 
1675
 
1676
 
@@ -1683,53 +1690,139 @@ def main():
1683
  if (text_input > ''):
1684
  textResponse = process_text(text_input)
1685
  elif option == "Image":
 
 
1686
  image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
1687
- image_response = process_image(image_input)
1688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1689
 
 
1690
 
1691
- elif option == "Audio":
1692
- audio_input = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
1693
- process_audio(audio_input)
1694
  elif option == "Video":
1695
  video_input = st.file_uploader("Upload a video file", type=["mp4"])
1696
  process_audio_and_video(video_input)
1697
 
1698
- # Image and Video Galleries
1699
- num_columns_images=st.slider(key="num_columns_images", label="Choose Number of Image Columns", min_value=1, max_value=15, value=5)
1700
- display_images_and_wikipedia_summaries(num_columns_images) # Image Jump Grid
1701
 
1702
- num_columns_video=st.slider(key="num_columns_video", label="Choose Number of Video Columns", min_value=1, max_value=15, value=5)
1703
- display_videos_and_links(num_columns_video) # Video Jump Grid
 
 
 
 
1704
 
1705
 
1706
- # Optional UI's
1707
- showExtendedTextInterface=False
1708
- if showExtendedTextInterface:
1709
- display_glossary_grid(roleplaying_glossary) # Word Glossary Jump Grid - Dynamically calculates columns based on details length to keep topic together
1710
- num_columns_text=st.slider(key="num_columns_text", label="Choose Number of Text Columns", min_value=1, max_value=15, value=4)
1711
- display_buttons_with_scores(num_columns_text) # Feedback Jump Grid
1712
- st.markdown(personality_factors)
1713
 
 
 
 
 
1714
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1715
 
1716
 
1717
- # st.title("GPT-4o ChatBot")
1718
 
1719
- client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
1720
- MODEL = "gpt-4o-2024-05-13"
1721
- if "openai_model" not in st.session_state:
1722
- st.session_state["openai_model"] = MODEL
1723
- if "messages" not in st.session_state:
1724
- st.session_state.messages = []
1725
- if st.button("Clear Session"):
1726
- st.session_state.messages = []
1727
 
1728
- current_messages=[]
1729
- for message in st.session_state.messages:
1730
- with st.chat_message(message["role"]):
1731
- current_messages.append(message)
1732
- st.markdown(message["content"])
1733
 
1734
  # ChatBot Entry
1735
  if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
@@ -1745,5 +1838,28 @@ if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with
1745
  response = process_text2(text_input=prompt)
1746
  st.session_state.messages.append({"role": "assistant", "content": response})
1747
 
1748
- if __name__ == "__main__":
1749
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  }
54
  )
55
 
56
+ client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
57
+ MODEL = "gpt-4o-2024-05-13"
58
+ if "openai_model" not in st.session_state:
59
+ st.session_state["openai_model"] = MODEL
60
+ if "messages" not in st.session_state:
61
+ st.session_state.messages = []
62
+ if st.button("Clear Session"):
63
+ st.session_state.messages = []
64
 
65
  # HTML5 based Speech Synthesis (Text to Speech in Browser)
66
  @st.cache_resource
 
366
  st.markdown(f"**{term}** <small>{links_md}</small>", unsafe_allow_html=True)
367
 
368
 
369
+ # ChatBot client chat completions ------------------------- !!
370
+ def process_text2(MODEL='gpt-4o-2024-05-13', text_input='What is 2+2 and what is an imaginary number'):
371
+ if text_input:
372
+ completion = client.chat.completions.create(
373
+ model=MODEL,
374
+ messages=st.session_state.messages
375
+ )
376
+ return_text = completion.choices[0].message.content
377
+ st.write("Assistant: " + return_text)
378
+ filename = generate_filename(text_input, "md")
379
+ create_file(filename, text_input, return_text, should_save)
380
+ return return_text
381
+
382
  @st.cache_resource
383
  def get_table_download_link(file_path):
384
 
 
596
 
597
  if next_action=='md':
598
  st.markdown(file_contents)
599
+ SpeechSynthesis(file_contents)
600
+
601
  buttonlabel = 'πŸ”Run'
602
  if st.button(key='Runmd', label = buttonlabel):
603
+ MODEL = "gpt-4o-2024-05-13"
604
+ openai.api_key = os.getenv('OPENAI_API_KEY')
605
+ openai.organization = os.getenv('OPENAI_ORG_ID')
606
+ client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'), organization=os.getenv('OPENAI_ORG_ID'))
607
+ st.session_state.messages.append({"role": "user", "content": transcript})
608
+ with st.chat_message("user"):
609
+ st.markdown(transcript)
610
+ with st.chat_message("assistant"):
611
+ completion = client.chat.completions.create(
612
+ model=MODEL,
613
+ messages = st.session_state.messages,
614
+ stream=True
615
+ )
616
+ response = process_text2(text_input=prompt)
617
+ st.session_state.messages.append({"role": "assistant", "content": response})
618
+ #try:
619
  #search_glossary(file_contents)
620
  #except:
621
  #st.markdown('GPT is sleeping. Restart ETA 30 seconds.')
 
788
  display_glossary_entity(k)
789
  col_index += 1 # Increment column index to place the next video in the next column
790
 
791
+ #@st.cache_resource
792
  def display_images_and_wikipedia_summaries(num_columns=4):
793
  image_files = [f for f in os.listdir('.') if f.endswith('.png')]
794
  if not image_files:
 
1278
 
1279
 
1280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1281
  GiveFeedback=False
1282
  if GiveFeedback:
1283
  with st.expander("Give your feedback πŸ‘", expanded=False):
 
1343
  st.write(result)
1344
  return result
1345
 
 
 
 
 
 
 
 
 
 
 
 
 
1346
 
1347
  # Transcript to arxiv and client chat completion ------------------------- !!
1348
  filename = save_and_play_audio(audio_recorder)
 
1392
  for example_input in session_state["search_queries"]:
1393
  st.write(example_input)
1394
 
1395
+ #if st.button("Run Prompt", help="Click to run."):
1396
+ # try:
1397
+ # response=StreamLLMChatResponse(example_input)
1398
+ # create_file(filename, example_input, response, should_save)
1399
+ # except:
1400
+ # st.write('model is asleep. Starting now on A10 GPU. Please wait one minute then retry. KEDA triggered.')
1401
 
1402
  openai.api_key = os.getenv('OPENAI_API_KEY')
1403
  if openai.api_key == None: openai.api_key = st.secrets['OPENAI_API_KEY']
 
1441
  st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
1442
 
1443
 
1444
+ # GPT4o documentation
1445
  # 1. Cookbook: https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o
1446
  # 2. Configure your Project and Orgs to limit/allow Models: https://platform.openai.com/settings/organization/general
1447
  # 3. Watch your Billing! https://platform.openai.com/settings/organization/billing/overview
 
1482
 
1483
  #st.write("Assistant: " + completion.choices[0].message.content)
1484
 
1485
+ def create_file(filename, prompt, response, is_image=False):
1486
+ with open(filename, "w", encoding="utf-8") as f:
1487
+ f.write(prompt + "\n\n" + response)
1488
 
1489
+ def save_image_old2(image, filename):
1490
+ with open(filename, "wb") as f:
1491
+ f.write(image.getbuffer())
1492
+
1493
+ # Now filename length protected for linux and windows filename lengths
1494
+ def save_image(image, filename):
1495
+ max_filename_length = 250
1496
+ filename_stem, extension = os.path.splitext(filename)
1497
+ truncated_stem = filename_stem[:max_filename_length - len(extension)] if len(filename) > max_filename_length else filename_stem
1498
+ filename = f"{truncated_stem}{extension}"
1499
  with open(filename, "wb") as f:
1500
+ f.write(image.getbuffer())
1501
  return filename
1502
+
1503
+ def extract_boldface_terms(text):
1504
+ return re.findall(r'\*\*(.*?)\*\*', text)
1505
+
1506
+ def extract_title(text):
1507
+ boldface_terms = re.findall(r'\*\*(.*?)\*\*', text)
1508
+ if boldface_terms:
1509
+ title = ' '.join(boldface_terms)
1510
+ else:
1511
+ title = re.sub(r'[^a-zA-Z0-9_\-]', ' ', text[-200:])
1512
+ return title[-200:]
1513
 
1514
+ def process_image(image_input, user_prompt):
1515
  if image_input:
1516
  st.markdown('Processing image: ' + image_input.name )
1517
  if image_input:
 
1521
  messages=[
1522
  {"role": "system", "content": "You are a helpful assistant that responds in Markdown."},
1523
  {"role": "user", "content": [
1524
+ {"type": "text", "text": user_prompt},
1525
  {"type": "image_url", "image_url": {
1526
  "url": f"data:image/png;base64,{base64_image}"}
1527
  }
 
1542
  with open(filename_md, "w", encoding="utf-8") as f:
1543
  f.write(image_response)
1544
 
1545
+ # Extract boldface terms from image_response then autoname save file
1546
+ #boldface_terms = extract_boldface_terms(image_response)
1547
+ boldface_terms = extract_title(image_response).replace(':','')
1548
+ filename_stem, extension = os.path.splitext(image_input.name)
1549
+ filename_img = f"{filename_stem} {''.join(boldface_terms)}{extension}"
1550
+ newfilename = save_image(image_input, filename_img)
1551
+ filename_md = newfilename.replace('.png', '.md')
1552
+ create_file(filename_md, '', image_response, True)
1553
 
1554
  return image_response
1555
 
1556
+ def create_audio_file(filename, audio_data, should_save):
1557
+ if should_save:
1558
+ with open(filename, "wb") as file:
1559
+ file.write(audio_data.getvalue())
1560
+ st.success(f"Audio file saved as {filename}")
1561
+ else:
1562
+ st.warning("Audio file not saved.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1563
 
1564
+ def process_audio(audio_input, text_input):
1565
  if audio_input:
1566
  transcription = client.audio.transcriptions.create(
1567
  model="whisper-1",
1568
  file=audio_input,
1569
  )
1570
+ st.session_state.messages.append({"role": "user", "content": transcription.text})
1571
+ with st.chat_message("assistant"):
1572
+ st.markdown(transcription.text)
1573
+
1574
+ SpeechSynthesis(transcription.text)
1575
+ filename = generate_filename(transcription.text, "wav")
1576
+
1577
+ create_audio_file(filename, audio_input, should_save)
1578
+
1579
+ #SpeechSynthesis(transcription.text)
1580
+
1581
+ filename = generate_filename(transcription.text, "md")
1582
+ create_file(filename, transcription.text, transcription.text, should_save)
1583
+ #st.markdown(response.choices[0].message.content)
1584
 
1585
  def process_audio_for_video(video_input):
1586
  if video_input:
1587
+ try:
1588
+ transcription = client.audio.transcriptions.create(
1589
+ model="whisper-1",
1590
+ file=video_input,
1591
+ )
1592
+ response = client.chat.completions.create(
1593
+ model=MODEL,
1594
+ messages=[
1595
+ {"role": "system", "content":"""You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."""},
1596
+ {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}],}
1597
+ ],
1598
+ temperature=0,
1599
+ )
1600
+ st.markdown(response.choices[0].message.content)
1601
+ return response.choices[0].message.content
1602
+ except:
1603
+ st.write('No transcript')
1604
 
1605
  def save_video(video_file):
1606
  # Save the uploaded video file
 
1631
 
1632
  # Extract audio from video
1633
  audio_path = f"{base_video_path}.mp3"
1634
+ try:
1635
+ clip = VideoFileClip(video_path)
1636
+
1637
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
1638
+ clip.audio.close()
1639
+
1640
+ clip.close()
1641
+ except:
1642
+ st.write('No audio track found, moving on..')
1643
+
1644
 
1645
  print(f"Extracted {len(base64Frames)} frames")
1646
  print(f"Extracted audio to {audio_path}")
 
1675
  results = response.choices[0].message.content
1676
  st.markdown(results)
1677
 
1678
+ if transcript:
1679
+ filename = generate_filename(transcript, "md")
1680
+ create_file(filename, transcript, results, should_save)
1681
 
1682
 
1683
 
 
1690
  if (text_input > ''):
1691
  textResponse = process_text(text_input)
1692
  elif option == "Image":
1693
+ text = "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."
1694
+ text_input = st.text_input(label="Enter text prompt to use with Image context.", value=text)
1695
  image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
1696
+ image_response = process_image(image_input, text_input)
1697
 
1698
+ elif option == "Audio":
1699
+ text = "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."
1700
+ text_input = st.text_input(label="Enter text prompt to use with Audio context.", value=text)
1701
+ uploaded_files = st.file_uploader("Upload an audio file", type=["mp3", "wav"], accept_multiple_files=True)
1702
+
1703
+ for audio_input in uploaded_files:
1704
+ st.write(audio_input.name)
1705
+ if audio_input is not None:
1706
+ process_audio(audio_input, text_input)
1707
+
1708
+ elif option == "Audio old":
1709
+ #text = "Transcribe and answer questions as a helpful audio music and speech assistant. "
1710
+ text = "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."
1711
+ text_input = st.text_input(label="Enter text prompt to use with Audio context.", value=text)
1712
+
1713
+ uploaded_files = st.file_uploader("Upload an audio file", type=["mp3", "wav"], accept_multiple_files=True)
1714
+ for audio_input in uploaded_files:
1715
+ st.write(audio_input.name)
1716
+
1717
+ if audio_input is not None:
1718
+ # To read file as bytes:
1719
+ bytes_data = uploaded_file.getvalue()
1720
+ #st.write(bytes_data)
1721
+
1722
+ # To convert to a string based IO:
1723
+ #stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
1724
+ #st.write(stringio)
1725
+
1726
+ # To read file as string:
1727
+ #string_data = stringio.read()
1728
+ #st.write(string_data)
1729
 
1730
+ process_audio(audio_input, text_input)
1731
 
 
 
 
1732
  elif option == "Video":
1733
  video_input = st.file_uploader("Upload a video file", type=["mp4"])
1734
  process_audio_and_video(video_input)
1735
 
 
 
 
1736
 
1737
+ # Enter the GPT-4o omni model in streamlit chatbot
1738
+ current_messages=[]
1739
+ for message in st.session_state.messages:
1740
+ with st.chat_message(message["role"]):
1741
+ current_messages.append(message)
1742
+ st.markdown(message["content"])
1743
 
1744
 
 
 
 
 
 
 
 
1745
 
1746
+ # 🎡 Wav Audio files - Transcription History in Wav
1747
+ audio_files = glob.glob("*.wav")
1748
+ audio_files = [file for file in audio_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
1749
+ audio_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
1750
 
1751
+ # πŸ–Ό PNG Image files
1752
+ image_files = glob.glob("*.png")
1753
+ image_files = [file for file in image_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
1754
+ image_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
1755
+
1756
+ # πŸŽ₯ MP4 Video files
1757
+ video_files = glob.glob("*.mp4")
1758
+ video_files = [file for file in video_files if len(os.path.splitext(file)[0]) >= 10] # exclude files with short names
1759
+ video_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
1760
+
1761
+
1762
+
1763
+
1764
+ main()
1765
+
1766
+ # Delete All button for each file type
1767
+ if st.sidebar.button("πŸ—‘ Delete All Audio"):
1768
+ for file in audio_files:
1769
+ os.remove(file)
1770
+ st.rerun()
1771
+
1772
+ if st.sidebar.button("πŸ—‘ Delete All Images"):
1773
+ for file in image_files:
1774
+ os.remove(file)
1775
+ st.rerun()
1776
+
1777
+ if st.sidebar.button("πŸ—‘ Delete All Videos"):
1778
+ for file in video_files:
1779
+ os.remove(file)
1780
+ st.rerun()
1781
+
1782
+ # Display and handle audio files
1783
+ for file in audio_files:
1784
+ col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
1785
+ with col1:
1786
+ st.markdown(file)
1787
+ if st.button("🎡", key="play_" + file): # play emoji button
1788
+ audio_file = open(file, 'rb')
1789
+ audio_bytes = audio_file.read()
1790
+ st.audio(audio_bytes, format='audio/wav')
1791
+ with col2:
1792
+ if st.button("πŸ—‘", key="delete_" + file):
1793
+ os.remove(file)
1794
+ st.rerun()
1795
+
1796
+ # Display and handle image files
1797
+ for file in image_files:
1798
+ col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
1799
+ with col1:
1800
+ st.markdown(file)
1801
+ if st.button("πŸ–Ό", key="show_" + file): # show emoji button
1802
+ image = open(file, 'rb').read()
1803
+ st.image(image)
1804
+ with col2:
1805
+ if st.button("πŸ—‘", key="delete_" + file):
1806
+ os.remove(file)
1807
+ st.rerun()
1808
+
1809
+ # Display and handle video files
1810
+ for file in video_files:
1811
+ col1, col2 = st.sidebar.columns([6, 1]) # adjust the ratio as needed
1812
+ with col1:
1813
+ st.markdown(file)
1814
+ if st.button("πŸŽ₯", key="play_" + file): # play emoji button
1815
+ video_file = open(file, 'rb')
1816
+ video_bytes = video_file.read()
1817
+ st.video(video_bytes)
1818
+ with col2:
1819
+ if st.button("πŸ—‘", key="delete_" + file):
1820
+ os.remove(file)
1821
+ st.rerun()
1822
 
1823
 
 
1824
 
 
 
 
 
 
 
 
 
1825
 
 
 
 
 
 
1826
 
1827
  # ChatBot Entry
1828
  if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
 
1838
  response = process_text2(text_input=prompt)
1839
  st.session_state.messages.append({"role": "assistant", "content": response})
1840
 
1841
+
1842
+
1843
+
1844
+
1845
+ # Image and Video Galleries
1846
+ num_columns_images=st.slider(key="num_columns_images", label="Choose Number of Image Columns", min_value=1, max_value=15, value=3)
1847
+ display_images_and_wikipedia_summaries(num_columns_images) # Image Jump Grid
1848
+
1849
+ num_columns_video=st.slider(key="num_columns_video", label="Choose Number of Video Columns", min_value=1, max_value=15, value=3)
1850
+ display_videos_and_links(num_columns_video) # Video Jump Grid
1851
+
1852
+
1853
+ # Optional UI's
1854
+ showExtendedTextInterface=False
1855
+ if showExtendedTextInterface:
1856
+ display_glossary_grid(roleplaying_glossary) # Word Glossary Jump Grid - Dynamically calculates columns based on details length to keep topic together
1857
+ num_columns_text=st.slider(key="num_columns_text", label="Choose Number of Text Columns", min_value=1, max_value=15, value=4)
1858
+ display_buttons_with_scores(num_columns_text) # Feedback Jump Grid
1859
+ st.markdown(personality_factors)
1860
+
1861
+
1862
+
1863
+
1864
+ #if __name__ == "__main__":
1865
+