awacke1 commited on
Commit
a51e81e
·
verified ·
1 Parent(s): d4e9ab8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -117
app.py CHANGED
@@ -1,7 +1,48 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- st.set_page_config(layout="wide")
4
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  st.markdown("""
6
  ## Anatomy Head to Toe Table with Body Organs Costly Conditions, Spending, CPT Codes and Frequency
7
 
@@ -31,23 +72,7 @@ st.markdown("""
31
  """)
32
 
33
 
34
- import os
35
- import json
36
- from PIL import Image
37
- from urllib.parse import quote # Ensure this import is included
38
 
39
- # Set page configuration with a title and favicon
40
- st.set_page_config(
41
- page_title="🌌🚀 Mixable AI - Voice Search",
42
- page_icon="🌠",
43
- layout="wide",
44
- initial_sidebar_state="expanded",
45
- menu_items={
46
- 'Get Help': 'https://huggingface.co/awacke1',
47
- 'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload",
48
- 'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558"
49
- }
50
- )
51
 
52
  # Ensure the directory for storing scores exists
53
  score_dir = "scores"
@@ -323,95 +348,6 @@ def display_content_or_image(query):
323
 
324
 
325
 
326
-
327
- # Imports
328
- import base64
329
- import glob
330
- import json
331
- import math
332
- import openai
333
- import os
334
- import pytz
335
- import re
336
- import requests
337
- import streamlit as st
338
- import textract
339
- import time
340
- import zipfile
341
- import huggingface_hub
342
- import dotenv
343
- from audio_recorder_streamlit import audio_recorder
344
- from bs4 import BeautifulSoup
345
- from collections import deque
346
- from datetime import datetime
347
- from dotenv import load_dotenv
348
- from huggingface_hub import InferenceClient
349
- from io import BytesIO
350
- from langchain.chat_models import ChatOpenAI
351
- from langchain.chains import ConversationalRetrievalChain
352
- from langchain.embeddings import OpenAIEmbeddings
353
- from langchain.memory import ConversationBufferMemory
354
- from langchain.text_splitter import CharacterTextSplitter
355
- from langchain.vectorstores import FAISS
356
- from openai import ChatCompletion
357
- from PyPDF2 import PdfReader
358
- from templates import bot_template, css, user_template
359
- from xml.etree import ElementTree as ET
360
- import streamlit.components.v1 as components # Import Streamlit Components for HTML5
361
-
362
-
363
- def add_Med_Licensing_Exam_Dataset():
364
- import streamlit as st
365
- from datasets import load_dataset
366
- dataset = load_dataset("augtoma/usmle_step_1")['test'] # Using 'test' split
367
- st.title("USMLE Step 1 Dataset Viewer")
368
- if len(dataset) == 0:
369
- st.write("😢 The dataset is empty.")
370
- else:
371
- st.write("""
372
- 🔍 Use the search box to filter questions or use the grid to scroll through the dataset.
373
- """)
374
-
375
- # 👩‍🔬 Search Box
376
- search_term = st.text_input("Search for a specific question:", "")
377
-
378
- # 🎛 Pagination
379
- records_per_page = 100
380
- num_records = len(dataset)
381
- num_pages = max(int(num_records / records_per_page), 1)
382
-
383
- # Skip generating the slider if num_pages is 1 (i.e., all records fit in one page)
384
- if num_pages > 1:
385
- page_number = st.select_slider("Select page:", options=list(range(1, num_pages + 1)))
386
- else:
387
- page_number = 1 # Only one page
388
-
389
- # 📊 Display Data
390
- start_idx = (page_number - 1) * records_per_page
391
- end_idx = start_idx + records_per_page
392
-
393
- # 🧪 Apply the Search Filter
394
- filtered_data = []
395
- for record in dataset[start_idx:end_idx]:
396
- if isinstance(record, dict) and 'text' in record and 'id' in record:
397
- if search_term:
398
- if search_term.lower() in record['text'].lower():
399
- st.markdown(record)
400
- filtered_data.append(record)
401
- else:
402
- filtered_data.append(record)
403
-
404
- # 🌐 Render the Grid
405
- for record in filtered_data:
406
- st.write(f"## Question ID: {record['id']}")
407
- st.write(f"### Question:")
408
- st.write(f"{record['text']}")
409
- st.write(f"### Answer:")
410
- st.write(f"{record['answer']}")
411
- st.write("---")
412
-
413
- st.write(f"😊 Total Records: {num_records} | 📄 Displaying {start_idx+1} to {min(end_idx, num_records)}")
414
-
415
  # 1. Constants and Top Level UI Variables
416
 
417
  # My Inference API Copy
@@ -644,7 +580,7 @@ def read_file_content(file,max_length):
644
  else:
645
  return ""
646
 
647
- # 11. Chat with GPT - Caution on quota - now favoring fastest AI pipeline STT Whisper->LLM Llama->TTS
648
  @st.cache_resource
649
  def chat_with_model(prompt, document_section='', model_choice='gpt-3.5-turbo'):
650
  model = model_choice
@@ -676,16 +612,6 @@ def chat_with_model(prompt, document_section='', model_choice='gpt-3.5-turbo'):
676
  st.write(time.time() - start_time)
677
  return full_reply_content
678
 
679
- # 12. Embedding VectorDB for LLM query of documents to text to compress inputs and prompt together as Chat memory using Langchain
680
- @st.cache_resource
681
- def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
682
- conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
683
- conversation.append({'role': 'user', 'content': prompt})
684
- if len(file_content)>0:
685
- conversation.append({'role': 'assistant', 'content': file_content})
686
- response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
687
- return response['choices'][0]['message']['content']
688
-
689
  def extract_mime_type(file):
690
  if isinstance(file, str):
691
  pattern = r"type='(.*?)'"
 
1
  import streamlit as st
2
+ import os
3
+ import json
4
+ from PIL import Image
5
+ from urllib.parse import quote # Ensure this import is included
6
+ import base64
7
+ import glob
8
+ import json
9
+ import math
10
+ import openai
11
+ import os
12
+ import pytz
13
+ import re
14
+ import requests
15
+ import textract
16
+ import time
17
+ import zipfile
18
+ import huggingface_hub
19
+ import dotenv
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from bs4 import BeautifulSoup
22
+ from collections import deque
23
+ from datetime import datetime
24
+ from dotenv import load_dotenv
25
+ from huggingface_hub import InferenceClient
26
+ from io import BytesIO
27
+ from openai import ChatCompletion
28
+ from PyPDF2 import PdfReader
29
+ from templates import bot_template, css, user_template
30
+ from xml.etree import ElementTree as ET
31
+ import streamlit.components.v1 as components # Import Streamlit Components for HTML5
32
 
 
33
 
34
+ # Set page configuration with a title and favicon
35
+ st.set_page_config(
36
+ page_title="🌌🚀 Mixable AI - Voice Search",
37
+ page_icon="🌠",
38
+ layout="wide",
39
+ initial_sidebar_state="expanded",
40
+ menu_items={
41
+ 'Get Help': 'https://huggingface.co/awacke1',
42
+ 'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload",
43
+ 'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558"
44
+ }
45
+ )
46
  st.markdown("""
47
  ## Anatomy Head to Toe Table with Body Organs Costly Conditions, Spending, CPT Codes and Frequency
48
 
 
72
  """)
73
 
74
 
 
 
 
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  # Ensure the directory for storing scores exists
78
  score_dir = "scores"
 
348
 
349
 
350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  # 1. Constants and Top Level UI Variables
352
 
353
  # My Inference API Copy
 
580
  else:
581
  return ""
582
 
583
+ # 11. Chat with GPT - Caution on quota
584
  @st.cache_resource
585
  def chat_with_model(prompt, document_section='', model_choice='gpt-3.5-turbo'):
586
  model = model_choice
 
612
  st.write(time.time() - start_time)
613
  return full_reply_content
614
 
 
 
 
 
 
 
 
 
 
 
615
  def extract_mime_type(file):
616
  if isinstance(file, str):
617
  pattern = r"type='(.*?)'"