Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,48 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
st.set_page_config(layout="wide")
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
st.markdown("""
|
6 |
## Anatomy Head to Toe Table with Body Organs Costly Conditions, Spending, CPT Codes and Frequency
|
7 |
|
@@ -31,23 +72,7 @@ st.markdown("""
|
|
31 |
""")
|
32 |
|
33 |
|
34 |
-
import os
|
35 |
-
import json
|
36 |
-
from PIL import Image
|
37 |
-
from urllib.parse import quote # Ensure this import is included
|
38 |
|
39 |
-
# Set page configuration with a title and favicon
|
40 |
-
st.set_page_config(
|
41 |
-
page_title="🌌🚀 Mixable AI - Voice Search",
|
42 |
-
page_icon="🌠",
|
43 |
-
layout="wide",
|
44 |
-
initial_sidebar_state="expanded",
|
45 |
-
menu_items={
|
46 |
-
'Get Help': 'https://huggingface.co/awacke1',
|
47 |
-
'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload",
|
48 |
-
'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558"
|
49 |
-
}
|
50 |
-
)
|
51 |
|
52 |
# Ensure the directory for storing scores exists
|
53 |
score_dir = "scores"
|
@@ -323,95 +348,6 @@ def display_content_or_image(query):
|
|
323 |
|
324 |
|
325 |
|
326 |
-
|
327 |
-
# Imports
|
328 |
-
import base64
|
329 |
-
import glob
|
330 |
-
import json
|
331 |
-
import math
|
332 |
-
import openai
|
333 |
-
import os
|
334 |
-
import pytz
|
335 |
-
import re
|
336 |
-
import requests
|
337 |
-
import streamlit as st
|
338 |
-
import textract
|
339 |
-
import time
|
340 |
-
import zipfile
|
341 |
-
import huggingface_hub
|
342 |
-
import dotenv
|
343 |
-
from audio_recorder_streamlit import audio_recorder
|
344 |
-
from bs4 import BeautifulSoup
|
345 |
-
from collections import deque
|
346 |
-
from datetime import datetime
|
347 |
-
from dotenv import load_dotenv
|
348 |
-
from huggingface_hub import InferenceClient
|
349 |
-
from io import BytesIO
|
350 |
-
from langchain.chat_models import ChatOpenAI
|
351 |
-
from langchain.chains import ConversationalRetrievalChain
|
352 |
-
from langchain.embeddings import OpenAIEmbeddings
|
353 |
-
from langchain.memory import ConversationBufferMemory
|
354 |
-
from langchain.text_splitter import CharacterTextSplitter
|
355 |
-
from langchain.vectorstores import FAISS
|
356 |
-
from openai import ChatCompletion
|
357 |
-
from PyPDF2 import PdfReader
|
358 |
-
from templates import bot_template, css, user_template
|
359 |
-
from xml.etree import ElementTree as ET
|
360 |
-
import streamlit.components.v1 as components # Import Streamlit Components for HTML5
|
361 |
-
|
362 |
-
|
363 |
-
def add_Med_Licensing_Exam_Dataset():
|
364 |
-
import streamlit as st
|
365 |
-
from datasets import load_dataset
|
366 |
-
dataset = load_dataset("augtoma/usmle_step_1")['test'] # Using 'test' split
|
367 |
-
st.title("USMLE Step 1 Dataset Viewer")
|
368 |
-
if len(dataset) == 0:
|
369 |
-
st.write("😢 The dataset is empty.")
|
370 |
-
else:
|
371 |
-
st.write("""
|
372 |
-
🔍 Use the search box to filter questions or use the grid to scroll through the dataset.
|
373 |
-
""")
|
374 |
-
|
375 |
-
# 👩🔬 Search Box
|
376 |
-
search_term = st.text_input("Search for a specific question:", "")
|
377 |
-
|
378 |
-
# 🎛 Pagination
|
379 |
-
records_per_page = 100
|
380 |
-
num_records = len(dataset)
|
381 |
-
num_pages = max(int(num_records / records_per_page), 1)
|
382 |
-
|
383 |
-
# Skip generating the slider if num_pages is 1 (i.e., all records fit in one page)
|
384 |
-
if num_pages > 1:
|
385 |
-
page_number = st.select_slider("Select page:", options=list(range(1, num_pages + 1)))
|
386 |
-
else:
|
387 |
-
page_number = 1 # Only one page
|
388 |
-
|
389 |
-
# 📊 Display Data
|
390 |
-
start_idx = (page_number - 1) * records_per_page
|
391 |
-
end_idx = start_idx + records_per_page
|
392 |
-
|
393 |
-
# 🧪 Apply the Search Filter
|
394 |
-
filtered_data = []
|
395 |
-
for record in dataset[start_idx:end_idx]:
|
396 |
-
if isinstance(record, dict) and 'text' in record and 'id' in record:
|
397 |
-
if search_term:
|
398 |
-
if search_term.lower() in record['text'].lower():
|
399 |
-
st.markdown(record)
|
400 |
-
filtered_data.append(record)
|
401 |
-
else:
|
402 |
-
filtered_data.append(record)
|
403 |
-
|
404 |
-
# 🌐 Render the Grid
|
405 |
-
for record in filtered_data:
|
406 |
-
st.write(f"## Question ID: {record['id']}")
|
407 |
-
st.write(f"### Question:")
|
408 |
-
st.write(f"{record['text']}")
|
409 |
-
st.write(f"### Answer:")
|
410 |
-
st.write(f"{record['answer']}")
|
411 |
-
st.write("---")
|
412 |
-
|
413 |
-
st.write(f"😊 Total Records: {num_records} | 📄 Displaying {start_idx+1} to {min(end_idx, num_records)}")
|
414 |
-
|
415 |
# 1. Constants and Top Level UI Variables
|
416 |
|
417 |
# My Inference API Copy
|
@@ -644,7 +580,7 @@ def read_file_content(file,max_length):
|
|
644 |
else:
|
645 |
return ""
|
646 |
|
647 |
-
# 11. Chat with GPT - Caution on quota
|
648 |
@st.cache_resource
|
649 |
def chat_with_model(prompt, document_section='', model_choice='gpt-3.5-turbo'):
|
650 |
model = model_choice
|
@@ -676,16 +612,6 @@ def chat_with_model(prompt, document_section='', model_choice='gpt-3.5-turbo'):
|
|
676 |
st.write(time.time() - start_time)
|
677 |
return full_reply_content
|
678 |
|
679 |
-
# 12. Embedding VectorDB for LLM query of documents to text to compress inputs and prompt together as Chat memory using Langchain
|
680 |
-
@st.cache_resource
|
681 |
-
def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
|
682 |
-
conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
|
683 |
-
conversation.append({'role': 'user', 'content': prompt})
|
684 |
-
if len(file_content)>0:
|
685 |
-
conversation.append({'role': 'assistant', 'content': file_content})
|
686 |
-
response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
|
687 |
-
return response['choices'][0]['message']['content']
|
688 |
-
|
689 |
def extract_mime_type(file):
|
690 |
if isinstance(file, str):
|
691 |
pattern = r"type='(.*?)'"
|
|
|
1 |
import streamlit as st
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
from PIL import Image
|
5 |
+
from urllib.parse import quote # Ensure this import is included
|
6 |
+
import base64
|
7 |
+
import glob
|
8 |
+
import json
|
9 |
+
import math
|
10 |
+
import openai
|
11 |
+
import os
|
12 |
+
import pytz
|
13 |
+
import re
|
14 |
+
import requests
|
15 |
+
import textract
|
16 |
+
import time
|
17 |
+
import zipfile
|
18 |
+
import huggingface_hub
|
19 |
+
import dotenv
|
20 |
+
from audio_recorder_streamlit import audio_recorder
|
21 |
+
from bs4 import BeautifulSoup
|
22 |
+
from collections import deque
|
23 |
+
from datetime import datetime
|
24 |
+
from dotenv import load_dotenv
|
25 |
+
from huggingface_hub import InferenceClient
|
26 |
+
from io import BytesIO
|
27 |
+
from openai import ChatCompletion
|
28 |
+
from PyPDF2 import PdfReader
|
29 |
+
from templates import bot_template, css, user_template
|
30 |
+
from xml.etree import ElementTree as ET
|
31 |
+
import streamlit.components.v1 as components # Import Streamlit Components for HTML5
|
32 |
|
|
|
33 |
|
34 |
+
# Set page configuration with a title and favicon
|
35 |
+
st.set_page_config(
|
36 |
+
page_title="🌌🚀 Mixable AI - Voice Search",
|
37 |
+
page_icon="🌠",
|
38 |
+
layout="wide",
|
39 |
+
initial_sidebar_state="expanded",
|
40 |
+
menu_items={
|
41 |
+
'Get Help': 'https://huggingface.co/awacke1',
|
42 |
+
'Report a bug': "https://huggingface.co/spaces/awacke1/WebDataDownload",
|
43 |
+
'About': "# Midjourney: https://discord.com/channels/@me/997514686608191558"
|
44 |
+
}
|
45 |
+
)
|
46 |
st.markdown("""
|
47 |
## Anatomy Head to Toe Table with Body Organs Costly Conditions, Spending, CPT Codes and Frequency
|
48 |
|
|
|
72 |
""")
|
73 |
|
74 |
|
|
|
|
|
|
|
|
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
# Ensure the directory for storing scores exists
|
78 |
score_dir = "scores"
|
|
|
348 |
|
349 |
|
350 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
# 1. Constants and Top Level UI Variables
|
352 |
|
353 |
# My Inference API Copy
|
|
|
580 |
else:
|
581 |
return ""
|
582 |
|
583 |
+
# 11. Chat with GPT - Caution on quota
|
584 |
@st.cache_resource
|
585 |
def chat_with_model(prompt, document_section='', model_choice='gpt-3.5-turbo'):
|
586 |
model = model_choice
|
|
|
612 |
st.write(time.time() - start_time)
|
613 |
return full_reply_content
|
614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
615 |
def extract_mime_type(file):
|
616 |
if isinstance(file, str):
|
617 |
pattern = r"type='(.*?)'"
|