File size: 4,093 Bytes
4bce033 af59780 4bce033 3fddc37 599fbab 4bce033 3fddc37 af59780 3fddc37 af59780 3fddc37 af59780 3fddc37 af59780 3fddc37 4bce033 cab1fa3 4bce033 3fddc37 4bce033 725e817 af59780 725e817 af59780 3fddc37 af59780 3fddc37 4bce033 af59780 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import streamlit as st
import json
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit.components.v1 as components
# Global variable to hold selected row index
selected_row_index = None
# Initialize an empty DataFrame
filtered_data = pd.DataFrame()
# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
data = []
with open(file_path, 'r') as f:
for line in f:
data.append(json.loads(line))
return pd.DataFrame(data)
# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]
# Function to generate HTML5 code with embedded text
def generate_html(question_text, answer_text):
return f'''
<!DOCTYPE html>
<html>
<head>
<title>Read It Aloud</title>
<script type="text/javascript">
function readAloud(id) {{
const text = document.getElementById(id).innerText;
const speech = new SpeechSynthesisUtterance(text);
window.speechSynthesis.speak(speech);
}}
</script>
</head>
<body>
<h1>🔊 Read It Aloud</h1>
<p id="questionArea">{question_text}</p>
<button onclick="readAloud('questionArea')">🔊 Read Question Aloud</button>
<p id="answerArea">{answer_text}</p>
<button onclick="readAloud('answerArea')">🔊 Read Answer Aloud</button>
</body>
</html>
'''
# Streamlit App
st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn 📊")
# Dropdown for file selection
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
st.write(f"You selected: {file_option}")
# Load the data
small_data = load_jsonl("usmle_16.2MB.jsonl")
large_data = load_jsonl("usmle_2.08MB.jsonl")
# Show filtered data grid
if file_option == "small_file.jsonl":
data = small_data
else:
data = large_data
# Text input for search keyword
search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):")
# Button to trigger search
if st.button("Search"):
filtered_data = filter_by_keyword(data, search_keyword)
st.write(f"Filtered Dataset by '{search_keyword}'")
selected_data = st.dataframe(filtered_data)
# Button to read selected row aloud
if st.button("Read Selected Row"):
if selected_row_index is not None:
selected_row = filtered_data.loc[selected_row_index]
question_text = selected_row.get("question", "No question field")
answer_text = selected_row.get("answer", "No answer field")
documentHTML5 = generate_html(question_text, answer_text)
components.html(documentHTML5, width=1280, height=1024)
else:
st.warning("Please select a row first.")
# Plotly and Seaborn charts for EDA
if st.button("Generate Charts"):
st.subheader("Plotly Charts 📈")
# 1. Scatter Plot
fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 2. Line Plot
fig = px.line(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 3. Bar Plot
fig = px.bar(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
# 4. Histogram
fig = px.histogram(data, x=data.columns[0])
st.plotly_chart(fig)
# 5. Box Plot
fig = px.box(data, x=data.columns[0], y=data.columns[1])
st.plotly_chart(fig)
st.subheader("Seaborn Charts 📊")
# 6. Violin Plot
fig, ax = plt.subplots()
sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig)
# 7. Swarm Plot
fig, ax = plt.subplots()
sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig)
# 8. Pair Plot
fig = sns.pairplot(data)
st.pyplot(fig)
# 9. Heatmap
fig, ax = plt.subplots()
sns.heatmap(data.corr(), annot=True)
st.pyplot(fig)
# 10. Regplot (Regression Plot)
fig, ax = plt.subplots()
sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
st.pyplot(fig) |