Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
import json
|
3 |
import pandas as pd
|
4 |
-
import plotly.express as px
|
5 |
-
import seaborn as sns
|
6 |
-
import matplotlib.pyplot as plt
|
7 |
import streamlit.components.v1 as components
|
8 |
|
9 |
-
# Global variable to hold selected row index
|
10 |
-
selected_row_index = None
|
11 |
-
|
12 |
-
# Initialize an empty DataFrame
|
13 |
-
filtered_data = pd.DataFrame()
|
14 |
-
|
15 |
-
|
16 |
# Function to load JSONL file into a DataFrame
|
17 |
def load_jsonl(file_path):
|
18 |
data = []
|
@@ -25,132 +15,74 @@ def load_jsonl(file_path):
|
|
25 |
def filter_by_keyword(df, keyword):
|
26 |
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]
|
27 |
|
28 |
-
#
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# Dropdown for file selection
|
32 |
-
file_option = st.selectbox("Select file:", ["
|
33 |
st.write(f"You selected: {file_option}")
|
34 |
|
35 |
-
# Load
|
36 |
-
|
37 |
-
|
38 |
|
39 |
-
|
40 |
-
if file_option == "small_file.jsonl":
|
41 |
-
data = small_data
|
42 |
-
else:
|
43 |
-
data = large_data
|
44 |
|
45 |
-
#
|
46 |
-
|
47 |
|
48 |
-
#
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
56 |
-
def generate_html_with_textarea(text_to_speak):
|
57 |
-
return f'''
|
58 |
-
<!DOCTYPE html>
|
59 |
-
<html>
|
60 |
-
<head>
|
61 |
-
<title>Read It Aloud</title>
|
62 |
-
<script type="text/javascript">
|
63 |
-
function readAloud() {{
|
64 |
-
const text = document.getElementById("textArea").value;
|
65 |
-
const speech = new SpeechSynthesisUtterance(text);
|
66 |
-
window.speechSynthesis.speak(speech);
|
67 |
-
}}
|
68 |
-
</script>
|
69 |
-
</head>
|
70 |
-
<body>
|
71 |
-
<h1>๐ Read It Aloud</h1>
|
72 |
-
<textarea id="textArea" rows="10" cols="80">
|
73 |
-
{text_to_speak}
|
74 |
-
</textarea>
|
75 |
-
<br>
|
76 |
-
<button onclick="readAloud()">๐ Read Aloud</button>
|
77 |
-
</body>
|
78 |
-
</html>
|
79 |
-
'''
|
80 |
-
|
81 |
-
# Define your text passage
|
82 |
-
text_passage = "A 60-year-old man is brought to the emergency department by police officers because he was acting strangely in public. The patient was found talking nonsensically to characters on cereal boxes in the store. Past medical history is significant for multiple hospitalizations for alcohol-related injuries and seizures. The patientโs vital signs are within normal limits. Physical examination shows a disheveled male who is oriented to person, but not time or place. Neurologic examination shows nystagmus and severe gait ataxia. A T1/T2 MRI is performed and demonstrates evidence of damage to the mammillary bodies. The patient is given the appropriate treatment for recovering most of his cognitive functions. However, significant short-term memory deficits persist. The patient remembers events from his past such as the school and college he attended, his current job, and the names of family members quite well. Which of the following is the most likely diagnosis in this patient?"
|
83 |
-
|
84 |
-
# Generate HTML code
|
85 |
-
documentHTML5 = generate_html_with_textarea(text_passage)
|
86 |
-
|
87 |
-
|
88 |
# Button to read all filtered rows
|
89 |
-
if st.button("Read All Rows"):
|
90 |
if not filtered_data.empty:
|
91 |
html_blocks = []
|
92 |
for idx, row in filtered_data.iterrows():
|
93 |
question_text = row.get("question", "No question field")
|
94 |
-
documentHTML5 =
|
95 |
html_blocks.append(documentHTML5)
|
96 |
all_html = ''.join(html_blocks)
|
97 |
components.html(all_html, width=1280, height=1024)
|
98 |
else:
|
99 |
-
st.warning("No rows to read.")
|
100 |
-
|
101 |
-
|
102 |
-
# Insert the HTML into Streamlit
|
103 |
-
# Button to read all filtered rows
|
104 |
-
if st.button("Read Aloud Text"):
|
105 |
-
components.html(documentHTML5, width=1280, height=1024)
|
106 |
-
|
107 |
-
|
108 |
-
# Plotly and Seaborn charts for EDA
|
109 |
-
if st.button("Generate Charts"):
|
110 |
-
st.subheader("Plotly Charts ๐")
|
111 |
-
|
112 |
-
# 1. Scatter Plot
|
113 |
-
fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
|
114 |
-
st.plotly_chart(fig)
|
115 |
-
|
116 |
-
# 2. Line Plot
|
117 |
-
fig = px.line(data, x=data.columns[0], y=data.columns[1])
|
118 |
-
st.plotly_chart(fig)
|
119 |
-
|
120 |
-
# 3. Bar Plot
|
121 |
-
fig = px.bar(data, x=data.columns[0], y=data.columns[1])
|
122 |
-
st.plotly_chart(fig)
|
123 |
-
|
124 |
-
# 4. Histogram
|
125 |
-
fig = px.histogram(data, x=data.columns[0])
|
126 |
-
st.plotly_chart(fig)
|
127 |
-
|
128 |
-
# 5. Box Plot
|
129 |
-
fig = px.box(data, x=data.columns[0], y=data.columns[1])
|
130 |
-
st.plotly_chart(fig)
|
131 |
-
|
132 |
-
st.subheader("Seaborn Charts ๐")
|
133 |
-
|
134 |
-
# 6. Violin Plot
|
135 |
-
fig, ax = plt.subplots()
|
136 |
-
sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
|
137 |
-
st.pyplot(fig)
|
138 |
-
|
139 |
-
# 7. Swarm Plot
|
140 |
-
fig, ax = plt.subplots()
|
141 |
-
sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
|
142 |
-
st.pyplot(fig)
|
143 |
-
|
144 |
-
# 8. Pair Plot
|
145 |
-
fig = sns.pairplot(data)
|
146 |
-
st.pyplot(fig)
|
147 |
-
|
148 |
-
# 9. Heatmap
|
149 |
-
fig, ax = plt.subplots()
|
150 |
-
sns.heatmap(data.corr(), annot=True)
|
151 |
-
st.pyplot(fig)
|
152 |
-
|
153 |
-
# 10. Regplot (Regression Plot)
|
154 |
-
fig, ax = plt.subplots()
|
155 |
-
sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
|
156 |
-
st.pyplot(fig)
|
|
|
1 |
import streamlit as st
|
2 |
import json
|
3 |
import pandas as pd
|
|
|
|
|
|
|
4 |
import streamlit.components.v1 as components
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# Function to load JSONL file into a DataFrame
|
7 |
def load_jsonl(file_path):
|
8 |
data = []
|
|
|
15 |
def filter_by_keyword(df, keyword):
|
16 |
return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]
|
17 |
|
18 |
+
# Function to generate HTML with textarea
|
19 |
+
def generate_html_with_textarea(text_to_speak):
|
20 |
+
return f'''
|
21 |
+
<!DOCTYPE html>
|
22 |
+
<html>
|
23 |
+
<head>
|
24 |
+
<title>Read It Aloud</title>
|
25 |
+
<script type="text/javascript">
|
26 |
+
function readAloud() {{
|
27 |
+
const text = document.getElementById("textArea").value;
|
28 |
+
const speech = new SpeechSynthesisUtterance(text);
|
29 |
+
window.speechSynthesis.speak(speech);
|
30 |
+
}}
|
31 |
+
</script>
|
32 |
+
</head>
|
33 |
+
<body>
|
34 |
+
<h1>๐ Read It Aloud</h1>
|
35 |
+
<textarea id="textArea" rows="10" cols="80">
|
36 |
+
{text_to_speak}
|
37 |
+
</textarea>
|
38 |
+
<br>
|
39 |
+
<button onclick="readAloud()">๐ Read Aloud</button>
|
40 |
+
</body>
|
41 |
+
</html>
|
42 |
+
'''
|
43 |
+
|
44 |
+
# Streamlit App ๐
|
45 |
+
st.title("USMLE Medical Questions Explorer with Speech Synthesis ๐")
|
46 |
|
47 |
# Dropdown for file selection
|
48 |
+
file_option = st.selectbox("Select file:", ["usmle_16.2MB.jsonl", "usmle_2.08MB.jsonl"])
|
49 |
st.write(f"You selected: {file_option}")
|
50 |
|
51 |
+
# Load data
|
52 |
+
large_data = load_jsonl("usmle_16.2MB.jsonl")
|
53 |
+
small_data = load_jsonl("usmle_2.08MB.jsonl")
|
54 |
|
55 |
+
data = small_data if file_option == "usmle_16.2MB.jsonl" else small_data
|
|
|
|
|
|
|
|
|
56 |
|
57 |
+
# Top 20 healthcare terms for USMLE
|
58 |
+
top_20_terms = ['Heart', 'Lung', 'Pain', 'Memory', 'Kidney', 'Diabetes', 'Cancer', 'Infection', 'Virus', 'Bacteria', 'Neurology', 'Psychiatry', 'Gastrointestinal', 'Pediatrics', 'Oncology', 'Skin', 'Blood', 'Surgery', 'Epidemiology', 'Genetics']
|
59 |
|
60 |
+
# Create Expander and Columns UI for terms
|
61 |
+
with st.expander("Search by Common Terms ๐"):
|
62 |
+
cols = st.columns(4)
|
63 |
+
for term in top_20_terms:
|
64 |
+
with cols[top_20_terms.index(term) % 4]:
|
65 |
+
if st.button(f"{term}"):
|
66 |
+
filtered_data = filter_by_keyword(data, term)
|
67 |
+
st.write(f"Filtered Dataset by '{term}' ๐")
|
68 |
+
st.dataframe(filtered_data)
|
69 |
|
70 |
+
# Text input for search keyword
|
71 |
+
search_keyword = st.text_input("Or, enter a keyword to filter data:")
|
72 |
+
if st.button("Search ๐ต๏ธโโ๏ธ"):
|
73 |
+
filtered_data = filter_by_keyword(data, search_keyword)
|
74 |
+
st.write(f"Filtered Dataset by '{search_keyword}' ๐")
|
75 |
+
st.dataframe(filtered_data)
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
# Button to read all filtered rows
|
78 |
+
if st.button("Read All Rows ๐"):
|
79 |
if not filtered_data.empty:
|
80 |
html_blocks = []
|
81 |
for idx, row in filtered_data.iterrows():
|
82 |
question_text = row.get("question", "No question field")
|
83 |
+
documentHTML5 = generate_html_with_textarea(question_text)
|
84 |
html_blocks.append(documentHTML5)
|
85 |
all_html = ''.join(html_blocks)
|
86 |
components.html(all_html, width=1280, height=1024)
|
87 |
else:
|
88 |
+
st.warning("No rows to read. ๐จ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|