File size: 4,093 Bytes
4bce033
 
 
 
 
 
af59780
4bce033
3fddc37
 
 
 
 
599fbab
4bce033
 
 
 
 
 
 
 
 
 
 
 
3fddc37
af59780
3fddc37
af59780
 
 
 
 
 
3fddc37
 
af59780
 
 
 
 
 
 
3fddc37
 
 
 
af59780
 
 
 
3fddc37
 
 
 
 
 
 
4bce033
cab1fa3
 
4bce033
3fddc37
 
 
 
 
4bce033
725e817
 
 
 
 
 
 
af59780
725e817
af59780
 
3fddc37
 
 
 
 
 
af59780
3fddc37
 
4bce033
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af59780
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import streamlit as st
import json
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit.components.v1 as components

# Global variable to hold selected row index
selected_row_index = None

# Initialize an empty DataFrame
filtered_data = pd.DataFrame()

# Function to load JSONL file into a DataFrame
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return pd.DataFrame(data)

# Function to filter DataFrame by keyword
def filter_by_keyword(df, keyword):
    return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]


# Function to generate HTML5 code with embedded text
def generate_html(question_text, answer_text):
    return f'''
<!DOCTYPE html>
<html>
<head>
    <title>Read It Aloud</title>
    <script type="text/javascript">
        function readAloud(id) {{
            const text = document.getElementById(id).innerText;
            const speech = new SpeechSynthesisUtterance(text);
            window.speechSynthesis.speak(speech);
        }}
    </script>
</head>
<body>
    <h1>🔊 Read It Aloud</h1>
    <p id="questionArea">{question_text}</p>
    <button onclick="readAloud('questionArea')">🔊 Read Question Aloud</button>
    <p id="answerArea">{answer_text}</p>
    <button onclick="readAloud('answerArea')">🔊 Read Answer Aloud</button>
</body>
</html>
'''

# Streamlit App
st.title("Medical Licensing Exam Explorer with Speech Synthesis, Plotly and Seaborn 📊")

# Dropdown for file selection
file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
st.write(f"You selected: {file_option}")

# Load the data
small_data = load_jsonl("usmle_16.2MB.jsonl")
large_data = load_jsonl("usmle_2.08MB.jsonl")

# Show filtered data grid
if file_option == "small_file.jsonl":
    data = small_data
else:
    data = large_data

# Text input for search keyword
search_keyword = st.text_input("Enter a keyword to filter data (e.g., Heart, Lung, Pain, Memory):")

# Button to trigger search
if st.button("Search"):
    filtered_data = filter_by_keyword(data, search_keyword)
    st.write(f"Filtered Dataset by '{search_keyword}'")
    selected_data = st.dataframe(filtered_data)

# Button to read selected row aloud
if st.button("Read Selected Row"):
    if selected_row_index is not None:
        selected_row = filtered_data.loc[selected_row_index]
        question_text = selected_row.get("question", "No question field")
        answer_text = selected_row.get("answer", "No answer field")
        
        documentHTML5 = generate_html(question_text, answer_text)
        components.html(documentHTML5, width=1280, height=1024)
    else:
        st.warning("Please select a row first.")

# Plotly and Seaborn charts for EDA
if st.button("Generate Charts"):
    st.subheader("Plotly Charts 📈")

    # 1. Scatter Plot
    fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 2. Line Plot
    fig = px.line(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 3. Bar Plot
    fig = px.bar(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    # 4. Histogram
    fig = px.histogram(data, x=data.columns[0])
    st.plotly_chart(fig)

    # 5. Box Plot
    fig = px.box(data, x=data.columns[0], y=data.columns[1])
    st.plotly_chart(fig)

    st.subheader("Seaborn Charts 📊")

    # 6. Violin Plot
    fig, ax = plt.subplots()
    sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)

    # 7. Swarm Plot
    fig, ax = plt.subplots()
    sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)

    # 8. Pair Plot
    fig = sns.pairplot(data)
    st.pyplot(fig)

    # 9. Heatmap
    fig, ax = plt.subplots()
    sns.heatmap(data.corr(), annot=True)
    st.pyplot(fig)

    # 10. Regplot (Regression Plot)
    fig, ax = plt.subplots()
    sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
    st.pyplot(fig)