awacke1 commited on
Commit
4bce033
·
1 Parent(s): 9382229

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import seaborn as sns
6
+ import matplotlib.pyplot as plt
7
+
8
+ # Function to load JSONL file into a DataFrame
9
+ def load_jsonl(file_path):
10
+ data = []
11
+ with open(file_path, 'r') as f:
12
+ for line in f:
13
+ data.append(json.loads(line))
14
+ return pd.DataFrame(data)
15
+
16
+ # Function to filter DataFrame by keyword
17
+ def filter_by_keyword(df, keyword):
18
+ return df[df.apply(lambda row: row.astype(str).str.contains(keyword).any(), axis=1)]
19
+
20
+ # Load the data
21
+ small_data = load_jsonl("usmle_16.2MB.jsonl")
22
+ large_data = load_jsonl("usmle_2.08MB.jsonl")
23
+
24
+ # Streamlit App
25
+ st.title("EDA with Plotly and Seaborn 📊")
26
+
27
+ # Dropdown for file selection
28
+ file_option = st.selectbox("Select file:", ["small_file.jsonl", "large_file.jsonl"])
29
+ st.write(f"You selected: {file_option}")
30
+
31
+ # Show filtered data grid
32
+ if file_option == "small_file.jsonl":
33
+ data = small_data
34
+ else:
35
+ data = large_data
36
+
37
+ filtered_data = filter_by_keyword(data, "Heart")
38
+ st.write("Filtered Dataset by 'Heart'")
39
+ st.dataframe(filtered_data)
40
+
41
+ # Plotly and Seaborn charts for EDA
42
+ if st.button("Generate Charts"):
43
+
44
+ st.subheader("Plotly Charts 📈")
45
+
46
+ # 1. Scatter Plot
47
+ fig = px.scatter(data, x=data.columns[0], y=data.columns[1])
48
+ st.plotly_chart(fig)
49
+
50
+ # 2. Line Plot
51
+ fig = px.line(data, x=data.columns[0], y=data.columns[1])
52
+ st.plotly_chart(fig)
53
+
54
+ # 3. Bar Plot
55
+ fig = px.bar(data, x=data.columns[0], y=data.columns[1])
56
+ st.plotly_chart(fig)
57
+
58
+ # 4. Histogram
59
+ fig = px.histogram(data, x=data.columns[0])
60
+ st.plotly_chart(fig)
61
+
62
+ # 5. Box Plot
63
+ fig = px.box(data, x=data.columns[0], y=data.columns[1])
64
+ st.plotly_chart(fig)
65
+
66
+ st.subheader("Seaborn Charts 📊")
67
+
68
+ # 6. Violin Plot
69
+ fig, ax = plt.subplots()
70
+ sns.violinplot(x=data.columns[0], y=data.columns[1], data=data)
71
+ st.pyplot(fig)
72
+
73
+ # 7. Swarm Plot
74
+ fig, ax = plt.subplots()
75
+ sns.swarmplot(x=data.columns[0], y=data.columns[1], data=data)
76
+ st.pyplot(fig)
77
+
78
+ # 8. Pair Plot
79
+ fig = sns.pairplot(data)
80
+ st.pyplot(fig)
81
+
82
+ # 9. Heatmap
83
+ fig, ax = plt.subplots()
84
+ sns.heatmap(data.corr(), annot=True)
85
+ st.pyplot(fig)
86
+
87
+ # 10. Regplot (Regression Plot)
88
+ fig, ax = plt.subplots()
89
+ sns.regplot(x=data.columns[0], y=data.columns[1], data=data)
90
+ st.pyplot(fig)
91
+