Jhsmit commited on
Commit
e05e748
1 Parent(s): 72f067f

initial commit

Browse files
Files changed (3) hide show
  1. Dockerfile +28 -0
  2. app.py +183 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ # Set up a new user named "user" with user ID 1000
4
+ RUN useradd -m -u 1000 user
5
+
6
+ # Switch to the "user" user
7
+ USER user
8
+
9
+ # Set home to the user's home directory
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH
12
+
13
+ # Set the working directory to the user's home directory
14
+ WORKDIR $HOME/app
15
+
16
+ # Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
17
+ RUN pip install --no-cache-dir --upgrade pip
18
+
19
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
20
+ COPY --chown=user . $HOME/app
21
+
22
+ COPY --chown=user requirements.txt .
23
+
24
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
25
+
26
+ COPY --chown=user app.py .
27
+
28
+ ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+
3
+ import io
4
+ import uuid
5
+ from dataclasses import dataclass
6
+ from typing import Optional
7
+
8
+ import matplotlib.pylab as plt
9
+ import numpy as np
10
+ import pandas as pd
11
+ import solara
12
+ import solara.lab
13
+ from matplotlib.figure import Figure
14
+ from scipy import stats
15
+ from solara.components.file_drop import FileInfo
16
+
17
+
18
+ def make_cdf_figure(
19
+ values_left: np.ndarray, values_right: np.ndarray, stat_loc: float
20
+ ) -> Figure:
21
+ fig = Figure(figsize=(5, 5))
22
+ ax = fig.subplots()
23
+
24
+ ax.axvline(stat_loc, color="grey")
25
+
26
+ # Cumulative distributions.
27
+ ax.ecdf(values_left, label="left")
28
+ ax.ecdf(values_right, label="right")
29
+ ax.legend()
30
+ ax.set_xlabel("Value")
31
+ ax.set_ylabel("CDF")
32
+
33
+ return fig
34
+
35
+
36
+ def make_pdf_figure(
37
+ values_left: np.ndarray, values_right: np.ndarray, stat_loc: float
38
+ ) -> Figure:
39
+ fig = Figure(figsize=(5, 5))
40
+ ax = fig.subplots()
41
+
42
+ ax.axvline(stat_loc, color="grey")
43
+
44
+ # Cumulative distributions.
45
+ ax.hist(values_left, bins="fd", density=True, histtype="step", label="left")
46
+ ax.hist(values_right, bins="fd", density=True, histtype="step", label="right")
47
+ ax.legend()
48
+ plt.show(fig)
49
+ ax.set_xlabel("Value")
50
+ ax.set_ylabel("PDF")
51
+
52
+ return fig
53
+
54
+
55
+ # %%
56
+
57
+
58
+ def dropna(values: np.ndarray) -> np.ndarray:
59
+ return values[~np.isnan(values)]
60
+
61
+
62
+ @solara.component
63
+ def KSTestResult(values_left, values_right):
64
+ values_left = dropna(values_left)
65
+ values_right = dropna(values_right)
66
+
67
+ kstat = stats.ks_2samp(values_left, values_right)
68
+
69
+ fig_cdf = make_cdf_figure(values_left, values_right, kstat.statistic_location)
70
+ fig_pdf = make_pdf_figure(values_left, values_right, kstat.statistic_location)
71
+
72
+ with solara.Card("Kolmogorov-Smirnov Test"):
73
+ with solara.Columns():
74
+ solara.FigureMatplotlib(fig_cdf)
75
+ solara.FigureMatplotlib(fig_pdf)
76
+
77
+ solara.Markdown("# Test Result")
78
+
79
+ solara.Info(
80
+ f"statistic: {kstat.statistic:.3g}",
81
+ )
82
+
83
+ solara.Info(
84
+ f"p-value: {kstat.pvalue:.3g}",
85
+ )
86
+
87
+ solara.Info(
88
+ f"location: {kstat.statistic_location:.3g}",
89
+ )
90
+
91
+
92
+ @dataclass
93
+ class Selection:
94
+ file: Optional[str] = None
95
+ column: Optional[str] = None
96
+
97
+ @property
98
+ def is_set(self) -> bool:
99
+ return self.file is not None and self.column is not None
100
+
101
+ @property
102
+ def columns(self) -> list[str]:
103
+ if self.file is not None:
104
+ return list(data_store.value[self.file].columns)
105
+ return []
106
+
107
+ @property
108
+ def array(self) -> np.ndarray:
109
+ if self.is_set:
110
+ return data_store.value[self.file][self.column].to_numpy()
111
+ return np.array([])
112
+
113
+
114
+ def all_set(selections: list[Selection]) -> bool:
115
+ return all(s.is_set for s in selections)
116
+
117
+
118
+ @solara.component
119
+ def Selectors(selection: solara.Reactive[Selection]):
120
+ solara.Select(
121
+ label="Select file",
122
+ values=list(data_store.value.keys()),
123
+ value=selection.value.file,
124
+ on_value=lambda x: selection.update(file=x, column=None),
125
+ )
126
+
127
+ solara.Select(
128
+ label="Select_column",
129
+ values=selection.value.columns,
130
+ value=selection.value.column,
131
+ on_value=lambda x: selection.update(column=x),
132
+ )
133
+
134
+
135
+ file_info: solara.Reactive[list[FileInfo]] = solara.reactive([])
136
+ data_store = solara.Reactive({})
137
+
138
+ selection_left = solara.reactive(Selection())
139
+ selection_right = solara.reactive(Selection())
140
+
141
+
142
+ @solara.component
143
+ def KSApp():
144
+ def load_data():
145
+ d = {}
146
+ for f in file_info.value:
147
+ b_io = io.BytesIO(f["data"])
148
+ df = pd.read_csv(b_io)
149
+ d[f["name"]] = df
150
+ data_store.set(d)
151
+
152
+ _ = solara.use_memo(load_data, dependencies=[file_info.value])
153
+ upload_key = solara.reactive(uuid.uuid4())
154
+
155
+ def clear_all():
156
+ upload_key.set(uuid.uuid4())
157
+ selection_left.set(Selection())
158
+ selection_right.set(Selection())
159
+ file_info.set([])
160
+ data_store.set({})
161
+
162
+ with solara.ColumnsResponsive([3, 9]):
163
+ with solara.Card("Input"):
164
+ solara.FileDropMultiple(
165
+ label="Upload CSV files",
166
+ on_file=file_info.set,
167
+ lazy=False,
168
+ ) # .key(upload_key.value.hex)
169
+ solara.Text("Select left:")
170
+ Selectors(selection_left)
171
+ solara.Text("Select left:")
172
+ Selectors(selection_right)
173
+ solara.Button(label="Clear", on_click=clear_all)
174
+
175
+ if not all_set([selection_left.value, selection_right.value]):
176
+ with solara.Card():
177
+ solara.Text("Please upload data and select both files and columns")
178
+
179
+ else:
180
+ KSTestResult(selection_left.value.array, selection_right.value.array)
181
+
182
+
183
+ page = KSApp()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ solara
2
+ matplotlib
3
+ numpy
4
+ scipy
5
+ pandas