Vamsi Thiriveedhi commited on
Commit
c6d0240
1 Parent(s): 6c72b9f

Add large files tracked with Git LFS

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ qual-checks-and-quant-values.parquet filter=lfs diff=lfs merge=lfs -text
filter_data_app.py CHANGED
@@ -1,42 +1,49 @@
1
  import streamlit as st
2
  import duckdb
3
- import os
4
  import requests
5
  import pandas as pd
6
  from upsetplot import UpSet
7
  import matplotlib.pyplot as plt
 
8
 
9
  # Set page configuration
10
  st.set_page_config(layout="wide")
11
 
12
  # URL and local path to the Parquet file
13
  PARQUET_URL = 'https://github.com/vkt1414/idc-index-data/releases/download/0.1/qualitative_checks.parquet'
14
- LOCAL_PARQUET_FILE = 'qualitative_checks.parquet'
15
-
16
- # Function to download the Parquet file if it doesn't exist
17
- def download_parquet(url, local_path):
18
- if not os.path.exists(local_path):
19
- response = requests.get(url)
20
- with open(local_path, 'wb') as file:
21
- file.write(response.content)
22
- st.write(f"Downloaded {local_path}")
23
 
24
  @st.cache_data
25
  def load_data():
26
- return duckdb.query(f"SELECT *, concat('https://viewer.imaging.datacommons.cancer.gov/viewer/', StudyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Function to filter data based on user input
29
  def filter_data(df, filters):
30
  for col, value in filters.items():
31
  if value:
32
- df = df[df[col] == value]
33
  return df
34
 
35
  # Function to create an UpSet plot for failed checks
36
  def create_upset_plot_failures(df):
37
- df = df.set_index(df['segmentation_completeness'] != 'pass').set_index(df['laterality_check'] != 'pass', append=True)
38
- df = df.set_index(df['series_with_vertabra_on_every_slice'] != 'pass', append=True)
39
- df = df.set_index(df['connected_volumes'] != 'pass', append=True)
 
 
 
40
  df = df[df.index.to_frame().any(axis=1)] # Ignore the case when all conditions are false
41
 
42
  fig = plt.figure()
@@ -46,6 +53,7 @@ def create_upset_plot_failures(df):
46
 
47
  # Function to create an UpSet plot for passed checks
48
  def create_upset_plot_passes(df):
 
49
  df = df.set_index(df['segmentation_completeness'] == 'pass').set_index(df['laterality_check'] == 'pass', append=True)
50
  df = df.set_index(df['series_with_vertabra_on_every_slice'] == 'pass', append=True)
51
  df = df.set_index(df['connected_volumes'] == 'pass', append=True)
@@ -63,50 +71,99 @@ def main():
63
 
64
  # Sidebar widgets for navigation and filtering
65
  page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
66
-
67
- # Download the Parquet file if it doesn't exist
68
- download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
69
 
70
  # Load the data
71
  df = load_data()
72
 
73
- # Preselect one combination for bodyPart based on the first row
74
- initial_body_part = df.iloc[0]['bodyPart']
75
-
76
- with st.sidebar:
77
- st.title("Filters")
78
-
79
- # Body part filter
80
- body_part_options = df['bodyPart'].unique().tolist()
81
- body_part = st.selectbox("Body Part", options=body_part_options, index=body_part_options.index(initial_body_part))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- # Filter the dataframe based on the selected body part
84
- filtered_df = df[df['bodyPart'] == body_part]
 
 
 
 
 
85
 
86
- # Fetch unique values for other filters based on the filtered dataframe
87
- segmentation_completeness_options = [""] + filtered_df['segmentation_completeness'].unique().tolist()
88
- laterality_check_options = [""] + filtered_df['laterality_check'].unique().tolist()
89
- series_with_vertabra_on_every_slice_options = [""] + filtered_df['series_with_vertabra_on_every_slice'].unique().tolist()
90
- connected_volumes_options = [""] + filtered_df['connected_volumes'].unique().tolist()
91
- laterality_options = [""] + filtered_df['laterality'].unique().tolist()
 
92
 
93
- segmentation_completeness = st.selectbox("Segmentation Completeness", options=segmentation_completeness_options)
94
- laterality_check = st.selectbox("Laterality Check", options=laterality_check_options)
95
- series_with_vertabra_on_every_slice = st.selectbox("Series with Vertebra on Every Slice", options=series_with_vertabra_on_every_slice_options)
96
- connected_volumes = st.selectbox("Connected Volumes", options=connected_volumes_options)
97
- laterality = st.selectbox("Laterality", options=laterality_options)
 
 
98
 
99
- # Filtering the data based on user input
100
- filters = {
101
- 'segmentation_completeness': segmentation_completeness if segmentation_completeness else None,
102
- 'laterality_check': laterality_check if laterality_check else None,
103
- 'series_with_vertabra_on_every_slice': series_with_vertabra_on_every_slice if series_with_vertabra_on_every_slice else None,
104
- 'connected_volumes': connected_volumes if connected_volumes else None,
105
- 'bodyPart': body_part,
106
- 'laterality': laterality if laterality else None
107
- }
108
 
109
- filtered_df = filter_data(df, filters)
110
 
111
  # Define the pages
112
  if page == "Summary":
@@ -116,30 +173,34 @@ def main():
116
  WITH Checks AS (
117
  SELECT
118
  bodyPart,
 
119
  COUNT(*) AS total_count,
120
  SUM(CASE WHEN segmentation_completeness = 'pass' THEN 1 ELSE 0 END) AS pass_count,
121
  SUM(CASE WHEN laterality_check = 'pass' THEN 1 ELSE 0 END) AS laterality_pass_count,
122
  SUM(CASE WHEN series_with_vertabra_on_every_slice = 'pass' THEN 1 ELSE 0 END) AS vertabra_pass_count,
123
  SUM(CASE WHEN connected_volumes = 'pass' THEN 1 ELSE 0 END) AS volumes_pass_count
124
  FROM
125
- 'qualitative_checks.parquet'
126
  GROUP BY
127
- bodyPart
128
  )
129
 
130
  SELECT
131
  bodyPart,
 
132
  ROUND((pass_count * 100.0) / total_count, 2) || '% (' || pass_count || '/' || total_count || ')' AS segmentation_completeness,
133
- ROUND((laterality_pass_count * 100.0) / total_count, 2) || '% (' || laterality_pass_count || '/' || total_count || ')' AS laterality_check,
 
 
134
  ROUND((vertabra_pass_count * 100.0) / total_count, 2) || '% (' || vertabra_pass_count || '/' || total_count || ')' AS vertabra_check,
135
  ROUND((volumes_pass_count * 100.0) / total_count, 2) || '% (' || volumes_pass_count || '/' || total_count || ')' AS volumes_check
136
  FROM
137
  Checks
138
  ORDER BY
139
- bodyPart;
140
- """).to_df()
141
-
142
- st.dataframe(summary_df)
143
 
144
  elif page == "UpSet Plots":
145
  st.header("UpSet Plots of Qualitative Checks")
@@ -161,7 +222,7 @@ def main():
161
 
162
  start_idx = (page_number - 1) * page_size
163
  end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
164
- paginated_df = filtered_df.iloc[start_idx:end_idx]
165
 
166
  # Display the paginated dataframe
167
  st.header("Filtered Data")
@@ -169,9 +230,6 @@ def main():
169
 
170
  st.data_editor(
171
  paginated_df,
172
- column_config={
173
- "viewerUrl": st.column_config.LinkColumn("Viewer Url")
174
- },
175
  hide_index=True,
176
  )
177
 
@@ -184,13 +242,13 @@ def main():
184
  # Create and display the UpSet plot for failed checks
185
  st.header("UpSet Plot for Failed Checks")
186
  st.write("This plot shows the combinations of checks that failed.")
187
- if not filtered_df.empty:
188
  create_upset_plot_failures(filtered_df)
189
 
190
  # Create and display the UpSet plot for passed checks
191
  st.header("UpSet Plot for Passed Checks")
192
  st.write("This plot shows the combinations of checks that passed.")
193
- if not filtered_df.empty:
194
  create_upset_plot_passes(filtered_df)
195
 
196
  if __name__ == "__main__":
 
1
  import streamlit as st
2
  import duckdb
 
3
  import requests
4
  import pandas as pd
5
  from upsetplot import UpSet
6
  import matplotlib.pyplot as plt
7
+ import polars as pl
8
 
9
  # Set page configuration
10
  st.set_page_config(layout="wide")
11
 
12
  # URL and local path to the Parquet file
13
  PARQUET_URL = 'https://github.com/vkt1414/idc-index-data/releases/download/0.1/qualitative_checks.parquet'
14
+ LOCAL_PARQUET_FILE = 'qual-checks-and-quant-values.parquet'
 
 
 
 
 
 
 
 
15
 
16
  @st.cache_data
17
  def load_data():
18
+ cols = [
19
+ 'PatientID',
20
+ 'StudyInstanceUID',
21
+ 'seriesNumber',
22
+ 'bodyPart',
23
+ 'laterality',
24
+ 'segmentation_completeness',
25
+ 'laterality_check',
26
+ 'series_with_vertabra_on_every_slice',
27
+ 'connected_volumes',
28
+ 'Volume from Voxel Summation'
29
+ ]
30
+ return pl.read_parquet(LOCAL_PARQUET_FILE, columns=cols)
31
 
32
  # Function to filter data based on user input
33
  def filter_data(df, filters):
34
  for col, value in filters.items():
35
  if value:
36
+ df = df.filter(pl.col(col) == value)
37
  return df
38
 
39
  # Function to create an UpSet plot for failed checks
40
  def create_upset_plot_failures(df):
41
+ df = df.to_pandas() # Convert to Pandas DataFrame
42
+
43
+ # Treat 'pass' and null values as passing
44
+ df = df.set_index(~((df['segmentation_completeness'] == 'pass') | df['segmentation_completeness'].isnull())).set_index(~((df['laterality_check'] == 'pass') | df['laterality_check'].isnull()), append=True)
45
+ df = df.set_index(~((df['series_with_vertabra_on_every_slice'] == 'pass') | df['series_with_vertabra_on_every_slice'].isnull()), append=True)
46
+ df = df.set_index(~((df['connected_volumes'] == 'pass') | df['connected_volumes'].isnull()), append=True)
47
  df = df[df.index.to_frame().any(axis=1)] # Ignore the case when all conditions are false
48
 
49
  fig = plt.figure()
 
53
 
54
  # Function to create an UpSet plot for passed checks
55
  def create_upset_plot_passes(df):
56
+ df = df.to_pandas() # Convert to Pandas DataFrame
57
  df = df.set_index(df['segmentation_completeness'] == 'pass').set_index(df['laterality_check'] == 'pass', append=True)
58
  df = df.set_index(df['series_with_vertabra_on_every_slice'] == 'pass', append=True)
59
  df = df.set_index(df['connected_volumes'] == 'pass', append=True)
 
71
 
72
  # Sidebar widgets for navigation and filtering
73
  page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
 
 
 
74
 
75
  # Load the data
76
  df = load_data()
77
 
78
+ if page == "UpSet Plots":
79
+ with st.sidebar:
80
+ st.title("Filters")
81
+
82
+ # Initialize filters with None values in session state
83
+ if 'filters' not in st.session_state:
84
+ st.session_state.filters = {
85
+ 'bodyPart': None,
86
+ 'segmentation_completeness': None,
87
+ 'laterality_check': None,
88
+ 'series_with_vertabra_on_every_slice': None,
89
+ 'connected_volumes': None,
90
+ 'laterality': None
91
+ }
92
+
93
+ filters = st.session_state.filters
94
+
95
+ # Define functions to handle filter updates
96
+ def reset_filters():
97
+ filters.update({
98
+ 'segmentation_completeness': None,
99
+ 'laterality_check': None,
100
+ 'series_with_vertabra_on_every_slice': None,
101
+ 'connected_volumes': None,
102
+ 'laterality': None
103
+ })
104
+ st.session_state.filters = filters
105
+
106
+ def apply_filter(filter_name, value):
107
+ filters[filter_name] = value
108
+ st.session_state.filters = filters
109
+
110
+ # Body part filter
111
+ body_part_options = sorted(df['bodyPart'].unique().to_list())
112
+ body_part = st.selectbox("Body Part", options=body_part_options, key='bodyPart', on_change=reset_filters)
113
+ filters['bodyPart'] = body_part
114
+
115
+ # Apply the current filters to update options for other filters
116
+ filtered_df = filter_data(df, filters)
117
+
118
+ # Update options for other filters based on the current selection
119
+ segmentation_completeness_options = [""] + filtered_df['segmentation_completeness'].unique().to_list()
120
+ laterality_check_options = [""] + filtered_df['laterality_check'].unique().to_list()
121
+ series_with_vertabra_on_every_slice_options = [""] + filtered_df['series_with_vertabra_on_every_slice'].unique().to_list()
122
+ connected_volumes_options = [""] + filtered_df['connected_volumes'].unique().to_list()
123
+ laterality_options = [""] + filtered_df['laterality'].unique().to_list()
124
+
125
+ # Add remaining filters with default values from session state
126
+ segmentation_completeness = st.selectbox(
127
+ "Segmentation Completeness",
128
+ options=segmentation_completeness_options,
129
+ index=segmentation_completeness_options.index(filters['segmentation_completeness']) if filters['segmentation_completeness'] else 0,
130
+ key='segmentation_completeness',
131
+ on_change=lambda: apply_filter('segmentation_completeness', st.session_state.segmentation_completeness)
132
+ )
133
 
134
+ laterality_check = st.selectbox(
135
+ "Laterality Check",
136
+ options=laterality_check_options,
137
+ index=laterality_check_options.index(filters['laterality_check']) if filters['laterality_check'] else 0,
138
+ key='laterality_check',
139
+ on_change=lambda: apply_filter('laterality_check', st.session_state.laterality_check)
140
+ )
141
 
142
+ series_with_vertabra_on_every_slice = st.selectbox(
143
+ "Series with Vertebra on Every Slice",
144
+ options=series_with_vertabra_on_every_slice_options,
145
+ index=series_with_vertabra_on_every_slice_options.index(filters['series_with_vertabra_on_every_slice']) if filters['series_with_vertabra_on_every_slice'] else 0,
146
+ key='series_with_vertabra_on_every_slice',
147
+ on_change=lambda: apply_filter('series_with_vertabra_on_every_slice', st.session_state.series_with_vertabra_on_every_slice)
148
+ )
149
 
150
+ connected_volumes = st.selectbox(
151
+ "Connected Volumes",
152
+ options=connected_volumes_options,
153
+ index=connected_volumes_options.index(filters['connected_volumes']) if filters['connected_volumes'] else 0,
154
+ key='connected_volumes',
155
+ on_change=lambda: apply_filter('connected_volumes', st.session_state.connected_volumes)
156
+ )
157
 
158
+ laterality = st.selectbox(
159
+ "Laterality",
160
+ options=laterality_options,
161
+ index=laterality_options.index(filters['laterality']) if filters['laterality'] else 0,
162
+ key='laterality',
163
+ on_change=lambda: apply_filter('laterality', st.session_state.laterality)
164
+ )
 
 
165
 
166
+ st.session_state.filters = filters
167
 
168
  # Define the pages
169
  if page == "Summary":
 
173
  WITH Checks AS (
174
  SELECT
175
  bodyPart,
176
+ laterality,
177
  COUNT(*) AS total_count,
178
  SUM(CASE WHEN segmentation_completeness = 'pass' THEN 1 ELSE 0 END) AS pass_count,
179
  SUM(CASE WHEN laterality_check = 'pass' THEN 1 ELSE 0 END) AS laterality_pass_count,
180
  SUM(CASE WHEN series_with_vertabra_on_every_slice = 'pass' THEN 1 ELSE 0 END) AS vertabra_pass_count,
181
  SUM(CASE WHEN connected_volumes = 'pass' THEN 1 ELSE 0 END) AS volumes_pass_count
182
  FROM
183
+ 'qual-checks-and-quant-values.parquet'
184
  GROUP BY
185
+ bodyPart, laterality
186
  )
187
 
188
  SELECT
189
  bodyPart,
190
+ laterality,
191
  ROUND((pass_count * 100.0) / total_count, 2) || '% (' || pass_count || '/' || total_count || ')' AS segmentation_completeness,
192
+ CASE WHEN laterality IS NOT NULL
193
+ THEN ROUND((laterality_pass_count * 100.0) / NULLIF(total_count, 0), 2) || '% (' || laterality_pass_count || '/' || total_count || ')'
194
+ ELSE 'N/A' END AS laterality_check,
195
  ROUND((vertabra_pass_count * 100.0) / total_count, 2) || '% (' || vertabra_pass_count || '/' || total_count || ')' AS vertabra_check,
196
  ROUND((volumes_pass_count * 100.0) / total_count, 2) || '% (' || volumes_pass_count || '/' || total_count || ')' AS volumes_check
197
  FROM
198
  Checks
199
  ORDER BY
200
+ bodyPart, laterality;
201
+ """).pl()
202
+ summary_df = summary_df.to_pandas()
203
+ st.data_editor(summary_df, hide_index=True,use_container_width=True,height=1500)
204
 
205
  elif page == "UpSet Plots":
206
  st.header("UpSet Plots of Qualitative Checks")
 
222
 
223
  start_idx = (page_number - 1) * page_size
224
  end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
225
+ paginated_df = filtered_df[start_idx:end_idx].to_pandas() # Convert to Pandas DataFrame
226
 
227
  # Display the paginated dataframe
228
  st.header("Filtered Data")
 
230
 
231
  st.data_editor(
232
  paginated_df,
 
 
 
233
  hide_index=True,
234
  )
235
 
 
242
  # Create and display the UpSet plot for failed checks
243
  st.header("UpSet Plot for Failed Checks")
244
  st.write("This plot shows the combinations of checks that failed.")
245
+ if not filtered_df.is_empty():
246
  create_upset_plot_failures(filtered_df)
247
 
248
  # Create and display the UpSet plot for passed checks
249
  st.header("UpSet Plot for Passed Checks")
250
  st.write("This plot shows the combinations of checks that passed.")
251
+ if not filtered_df.is_empty():
252
  create_upset_plot_passes(filtered_df)
253
 
254
  if __name__ == "__main__":
qual-checks-and-quant-values.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527bf1b978eec82de57e9b4f22d1470da418c47a45ee79c47a3af6857ee850e1
3
+ size 1127681711
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  duckdb
2
  matplotlib
3
  pandas
 
4
  pyarrow
5
  streamlit
6
  streamlit_extras
 
1
  duckdb
2
  matplotlib
3
  pandas
4
+ polars
5
  pyarrow
6
  streamlit
7
  streamlit_extras