vkt1414 commited on
Commit
6c72b9f
·
verified ·
1 Parent(s): ca3ef3a

add a summary page

Browse files
Files changed (1) hide show
  1. filter_data_app.py +89 -67
filter_data_app.py CHANGED
@@ -23,7 +23,7 @@ def download_parquet(url, local_path):
23
 
24
  @st.cache_data
25
  def load_data():
26
- return duckdb.query(f"SELECT * exclude(CT_SeriesInstanceUID, StudyInstanceUID), concat('https://viewer.imaging.datacommons.cancer.gov/viewer/',studyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
27
 
28
  # Function to filter data based on user input
29
  def filter_data(df, filters):
@@ -58,25 +58,12 @@ def create_upset_plot_passes(df):
58
 
59
  # Main function to run the Streamlit app
60
  def main():
61
- st.title("Qualitative checks of TotalSegmentator Segmentations on NLST")
62
  st.write("Filter the data based on qualitative checks, bodyPart, and laterality.")
63
 
64
- # Information about the checks
65
- with st.expander("About the Checks"):
66
- st.write("""
67
- **Check for Segmentation Completeness**:
68
- Depending on the inferior to superior extent of the patient scanned, certain anatomical structures may be cropped or incomplete. We evaluated the completeness of the segmentation by ensuring that there was at least one transverse slice superior and one inferior to each anatomical region.
69
-
70
- **Check for a Single Connected Component**:
71
- Each anatomical region that is segmented should be continuous and consist of a single connected component. We detect the presence of unconnected components using the VoxelNum field. This field informs the user not only of incomplete or missing segmentations but also the ability to identify segmentations with extraneous or noisy voxels.
72
-
73
- **Check for Correct Left vs Right Laterality**:
74
- One of the major issues with identifying the accuracy of a segmentation is to determine if the laterality of the anatomical region is correct. We evaluated the laterality by using metadata extracted from the segmentations using the CenterOfMass field.
75
-
76
- **Check for Presence of Vertebrae on Each Slice**:
77
- Specifically for the vertebrae, we hypothesized that the vertebrae must be present in every transverse slice of the scan. This heuristic is based on the superior to inferior direction of most chest scans.
78
- """)
79
-
80
  # Download the Parquet file if it doesn't exist
81
  download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
82
 
@@ -86,7 +73,6 @@ def main():
86
  # Preselect one combination for bodyPart based on the first row
87
  initial_body_part = df.iloc[0]['bodyPart']
88
 
89
- # Sidebar widgets for filtering
90
  with st.sidebar:
91
  st.title("Filters")
92
 
@@ -122,54 +108,90 @@ def main():
122
 
123
  filtered_df = filter_data(df, filters)
124
 
125
- # Pagination for the filtered dataframe
126
- page_size = 10
127
- total_pages = len(filtered_df) // page_size
128
- if len(filtered_df) % page_size > 0: # Check if there's a remainder
129
- total_pages += 1
130
-
131
- # Ensure there is at least one page
132
- total_pages = max(total_pages, 1)
133
-
134
- # Conditionally display the slider
135
- if total_pages > 1:
136
- page_number = st.sidebar.slider("Page Number", min_value=1, max_value=total_pages, value=1)
137
- else:
138
- page_number = 1 # If there's only one page, set page_number to 1
139
-
140
- start_idx = (page_number - 1) * page_size
141
- end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
142
- paginated_df = filtered_df.iloc[start_idx:end_idx]
143
-
144
- # Display the paginated dataframe
145
- st.header("Filtered Data")
146
- st.write("Number of Rows:", len(filtered_df))
147
-
148
- st.data_editor(
149
- paginated_df,
150
- column_config={
151
- "viewerUrl": st.column_config.LinkColumn("Viewer Url")
152
- },
153
- hide_index=True,
154
- )
155
-
156
- # Explanation about the UpSet plot
157
- with st.expander("About the UpSet Plot"):
158
- st.write("""
159
- The UpSet plot is a way to visualize intersections of multiple sets. Each row in the plot represents a different set, and the dots indicate the presence or absence of intersections among these sets. The vertical bars show the size of each intersection, making it easy to see which intersections are most common.
160
- """)
161
-
162
- # Create and display the UpSet plot for failed checks
163
- st.header("UpSet Plot for Failed Checks")
164
- st.write("This plot shows the combinations of checks that failed.")
165
- if not filtered_df.empty:
166
- create_upset_plot_failures(filtered_df)
167
-
168
- # Create and display the UpSet plot for passed checks
169
- st.header("UpSet Plot for Passed Checks")
170
- st.write("This plot shows the combinations of checks that passed.")
171
- if not filtered_df.empty:
172
- create_upset_plot_passes(filtered_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  if __name__ == "__main__":
175
  main()
 
23
 
24
  @st.cache_data
25
  def load_data():
26
+ return duckdb.query(f"SELECT *, concat('https://viewer.imaging.datacommons.cancer.gov/viewer/', StudyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
27
 
28
  # Function to filter data based on user input
29
  def filter_data(df, filters):
 
58
 
59
  # Main function to run the Streamlit app
60
  def main():
61
+ st.title("Qualitative Checks of TotalSegmentator Segmentations on NLST")
62
  st.write("Filter the data based on qualitative checks, bodyPart, and laterality.")
63
 
64
+ # Sidebar widgets for navigation and filtering
65
+ page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
66
+
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # Download the Parquet file if it doesn't exist
68
  download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
69
 
 
73
  # Preselect one combination for bodyPart based on the first row
74
  initial_body_part = df.iloc[0]['bodyPart']
75
 
 
76
  with st.sidebar:
77
  st.title("Filters")
78
 
 
108
 
109
  filtered_df = filter_data(df, filters)
110
 
111
+ # Define the pages
112
+ if page == "Summary":
113
+ st.header("Summary of Qualitative Checks")
114
+ # Execute the SQL to get summary statistics
115
+ summary_df = duckdb.query("""
116
+ WITH Checks AS (
117
+ SELECT
118
+ bodyPart,
119
+ COUNT(*) AS total_count,
120
+ SUM(CASE WHEN segmentation_completeness = 'pass' THEN 1 ELSE 0 END) AS pass_count,
121
+ SUM(CASE WHEN laterality_check = 'pass' THEN 1 ELSE 0 END) AS laterality_pass_count,
122
+ SUM(CASE WHEN series_with_vertabra_on_every_slice = 'pass' THEN 1 ELSE 0 END) AS vertabra_pass_count,
123
+ SUM(CASE WHEN connected_volumes = 'pass' THEN 1 ELSE 0 END) AS volumes_pass_count
124
+ FROM
125
+ 'qualitative_checks.parquet'
126
+ GROUP BY
127
+ bodyPart
128
+ )
129
+
130
+ SELECT
131
+ bodyPart,
132
+ ROUND((pass_count * 100.0) / total_count, 2) || '% (' || pass_count || '/' || total_count || ')' AS segmentation_completeness,
133
+ ROUND((laterality_pass_count * 100.0) / total_count, 2) || '% (' || laterality_pass_count || '/' || total_count || ')' AS laterality_check,
134
+ ROUND((vertabra_pass_count * 100.0) / total_count, 2) || '% (' || vertabra_pass_count || '/' || total_count || ')' AS vertabra_check,
135
+ ROUND((volumes_pass_count * 100.0) / total_count, 2) || '% (' || volumes_pass_count || '/' || total_count || ')' AS volumes_check
136
+ FROM
137
+ Checks
138
+ ORDER BY
139
+ bodyPart;
140
+ """).to_df()
141
+
142
+ st.dataframe(summary_df)
143
+
144
+ elif page == "UpSet Plots":
145
+ st.header("UpSet Plots of Qualitative Checks")
146
+
147
+ # Pagination for the filtered dataframe
148
+ page_size = 10
149
+ total_pages = len(filtered_df) // page_size
150
+ if len(filtered_df) % page_size > 0: # Check if there's a remainder
151
+ total_pages += 1
152
+
153
+ # Ensure there is at least one page
154
+ total_pages = max(total_pages, 1)
155
+
156
+ # Conditionally display the slider
157
+ if total_pages > 1:
158
+ page_number = st.sidebar.slider("Page Number", min_value=1, max_value=total_pages, value=1)
159
+ else:
160
+ page_number = 1 # If there's only one page, set page_number to 1
161
+
162
+ start_idx = (page_number - 1) * page_size
163
+ end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
164
+ paginated_df = filtered_df.iloc[start_idx:end_idx]
165
+
166
+ # Display the paginated dataframe
167
+ st.header("Filtered Data")
168
+ st.write("Number of Rows:", len(filtered_df))
169
+
170
+ st.data_editor(
171
+ paginated_df,
172
+ column_config={
173
+ "viewerUrl": st.column_config.LinkColumn("Viewer Url")
174
+ },
175
+ hide_index=True,
176
+ )
177
+
178
+ # Explanation about the UpSet plot
179
+ with st.expander("About the UpSet Plot"):
180
+ st.write("""
181
+ The UpSet plot is a way to visualize intersections of multiple sets. Each row in the plot represents a different set, and the dots indicate the presence or absence of intersections among these sets. The vertical bars show the size of each intersection, making it easy to see which intersections are most common.
182
+ """)
183
+
184
+ # Create and display the UpSet plot for failed checks
185
+ st.header("UpSet Plot for Failed Checks")
186
+ st.write("This plot shows the combinations of checks that failed.")
187
+ if not filtered_df.empty:
188
+ create_upset_plot_failures(filtered_df)
189
+
190
+ # Create and display the UpSet plot for passed checks
191
+ st.header("UpSet Plot for Passed Checks")
192
+ st.write("This plot shows the combinations of checks that passed.")
193
+ if not filtered_df.empty:
194
+ create_upset_plot_passes(filtered_df)
195
 
196
  if __name__ == "__main__":
197
  main()