Spaces:
Runtime error
Runtime error
add a summary page
Browse files- filter_data_app.py +89 -67
filter_data_app.py
CHANGED
@@ -23,7 +23,7 @@ def download_parquet(url, local_path):
|
|
23 |
|
24 |
@st.cache_data
|
25 |
def load_data():
|
26 |
-
return duckdb.query(f"SELECT
|
27 |
|
28 |
# Function to filter data based on user input
|
29 |
def filter_data(df, filters):
|
@@ -58,25 +58,12 @@ def create_upset_plot_passes(df):
|
|
58 |
|
59 |
# Main function to run the Streamlit app
|
60 |
def main():
|
61 |
-
st.title("Qualitative
|
62 |
st.write("Filter the data based on qualitative checks, bodyPart, and laterality.")
|
63 |
|
64 |
-
#
|
65 |
-
|
66 |
-
|
67 |
-
**Check for Segmentation Completeness**:
|
68 |
-
Depending on the inferior to superior extent of the patient scanned, certain anatomical structures may be cropped or incomplete. We evaluated the completeness of the segmentation by ensuring that there was at least one transverse slice superior and one inferior to each anatomical region.
|
69 |
-
|
70 |
-
**Check for a Single Connected Component**:
|
71 |
-
Each anatomical region that is segmented should be continuous and consist of a single connected component. We detect the presence of unconnected components using the VoxelNum field. This field informs the user not only of incomplete or missing segmentations but also the ability to identify segmentations with extraneous or noisy voxels.
|
72 |
-
|
73 |
-
**Check for Correct Left vs Right Laterality**:
|
74 |
-
One of the major issues with identifying the accuracy of a segmentation is to determine if the laterality of the anatomical region is correct. We evaluated the laterality by using metadata extracted from the segmentations using the CenterOfMass field.
|
75 |
-
|
76 |
-
**Check for Presence of Vertebrae on Each Slice**:
|
77 |
-
Specifically for the vertebrae, we hypothesized that the vertebrae must be present in every transverse slice of the scan. This heuristic is based on the superior to inferior direction of most chest scans.
|
78 |
-
""")
|
79 |
-
|
80 |
# Download the Parquet file if it doesn't exist
|
81 |
download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
|
82 |
|
@@ -86,7 +73,6 @@ def main():
|
|
86 |
# Preselect one combination for bodyPart based on the first row
|
87 |
initial_body_part = df.iloc[0]['bodyPart']
|
88 |
|
89 |
-
# Sidebar widgets for filtering
|
90 |
with st.sidebar:
|
91 |
st.title("Filters")
|
92 |
|
@@ -122,54 +108,90 @@ def main():
|
|
122 |
|
123 |
filtered_df = filter_data(df, filters)
|
124 |
|
125 |
-
#
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
if __name__ == "__main__":
|
175 |
main()
|
|
|
23 |
|
24 |
@st.cache_data
|
25 |
def load_data():
|
26 |
+
return duckdb.query(f"SELECT *, concat('https://viewer.imaging.datacommons.cancer.gov/viewer/', StudyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
|
27 |
|
28 |
# Function to filter data based on user input
|
29 |
def filter_data(df, filters):
|
|
|
58 |
|
59 |
# Main function to run the Streamlit app
|
60 |
def main():
|
61 |
+
st.title("Qualitative Checks of TotalSegmentator Segmentations on NLST")
|
62 |
st.write("Filter the data based on qualitative checks, bodyPart, and laterality.")
|
63 |
|
64 |
+
# Sidebar widgets for navigation and filtering
|
65 |
+
page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
|
66 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
# Download the Parquet file if it doesn't exist
|
68 |
download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
|
69 |
|
|
|
73 |
# Preselect one combination for bodyPart based on the first row
|
74 |
initial_body_part = df.iloc[0]['bodyPart']
|
75 |
|
|
|
76 |
with st.sidebar:
|
77 |
st.title("Filters")
|
78 |
|
|
|
108 |
|
109 |
filtered_df = filter_data(df, filters)
|
110 |
|
111 |
+
# Define the pages
|
112 |
+
if page == "Summary":
|
113 |
+
st.header("Summary of Qualitative Checks")
|
114 |
+
# Execute the SQL to get summary statistics
|
115 |
+
summary_df = duckdb.query("""
|
116 |
+
WITH Checks AS (
|
117 |
+
SELECT
|
118 |
+
bodyPart,
|
119 |
+
COUNT(*) AS total_count,
|
120 |
+
SUM(CASE WHEN segmentation_completeness = 'pass' THEN 1 ELSE 0 END) AS pass_count,
|
121 |
+
SUM(CASE WHEN laterality_check = 'pass' THEN 1 ELSE 0 END) AS laterality_pass_count,
|
122 |
+
SUM(CASE WHEN series_with_vertabra_on_every_slice = 'pass' THEN 1 ELSE 0 END) AS vertabra_pass_count,
|
123 |
+
SUM(CASE WHEN connected_volumes = 'pass' THEN 1 ELSE 0 END) AS volumes_pass_count
|
124 |
+
FROM
|
125 |
+
'qualitative_checks.parquet'
|
126 |
+
GROUP BY
|
127 |
+
bodyPart
|
128 |
+
)
|
129 |
+
|
130 |
+
SELECT
|
131 |
+
bodyPart,
|
132 |
+
ROUND((pass_count * 100.0) / total_count, 2) || '% (' || pass_count || '/' || total_count || ')' AS segmentation_completeness,
|
133 |
+
ROUND((laterality_pass_count * 100.0) / total_count, 2) || '% (' || laterality_pass_count || '/' || total_count || ')' AS laterality_check,
|
134 |
+
ROUND((vertabra_pass_count * 100.0) / total_count, 2) || '% (' || vertabra_pass_count || '/' || total_count || ')' AS vertabra_check,
|
135 |
+
ROUND((volumes_pass_count * 100.0) / total_count, 2) || '% (' || volumes_pass_count || '/' || total_count || ')' AS volumes_check
|
136 |
+
FROM
|
137 |
+
Checks
|
138 |
+
ORDER BY
|
139 |
+
bodyPart;
|
140 |
+
""").to_df()
|
141 |
+
|
142 |
+
st.dataframe(summary_df)
|
143 |
+
|
144 |
+
elif page == "UpSet Plots":
|
145 |
+
st.header("UpSet Plots of Qualitative Checks")
|
146 |
+
|
147 |
+
# Pagination for the filtered dataframe
|
148 |
+
page_size = 10
|
149 |
+
total_pages = len(filtered_df) // page_size
|
150 |
+
if len(filtered_df) % page_size > 0: # Check if there's a remainder
|
151 |
+
total_pages += 1
|
152 |
+
|
153 |
+
# Ensure there is at least one page
|
154 |
+
total_pages = max(total_pages, 1)
|
155 |
+
|
156 |
+
# Conditionally display the slider
|
157 |
+
if total_pages > 1:
|
158 |
+
page_number = st.sidebar.slider("Page Number", min_value=1, max_value=total_pages, value=1)
|
159 |
+
else:
|
160 |
+
page_number = 1 # If there's only one page, set page_number to 1
|
161 |
+
|
162 |
+
start_idx = (page_number - 1) * page_size
|
163 |
+
end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
|
164 |
+
paginated_df = filtered_df.iloc[start_idx:end_idx]
|
165 |
+
|
166 |
+
# Display the paginated dataframe
|
167 |
+
st.header("Filtered Data")
|
168 |
+
st.write("Number of Rows:", len(filtered_df))
|
169 |
+
|
170 |
+
st.data_editor(
|
171 |
+
paginated_df,
|
172 |
+
column_config={
|
173 |
+
"viewerUrl": st.column_config.LinkColumn("Viewer Url")
|
174 |
+
},
|
175 |
+
hide_index=True,
|
176 |
+
)
|
177 |
+
|
178 |
+
# Explanation about the UpSet plot
|
179 |
+
with st.expander("About the UpSet Plot"):
|
180 |
+
st.write("""
|
181 |
+
The UpSet plot is a way to visualize intersections of multiple sets. Each row in the plot represents a different set, and the dots indicate the presence or absence of intersections among these sets. The vertical bars show the size of each intersection, making it easy to see which intersections are most common.
|
182 |
+
""")
|
183 |
+
|
184 |
+
# Create and display the UpSet plot for failed checks
|
185 |
+
st.header("UpSet Plot for Failed Checks")
|
186 |
+
st.write("This plot shows the combinations of checks that failed.")
|
187 |
+
if not filtered_df.empty:
|
188 |
+
create_upset_plot_failures(filtered_df)
|
189 |
+
|
190 |
+
# Create and display the UpSet plot for passed checks
|
191 |
+
st.header("UpSet Plot for Passed Checks")
|
192 |
+
st.write("This plot shows the combinations of checks that passed.")
|
193 |
+
if not filtered_df.empty:
|
194 |
+
create_upset_plot_passes(filtered_df)
|
195 |
|
196 |
if __name__ == "__main__":
|
197 |
main()
|