Spaces:

ImagingDataCommons
/

CloudSegmentatorResults

Runtime error

App Files Files Community

vkt1414 commited on May 22, 2024

Commit

6c72b9f

verified ·

1 Parent(s): ca3ef3a

add a summary page

Browse files

Files changed (1) hide show

filter_data_app.py +89 -67

filter_data_app.py CHANGED Viewed

@@ -23,7 +23,7 @@ def download_parquet(url, local_path):
 @st.cache_data
 def load_data():
-    return duckdb.query(f"SELECT * exclude(CT_SeriesInstanceUID, StudyInstanceUID), concat('https://viewer.imaging.datacommons.cancer.gov/viewer/',studyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
 # Function to filter data based on user input
 def filter_data(df, filters):
@@ -58,25 +58,12 @@ def create_upset_plot_passes(df):
 # Main function to run the Streamlit app
 def main():
-    st.title("Qualitative checks of TotalSegmentator Segmentations on NLST")
     st.write("Filter the data based on qualitative checks, bodyPart, and laterality.")
-    # Information about the checks
-    with st.expander("About the Checks"):
-        st.write("""
-        **Check for Segmentation Completeness**:
-        Depending on the inferior to superior extent of the patient scanned, certain anatomical structures may be cropped or incomplete. We evaluated the completeness of the segmentation by ensuring that there was at least one transverse slice superior and one inferior to each anatomical region.
-        **Check for a Single Connected Component**:
-        Each anatomical region that is segmented should be continuous and consist of a single connected component. We detect the presence of unconnected components using the VoxelNum field. This field informs the user not only of incomplete or missing segmentations but also the ability to identify segmentations with extraneous or noisy voxels.
-        **Check for Correct Left vs Right Laterality**:
-        One of the major issues with identifying the accuracy of a segmentation is to determine if the laterality of the anatomical region is correct. We evaluated the laterality by using metadata extracted from the segmentations using the CenterOfMass field.
-        **Check for Presence of Vertebrae on Each Slice**:
-        Specifically for the vertebrae, we hypothesized that the vertebrae must be present in every transverse slice of the scan. This heuristic is based on the superior to inferior direction of most chest scans.
-        """)
     # Download the Parquet file if it doesn't exist
     download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
@@ -86,7 +73,6 @@ def main():
     # Preselect one combination for bodyPart based on the first row
     initial_body_part = df.iloc[0]['bodyPart']
-    # Sidebar widgets for filtering
     with st.sidebar:
         st.title("Filters")
@@ -122,54 +108,90 @@ def main():
     filtered_df = filter_data(df, filters)
-    # Pagination for the filtered dataframe
-    page_size = 10
-    total_pages = len(filtered_df) // page_size
-    if len(filtered_df) % page_size > 0:  # Check if there's a remainder
-        total_pages += 1
-    # Ensure there is at least one page
-    total_pages = max(total_pages, 1)
-    # Conditionally display the slider
-    if total_pages > 1:
-        page_number = st.sidebar.slider("Page Number", min_value=1, max_value=total_pages, value=1)
-    else:
-        page_number = 1  # If there's only one page, set page_number to 1
-    start_idx = (page_number - 1) * page_size
-    end_idx = min(start_idx + page_size, len(filtered_df))  # Ensure end_idx does not go beyond the dataframe length
-    paginated_df = filtered_df.iloc[start_idx:end_idx]
-    # Display the paginated dataframe
-    st.header("Filtered Data")
-    st.write("Number of Rows:", len(filtered_df))
-    st.data_editor(
-        paginated_df,
-        column_config={
-            "viewerUrl": st.column_config.LinkColumn("Viewer Url")
-        },
-        hide_index=True,
-    )
-    # Explanation about the UpSet plot
-    with st.expander("About the UpSet Plot"):
-        st.write("""
-        The UpSet plot is a way to visualize intersections of multiple sets. Each row in the plot represents a different set, and the dots indicate the presence or absence of intersections among these sets. The vertical bars show the size of each intersection, making it easy to see which intersections are most common.
-        """)
-    # Create and display the UpSet plot for failed checks
-    st.header("UpSet Plot for Failed Checks")
-    st.write("This plot shows the combinations of checks that failed.")
-    if not filtered_df.empty:
-        create_upset_plot_failures(filtered_df)
-    # Create and display the UpSet plot for passed checks
-    st.header("UpSet Plot for Passed Checks")
-    st.write("This plot shows the combinations of checks that passed.")
-    if not filtered_df.empty:
-        create_upset_plot_passes(filtered_df)
 if __name__ == "__main__":
     main()

 @st.cache_data
 def load_data():
+    return duckdb.query(f"SELECT *, concat('https://viewer.imaging.datacommons.cancer.gov/viewer/', StudyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
 # Function to filter data based on user input
 def filter_data(df, filters):
 # Main function to run the Streamlit app
 def main():
+    st.title("Qualitative Checks of TotalSegmentator Segmentations on NLST")
     st.write("Filter the data based on qualitative checks, bodyPart, and laterality.")
+    # Sidebar widgets for navigation and filtering
+    page = st.sidebar.selectbox("Choose a page", ["Summary", "UpSet Plots"])
     # Download the Parquet file if it doesn't exist
     download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
     # Preselect one combination for bodyPart based on the first row
     initial_body_part = df.iloc[0]['bodyPart']
     with st.sidebar:
         st.title("Filters")
     filtered_df = filter_data(df, filters)
+    # Define the pages
+    if page == "Summary":
+        st.header("Summary of Qualitative Checks")
+        # Execute the SQL to get summary statistics
+        summary_df = duckdb.query("""
+            WITH Checks AS (
+                SELECT
+                    bodyPart,
+                    COUNT(*) AS total_count,
+                    SUM(CASE WHEN segmentation_completeness = 'pass' THEN 1 ELSE 0 END) AS pass_count,
+                    SUM(CASE WHEN laterality_check = 'pass' THEN 1 ELSE 0 END) AS laterality_pass_count,
+                    SUM(CASE WHEN series_with_vertabra_on_every_slice = 'pass' THEN 1 ELSE 0 END) AS vertabra_pass_count,
+                    SUM(CASE WHEN connected_volumes = 'pass' THEN 1 ELSE 0 END) AS volumes_pass_count
+                FROM
+                    'qualitative_checks.parquet'
+                GROUP BY
+                    bodyPart
+            )
+            SELECT
+                bodyPart,
+                ROUND((pass_count * 100.0) / total_count, 2) || '% (' || pass_count || '/' || total_count || ')' AS segmentation_completeness,
+                ROUND((laterality_pass_count * 100.0) / total_count, 2) || '% (' || laterality_pass_count || '/' || total_count || ')' AS laterality_check,
+                ROUND((vertabra_pass_count * 100.0) / total_count, 2) || '% (' || vertabra_pass_count || '/' || total_count || ')' AS vertabra_check,
+                ROUND((volumes_pass_count * 100.0) / total_count, 2) || '% (' || volumes_pass_count || '/' || total_count || ')' AS volumes_check
+            FROM
+                Checks
+            ORDER BY
+                bodyPart;
+        """).to_df()
+        st.dataframe(summary_df)
+    elif page == "UpSet Plots":
+        st.header("UpSet Plots of Qualitative Checks")
+        # Pagination for the filtered dataframe
+        page_size = 10
+        total_pages = len(filtered_df) // page_size
+        if len(filtered_df) % page_size > 0:  # Check if there's a remainder
+            total_pages += 1
+        # Ensure there is at least one page
+        total_pages = max(total_pages, 1)
+        # Conditionally display the slider
+        if total_pages > 1:
+            page_number = st.sidebar.slider("Page Number", min_value=1, max_value=total_pages, value=1)
+        else:
+            page_number = 1  # If there's only one page, set page_number to 1
+        start_idx = (page_number - 1) * page_size
+        end_idx = min(start_idx + page_size, len(filtered_df))  # Ensure end_idx does not go beyond the dataframe length
+        paginated_df = filtered_df.iloc[start_idx:end_idx]
+        # Display the paginated dataframe
+        st.header("Filtered Data")
+        st.write("Number of Rows:", len(filtered_df))
+        st.data_editor(
+            paginated_df,
+            column_config={
+                "viewerUrl": st.column_config.LinkColumn("Viewer Url")
+            },
+            hide_index=True,
+        )
+        # Explanation about the UpSet plot
+        with st.expander("About the UpSet Plot"):
+            st.write("""
+            The UpSet plot is a way to visualize intersections of multiple sets. Each row in the plot represents a different set, and the dots indicate the presence or absence of intersections among these sets. The vertical bars show the size of each intersection, making it easy to see which intersections are most common.
+            """)
+        # Create and display the UpSet plot for failed checks
+        st.header("UpSet Plot for Failed Checks")
+        st.write("This plot shows the combinations of checks that failed.")
+        if not filtered_df.empty:
+            create_upset_plot_failures(filtered_df)
+        # Create and display the UpSet plot for passed checks
+        st.header("UpSet Plot for Passed Checks")
+        st.write("This plot shows the combinations of checks that passed.")
+        if not filtered_df.empty:
+            create_upset_plot_passes(filtered_df)
 if __name__ == "__main__":
     main()