vkt1414 commited on
Commit
a9a4b32
·
1 Parent(s): 34b8a25

Create a streamlit app to display qualitative checks on totalsegmentator

Browse files
Files changed (4) hide show
  1. Dockerfile +39 -0
  2. README.md +1 -0
  3. filter_data_app.py +164 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.12.3
3
+
4
+ # Set up a new user named "user" with user ID 1000
5
+ RUN useradd -m -u 1000 user
6
+
7
+ # Switch to the "user" user
8
+ USER user
9
+
10
+ # Set home to the user's home directory
11
+ ENV HOME=/home/user \
12
+ PATH=/home/user/.local/bin:$PATH
13
+
14
+ # Set the working directory to the user's home directory
15
+ WORKDIR $HOME/app
16
+
17
+ # Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
18
+ RUN pip install --no-cache-dir --upgrade pip
19
+
20
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
21
+ COPY --chown=user . $HOME/app
22
+
23
+ # Install any needed packages specified in requirements.txt
24
+ RUN pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Create the .streamlit directory
27
+ RUN mkdir -p .streamlit
28
+
29
+ # Create the config.toml file and set the maxMessageSize
30
+ RUN echo "\
31
+ [server]\n\
32
+ maxMessageSize = 2000\n\
33
+ " > .streamlit/config.toml
34
+
35
+ # Make port 8501 available to the world outside this container
36
+ EXPOSE 8501
37
+
38
+ # Run filter_data_app.py when the container launches
39
+ CMD streamlit run filter_data_app.py
README.md CHANGED
@@ -4,6 +4,7 @@ emoji: 🌍
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: docker
 
7
  pinned: false
8
  license: mit
9
  ---
 
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: docker
7
+ app_port: 8501
8
  pinned: false
9
  license: mit
10
  ---
filter_data_app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import duckdb
3
+ import os
4
+ import requests
5
+ import pandas as pd
6
+ from upsetplot import UpSet
7
+ import matplotlib.pyplot as plt
8
+
9
+ # Set page configuration
10
+ st.set_page_config(layout="wide")
11
+
12
+ # URL and local path to the Parquet file
13
+ PARQUET_URL = 'https://github.com/vkt1414/idc-index-data/releases/download/0.1/qualitative_checks.parquet'
14
+ LOCAL_PARQUET_FILE = 'qualitative_checks.parquet'
15
+
16
+ # Function to download the Parquet file if it doesn't exist
17
+ def download_parquet(url, local_path):
18
+ if not os.path.exists(local_path):
19
+ response = requests.get(url)
20
+ with open(local_path, 'wb') as file:
21
+ file.write(response.content)
22
+ st.write(f"Downloaded {local_path}")
23
+
24
+ @st.cache_data
25
+ def load_data():
26
+ return duckdb.query(f"SELECT * exclude(CT_SeriesInstanceUID, StudyInstanceUID), concat('https://viewer.imaging.datacommons.cancer.gov/viewer/',studyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
27
+
28
+ # Function to filter data based on user input
29
+ def filter_data(df, filters):
30
+ for col, value in filters.items():
31
+ if value:
32
+ df = df[df[col] == value]
33
+ return df
34
+
35
+ # Function to create an UpSet plot for failed checks
36
+ def create_upset_plot_failures(df):
37
+ df = df.set_index(df['segmentation_completeness'] != 'pass').set_index(df['laterality_check'] != 'pass', append=True)
38
+ df = df.set_index(df['series_with_vertabra_on_every_slice'] != 'pass', append=True)
39
+ df = df.set_index(df['connected_volumes'] != 'pass', append=True)
40
+ df = df[df.index.to_frame().any(axis=1)] # Ignore the case when all conditions are false
41
+
42
+ fig = plt.figure()
43
+ upset = UpSet(df, sort_by='cardinality', intersection_plot_elements=5, totals_plot_elements=4, element_size=40, show_counts=True)
44
+ upset.plot(fig=fig)
45
+ st.pyplot(fig)
46
+
47
+ # Function to create an UpSet plot for passed checks
48
+ def create_upset_plot_passes(df):
49
+ df = df.set_index(df['segmentation_completeness'] == 'pass').set_index(df['laterality_check'] == 'pass', append=True)
50
+ df = df.set_index(df['series_with_vertabra_on_every_slice'] == 'pass', append=True)
51
+ df = df.set_index(df['connected_volumes'] == 'pass', append=True)
52
+ df = df[df.index.to_frame().any(axis=1)] # Ignore the case when all conditions are false
53
+
54
+ fig = plt.figure()
55
+ upset = UpSet(df, sort_by='cardinality', intersection_plot_elements=5, totals_plot_elements=4, element_size=40, show_counts=True)
56
+ upset.plot(fig=fig)
57
+ st.pyplot(fig)
58
+
59
+ # Main function to run the Streamlit app
60
+ def main():
61
+ st.title("Qualitative checks of TotalSegmentator Segmentations on NLST")
62
+ st.write("Filter the data based on qualitative checks, bodyPart, and laterality.")
63
+
64
+ # Information about the checks
65
+ with st.expander("About the Checks"):
66
+ st.write("""
67
+ **Check for Segmentation Completeness**:
68
+ Depending on the inferior to superior extent of the patient scanned, certain anatomical structures may be cropped or incomplete. We evaluated the completeness of the segmentation by ensuring that there was at least one transverse slice superior and one inferior to each anatomical region.
69
+
70
+ **Check for a Single Connected Component**:
71
+ Each anatomical region that is segmented should be continuous and consist of a single connected component. We detect the presence of unconnected components using the VoxelNum field. This field informs the user not only of incomplete or missing segmentations but also the ability to identify segmentations with extraneous or noisy voxels.
72
+
73
+ **Check for Correct Left vs Right Laterality**:
74
+ One of the major issues with identifying the accuracy of a segmentation is to determine if the laterality of the anatomical region is correct. We evaluated the laterality by using metadata extracted from the segmentations using the CenterOfMass field.
75
+
76
+ **Check for Presence of Vertebrae on Each Slice**:
77
+ Specifically for the vertebrae, we hypothesized that the vertebrae must be present in every transverse slice of the scan. This heuristic is based on the superior to inferior direction of most chest scans.
78
+ """)
79
+
80
+ # Download the Parquet file if it doesn't exist
81
+ download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
82
+
83
+ # Load the data
84
+ df = load_data()
85
+
86
+ # Preselect one combination for bodyPart based on the first row
87
+ initial_body_part = df.iloc[0]['bodyPart']
88
+
89
+ # Sidebar widgets for filtering
90
+ with st.sidebar:
91
+ st.title("Filters")
92
+
93
+ # Body part filter
94
+ body_part_options = df['bodyPart'].unique().tolist()
95
+ body_part = st.selectbox("Body Part", options=body_part_options, index=body_part_options.index(initial_body_part))
96
+
97
+ # Filter the dataframe based on the selected body part
98
+ filtered_df = df[df['bodyPart'] == body_part]
99
+
100
+ # Fetch unique values for other filters based on the filtered dataframe
101
+ segmentation_completeness_options = [""] + filtered_df['segmentation_completeness'].unique().tolist()
102
+ laterality_check_options = [""] + filtered_df['laterality_check'].unique().tolist()
103
+ series_with_vertabra_on_every_slice_options = [""] + filtered_df['series_with_vertabra_on_every_slice'].unique().tolist()
104
+ connected_volumes_options = [""] + filtered_df['connected_volumes'].unique().tolist()
105
+ laterality_options = [""] + filtered_df['laterality'].unique().tolist()
106
+
107
+ segmentation_completeness = st.selectbox("Segmentation Completeness", options=segmentation_completeness_options)
108
+ laterality_check = st.selectbox("Laterality Check", options=laterality_check_options)
109
+ series_with_vertabra_on_every_slice = st.selectbox("Series with Vertebra on Every Slice", options=series_with_vertabra_on_every_slice_options)
110
+ connected_volumes = st.selectbox("Connected Volumes", options=connected_volumes_options)
111
+ laterality = st.selectbox("Laterality", options=laterality_options)
112
+
113
+ # Filtering the data based on user input
114
+ filters = {
115
+ 'segmentation_completeness': segmentation_completeness if segmentation_completeness else None,
116
+ 'laterality_check': laterality_check if laterality_check else None,
117
+ 'series_with_vertabra_on_every_slice': series_with_vertabra_on_every_slice if series_with_vertabra_on_every_slice else None,
118
+ 'connected_volumes': connected_volumes if connected_volumes else None,
119
+ 'bodyPart': body_part,
120
+ 'laterality': laterality if laterality else None
121
+ }
122
+
123
+ filtered_df = filter_data(df, filters)
124
+
125
+ # Pagination for the filtered dataframe
126
+ page_size = 10
127
+ total_pages = (len(filtered_df) // page_size) + 1
128
+ page_number = st.sidebar.slider("Page Number", min_value=1, max_value=total_pages, value=1)
129
+ start_idx = (page_number - 1) * page_size
130
+ end_idx = start_idx + page_size
131
+ paginated_df = filtered_df.iloc[start_idx:end_idx]
132
+
133
+ # Display the paginated dataframe
134
+ st.header("Filtered Data")
135
+ st.write("Number of Rows:", len(filtered_df))
136
+
137
+ st.data_editor(
138
+ paginated_df,
139
+ column_config={
140
+ "viewerUrl": st.column_config.LinkColumn("Viewer Url")
141
+ },
142
+ hide_index=True,
143
+ )
144
+
145
+ # Explanation about the UpSet plot
146
+ with st.expander("About the UpSet Plot"):
147
+ st.write("""
148
+ The UpSet plot is a way to visualize intersections of multiple sets. Each row in the plot represents a different set, and the dots indicate the presence or absence of intersections among these sets. The vertical bars show the size of each intersection, making it easy to see which intersections are most common.
149
+ """)
150
+
151
+ # Create and display the UpSet plot for failed checks
152
+ st.header("UpSet Plot for Failed Checks")
153
+ st.write("This plot shows the combinations of checks that failed.")
154
+ if not filtered_df.empty:
155
+ create_upset_plot_failures(filtered_df)
156
+
157
+ # Create and display the UpSet plot for passed checks
158
+ st.header("UpSet Plot for Passed Checks")
159
+ st.write("This plot shows the combinations of checks that passed.")
160
+ if not filtered_df.empty:
161
+ create_upset_plot_passes(filtered_df)
162
+
163
+ if __name__ == "__main__":
164
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ duckdb
2
+ matplotlib
3
+ pandas
4
+ pyarrow
5
+ streamlit
6
+ streamlit_extras
7
+ upsetplot