Spaces:
Runtime error
Runtime error
Create a streamlit app to display qualitative checks on totalsegmentator
Browse files- Dockerfile +39 -0
- README.md +1 -0
- filter_data_app.py +164 -0
- requirements.txt +7 -0
Dockerfile
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a parent image
|
2 |
+
FROM python:3.12.3
|
3 |
+
|
4 |
+
# Set up a new user named "user" with user ID 1000
|
5 |
+
RUN useradd -m -u 1000 user
|
6 |
+
|
7 |
+
# Switch to the "user" user
|
8 |
+
USER user
|
9 |
+
|
10 |
+
# Set home to the user's home directory
|
11 |
+
ENV HOME=/home/user \
|
12 |
+
PATH=/home/user/.local/bin:$PATH
|
13 |
+
|
14 |
+
# Set the working directory to the user's home directory
|
15 |
+
WORKDIR $HOME/app
|
16 |
+
|
17 |
+
# Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
|
18 |
+
RUN pip install --no-cache-dir --upgrade pip
|
19 |
+
|
20 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
21 |
+
COPY --chown=user . $HOME/app
|
22 |
+
|
23 |
+
# Install any needed packages specified in requirements.txt
|
24 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
25 |
+
|
26 |
+
# Create the .streamlit directory
|
27 |
+
RUN mkdir -p .streamlit
|
28 |
+
|
29 |
+
# Create the config.toml file and set the maxMessageSize
|
30 |
+
RUN echo "\
|
31 |
+
[server]\n\
|
32 |
+
maxMessageSize = 2000\n\
|
33 |
+
" > .streamlit/config.toml
|
34 |
+
|
35 |
+
# Make port 8501 available to the world outside this container
|
36 |
+
EXPOSE 8501
|
37 |
+
|
38 |
+
# Run filter_data_app.py when the container launches
|
39 |
+
CMD streamlit run filter_data_app.py
|
README.md
CHANGED
@@ -4,6 +4,7 @@ emoji: 🌍
|
|
4 |
colorFrom: purple
|
5 |
colorTo: green
|
6 |
sdk: docker
|
|
|
7 |
pinned: false
|
8 |
license: mit
|
9 |
---
|
|
|
4 |
colorFrom: purple
|
5 |
colorTo: green
|
6 |
sdk: docker
|
7 |
+
app_port: 8501
|
8 |
pinned: false
|
9 |
license: mit
|
10 |
---
|
filter_data_app.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import duckdb
|
3 |
+
import os
|
4 |
+
import requests
|
5 |
+
import pandas as pd
|
6 |
+
from upsetplot import UpSet
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
|
9 |
+
# Set page configuration
|
10 |
+
st.set_page_config(layout="wide")
|
11 |
+
|
12 |
+
# URL and local path to the Parquet file
|
13 |
+
PARQUET_URL = 'https://github.com/vkt1414/idc-index-data/releases/download/0.1/qualitative_checks.parquet'
|
14 |
+
LOCAL_PARQUET_FILE = 'qualitative_checks.parquet'
|
15 |
+
|
16 |
+
# Function to download the Parquet file if it doesn't exist
|
17 |
+
def download_parquet(url, local_path):
|
18 |
+
if not os.path.exists(local_path):
|
19 |
+
response = requests.get(url)
|
20 |
+
with open(local_path, 'wb') as file:
|
21 |
+
file.write(response.content)
|
22 |
+
st.write(f"Downloaded {local_path}")
|
23 |
+
|
24 |
+
@st.cache_data
|
25 |
+
def load_data():
|
26 |
+
return duckdb.query(f"SELECT * exclude(CT_SeriesInstanceUID, StudyInstanceUID), concat('https://viewer.imaging.datacommons.cancer.gov/viewer/',studyInstanceUID) as viewerUrl FROM read_parquet('{LOCAL_PARQUET_FILE}')").to_df()
|
27 |
+
|
28 |
+
# Function to filter data based on user input
|
29 |
+
def filter_data(df, filters):
|
30 |
+
for col, value in filters.items():
|
31 |
+
if value:
|
32 |
+
df = df[df[col] == value]
|
33 |
+
return df
|
34 |
+
|
35 |
+
# Function to create an UpSet plot for failed checks
|
36 |
+
def create_upset_plot_failures(df):
|
37 |
+
df = df.set_index(df['segmentation_completeness'] != 'pass').set_index(df['laterality_check'] != 'pass', append=True)
|
38 |
+
df = df.set_index(df['series_with_vertabra_on_every_slice'] != 'pass', append=True)
|
39 |
+
df = df.set_index(df['connected_volumes'] != 'pass', append=True)
|
40 |
+
df = df[df.index.to_frame().any(axis=1)] # Ignore the case when all conditions are false
|
41 |
+
|
42 |
+
fig = plt.figure()
|
43 |
+
upset = UpSet(df, sort_by='cardinality', intersection_plot_elements=5, totals_plot_elements=4, element_size=40, show_counts=True)
|
44 |
+
upset.plot(fig=fig)
|
45 |
+
st.pyplot(fig)
|
46 |
+
|
47 |
+
# Function to create an UpSet plot for passed checks
|
48 |
+
def create_upset_plot_passes(df):
|
49 |
+
df = df.set_index(df['segmentation_completeness'] == 'pass').set_index(df['laterality_check'] == 'pass', append=True)
|
50 |
+
df = df.set_index(df['series_with_vertabra_on_every_slice'] == 'pass', append=True)
|
51 |
+
df = df.set_index(df['connected_volumes'] == 'pass', append=True)
|
52 |
+
df = df[df.index.to_frame().any(axis=1)] # Ignore the case when all conditions are false
|
53 |
+
|
54 |
+
fig = plt.figure()
|
55 |
+
upset = UpSet(df, sort_by='cardinality', intersection_plot_elements=5, totals_plot_elements=4, element_size=40, show_counts=True)
|
56 |
+
upset.plot(fig=fig)
|
57 |
+
st.pyplot(fig)
|
58 |
+
|
59 |
+
# Main function to run the Streamlit app
|
60 |
+
def main():
|
61 |
+
st.title("Qualitative checks of TotalSegmentator Segmentations on NLST")
|
62 |
+
st.write("Filter the data based on qualitative checks, bodyPart, and laterality.")
|
63 |
+
|
64 |
+
# Information about the checks
|
65 |
+
with st.expander("About the Checks"):
|
66 |
+
st.write("""
|
67 |
+
**Check for Segmentation Completeness**:
|
68 |
+
Depending on the inferior to superior extent of the patient scanned, certain anatomical structures may be cropped or incomplete. We evaluated the completeness of the segmentation by ensuring that there was at least one transverse slice superior and one inferior to each anatomical region.
|
69 |
+
|
70 |
+
**Check for a Single Connected Component**:
|
71 |
+
Each anatomical region that is segmented should be continuous and consist of a single connected component. We detect the presence of unconnected components using the VoxelNum field. This field informs the user not only of incomplete or missing segmentations but also the ability to identify segmentations with extraneous or noisy voxels.
|
72 |
+
|
73 |
+
**Check for Correct Left vs Right Laterality**:
|
74 |
+
One of the major issues with identifying the accuracy of a segmentation is to determine if the laterality of the anatomical region is correct. We evaluated the laterality by using metadata extracted from the segmentations using the CenterOfMass field.
|
75 |
+
|
76 |
+
**Check for Presence of Vertebrae on Each Slice**:
|
77 |
+
Specifically for the vertebrae, we hypothesized that the vertebrae must be present in every transverse slice of the scan. This heuristic is based on the superior to inferior direction of most chest scans.
|
78 |
+
""")
|
79 |
+
|
80 |
+
# Download the Parquet file if it doesn't exist
|
81 |
+
download_parquet(PARQUET_URL, LOCAL_PARQUET_FILE)
|
82 |
+
|
83 |
+
# Load the data
|
84 |
+
df = load_data()
|
85 |
+
|
86 |
+
# Preselect one combination for bodyPart based on the first row
|
87 |
+
initial_body_part = df.iloc[0]['bodyPart']
|
88 |
+
|
89 |
+
# Sidebar widgets for filtering
|
90 |
+
with st.sidebar:
|
91 |
+
st.title("Filters")
|
92 |
+
|
93 |
+
# Body part filter
|
94 |
+
body_part_options = df['bodyPart'].unique().tolist()
|
95 |
+
body_part = st.selectbox("Body Part", options=body_part_options, index=body_part_options.index(initial_body_part))
|
96 |
+
|
97 |
+
# Filter the dataframe based on the selected body part
|
98 |
+
filtered_df = df[df['bodyPart'] == body_part]
|
99 |
+
|
100 |
+
# Fetch unique values for other filters based on the filtered dataframe
|
101 |
+
segmentation_completeness_options = [""] + filtered_df['segmentation_completeness'].unique().tolist()
|
102 |
+
laterality_check_options = [""] + filtered_df['laterality_check'].unique().tolist()
|
103 |
+
series_with_vertabra_on_every_slice_options = [""] + filtered_df['series_with_vertabra_on_every_slice'].unique().tolist()
|
104 |
+
connected_volumes_options = [""] + filtered_df['connected_volumes'].unique().tolist()
|
105 |
+
laterality_options = [""] + filtered_df['laterality'].unique().tolist()
|
106 |
+
|
107 |
+
segmentation_completeness = st.selectbox("Segmentation Completeness", options=segmentation_completeness_options)
|
108 |
+
laterality_check = st.selectbox("Laterality Check", options=laterality_check_options)
|
109 |
+
series_with_vertabra_on_every_slice = st.selectbox("Series with Vertebra on Every Slice", options=series_with_vertabra_on_every_slice_options)
|
110 |
+
connected_volumes = st.selectbox("Connected Volumes", options=connected_volumes_options)
|
111 |
+
laterality = st.selectbox("Laterality", options=laterality_options)
|
112 |
+
|
113 |
+
# Filtering the data based on user input
|
114 |
+
filters = {
|
115 |
+
'segmentation_completeness': segmentation_completeness if segmentation_completeness else None,
|
116 |
+
'laterality_check': laterality_check if laterality_check else None,
|
117 |
+
'series_with_vertabra_on_every_slice': series_with_vertabra_on_every_slice if series_with_vertabra_on_every_slice else None,
|
118 |
+
'connected_volumes': connected_volumes if connected_volumes else None,
|
119 |
+
'bodyPart': body_part,
|
120 |
+
'laterality': laterality if laterality else None
|
121 |
+
}
|
122 |
+
|
123 |
+
filtered_df = filter_data(df, filters)
|
124 |
+
|
125 |
+
# Pagination for the filtered dataframe
|
126 |
+
page_size = 10
|
127 |
+
total_pages = (len(filtered_df) // page_size) + 1
|
128 |
+
page_number = st.sidebar.slider("Page Number", min_value=1, max_value=total_pages, value=1)
|
129 |
+
start_idx = (page_number - 1) * page_size
|
130 |
+
end_idx = start_idx + page_size
|
131 |
+
paginated_df = filtered_df.iloc[start_idx:end_idx]
|
132 |
+
|
133 |
+
# Display the paginated dataframe
|
134 |
+
st.header("Filtered Data")
|
135 |
+
st.write("Number of Rows:", len(filtered_df))
|
136 |
+
|
137 |
+
st.data_editor(
|
138 |
+
paginated_df,
|
139 |
+
column_config={
|
140 |
+
"viewerUrl": st.column_config.LinkColumn("Viewer Url")
|
141 |
+
},
|
142 |
+
hide_index=True,
|
143 |
+
)
|
144 |
+
|
145 |
+
# Explanation about the UpSet plot
|
146 |
+
with st.expander("About the UpSet Plot"):
|
147 |
+
st.write("""
|
148 |
+
The UpSet plot is a way to visualize intersections of multiple sets. Each row in the plot represents a different set, and the dots indicate the presence or absence of intersections among these sets. The vertical bars show the size of each intersection, making it easy to see which intersections are most common.
|
149 |
+
""")
|
150 |
+
|
151 |
+
# Create and display the UpSet plot for failed checks
|
152 |
+
st.header("UpSet Plot for Failed Checks")
|
153 |
+
st.write("This plot shows the combinations of checks that failed.")
|
154 |
+
if not filtered_df.empty:
|
155 |
+
create_upset_plot_failures(filtered_df)
|
156 |
+
|
157 |
+
# Create and display the UpSet plot for passed checks
|
158 |
+
st.header("UpSet Plot for Passed Checks")
|
159 |
+
st.write("This plot shows the combinations of checks that passed.")
|
160 |
+
if not filtered_df.empty:
|
161 |
+
create_upset_plot_passes(filtered_df)
|
162 |
+
|
163 |
+
if __name__ == "__main__":
|
164 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
duckdb
|
2 |
+
matplotlib
|
3 |
+
pandas
|
4 |
+
pyarrow
|
5 |
+
streamlit
|
6 |
+
streamlit_extras
|
7 |
+
upsetplot
|