Vamsi Thiriveedhi
commited on
Commit
•
cfdbb51
1
Parent(s):
caced37
enh: provide ohif links with series uids, download with seg uuids
Browse files- Dockerfile +3 -0
- filter_data_app.py +44 -5
Dockerfile
CHANGED
@@ -23,6 +23,9 @@ COPY --chown=user . $HOME/app
|
|
23 |
# Download the parquet file from github
|
24 |
RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/qual_checks_and_quantitative_measurements.parquet
|
25 |
|
|
|
|
|
|
|
26 |
# Install any needed packages specified in requirements.txt
|
27 |
RUN pip install --no-cache-dir -r requirements.txt
|
28 |
|
|
|
23 |
# Download the parquet file from github
|
24 |
RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/qual_checks_and_quantitative_measurements.parquet
|
25 |
|
26 |
+
# Download the mapping parquet file from github
|
27 |
+
RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/ct-seg-sr-map-with-series-uids-gcs-aws-ulrs.parquet
|
28 |
+
|
29 |
# Install any needed packages specified in requirements.txt
|
30 |
RUN pip install --no-cache-dir -r requirements.txt
|
31 |
|
filter_data_app.py
CHANGED
@@ -15,11 +15,15 @@ st.set_page_config(layout="wide")
|
|
15 |
# Local path to the Parquet file
|
16 |
LOCAL_PARQUET_FILE = 'qual_checks_and_quantitative_measurements.parquet'
|
17 |
|
|
|
|
|
|
|
18 |
@st.cache_data
|
19 |
def load_data(radiomics_feature='Volume from Voxel Summation'):
|
20 |
cols = [
|
21 |
'PatientID',
|
22 |
'StudyInstanceUID',
|
|
|
23 |
'SeriesNumber',
|
24 |
'bodyPart',
|
25 |
'laterality',
|
@@ -36,8 +40,13 @@ def load_data(radiomics_feature='Volume from Voxel Summation'):
|
|
36 |
# pl.col('connected_volumes').cast(pl.Int32, strict=False)
|
37 |
# ).alias('connected_volumes')
|
38 |
# ])
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
return df
|
41 |
|
42 |
# Function to filter data based on user input
|
43 |
def filter_data(df, filters,radiomics_feature):
|
@@ -190,7 +199,7 @@ def main():
|
|
190 |
on_change=lambda: apply_filter('radiomics_feature', st.session_state.radiomics_feature)
|
191 |
)
|
192 |
filters['radiomics_feature'] = radiomics_feature
|
193 |
-
df = load_data(radiomics_feature=radiomics_feature)
|
194 |
|
195 |
|
196 |
# Body part filter
|
@@ -303,8 +312,15 @@ def main():
|
|
303 |
end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
|
304 |
paginated_df = filtered_df[start_idx:end_idx].to_pandas() # Convert to Pandas DataFrame
|
305 |
paginated_df = paginated_df.rename(columns={"connected_volumes": "connected_components"})
|
306 |
-
|
307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
# Display the paginated dataframe
|
309 |
st.header("Filtered Data")
|
310 |
st.write("Number of Rows:", len(filtered_df))
|
@@ -313,7 +329,7 @@ def main():
|
|
313 |
paginated_df,
|
314 |
column_config={
|
315 |
"Viewer Url":st.column_config.LinkColumn("StudyInstanceUID",
|
316 |
-
display_text=r"https:\/\/viewer\.imaging\.datacommons\.cancer\.gov\/v3\/viewer
|
317 |
),
|
318 |
|
319 |
},
|
@@ -367,6 +383,29 @@ def main():
|
|
367 |
create_upset_plot_passes(filtered_df)
|
368 |
|
369 |
def convert_df(df):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
return df.write_csv()
|
371 |
|
372 |
csv= convert_df(filtered_df)
|
|
|
15 |
# Local path to the Parquet file
|
16 |
LOCAL_PARQUET_FILE = 'qual_checks_and_quantitative_measurements.parquet'
|
17 |
|
18 |
+
|
19 |
+
CT_SEG_SR_MAPPING_FILE = 'ct-seg-sr-map-with-series-uids-gcs-aws-ulrs.parquet'
|
20 |
+
|
21 |
@st.cache_data
|
22 |
def load_data(radiomics_feature='Volume from Voxel Summation'):
|
23 |
cols = [
|
24 |
'PatientID',
|
25 |
'StudyInstanceUID',
|
26 |
+
'CT_SeriesInstanceUID',
|
27 |
'SeriesNumber',
|
28 |
'bodyPart',
|
29 |
'laterality',
|
|
|
40 |
# pl.col('connected_volumes').cast(pl.Int32, strict=False)
|
41 |
# ).alias('connected_volumes')
|
42 |
# ])
|
43 |
+
map_cols = [
|
44 |
+
'seg_seriesInstanceUID',
|
45 |
+
'Referenced_CT_SeriesInstanceUID',
|
46 |
+
]
|
47 |
+
map_df = pl.read_parquet(CT_SEG_SR_MAPPING_FILE, columns=map_cols)
|
48 |
|
49 |
+
return df, map_df
|
50 |
|
51 |
# Function to filter data based on user input
|
52 |
def filter_data(df, filters,radiomics_feature):
|
|
|
199 |
on_change=lambda: apply_filter('radiomics_feature', st.session_state.radiomics_feature)
|
200 |
)
|
201 |
filters['radiomics_feature'] = radiomics_feature
|
202 |
+
df,map_df = load_data(radiomics_feature=radiomics_feature)
|
203 |
|
204 |
|
205 |
# Body part filter
|
|
|
312 |
end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
|
313 |
paginated_df = filtered_df[start_idx:end_idx].to_pandas() # Convert to Pandas DataFrame
|
314 |
paginated_df = paginated_df.rename(columns={"connected_volumes": "connected_components"})
|
315 |
+
sql='''
|
316 |
+
SELECT
|
317 |
+
DISTINCT *
|
318 |
+
FROM
|
319 |
+
paginated_df pdf
|
320 |
+
JOIN map_df mdf on pdf.CT_SeriesInstanceUID = mdf.Referenced_CT_SeriesInstanceUID
|
321 |
+
'''
|
322 |
+
paginated_df = duckdb.sql(sql).df()
|
323 |
+
paginated_df['Viewer Url'] = 'https://viewer.imaging.datacommons.cancer.gov/v3/viewer/?StudyInstanceUIDs='+paginated_df['StudyInstanceUID']+'&SeriesInstanceUIDs='+paginated_df['CT_SeriesInstanceUID']+','+paginated_df['seg_seriesInstanceUID']
|
324 |
# Display the paginated dataframe
|
325 |
st.header("Filtered Data")
|
326 |
st.write("Number of Rows:", len(filtered_df))
|
|
|
329 |
paginated_df,
|
330 |
column_config={
|
331 |
"Viewer Url":st.column_config.LinkColumn("StudyInstanceUID",
|
332 |
+
display_text=r"https:\/\/viewer\.imaging\.datacommons\.cancer\.gov\/v3\/viewer\/\?\StudyInstanceUIDs=(.*)&"
|
333 |
),
|
334 |
|
335 |
},
|
|
|
383 |
create_upset_plot_passes(filtered_df)
|
384 |
|
385 |
def convert_df(df):
|
386 |
+
sql=f'''
|
387 |
+
SELECT
|
388 |
+
DISTINCT
|
389 |
+
PatientID,
|
390 |
+
StudyInstanceUID,
|
391 |
+
CT_SeriesInstanceUID,
|
392 |
+
SEG_seriesInstanceUID,
|
393 |
+
SeriesNumber,
|
394 |
+
bodyPart,
|
395 |
+
laterality,
|
396 |
+
segmentation_completeness,
|
397 |
+
laterality_check,
|
398 |
+
volume_from_voxel_summation_check,
|
399 |
+
connected_volumes,
|
400 |
+
voxel_num,
|
401 |
+
"{radiomics_feature}"
|
402 |
+
FROM
|
403 |
+
filtered_df
|
404 |
+
JOIN
|
405 |
+
map_df on filtered_df.CT_SeriesInstanceUID= map_df.Referenced_CT_SeriesInstanceUId
|
406 |
+
'''
|
407 |
+
df= duckdb.sql(sql).pl()
|
408 |
+
|
409 |
return df.write_csv()
|
410 |
|
411 |
csv= convert_df(filtered_df)
|