Vamsi Thiriveedhi commited on
Commit
cfdbb51
1 Parent(s): caced37

enh: provide ohif links with series uids, download with seg uuids

Browse files
Files changed (2) hide show
  1. Dockerfile +3 -0
  2. filter_data_app.py +44 -5
Dockerfile CHANGED
@@ -23,6 +23,9 @@ COPY --chown=user . $HOME/app
23
  # Download the parquet file from github
24
  RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/qual_checks_and_quantitative_measurements.parquet
25
 
 
 
 
26
  # Install any needed packages specified in requirements.txt
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
 
23
  # Download the parquet file from github
24
  RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/qual_checks_and_quantitative_measurements.parquet
25
 
26
+ # Download the mapping parquet file from github
27
+ RUN wget https://github.com/ImagingDataCommons/CloudSegmentatorResults/releases/download/0.0.1/ct-seg-sr-map-with-series-uids-gcs-aws-ulrs.parquet
28
+
29
  # Install any needed packages specified in requirements.txt
30
  RUN pip install --no-cache-dir -r requirements.txt
31
 
filter_data_app.py CHANGED
@@ -15,11 +15,15 @@ st.set_page_config(layout="wide")
15
  # Local path to the Parquet file
16
  LOCAL_PARQUET_FILE = 'qual_checks_and_quantitative_measurements.parquet'
17
 
 
 
 
18
  @st.cache_data
19
  def load_data(radiomics_feature='Volume from Voxel Summation'):
20
  cols = [
21
  'PatientID',
22
  'StudyInstanceUID',
 
23
  'SeriesNumber',
24
  'bodyPart',
25
  'laterality',
@@ -36,8 +40,13 @@ def load_data(radiomics_feature='Volume from Voxel Summation'):
36
  # pl.col('connected_volumes').cast(pl.Int32, strict=False)
37
  # ).alias('connected_volumes')
38
  # ])
 
 
 
 
 
39
 
40
- return df
41
 
42
  # Function to filter data based on user input
43
  def filter_data(df, filters,radiomics_feature):
@@ -190,7 +199,7 @@ def main():
190
  on_change=lambda: apply_filter('radiomics_feature', st.session_state.radiomics_feature)
191
  )
192
  filters['radiomics_feature'] = radiomics_feature
193
- df = load_data(radiomics_feature=radiomics_feature)
194
 
195
 
196
  # Body part filter
@@ -303,8 +312,15 @@ def main():
303
  end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
304
  paginated_df = filtered_df[start_idx:end_idx].to_pandas() # Convert to Pandas DataFrame
305
  paginated_df = paginated_df.rename(columns={"connected_volumes": "connected_components"})
306
- paginated_df['Viewer Url'] = 'https://viewer.imaging.datacommons.cancer.gov/v3/viewer/?StudyInstanceUIDs='+paginated_df['StudyInstanceUID']
307
-
 
 
 
 
 
 
 
308
  # Display the paginated dataframe
309
  st.header("Filtered Data")
310
  st.write("Number of Rows:", len(filtered_df))
@@ -313,7 +329,7 @@ def main():
313
  paginated_df,
314
  column_config={
315
  "Viewer Url":st.column_config.LinkColumn("StudyInstanceUID",
316
- display_text=r"https:\/\/viewer\.imaging\.datacommons\.cancer\.gov\/v3\/viewer\/\?StudyInstanceUIDs=(.*)"
317
  ),
318
 
319
  },
@@ -367,6 +383,29 @@ def main():
367
  create_upset_plot_passes(filtered_df)
368
 
369
  def convert_df(df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  return df.write_csv()
371
 
372
  csv= convert_df(filtered_df)
 
15
  # Local path to the Parquet file
16
  LOCAL_PARQUET_FILE = 'qual_checks_and_quantitative_measurements.parquet'
17
 
18
+
19
+ CT_SEG_SR_MAPPING_FILE = 'ct-seg-sr-map-with-series-uids-gcs-aws-ulrs.parquet'
20
+
21
  @st.cache_data
22
  def load_data(radiomics_feature='Volume from Voxel Summation'):
23
  cols = [
24
  'PatientID',
25
  'StudyInstanceUID',
26
+ 'CT_SeriesInstanceUID',
27
  'SeriesNumber',
28
  'bodyPart',
29
  'laterality',
 
40
  # pl.col('connected_volumes').cast(pl.Int32, strict=False)
41
  # ).alias('connected_volumes')
42
  # ])
43
+ map_cols = [
44
+ 'seg_seriesInstanceUID',
45
+ 'Referenced_CT_SeriesInstanceUID',
46
+ ]
47
+ map_df = pl.read_parquet(CT_SEG_SR_MAPPING_FILE, columns=map_cols)
48
 
49
+ return df, map_df
50
 
51
  # Function to filter data based on user input
52
  def filter_data(df, filters,radiomics_feature):
 
199
  on_change=lambda: apply_filter('radiomics_feature', st.session_state.radiomics_feature)
200
  )
201
  filters['radiomics_feature'] = radiomics_feature
202
+ df,map_df = load_data(radiomics_feature=radiomics_feature)
203
 
204
 
205
  # Body part filter
 
312
  end_idx = min(start_idx + page_size, len(filtered_df)) # Ensure end_idx does not go beyond the dataframe length
313
  paginated_df = filtered_df[start_idx:end_idx].to_pandas() # Convert to Pandas DataFrame
314
  paginated_df = paginated_df.rename(columns={"connected_volumes": "connected_components"})
315
+ sql='''
316
+ SELECT
317
+ DISTINCT *
318
+ FROM
319
+ paginated_df pdf
320
+ JOIN map_df mdf on pdf.CT_SeriesInstanceUID = mdf.Referenced_CT_SeriesInstanceUID
321
+ '''
322
+ paginated_df = duckdb.sql(sql).df()
323
+ paginated_df['Viewer Url'] = 'https://viewer.imaging.datacommons.cancer.gov/v3/viewer/?StudyInstanceUIDs='+paginated_df['StudyInstanceUID']+'&SeriesInstanceUIDs='+paginated_df['CT_SeriesInstanceUID']+','+paginated_df['seg_seriesInstanceUID']
324
  # Display the paginated dataframe
325
  st.header("Filtered Data")
326
  st.write("Number of Rows:", len(filtered_df))
 
329
  paginated_df,
330
  column_config={
331
  "Viewer Url":st.column_config.LinkColumn("StudyInstanceUID",
332
+ display_text=r"https:\/\/viewer\.imaging\.datacommons\.cancer\.gov\/v3\/viewer\/\?\StudyInstanceUIDs=(.*)&"
333
  ),
334
 
335
  },
 
383
  create_upset_plot_passes(filtered_df)
384
 
385
  def convert_df(df):
386
+ sql=f'''
387
+ SELECT
388
+ DISTINCT
389
+ PatientID,
390
+ StudyInstanceUID,
391
+ CT_SeriesInstanceUID,
392
+ SEG_seriesInstanceUID,
393
+ SeriesNumber,
394
+ bodyPart,
395
+ laterality,
396
+ segmentation_completeness,
397
+ laterality_check,
398
+ volume_from_voxel_summation_check,
399
+ connected_volumes,
400
+ voxel_num,
401
+ "{radiomics_feature}"
402
+ FROM
403
+ filtered_df
404
+ JOIN
405
+ map_df on filtered_df.CT_SeriesInstanceUID= map_df.Referenced_CT_SeriesInstanceUId
406
+ '''
407
+ df= duckdb.sql(sql).pl()
408
+
409
  return df.write_csv()
410
 
411
  csv= convert_df(filtered_df)