diff --git "a/app.py" "b/app.py"
new file mode 100644--- /dev/null
+++ "b/app.py"
@@ -0,0 +1,4221 @@
+
+!pip install gradio
+#transformers
+
+
+
+########### AGENT: Clincialtrial.gov ###################################################################################################
+##Gradio App: TRIAL CONNECT
+#Author: Tamer Chowdhury' Nov 2024
+#tamer.chowdhury@gmail.com
+##################################################################################################################################
+
+import gradio as gr
+from gradio import Interface
+from gradio import Dropdown
+import io
+import re
+import pandas as pd
+import textwrap
+from IPython.display import display
+import requests
+#from _plotly_utils.utils import NotEncodable
+from IPython.core.display import display_markdown
+
+
+######################### from CLAUDE###########################################
+import aiohttp
+import asyncio
+import pandas as pd
+import io
+import json
+
+async def fetch(session, url, params):
+ try:
+ async with session.get(url, params=params) as response:
+ if response.status == 200:
+ text = await response.text()
+ try:
+ return json.loads(text)
+ except json.JSONDecodeError as e:
+ print(f"Failed to decode JSON: {text[:200]}...")
+ raise
+ else:
+ print(f"HTTP Error: {response.status}")
+ print(f"Response text: {await response.text()}")
+ return None
+ except Exception as e:
+ print(f"Error in fetch: {str(e)}")
+ return None
+
+async def get_nct_ids(lead_sponsor_name=None, disease_area=None, overall_status=None,
+ location_country=None, NCTId=None, max_records=None, blocks=30):
+ base_url = "https://clinicaltrials.gov/api/v2/studies"
+
+ # Define the fields we want to retrieve (mapped to v2 API structure)
+ fields = [
+ "protocolSection.identificationModule.nctId",
+ "protocolSection.identificationModule.orgStudyIdInfo",
+ "protocolSection.identificationModule.briefTitle",
+ "protocolSection.conditionsModule.conditions",
+ "protocolSection.designModule.phases",
+ "protocolSection.statusModule.overallStatus",
+ "protocolSection.statusModule.primaryCompletionDateStruct",
+ "protocolSection.designModule.enrollmentInfo",
+ "protocolSection.designModule.studyType",
+ "protocolSection.eligibilityModule.studyPopulation",
+ "protocolSection.contactsLocationsModule.locations",
+ "protocolSection.designModule.designInfo",
+ "protocolSection.armsInterventionsModule.armGroups",
+ "protocolSection.sponsorCollaboratorsModule.leadSponsor",
+ "protocolSection.armsInterventionsModule.interventions",
+ "protocolSection.outcomesModule.primaryOutcomes",
+ "protocolSection.statusModule.startDateStruct"
+ ]
+
+ # Build base parameters
+ params = {
+ "format": "json",
+ "fields": ",".join(fields),
+ "pageSize": "1000",
+ "countTotal": "true"
+ }
+
+ print("Constructing query...")
+
+ # Build query parameters
+ if NCTId:
+ params["query.id"] = NCTId
+ else:
+ if disease_area:
+ params["query.cond"] = disease_area.replace(" ", "+")
+ if lead_sponsor_name:
+ params["query.lead"] = lead_sponsor_name.replace(" ", "+")
+ if location_country:
+ params["query.locn"] = location_country.replace(" ", "+")
+ if overall_status:
+ params["filter.overallStatus"] = overall_status.upper()
+
+ print(f"Full parameters: {params}")
+
+ all_studies = []
+ next_page_token = None
+
+ async with aiohttp.ClientSession() as session:
+ while True:
+ try:
+ if next_page_token:
+ params["pageToken"] = next_page_token
+
+ response_data = await fetch(session, base_url, params)
+
+ if not response_data or not isinstance(response_data, dict):
+ print(f"Invalid response data")
+ break
+
+ studies = response_data.get('studies', [])
+ if not studies:
+ print("No more studies found")
+ break
+
+ all_studies.extend(studies)
+ print(f"Retrieved {len(studies)} studies. Total so far: {len(all_studies)}")
+
+ # Print first study details for debugging
+ if len(all_studies) > 0:
+ first_study = all_studies[0]
+ print("\nFirst study example:")
+ print(f"NCT ID: {_get_nested_value(first_study, ['protocolSection', 'identificationModule', 'nctId'])}")
+ print(f"Status: {_get_nested_value(first_study, ['protocolSection', 'statusModule', 'overallStatus'])}")
+
+ next_page_token = response_data.get('nextPageToken')
+ if not next_page_token or (max_records and len(all_studies) >= max_records):
+ break
+
+ except Exception as e:
+ print(f"Error processing page: {str(e)}")
+ break
+
+ # Convert all studies to the required format
+ recruiting_trials_list = []
+
+ for study in all_studies:
+ try:
+ # Status filtering is now handled by the API, so we don't need to filter here
+ trial_info = {
+ 'NCTId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'nctId']),
+ 'Phase': _get_first_item(study, ['protocolSection', 'designModule', 'phases']),
+ 'OrgStudyId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'orgStudyIdInfo', 'id']),
+ 'Status': _get_nested_value(study, ['protocolSection', 'statusModule', 'overallStatus']),
+ 'Condition': '|'.join(_get_nested_value(study, ['protocolSection', 'conditionsModule', 'conditions'], [])),
+ 'CompletionDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'primaryCompletionDateStruct', 'date']),
+ 'EnrollmentCount': _get_nested_value(study, ['protocolSection', 'designModule', 'enrollmentInfo', 'count']),
+ 'StudyType': _get_nested_value(study, ['protocolSection', 'designModule', 'studyType']),
+ 'Arm': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'armGroups'], 'label'),
+ 'Drug': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'interventions'], 'name'),
+ 'Country': _get_location_info(study, 'country'),
+ 'City': _get_location_info(study, 'city'),
+ 'Site': _get_location_info(study, 'facility'),
+ 'StudyPopulation': _get_nested_value(study, ['protocolSection', 'eligibilityModule', 'studyPopulation']),
+ 'Sponsor': _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'leadSponsor', 'name']),
+ 'Collaborator': _get_collaborators(study),
+ 'StartDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'startDateStruct', 'date']),
+ 'PrimaryMeasure': _get_first_item(study, ['protocolSection', 'outcomesModule', 'primaryOutcomes'], 'measure'),
+ 'Purpose': _get_nested_value(study, ['protocolSection', 'designModule', 'designInfo', 'primaryPurpose']),
+ 'BriefTitle': _get_nested_value(study, ['protocolSection', 'identificationModule', 'briefTitle'])
+ }
+ recruiting_trials_list.append(trial_info)
+
+ except Exception as e:
+ print(f"Error processing study: {str(e)}")
+ continue
+
+ print(f"Total studies processed: {len(recruiting_trials_list)}")
+ return recruiting_trials_list
+
+# Helper functions remain the same
+def _get_nested_value(obj, path, default=None):
+ try:
+ current = obj
+ for key in path:
+ if current is None:
+ return default
+ current = current.get(key)
+ return current if current is not None else default
+ except (KeyError, TypeError, AttributeError):
+ return default
+
+def _get_first_item(obj, path, field=None):
+ try:
+ items = _get_nested_value(obj, path, [])
+ if items and isinstance(items, list):
+ if field:
+ return items[0].get(field)
+ return items[0]
+ return None
+ except (IndexError, AttributeError):
+ return None
+
+def _get_location_info(study, info_type):
+ try:
+ locations = _get_nested_value(study, ['protocolSection', 'contactsLocationsModule', 'locations'], [])
+ if info_type == 'facility':
+ values = [loc.get('facility', '') for loc in locations if loc.get('facility')]
+ else:
+ values = [loc.get(info_type, '') for loc in locations if loc.get(info_type)]
+ return '|'.join(filter(None, values))
+ except Exception:
+ return None
+
+def _get_collaborators(study):
+ try:
+ collaborators = _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'collaborators'], [])
+ return '|'.join(collab.get('name', '') for collab in collaborators if collab.get('name'))
+ except Exception:
+ return None
+########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ###################################
+
+################# FROM CLAUDE API FOR ELIGIBILITY###############################
+import requests
+import re
+
+def get_formatted_inclusion_criteria(nct_id):
+ """
+ Get and format inclusion criteria for a clinical trial using ClinicalTrials.gov API v2
+
+ Args:
+ nct_id (str): The NCT ID of the trial
+
+ Returns:
+ str: Formatted inclusion criteria as a numbered list, or None if not found
+ """
+ # V2 API endpoint
+ base_url = "https://clinicaltrials.gov/api/v2/studies"
+
+ # Parameters for the API request
+ params = {
+ "format": "json",
+ "fields": "protocolSection.eligibilityModule.eligibilityCriteria",
+ "query.id": nct_id
+ }
+
+ try:
+ # Send the API request
+ response = requests.get(base_url, params=params)
+ response.raise_for_status() # Raise an exception for bad status codes
+ data = response.json()
+
+ # Extract the eligibility criteria text from the v2 API response
+ if not data.get('studies') or len(data['studies']) == 0:
+ print(f"No data found for Trial NCT ID: {nct_id}")
+ return None
+
+ eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria']
+
+ # Split at "Exclusion Criteria" to get only inclusion criteria
+ # Using a more robust splitting approach
+ inclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip()
+
+ # Split the inclusion criteria into a list by line breaks
+ # Handle different types of line breaks
+ inclusions = re.split(r'\r?\n+', inclusion_criteria)
+
+ # Clean up the inclusions:
+ # 1. Remove "Inclusion criteria" header
+ # 2. Remove empty lines
+ # 3. Remove lines that are just whitespace or punctuation
+ cleaned_inclusions = []
+ for inclusion in inclusions:
+ inclusion = inclusion.strip()
+ if (inclusion and
+ not re.search(r'^\s*inclusion\s+criteria:?\s*$', inclusion, flags=re.IGNORECASE) and
+ not re.search(r'^\s*[-•*]\s*$', inclusion)):
+
+ # Remove bullet points and dashes at the start of lines
+ inclusion = re.sub(r'^\s*[-•*]\s*', '', inclusion)
+
+ # Add to cleaned list if not empty after cleanup
+ if inclusion:
+ cleaned_inclusions.append(inclusion)
+
+ # Format the list as a numbered list with periods
+ formatted_inclusions = []
+ for i, inclusion in enumerate(cleaned_inclusions, 1):
+ # Ensure the line ends with a period
+ if not inclusion.endswith('.'):
+ inclusion = inclusion + '.'
+ formatted_inclusions.append(f"{i}. {inclusion}")
+
+ # Join the list into a single string
+ return "\n".join(formatted_inclusions)
+
+ except requests.exceptions.RequestException as e:
+ print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+ except (IndexError, KeyError) as e:
+ print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+ except Exception as e:
+ print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+
+
+
+#########################################################################################################
+
+
+## ############################API For Exclusions###################################################################################################################################################
+def get_formatted_exclusion_criteria(nct_id):
+ """
+ Get and format exclusion criteria for a clinical trial using ClinicalTrials.gov API v2
+
+ Args:
+ nct_id (str): The NCT ID of the trial
+
+ Returns:
+ str: Formatted exclusion criteria as a numbered list, or None if not found
+ """
+ # V2 API endpoint
+ base_url = "https://clinicaltrials.gov/api/v2/studies"
+
+ # Parameters for the API request
+ params = {
+ "format": "json",
+ "fields": "protocolSection.eligibilityModule.eligibilityCriteria",
+ "query.id": nct_id
+ }
+
+ try:
+ # Send the API request
+ response = requests.get(base_url, params=params)
+ response.raise_for_status() # Raise an exception for bad status codes
+ data = response.json()
+
+ # Extract the eligibility criteria text from the v2 API response
+ if not data.get('studies') or len(data['studies']) == 0:
+ print(f"No data found for Trial NCT ID: {nct_id}")
+ return None
+
+ eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria']
+
+ # Split at "Exclusion Criteria" to get only exclusion criteria
+ try:
+ exclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip()
+ except IndexError:
+ # Try alternative patterns if the first one doesn't work
+ try:
+ exclusion_criteria = re.split(r'(?i)(?:^|\n)\s*exclusion criteria\s*[:|-]?', eligibility_criteria)[1].strip()
+ except IndexError:
+ print(f"Could not find exclusion criteria section for Trial NCT ID: {nct_id}")
+ return None
+
+ # Split the exclusion criteria into a list by line breaks
+ # Handle different types of line breaks
+ exclusions = re.split(r'\r?\n+', exclusion_criteria)
+
+ # Clean up the exclusions:
+ # 1. Remove empty lines
+ # 2. Remove lines that are just whitespace or punctuation
+ # 3. Clean up formatting
+ cleaned_exclusions = []
+ for exclusion in exclusions:
+ exclusion = exclusion.strip()
+ if (exclusion and
+ not re.search(r'^\s*$', exclusion) and # Skip empty lines
+ not re.search(r'^\s*[-•*]\s*$', exclusion)): # Skip lines with just bullets
+
+ # Remove bullet points and dashes at the start of lines
+ exclusion = re.sub(r'^\s*[-•*]\s*', '', exclusion)
+
+ # Add to cleaned list if not empty after cleanup
+ if exclusion:
+ cleaned_exclusions.append(exclusion)
+
+ # Format the list as a numbered list with periods
+ formatted_exclusions = []
+ for i, exclusion in enumerate(cleaned_exclusions, 1):
+ # Ensure the line ends with a period
+ if not exclusion.endswith('.'):
+ exclusion = exclusion + '.'
+ formatted_exclusions.append(f"{i}. {exclusion}")
+
+ # Join the list into a single string
+ return "\n".join(formatted_exclusions)
+
+ except requests.exceptions.RequestException as e:
+ print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+ except (IndexError, KeyError) as e:
+ print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+ except Exception as e:
+ print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}")
+ return None
+
+
+
+
+
+################################# Apply CSS Style to HTML Table ##############################################################################################################
+
+def dataframe_to_html_table(df):
+ custom_css = """
+
+ """
+ return custom_css + '
' + df.to_html(classes="table table-striped", index=False, border=0) + '
'
+
+##################################################################################################################################
+def format_summary_stats(summary):
+ formatted_html = f"""
+
+ {summary}
+ """
+ return formatted_html
+############################ End of Style #############################################################################################
+
+
+############### Functions to Process the Dataframes of Disease, Conditions, Trial Details####################################
+
+
+
+# parse the conditions
+import re
+import pandas as pd
+#######################################################################################################
+def split_conditions(df, column_to_split):
+ # Create a list to store the new rows
+ new_rows = []
+
+ # Iterate through each row in the original dataframe
+ for index, row in df.iterrows():
+ # Split the column value by comma or pipe and create a new row for each unique condition
+ for condition in re.split(',|\|', str(row[column_to_split])):
+ new_row = {col: row[col] if col != column_to_split else condition.strip() for col in df.columns}
+ new_rows.append(new_row)
+
+ # Create a new dataframe from the list of new rows
+ temp_df = pd.DataFrame(new_rows)
+
+ return temp_df
+
+#########################################################################################################################
+def split_drug(df, column_to_split):
+ # Create a list to store the new rows
+ new_rows = []
+
+ # Iterate through each row in the original dataframe
+ for index, row in df.iterrows():
+ # Split the column value by comma or pipe and create a new row for each unique condition
+ for drug in re.split(',|\|', str(row[column_to_split])):
+ new_row = {col: row[col] if col != column_to_split else drug.strip() for col in df.columns}
+ new_rows.append(new_row)
+
+ # Create a new dataframe from the list of new rows
+ temp_df = pd.DataFrame(new_rows)
+
+ return temp_df
+
+############################################################################################################################################
+#############################################################################################
+
+def split_columns(df, columns_to_split):
+ # Create a list to store the new dataframes
+ new_dfs = []
+
+ # Iterate through each row in the original dataframe
+ for index, row in df.iterrows():
+ # Create a list of dictionaries to store the split values
+ split_rows = []
+ # Find the maximum number of pipe-separated values in the columns to split
+ max_splits = max([len(str(row[col]).split('|')) for col in columns_to_split])
+ # Iterate through the number of splits
+ for i in range(max_splits):
+ # Create a dictionary to store the split values for each column
+ split_row = {}
+ # Iterate through the columns to split
+ for col in columns_to_split:
+ # Split the column value and store the ith value if it exists, otherwise store None
+ split_row[col] = str(row[col]).split('|')[i] if i < len(str(row[col]).split('|')) else None
+ # Add the non-split columns to the dictionary
+ for col in df.columns:
+ if col not in columns_to_split:
+ split_row[col] = row[col]
+ # Append the dictionary to the list of dictionaries
+ split_rows.append(split_row)
+ # Convert the list of dictionaries to a dataframe and append it to the list of new dataframes
+ new_dfs.append(pd.DataFrame(split_rows))
+
+ # Concatenate all the new dataframes
+ temp_df = pd.concat(new_dfs, ignore_index=True)
+
+ # Reorder the columns in the temporary dataframe
+ temp_df = temp_df[df.columns]
+
+ return temp_df
+
+
+################## INTERVENTIONAL, OBSERVATIONAL Trials Lead Sponsor Counts##################################################
+def calculate_summary_stats(df, sponsor):
+ study_types = ["INTERVENTIONAL", "OBSERVATIONAL"]
+ summary_stats = []
+
+ sponsor_name = sponsor if sponsor else "All Lead Sponsors"
+
+ for study_type in study_types:
+ df_study_type = df[df['StudyType'] == study_type].copy()
+
+
+ # Convert the 'EnrollmentCount' column to numeric
+
+ df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce')
+
+
+ num_trials = len(df_study_type['NCTId'].unique())
+
+ unique_conditions = df_study_type['Condition'].unique()
+ num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult'])
+
+ total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum()
+ formatted_total_patients = format(int(total_patients), ',')
+
+
+ summary_stats.append(f"{num_trials} {study_type} Trials, \
+ {num_conditions} Conditions, \
+ {formatted_total_patients} Planned Patients.")
+ #return f"{sponsor_name} - As Lead Sponsor Recruiting For:
" + "
".join(summary_stats)
+ return f"{sponsor_name} - As Lead Sponsor:
" + "
".join(summary_stats)
+
+############################################################################################################################################
+
+def calculate_summary_stats_collb(df, sponsor):
+ study_types = ["INTERVENTIONAL", "OBSERVATIONAL"]
+ summary_stats = []
+
+ sponsor_name = sponsor if sponsor else "All Collaborators"
+
+ for study_type in study_types:
+ df_study_type = df[df['StudyType'] == study_type].copy()
+
+
+ # Convert the 'EnrollmentCount' column to numeric
+
+ df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce')
+
+
+ num_trials = len(df_study_type['NCTId'].unique())
+
+ unique_conditions = df_study_type['Condition'].unique()
+ num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult'])
+
+ total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum()
+ formatted_total_patients = format(int(total_patients), ',')
+
+
+ summary_stats.append(f"{num_trials} {study_type} Trials, \
+ {num_conditions} Conditions, \
+ {formatted_total_patients} Planned Patients.")
+ return f"{sponsor_name} - With Collaborators Recruiting For:
" + "
".join(summary_stats)
+
+
+##################################################################################################################
+
+def calculate_summary_stats_sites(df, sponsor, country):
+
+
+ #Filter the data frame by the country if a country is provided
+ if country:
+ df = df[df['Country'] == country]
+
+ num_trials = len(df['NCTId'].unique())
+
+ # Group the data frame by NCTId and StudyType in the desired order
+ grouped_df = df.groupby(['NCTId', 'StudyType']).first().reset_index()
+
+ # Convert the 'EnrollmentCount' column to numeric
+ grouped_df['EnrollmentCount'] = pd.to_numeric(grouped_df['EnrollmentCount'], errors='coerce')
+
+ # Count the number of unique NCTIds for each StudyType
+ INTERVENTIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['NCTId'].unique())
+ OBSERVATIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['NCTId'].unique())
+
+ # Count the number of unique countries for each StudyType
+ INTERVENTIONAL_countries = df[df['StudyType'] == 'INTERVENTIONAL']['Country'].nunique()
+ OBSERVATIONAL_countries = df[df['StudyType'] == 'OBSERVATIONAL']['Country'].nunique()
+
+ # Count the number of unique sites for each StudyType, grouped by Country, City, and Site
+ INTERVENTIONAL_grouped = df[df['StudyType'] == 'INTERVENTIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0]
+ OBSERVATIONAL_grouped = df[df['StudyType'] == 'OBSERVATIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0]
+
+ # Calculate the sum of enrollment counts for each StudyType
+ INTERVENTIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['EnrollmentCount'].sum())
+ OBSERVATIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['EnrollmentCount'].sum())
+
+ formatted_INTERVENTIONAL_patients = format(INTERVENTIONAL_patients, ',')
+ formatted_OBSERVATIONAL_patients = format(OBSERVATIONAL_patients, ',')
+
+ sponsor_name = sponsor if sponsor else "All Sponsors"
+ country_name = country if country else "All Countries"
+
+ return f"{sponsor_name}
{INTERVENTIONAL_count} INTERVENTIONAL Trials, in {INTERVENTIONAL_countries} Country, at {INTERVENTIONAL_grouped} Sites, \
+ Recruiting: {formatted_INTERVENTIONAL_patients} Planned Patients.
\
+ {OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites"
+
+ #{OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites, Recruiting: {formatted_OBSERVATIONAL_patients} Planned Patients."
+
+
+################################################ GRADIO STARTS HERE #########################################################
+
+#Wrapper Function called from Interfce to get input , output
+async def gradio_wrapper_nct(sponsor=None, condition=None, NCTId=None, country=None, status = None):
+ # Check if disease, condition, sponsor, and NCTId are provided
+
+ if condition and sponsor:
+ recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status = status)
+ elif condition:
+ recruiting_trials = await get_nct_ids(disease_area=condition, overall_status = status)
+ elif sponsor:
+ recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status = status)
+ elif NCTId:
+ recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status)
+ # print("Recruiting trials for NCTId:", recruiting_trials) # Add this line to debug
+ else:
+ return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided"
+
+ trial_info_list = []
+ for trial in recruiting_trials:
+ trial_info = {'Sponsor': trial['Sponsor'],
+ 'Collaborator': trial['Collaborator'],
+ 'Status': trial['Status'],
+ 'Drug': trial['Drug'],
+ 'StudyType': trial['StudyType'],
+ 'Phase': trial['Phase'],
+ 'Site': trial['Site'],
+ 'Country': trial['Country'],
+ 'City': trial['City'],
+ 'NCTId': trial['NCTId'],
+ 'OrgStudyId': trial['OrgStudyId'],
+ 'Condition': trial['Condition'],
+ 'StartDate': trial['StartDate'],
+ 'CompletionDate': trial['CompletionDate'],
+ 'EnrollmentCount': trial['EnrollmentCount'],
+ 'PrimaryMeasure': trial['PrimaryMeasure'],
+ 'Purpose': trial['Purpose'],
+ 'Arm': trial['Arm'],
+ 'BriefTitle': trial['BriefTitle']}
+ trial_info_list.append(trial_info)
+
+
+ # Check if trial_info_list is empty
+ if not trial_info_list:
+ return None, None, None, None, None, None
+
+
+
+ import pandas as pd
+
+ clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \
+ 'Sponsor','Collaborator'])
+
+ ## Take care of NaN
+ clinical_trials_gov.fillna("Not Available", inplace=True)
+
+
+ clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False])
+
+ # Convert the DataFrame to an HTML table
+ html_table = dataframe_to_html_table(clinical_trials_gov)
+
+
+ # now move to include country
+
+ #clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount'])
+ clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle', 'Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount'])
+
+ ## Address NaN
+ clinical_trials_gov_add.fillna("Not Available", inplace=True)
+
+ clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True])
+
+ # print("Preparing dataframe before split")
+
+ # Create a DataFrame for conditions
+ clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'BriefTitle','OrgStudyId','Phase','Status','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator'])
+ # Split the 'Condition' column in clinical_trials_gov_conditions
+ clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition')
+ # print("Dataframe after condition split")
+ #address NaN
+ clinical_trials_gov_conditions.fillna("Not Available", inplace=True)
+
+ # Create a DataFrame for drugs
+ clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['Status','NCTId', 'BriefTitle','OrgStudyId','Status','Phase','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator'])
+ # Split the 'Drug' column in clinical_trials_gov_conditions
+ clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug')
+
+ clinical_trials_gov_drugs.fillna("Not Available", inplace=True)
+
+
+############################################## ##########################################################################################
+ # Filter and sort the conditions DataFrame
+ # Add the filtering condition for Sponsor and Collaborator
+ # Add the filtering condition for Sponsor and Collaborator
+ if sponsor:
+ df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor]
+
+ df1_1 = df1[(df1['Collaborator'] != 'Not Available') &
+ (~df1['Collaborator'].isnull())]
+
+ df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor]
+ ### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator)
+ df2['Collaborator'] = df2['Sponsor']
+
+
+
+ df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
+ df3 = df3[df3['Collaborator'] != sponsor]
+ #print(df3)
+ ## Now add sponsors to collaborators
+ df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
+ #df3['Collaborator'] = df3['Sponsor']
+
+ clinical_trials_gov_conditions = df1
+ clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True)
+
+
+
+ if sponsor:
+
+ df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor]
+ df1_1 = df1[(df1['Collaborator'] != 'Not Available') &
+ (~df1['Collaborator'].isnull())]
+ ##########################################################################################################
+ df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor]
+ ### Now copy sponsors to collaborators:
+ df2['Collaborator'] = df2['Sponsor']
+ ##########################################################################################################
+
+ df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
+ df3 = df3[df3['Collaborator'] != sponsor]
+
+
+ ## Now add sponsors to collaborators
+ df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
+ #df3['Collaborator'] = df3['Sponsor']
+
+ clinical_trials_gov_drugs = df1
+ clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True)
+
+
+
+ ###############################################################################################################################################
+
+
+ # Convert the filtered DataFrame to an HTML table
+ #html_table_add = dataframe_to_html_table(country_site_city_df)
+
+ # Convert the DataFrame to an HTML table
+ html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions)
+ # Convert the DataFrame to an HTML table
+ html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs)
+
+ if sponsor:
+ # Convert the DataFrame to an HTML table
+ html_table_conditions_collb = dataframe_to_html_table(clinical_trials_gov_conditions_collb)
+ # Convert the DataFrame to an HTML table
+ html_table_drugs_collb = dataframe_to_html_table(clinical_trials_gov_drugs_collb)
+ else:
+ empty_df = pd.DataFrame()
+ html_table_conditions_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table")
+ html_table_drugs_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table")
+
+########################################################################################################################################
+ # Calculate the summary statistics
+
+ # print("Calcualting Stats")
+
+ summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor)
+ summary_stats = format_summary_stats(summary_stats_pre)
+
+ if sponsor:
+
+ summary_stats_pre_collb = calculate_summary_stats_collb(clinical_trials_gov_conditions_collb, sponsor)
+ summary_stats_collb = format_summary_stats(summary_stats_pre_collb)
+
+ else:
+ summary_stats_collb =''
+ # Calculate Site Summary
+ #summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country)
+ #summary_stats_sites = format_summary_stats(summary_stats_sites_pre)
+
+ # print("Done Stats")
+
+ return summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs
+
+
+
+#Wrapper Function called from Interfce to get input , output
+async def gradio_wrapper_nct_spn(sponsor=None, condition=None, NCTId=None, country=None, status = None):
+ # Check if disease, condition, sponsor, and NCTId are provided
+
+ if condition and sponsor:
+ recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status= status)
+ elif condition:
+ recruiting_trials = await get_nct_ids(disease_area=condition, overall_status= status)
+ elif sponsor:
+ recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status= status)
+ elif NCTId:
+ recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status)
+ # print("Recruiting trials for NCTId:", recruiting_trials) # Add this line to debug
+ else:
+ return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided"
+
+ trial_info_list = []
+ for trial in recruiting_trials:
+ trial_info = {'Sponsor': trial['Sponsor'],
+ 'Collaborator': trial['Collaborator'],
+ 'Drug': trial['Drug'],
+ 'StudyType': trial['StudyType'],
+ 'Phase': trial['Phase'],
+ 'Status': trial['Status'],
+ 'Site': trial['Site'],
+ 'Country': trial['Country'],
+ 'City': trial['City'],
+ # 'Zip': trial['Zip'],
+ 'NCTId': trial['NCTId'],
+ 'OrgStudyId': trial['OrgStudyId'],
+ 'Condition': trial['Condition'],
+ 'StartDate': trial['StartDate'],
+ 'CompletionDate': trial['CompletionDate'],
+ 'EnrollmentCount': trial['EnrollmentCount'],
+ 'PrimaryMeasure': trial['PrimaryMeasure'],
+ 'Purpose': trial['Purpose'],
+ 'Arm': trial['Arm'],
+ 'BriefTitle': trial['BriefTitle']}
+ trial_info_list.append(trial_info)
+
+
+ # Check if trial_info_list is empty
+ if not trial_info_list:
+ return None, None, None, None, None, None
+
+
+ import pandas as pd
+
+ clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \
+ 'Sponsor','Collaborator'])
+
+ ## Take care of NaN
+ clinical_trials_gov.fillna("Not Available", inplace=True)
+
+
+ clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False])
+
+ # Convert the DataFrame to an HTML table
+ html_table = dataframe_to_html_table(clinical_trials_gov)
+
+
+ # now move to include country
+
+ #clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount'])
+ clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle','Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount'])
+
+ ## Address NaN
+ clinical_trials_gov_add.fillna("Not Available", inplace=True)
+
+ clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True])
+
+ # print("Preparing dataframe before split")
+
+
+ # Create a DataFrame for conditions
+ clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'OrgStudyId','Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator'])
+ # Split the 'Condition' column in clinical_trials_gov_conditions
+ clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition')
+ # print("Dataframe after condition split")
+ #address NaN
+ clinical_trials_gov_conditions.fillna("Not Available", inplace=True)
+
+ # Create a DataFrame for drugs
+ clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['NCTId','OrgStudyId', 'Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator'])
+ # Split the 'Drug' column in clinical_trials_gov_conditions
+ clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug')
+ # print("Dataframe after drug split")
+
+ # Split the 'Condition' column in clinical_trials_gov_conditions
+ #clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Condition')
+
+ #print("Prepared dataframe after condition split on drug ? why ?")
+
+ #address NaN
+ clinical_trials_gov_drugs.fillna("Not Available", inplace=True)
+
+ # print("Preparing Country City Site split")
+
+ columns_to_split = ['Site', 'Country', 'City']
+
+ #if not clinical_trials_gov_add.empty:
+
+ country_site_city_df = split_columns(clinical_trials_gov_add, columns_to_split)
+
+ ## Ensure no NaN after Split
+ country_site_city_df.fillna("Not Available", inplace=True)
+
+ # print("Done Country City Site split")
+
+ # Filter the modified DataFrame by country if provided
+ if country:
+ # modified_df = modified_df[modified_df['Country'] == country]
+ country_site_city_df = country_site_city_df[country_site_city_df['Country'] == country]
+
+
+
+
+
+############################################## ##########################################################################################
+ # Filter and sort the conditions DataFrame
+ # Add the filtering condition for Sponsor and Collaborator
+ # Add the filtering condition for Sponsor and Collaborator
+ if sponsor:
+ df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor]
+
+ df1_1 = df1[(df1['Collaborator'] != 'Not Available') &
+ (~df1['Collaborator'].isnull())]
+
+ df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor]
+ ### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator)
+ df2['Collaborator'] = df2['Sponsor']
+
+
+
+ df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
+ df3 = df3[df3['Collaborator'] != sponsor]
+ #print(df3)
+ ## Now add sponsors to collaborators
+ df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
+ #df3['Collaborator'] = df3['Sponsor']
+
+ clinical_trials_gov_conditions = df1
+ clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True)
+
+
+
+ if sponsor:
+
+ df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor]
+ df1_1 = df1[(df1['Collaborator'] != 'Not Available') &
+ (~df1['Collaborator'].isnull())]
+ ##########################################################################################################
+ df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor]
+ ### Now copy sponsors to collaborators:
+ df2['Collaborator'] = df2['Sponsor']
+ ##########################################################################################################
+
+ df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
+ df3 = df3[df3['Collaborator'] != sponsor]
+
+
+ ## Now add sponsors to collaborators
+ df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
+ #df3['Collaborator'] = df3['Sponsor']
+
+ clinical_trials_gov_drugs = df1
+ clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True)
+
+
+ #country_site_city_df
+ if sponsor:
+ df1 = country_site_city_df[country_site_city_df['Sponsor'] == sponsor]
+ df1_1 = df1[(df1['Collaborator'] != 'Not Available') & (~df1['Collaborator'].isnull())]
+
+ df2 = country_site_city_df[country_site_city_df['Collaborator'] == sponsor]
+ ### Now copy sponsors to collaborators:
+ df2['Collaborator'] = df2['Sponsor']
+
+
+ #df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(\|)?{sponsor}(\|)?', na=False, flags=re.IGNORECASE, regex=True)]
+ df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
+ df3 = df3[df3['Collaborator'] != sponsor]
+ ## Now add sponsors to collaborators
+ #df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
+
+ country_site_city_df = df1
+ country_site_city_df_collb = pd.concat([ df1_1,df2, df3], ignore_index=True)
+
+
+ #####################################################################################################################################################################
+ ## This only includes data for a specific sponsor and for the collaborators
+
+ # Convert the filtered DataFrame to an HTML table
+ html_table_add = dataframe_to_html_table(country_site_city_df)
+ # Convert the DataFrame to an HTML table
+ html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions)
+ # Convert the DataFrame to an HTML table
+ html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs)
+
+######################################################################################################################################
+
+########################################################################################################################################
+ # Calculate the summary statistics
+ summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor)
+ summary_stats = format_summary_stats(summary_stats_pre)
+
+
+ # Calculate Site Summary
+ summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country)
+ summary_stats_sites = format_summary_stats(summary_stats_sites_pre)
+
+
+ return summary_stats, html_table_conditions, html_table, summary_stats_sites,html_table_add,html_table_drugs
+
+
+###############################################################################################################################################################
+##### ################## Start Gradio Interface #########################################################################
+
+################################## Condition Icicle and Sponsor Map ########################
+
+###################################################################
+
+import plotly.graph_objects as go
+import pandas as pd
+import numpy as np
+
+################################################ TOP 20 Conditions######################################################
+
+
+##########################################################################################################
+
+
+def plot_condition_sunburst (df):
+
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ # Convert 'Condition' names to uppercase
+ df['Condition'] = df['Condition'].str.upper()
+
+ # Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT"
+ df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT", "CHRONIC'"])]
+
+ # Group the data by 'Condition' and count the number of NCTId
+ df_count = df.groupby('Condition')['NCTId'].nunique().reset_index()
+
+ # Sort the DataFrame by Value in descending order and reset the index
+ df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True)
+
+ # Create a DataFrame for the top 30 conditions
+ top_30_conditions = df_count.head(20)
+ top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'})
+
+ # Add 'Display' column to top_30_conditions and set its value to 'TOP 30'
+ top_30_conditions['Display'] = 'TOP 20'
+
+ # Create the icicle plot
+ icicle_fig = px.icicle(top_30_conditions, path=['Condition'], values='Number of Trials',
+ color='Condition', color_continuous_scale='RdBu',
+ custom_data=['Condition', 'Number of Trials'])
+
+ # Customize the hovertemplate
+ icicle_fig.update_traces(hovertemplate='%{customdata[0]}
Number of Trials: %{customdata[1]}')
+
+
+ # Customize the icicle plot
+ icicle_fig.update_layout(
+ title='Top 20 Conditions',
+ font=dict(family="Arial", size=14, color='black'),
+ width= 400,
+ height= 1000,
+ #autosize=True,
+ margin=dict(t=50, l=25, r=25, b=25)
+ )
+
+ return icicle_fig
+
+
+############################################################ Conditions OTHERS ########### ############################################
+def plot_condition_others (df):
+
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ # Convert 'Condition' names to uppercase
+ df['Condition'] = df['Condition'].str.upper()
+
+ # Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT"
+ df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT"])]
+
+ # Group the data by 'Condition' and count the number of NCTId
+ df_count = df.groupby('Condition')['NCTId'].nunique().reset_index()
+
+ # Sort the DataFrame by Value in descending order and reset the index
+ df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True)
+
+ # Create a DataFrame for the top 30 conditions
+ top_30_conditions = df_count.head(20)
+ top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'})
+
+ # Add 'Display' column to top_30_conditions and set its value to 'TOP 30'
+ top_30_conditions['Display'] = 'TOP 30'
+
+ # Create a DataFrame for the other conditions by filtering out the rows that are part of the top 30 conditions
+ other_conditions = df_count[~df_count['Condition'].isin(top_30_conditions['Condition'])]
+
+ # Add 'Display' column to other_conditions and set its value to 'OTHERS'
+ other_conditions['Display'] = 'OTHERS'
+ other_conditions = other_conditions.rename(columns={'NCTId': 'Number of Trials'})
+ #print( other_conditions)
+
+ # Create the icicle plot
+ #icicle_fig = px.icicle( other_conditions, path=['Condition'], values='Number of Trials',
+ # color='Condition', color_continuous_scale='RdBu',
+ # hover_data=['Condition'])
+
+
+ # Create the icicle plot
+ icicle_fig = px.icicle(other_conditions, path=['Condition'], values='Number of Trials',
+ color='Condition', color_continuous_scale='RdBu',
+ custom_data=['Condition', 'Number of Trials'])
+
+ # Customize the hovertemplate
+ icicle_fig.update_traces(hovertemplate='%{customdata[0]}
Number of Trials: %{customdata[1]}')
+
+ # Customize the icicle plot
+ icicle_fig.update_layout(
+ title='Other Conditions',
+ font=dict(family="Arial", size=14, color='black'),
+ width= 400,
+ height=1000,
+ # autosize=True,
+ margin=dict(t=50, l=25, r=25, b=25)
+ )
+
+ return icicle_fig
+###################################################################################################################################################
+
+def wrap_text(text, max_chars_per_line):
+ words = text.split()
+ lines = []
+ current_line = []
+
+ for word in words:
+ if len(' '.join(current_line + [word])) <= max_chars_per_line:
+ current_line.append(word)
+ else:
+ lines.append(' '.join(current_line))
+ current_line = [word]
+
+ lines.append(' '.join(current_line))
+ return '
'.join(lines)
+
+
+##################################################### Sponsor Counts ###########################################
+
+
+################################################### ############################################################
+
+def wrap_text(text, max_chars_per_line):
+ return '
'.join(textwrap.wrap(text, max_chars_per_line))
+
+def plot_sponsor_collaborator_tree_map(df):
+
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ # Group the data by 'Sponsor' and 'Collaborator' and count the number of unique NCTId
+ df_count = df.groupby(['Sponsor', 'Collaborator'])['NCTId'].nunique().reset_index()
+
+ # Sort the DataFrame by Value in descending order and reset the index
+ df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True)
+
+ # Create a DataFrame for the top 30 sponsors and collaborators
+ top_30 = df_count.head(30)
+ top_30 = top_30.rename(columns={'NCTId': 'Number of Trials'})
+
+ max_chars_per_line = 10 # Adjust this value according to your needs
+ top_30['Wrapped Sponsor'] = top_30['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line))
+ top_30['Wrapped Collaborator'] = top_30['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line))
+
+ # Create the tree map
+ tree_map_fig = px.treemap(top_30, path=['Wrapped Sponsor', 'Wrapped Collaborator'], values='Number of Trials',
+ color='Sponsor', color_continuous_scale='RdBu',
+ custom_data=['Wrapped Sponsor', 'Wrapped Collaborator', 'Number of Trials'])
+
+ # Customize the hovertemplate
+ tree_map_fig.update_traces(hovertemplate='%{customdata[0]}
%{customdata[1]}
Number of Trials: %{customdata[2]}')
+
+ # Customize the tree map
+ tree_map_fig.update_layout(
+ title='Lead Sponsors and Collaborators',
+ font=dict(family="Arial", size=14, color='black'),
+ width=600,
+ height=1000
+ )
+
+ # Update the labels to show only the number of trials
+ tree_map_fig.update_traces(textinfo='value')
+
+ return tree_map_fig
+
+
+#########################################################################################################
+
+def plot_sponsor_tree(df):
+
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ #Drop rows where 'Phase' is "Not Applicable" or "UNKNOWN"
+ #df = df[(df['Phase'] != "Not Applicable") & (df['Phase'] != "UNKNOWN")]
+
+ # Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId
+ df_count = df.groupby(['Phase', 'Sponsor'])['NCTId'].nunique().reset_index()
+
+ #print(df_count)
+
+ # Then, sum the counts for each combination of Phase and Sponsor
+ df_count = df_count.groupby(['Phase', 'Sponsor'])['NCTId'].sum().reset_index()
+
+ # Finally, sum the counts for each Sponsor
+ df_count_tot = df_count.groupby('Sponsor')['NCTId'].sum().reset_index()
+
+ # Sort the DataFrame by Value in descending order and reset the index
+ df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True)
+
+ # Create a DataFrame for the top 30 sponsors
+ top_30_sponsors = df_count_tot.head(30)
+ top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'})
+
+
+ max_chars_per_line = 10 # Adjust this value according to your needs
+ top_30_sponsors['Wrapped Sponsor'] = top_30_sponsors['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line))
+
+ # Create the icicle plot
+ icicle_fig = px.icicle(top_30_sponsors, path=['Wrapped Sponsor'], values='Number of Trials',
+ color='Sponsor', color_continuous_scale='RdBu',
+ custom_data=['Wrapped Sponsor', 'Number of Trials'])
+
+ # Customize the hovertemplate
+ icicle_fig.update_traces(hovertemplate='%{customdata[0]}
Number of Trials: %{customdata[1]}')
+
+
+
+ # Customize the icicle plot
+ icicle_fig.update_layout(
+ title='Sponsor',
+ font=dict(family="Arial", size=14, color='black'),
+ width= 600,
+ height=1000
+ # autosize=True
+ # margin=dict(t=50, l=25, r=25, b=25)
+ )
+
+
+ return icicle_fig
+
+######################################################################################################################################
+
+
+def plot_collaborator_icicle(df):
+
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ # Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId
+ df_count = df.groupby(['Phase', 'Collaborator'])['NCTId'].nunique().reset_index()
+
+
+ # Then, sum the counts for each combination of Phase and Sponsor
+ df_count = df_count.groupby(['Phase', 'Collaborator'])['NCTId'].sum().reset_index()
+
+ # Finally, sum the counts for each 'Collaborator'
+ df_count_tot = df_count.groupby('Collaborator')['NCTId'].sum().reset_index()
+
+ # Sort the DataFrame by Value in descending order and reset the index
+ df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True)
+
+ # Create a DataFrame for the top 30 sponsors
+ top_30_sponsors = df_count_tot.head(30)
+ top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'})
+
+
+ max_chars_per_line = 10 # Adjust this value according to your needs
+ top_30_sponsors['Collaborators'] = top_30_sponsors['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line))
+
+
+ # Create the icicle plot
+ icicle_fig = px.icicle(top_30_sponsors, path=['Collaborators'], values='Number of Trials',
+ color='Collaborator', color_continuous_scale='RdBu',
+ custom_data=['Collaborators', 'Number of Trials'])
+
+ # Customize the hovertemplate
+ icicle_fig.update_traces(hovertemplate='%{customdata[0]}
Number of Trials: %{customdata[1]}')
+
+ # Customize the icicle plot
+ icicle_fig.update_layout(
+ title='Collaborators',
+ font=dict(family="Arial", size=14, color='black'),
+ width= 600,
+ height=1000
+ #autosize=True
+
+ )
+
+
+ return icicle_fig
+
+#################################### DRUGS ########################################################################
+
+#################### Sankey Diagram for Conditions to Drugs to Phase /NCTId#############################################
+
+
+
+
+import pandas as pd
+import plotly.graph_objects as go
+import random
+
+def random_color():
+ return f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})'
+##############################################################################################################
+def plot_drug_sankey(df):
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ # Fill missing values in the 'Phase' column with a placeholder string
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ # Sort by Phase
+ df = df.sort_values(by='Phase')
+ #print(df)
+ # Split the conditions
+ df = split_conditions(df, 'Condition')
+
+ conditions = df['Condition'].unique().tolist()
+ drugs = df['Drug'].unique().tolist()
+ nct_ids = df['NCTId'].unique().tolist()
+ study_ids= df['OrgStudyId'].unique().tolist()
+ phases = df['Phase'].unique().tolist()
+
+ # labels = conditions + drugs + nct_ids + phases
+ labels = conditions + drugs + nct_ids + study_ids+phases
+
+ # Assign random colors to each node
+ colors = [random_color() for _ in range(len(labels))]
+
+ source = []
+ target = []
+ value = []
+
+ for i, condition in enumerate(conditions):
+ for j, drug in enumerate(drugs, start=len(conditions)):
+ count = df[(df['Condition'] == condition) & (df['Drug'] == drug)].shape[0]
+ if count > 0:
+ source.append(i)
+ target.append(j)
+ value.append(count)
+
+ for i, drug in enumerate(drugs, start=len(conditions)):
+ for j, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)):
+ count = df[(df['Drug'] == drug) & (df['NCTId'] == nct_id)].shape[0]
+ if count > 0:
+ source.append(i)
+ target.append(j)
+ value.append(count)
+
+
+ # Add connections between nct_ids and study_ids
+ for i, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)):
+ for j, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)):
+ count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0]
+ if count > 0:
+ source.append(i)
+ target.append(j)
+ value.append(count)
+
+ # Add connections between study_ids and phases
+ for i, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)):
+ for j, phase in enumerate(phases, start=len(conditions) + len(drugs) + len(nct_ids) + len(study_ids)):
+ count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0]
+ if count > 0:
+ source.append(i)
+ target.append(j)
+ value.append(count)
+
+
+ num_conditions = len(conditions)
+ if num_conditions <= 2:
+ height = 400
+ elif num_conditions <= 10:
+ # height = 300 + (num_conditions - 2) * (200 / 8) # Linearly scale between 300 and 500
+ height = 800
+ elif num_conditions <= 30:
+ height = 1000
+ else:
+ height = 1200 # Linearly scale between 700 and 1000, assuming a max of 100 conditions
+ height = min(height, 1000) # Cap the height at 1000
+
+ fig = go.Figure(data=[go.Sankey(
+ node=dict(
+ pad=15,
+ thickness=15,
+ line=dict(color="black", width=0.5),
+ label=labels,
+ color=colors
+ ),
+ link=dict(
+ source=source,
+ target=target,
+ value=value
+ )
+ )])
+
+ fig.update_layout(title_text="Conditions, Drugs, Trial IDs, Phases for Sponsor",
+ font_size=10, height=height, autosize=True)
+ return fig
+
+###########################################################################################
+
+
+###########################################################################################################################
+
+#################################################################### TRIALS ##############################
+
+
+
+########################################
+
+def random_color():
+ return f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})"
+
+
+
+def plot_condition_treemap_nct_old(df):
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+ df = df.sort_values(by='Phase')
+ df = split_conditions(df, 'Condition')
+
+ conditions = df['Condition'].unique().tolist()
+ nct_ids = df['NCTId'].unique().tolist()
+ study_ids = df['OrgStudyId'].unique().tolist()
+ phases = df['Phase'].unique().tolist()
+
+ labels = conditions + nct_ids + study_ids + phases
+ colors = [random_color() for _ in range(len(labels))]
+
+ source = []
+ target = []
+ value = []
+
+ for i, condition in enumerate(conditions):
+ for j, nct_id in enumerate(nct_ids, start=len(conditions)):
+ count = df[(df['Condition'] == condition) & (df['NCTId'] == nct_id)].shape[0]
+ if count > 0:
+ source.append(i)
+ target.append(j)
+ value.append(count)
+
+ for i, nct_id in enumerate(nct_ids, start=len(conditions)):
+ for j, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)):
+ count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0]
+ if count > 0:
+ source.append(i)
+ target.append(j)
+ value.append(count)
+
+ for i, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)):
+ for j, phase in enumerate(phases, start=len(conditions) + len(nct_ids) + len(study_ids)):
+ count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0]
+ if count > 0:
+ source.append(i)
+ target.append(j)
+ value.append(count)
+
+ num_conditions = len(conditions)
+ if num_conditions <= 2:
+ height = 400
+ elif num_conditions <= 10:
+ height = 800
+ elif num_conditions <= 30:
+ height = 1000
+ else:
+ height = 1200
+ height = min(height, 1000)
+
+ fig = go.Figure(data=[go.Sankey(
+ node=dict(
+ pad=15,
+ thickness=15,
+ line=dict(color="black", width=0.5),
+ label=labels,
+ color=colors
+ ),
+ link=dict(
+ source=source,
+ target=target,
+ value=value
+ )
+ )])
+
+ fig.update_layout(title_text="Conditions, Trial IDs, Study IDs, Phases for Sponsor",
+ font_size=10, height=height, autosize=True)
+ return fig
+######################################### Conditions###############################
+
+#####################################################################################
+
+import plotly.graph_objects as go
+
+def plot_condition_treemap_nct_old(df):
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ #print("tamer")
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+ #print(df)
+ # Fill missing values in the 'Phase' column with a placeholder string
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ # Map NCTId to its Title
+ # Create a dictionary to map NCTId to BriefTitle
+ nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict()
+ # Create a dictionary to map NCTId to OrgStudyId
+ nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict()
+
+ # Create a new dataframe with the required columns
+ table_df = df[['Condition', 'NCTId', 'Phase']]
+
+ # Add the BriefTitle and OrgStudyId columns
+ table_df['BriefTitle'] = table_df['NCTId'].map(nctid_to_brieftitle)
+ table_df['OrgStudyId'] = table_df['NCTId'].map(nctid_to_orgstudyid)
+ print(table_df)
+ # Sort the dataframe by Condition alphabetically
+ table_df = table_df.sort_values('Condition')
+
+
+ # Create a Plotly Table
+ fig = go.Figure(data=[go.Table(
+ header=dict(
+ values=['Condition', 'NCTId', 'OrgStudyId', 'BriefTitle', 'Phase'],
+ fill_color='paleturquoise',
+ align='left',
+ font=dict(size=16, color='black')
+ ),
+ cells=dict(
+ values=[table_df.Condition, table_df.NCTId, table_df.OrgStudyId, table_df.BriefTitle, table_df.Phase],
+ align='left',
+ font=dict(size=14, color='black')
+ )
+ )])
+
+ fig.update_layout(
+ autosize=True,
+ height=1000,
+ title_text="Conditions with NCTIds and Phases",
+ title_x=0.5,
+ font=dict(size=18)
+ )
+
+ return fig
+
+
+
+
+
+###################### Claude modified Jan 14/2025#############################
+def plot_condition_sunburst_nct(df):
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ # Fill missing values in the 'Phase' column with a placeholder string
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ # Map NCTId to its Title
+ nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict()
+ nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict()
+
+ # Create separate dataframes for each level
+ condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle'])
+ nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle'])
+ phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle'])
+
+ # Add the "Conditions" root node
+ condition_df = pd.concat([condition_df, pd.DataFrame({
+ 'ids': ["Conditions"],
+ 'labels': ["Conditions"],
+ 'parents': [""],
+ 'brieftitle': [None]
+ })], ignore_index=True)
+
+ # Sort conditions alphabetically and add them
+ conditions = sorted(df['Condition'].unique(), key=str.lower)
+ for condition in conditions:
+ condition_df = pd.concat([condition_df, pd.DataFrame({
+ 'ids': [condition],
+ 'labels': [condition],
+ 'parents': ["Conditions"],
+ 'brieftitle': [None]
+ })], ignore_index=True)
+
+ # Add NCTId level
+ for condition in conditions:
+ temp_df = df[df['Condition'] == condition]
+ nctids = sorted(temp_df['NCTId'].unique())
+ for nctid in nctids:
+ nctid_df = pd.concat([nctid_df, pd.DataFrame({
+ 'ids': [f"{condition}-{nctid}"],
+ 'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"],
+ 'parents': [condition],
+ 'brieftitle': [nctid_to_brieftitle[nctid]]
+ })], ignore_index=True)
+
+ # Add Phase level
+ for condition in conditions:
+ temp_df = df[df['Condition'] == condition].sort_values('NCTId')
+ for _, row in temp_df.iterrows():
+ phase_df = pd.concat([phase_df, pd.DataFrame({
+ 'ids': [f"{condition}-{row['NCTId']}-{row['Phase']}"],
+ 'labels': [row['Phase']],
+ 'parents': [f"{condition}-{row['NCTId']}"],
+ 'brieftitle': [None]
+ })], ignore_index=True)
+
+ # Concatenate all dataframes
+ sunburst_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True)
+
+ # Create the Sunburst figure
+ fig = go.Figure(go.Sunburst(
+ ids=sunburst_df.ids,
+ labels=sunburst_df.labels,
+ parents=sunburst_df.parents,
+ maxdepth=3, # Limit the depth to 3 levels
+ branchvalues="total",
+ hovertext=sunburst_df['brieftitle'],
+ hoverinfo='text',
+ hoverlabel=dict(namelength=-1),
+ textfont=dict(size=14, family="Arial")
+ ))
+
+ # Update layout
+ fig.update_layout(
+ width=1200,
+ height=1200,
+ title={
+ 'text': "Clinical Trials by Condition, NCTId, and Phase",
+ 'y':0.95,
+ 'x':0.5,
+ 'xanchor': 'center',
+ 'yanchor': 'top',
+ 'font': dict(size=20)
+ }
+ )
+
+ return fig
+
+####################################################################################
+def plot_condition_treemap_nct(df):
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ # Fill missing values in the 'Phase' column with a placeholder string
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ # Map NCTId to its Title
+ nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict()
+ nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict()
+
+ # Create separate dataframes for each level
+ condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level'])
+ nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level'])
+ phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level'])
+
+ # Add the "Conditions" root node
+ condition_df = pd.concat([condition_df, pd.DataFrame({
+ 'ids': ["Conditions"],
+ 'labels': ["Conditions"],
+ 'parents': [""],
+ 'brieftitle': [None],
+ 'level': [0]
+ })], ignore_index=True)
+
+ # Add Condition level with sorting
+ conditions = sorted(df['Condition'].unique(), key=str.lower)
+ for condition in conditions:
+ condition_df = pd.concat([condition_df, pd.DataFrame({
+ 'ids': [condition],
+ 'labels': [condition],
+ 'parents': ["Conditions"],
+ 'brieftitle': [None],
+ 'level': [1]
+ })], ignore_index=True)
+
+ # Add NCTId level
+ for condition in conditions:
+ temp_df = df[df['Condition'] == condition]
+ nctids = sorted(temp_df['NCTId'].unique())
+ for nctid in nctids:
+ nctid_df = pd.concat([nctid_df, pd.DataFrame({
+ 'ids': [f"{condition}-{nctid}"],
+ 'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"],
+ 'parents': [condition],
+ 'brieftitle': [nctid_to_brieftitle[nctid]],
+ 'level': [2]
+ })], ignore_index=True)
+
+ # Add Phase level
+ for condition in conditions:
+ temp_df = df[df['Condition'] == condition].sort_values('NCTId')
+ for _, row in temp_df.iterrows():
+ phase_df = pd.concat([phase_df, pd.DataFrame({
+ 'ids': [f"{condition}-{row['NCTId']}-{row['Phase']}"],
+ 'labels': [row['Phase']],
+ 'parents': [f"{condition}-{row['NCTId']}"],
+ 'brieftitle': [None],
+ 'level': [3]
+ })], ignore_index=True)
+
+ # Concatenate all dataframes and sort by level and labels
+ icicle_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True)
+
+ # Sort the dataframe to ensure conditions appear in alphabetical order
+ # First sort by level to maintain hierarchy
+ icicle_df = icicle_df.sort_values(['level', 'labels'],
+ key=lambda x: x.str.lower() if x.name == 'labels' else x)
+
+ # Create the figure with sorted data
+ fig = go.Figure(go.Icicle(
+ ids=icicle_df.ids,
+ labels=icicle_df.labels,
+ parents=icicle_df.parents,
+ root_color="lightgrey",
+ textfont=dict(size=34, family="Arial"),
+ hovertext=icicle_df['brieftitle'],
+ hoverinfo='text',
+ hoverlabel=dict(namelength=-1)
+ ))
+
+ fig.update_layout(autosize=True, height=1000)
+
+ return fig
+
+############################################################
+
+
+
+###########################################################################################################################
+import re
+
+def insert_line_break(text, max_length=30):
+ if len(text) <= max_length:
+ return text
+
+ nearest_space = text.rfind(' ', 0, max_length)
+ if nearest_space == -1:
+ nearest_space = max_length
+
+ return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length)
+
+
+
+########################################################### #######################################################################
+def plot_nct2org_icicle(df):
+ icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hovertext'])
+
+ # Add the "Trials" root node
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': ["Trials"],
+ 'labels': ["Trials"],
+ 'parents': [""],
+ 'hovertext': [""]
+ })], ignore_index=True)
+
+ # Create a dictionary of NCTId-BriefTitle pairs
+ nctid_brieftitle = df[['NCTId', 'BriefTitle']].drop_duplicates().set_index('NCTId').to_dict()['BriefTitle']
+
+ # Add the NCTId level with BriefTitle as hover text
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': df['NCTId'].unique(),
+ 'labels': df['NCTId'].unique(),
+ 'parents': ["Trials"] * len(df['NCTId'].unique()),
+ 'hovertext': [nctid_brieftitle[nctid] for nctid in df['NCTId'].unique()]
+ })], ignore_index=True)
+
+ # Add the OrgStudyId level
+ for nctid in df['NCTId'].unique():
+ temp_df = df[df['NCTId'] == nctid]
+ orgstudyids = temp_df['OrgStudyId'].unique()
+ for orgstudyid in orgstudyids:
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': [f"{nctid}-{orgstudyid}"],
+ 'labels': [orgstudyid],
+ 'parents': [nctid],
+ 'hovertext': [""]
+ })], ignore_index=True)
+
+ # Add the Condition level
+ for index, row in df.iterrows():
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': [f"{row['NCTId']}-{row['OrgStudyId']}-{row['Condition']}-{index}"],
+ 'labels': [row['Condition']],
+ 'parents': [f"{row['NCTId']}-{row['OrgStudyId']}"],
+ 'hovertext': [""]
+ })], ignore_index=True)
+
+ fig = go.Figure(go.Icicle(
+ ids=icicle_df.ids,
+ labels=icicle_df.labels,
+ parents=icicle_df.parents,
+ hovertext=icicle_df.hovertext,
+ root_color="lightgrey",
+ textfont=dict(size=34, family="Arial")
+ ))
+
+ fig.update_layout(autosize=True, height=1000)
+
+ return fig
+
+
+
+######################################################################################################################################
+
+
+#################################################################################################################
+
+
+############################## Scatter Plot for Country Timelines ######################################
+
+import pandas as pd
+
+import numpy as np
+import plotly.express as px
+
+
+import plotly.graph_objs as go
+from plotly.subplots import make_subplots
+
+
+def split_condition(text):
+ split_text = text.split(',', 1)[0].split('|', 1)[0]
+ return split_text.strip()
+
+#################################################################################################################################
+import plotly.graph_objs as go
+
+
+
+import plotly.graph_objs as go
+import plotly.subplots as sp
+import pandas as pd
+import numpy as np
+
+
+
+################################################################### COUNTRY PLOTS ################################################################
+def plot_trial_country_map(df):
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+ df = df.sort_values(by='Phase')
+
+ # Split the conditions
+ df = split_conditions(df, 'Condition')
+
+ ## Root Country Node
+ icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text'])
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': ["Country"],
+ 'labels': ["Country"],
+ 'parents': [""],
+ 'hover_text': ["Country"]
+ })], ignore_index=True)
+
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': df['Country'].unique(),
+ 'labels': df['Country'].unique(),
+ 'parents': ["Country"] * len(df['Country'].unique()),
+ 'hover_text': [f"({len(df[df['Country'] == country]['NCTId'].unique())} Trials)" for country in df['Country'].unique()]
+ })], ignore_index=True)
+
+ ### Country and Conditions
+
+ for country in df['Country'].unique():
+ temp_df = df[df['Country'] == country]
+ conditions = temp_df['Condition'].unique()
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': [f"{country}__{condition}" for condition in conditions],
+ 'labels': conditions,
+ 'parents': [country] * len(conditions),
+ 'hover_text': [f"({len(temp_df[temp_df['Condition'] == condition]['NCTId'].unique())} Trials)" for condition in conditions]
+ })], ignore_index=True)
+
+ ### Country with Conditions and Trials NCTId and OrgStudId
+
+ for country_condition in icicle_df['ids'][icicle_df['parents'].isin(df['Country'].unique())]:
+ country, condition = country_condition.split('__')
+ temp_df = df[(df['Country'] == country) & (df['Condition'] == condition)]
+ trials = temp_df['NCTId'].unique()
+ trial_labels = [f"
{insert_line_break(temp_df[temp_df['NCTId'] == trial]['BriefTitle'].iloc[0])}" for trial in trials]
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': [f"{country_condition}__{trial}" for trial in trials],
+ 'labels': trials,
+ 'parents': [country_condition] * len(trials),
+ 'hover_text': trial_labels
+ })], ignore_index=True)
+
+ fig = go.Figure(go.Icicle(
+ ids=icicle_df.ids,
+ labels=icicle_df.labels,
+ parents=icicle_df.parents,
+ textinfo='label',
+ hovertext=icicle_df.hover_text,
+ root_color="lightgrey",
+ textfont=dict(size=30, family="Arial")
+ ))
+
+ fig.update_layout(
+ autosize=True,height = 800
+ )
+
+ return fig
+
+
+####################
+
+################################################################ SITES #####################################################
+
+
+
+
+
+################################################################ TRIAL SITES ###########################################
+def plot_trial_sites(df):
+ def insert_line_break(text, max_length=30):
+ if len(text) <= max_length:
+ return text
+
+ nearest_space = text.rfind(' ', 0, max_length)
+ if nearest_space == -1:
+ nearest_space = max_length
+
+ return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length)
+
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+ df = df.sort_values(by='Phase')
+
+ ## Root Site Node
+ icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text'])
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': ["Sites"],
+ 'labels': ["Sites"],
+ 'parents': [""],
+ 'hover_text': ["Sites"]
+ })], ignore_index=True)
+
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': df['City'].unique(),
+ 'labels': df['City'].unique(),
+ 'parents': ["Sites"] * len(df['City'].unique()),
+ 'hover_text': [f"({len(df[df['City'] == city]['NCTId'].unique())} Trials)" for city in df['City'].unique()]
+ })], ignore_index=True)
+
+ ### City and Site
+
+ for city in df['City'].unique():
+ temp_df = df[df['City'] == city]
+ sites = temp_df['Site'].unique()
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': [f"{city}__{site}" for site in sites],
+ 'labels': sites,
+ 'parents': [city] * len(sites),
+ 'hover_text': [f"({len(temp_df[temp_df['Site'] == site]['NCTId'].unique())} Trials)" for site in sites]
+ })], ignore_index=True)
+
+ ### Site and Trials (NCTId, OrgStudyId, BriefTitle)
+
+ for city_site in icicle_df['ids'][icicle_df['parents'].isin(df['City'].unique())]:
+ city, site = city_site.split('__')
+ temp_df = df[(df['City'] == city) & (df['Site'] == site)]
+ trials = temp_df[['NCTId', 'OrgStudyId']].apply(lambda x: f"{x['NCTId']}
{x['OrgStudyId']}", axis=1).unique()
+
+ for trial in trials:
+ nctid = trial.split('
')[0]
+ icicle_df = pd.concat([icicle_df, pd.DataFrame({
+ 'ids': [f"{city_site}__{nctid}"],
+ 'labels': [trial],
+ 'parents': [city_site],
+ 'hover_text': [""]
+ })], ignore_index=True)
+
+ fig = go.Figure(go.Icicle(
+ ids=icicle_df.ids,
+ labels=icicle_df.labels,
+ parents=icicle_df.parents,
+ textinfo='label',
+ hovertext=icicle_df.hover_text,
+ root_color="lightgrey",
+ textfont=dict(size=30, family="Arial")
+ ))
+
+ fig.update_layout(autosize=True, height=800)
+
+ return fig
+
+
+
+#############################################################################################################################################
+def plot_trial_site_map(df):
+ def insert_line_break(text, max_length=30):
+ if len(text) <= max_length:
+ return text
+
+ nearest_space = text.rfind(' ', 0, max_length)
+ if nearest_space == -1:
+ nearest_space = max_length
+
+ return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length)
+
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+ df = df.sort_values(by='Phase')
+
+ # Split the conditions
+ df = split_conditions(df, 'Condition')
+
+ #df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count')
+ #df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count')
+
+ df_count = df.groupby([ 'Site', 'City' ,'NCTId', 'Condition']).size().reset_index(name='Count')
+
+
+ #df_count['BriefTitle'] = df_count['BriefTitle'].apply(insert_line_break)
+
+ # fig = px.treemap(df_count, path=['Site', 'NCTId', 'BriefTitle','Condition'], values='Count', color='Site')
+ fig = px.treemap(df_count, path=['Site', 'City','NCTId', 'Condition'], values='Count', color='Site')
+
+ # Customize font and textinfo for Sponsor, Country, Site, and Condition
+ fig.update_traces(
+ textfont=dict(family="Arial", size=30, color='black'),
+ selector=dict(depth=0) # Apply customization to Sponsor grid
+ )
+ fig.update_traces(
+ textfont=dict(family="Arial", size=30, color='black'),
+ selector=dict(depth=1) # Apply customization to Country grid
+ )
+ fig.update_traces(
+ textfont=dict(family="Arial", size=30, color='black'),
+ selector=dict(depth=2) # Apply customization to Site grid
+ )
+
+ fig.update_layout(autosize=True, height=800)
+
+ return fig
+
+
+############################################################
+
+###############################################################################################################################################################
+
+
+########################################################### Timelines ###########################################################################################################
+
+
+import numpy as np
+import plotly.graph_objs as go
+import matplotlib.pyplot as plt
+
+
+def generate_colors(n):
+ colors = plt.cm.rainbow(np.linspace(0, 1, n))
+ hex_colors = ['#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in colors]
+ return hex_colors
+
+def get_marker_size(enrollment_count):
+ if enrollment_count < 100:
+ return 20
+ elif 100 <= enrollment_count < 300:
+ return 40
+ elif 300 <= enrollment_count < 500:
+ return 60
+ elif 500 <= enrollment_count < 1000:
+ return 70
+ else:
+ return 100
+
+def plot_trial_bubblemap(df):
+ scatter_plot_start_traces = []
+ scatter_plot_end_traces = []
+ scatter_plot_lines = []
+
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ # Fill missing values in the 'Phase' column with a placeholder string
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ # Sort by Phase
+ df = df.sort_values(by='Phase')
+ ## address correct date formats
+ #df['StartDate'] = pd.to_datetime(df['StartDate'])
+ df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce')
+ #df['CompletionDate'] = pd.to_datetime(df['CompletionDate'])
+ df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce')
+
+
+ # Split the conditions
+ df = split_conditions(df, 'Condition')
+
+ # Assign an ID to each unique condition
+ #condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())}
+ # Create a dictionary of unique conditions with their IDs starting from 1
+ condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)}
+
+ # Create a dictionary that maps each NCTId to a list of condition IDs
+ nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict()
+
+ # Define the marker size function
+ df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size)
+
+ # Update the hovertemplate to display original Conditions associated with the NCTId
+ #hovertemplate_start = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
OrgStudyId: %{customdata[1]}
Phase: %{customdata[2]}
Start Date: %{x}
Enrollment Count: %{customdata[3]}'
+ #hovertemplate_end = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
OrgStudyId: %{customdata[1]}
NCTId: %{customdata[2]}
Phase: %{customdata[3]}
Completion Date: %{x}
Enrollment Count: %{customdata[4]}'
+
+ # Update the hovertemplate to display original Conditions associated with the NCTId
+ hovertemplate_start = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}
Phase: %{customdata[3]}\
+
Start Date: %{x}
Enrollment Count: %{customdata[4]}'
+
+ #hovertemplate_end = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}
NCTId: %{customdata[3]}\
+ #
Phase: %{customdata[4]}
Completion Date: %{x}
Enrollment Count: %{customdata[5]}'
+
+ hovertemplate_end = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}\
+
Phase: %{customdata[3]}
Completion Date: %{x}
Enrollment Count: %{customdata[4]}'
+
+ for nctid in df['NCTId'].unique():
+ df_filtered = df[df['NCTId'] == nctid]
+
+ # Replace the text parameter with original Conditions
+ text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))]
+
+ # Get the first condition ID for the current NCTId
+ first_condition_id = nctid_condition_map[nctid][0]
+ color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})'
+
+ # color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3]
+
+ # Start traces (square)
+ start_trace = go.Scatter(x=df_filtered['StartDate'],
+ y=df_filtered['NCTId'],
+ mode='markers',
+ marker=dict(size=10, symbol='square', color=color),
+ text=text,
+ #customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']],
+ customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']],
+ hovertemplate=hovertemplate_start,
+ showlegend=False)
+ scatter_plot_start_traces.append(start_trace)
+
+ # End traces (circle)
+ end_trace = go.Scatter(x=df_filtered['CompletionDate'],
+ y=df_filtered['NCTId'],
+ mode='markers',
+ marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'),
+ text=text,
+ #customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']],
+ customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']],
+ hovertemplate=hovertemplate_end,
+ showlegend=False)
+ scatter_plot_end_traces.append(end_trace)
+
+ # Line traces connecting start and end dates
+ line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]],
+ y=[nctid, nctid],
+ mode='lines',
+ line=dict(color='black', width=1),
+ showlegend=False)
+ scatter_plot_lines.append(line_trace)
+
+ # Create legend traces for unique conditions with their IDs
+ legend_traces = [go.Scatter(x=[None], y=[None],
+ mode='markers',
+ marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'),
+ name=f'{condition_id}: {condition}',
+ showlegend=True) for condition, condition_id in condition_ids.items()]
+
+ # Combine all traces
+ data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces
+
+ # Update the layout
+ layout = go.Layout(yaxis=dict(title='NCTId',
+ showgrid=False,
+ tickvals=df['NCTId'].unique(),
+ ticktext=df['NCTId'].unique(),
+ tickangle=0),
+ xaxis=dict(title='Start-End Dates',
+ showgrid=False,
+ range=[pd.to_datetime('2020-01-01'), pd.to_datetime('2028-12-31')],
+ tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2020, 2029)]),
+ # tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)],
+ showlegend=True,
+ legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')),
+ margin=dict(l=150),
+ plot_bgcolor='#ffffff',
+ paper_bgcolor='#ffffff',
+ font=dict(family='Segoe UI', color='#000000'))
+
+ fig = go.Figure(data=data, layout=layout)
+
+
+ # Calculate the height based on the number of shortened_conditions
+ num_trial = len(df['NCTId'].unique())
+ if num_trial <= 5:
+ height = 600
+ elif num_trial >= 10:
+ height = 800
+ elif num_trial >= 20:
+ height = 1000
+ else:
+ height = 1400 # Linearly scale between 700 and 1000, assuming a max of 100 conditions
+
+ height = min(height, 1400) # Cap the height at 1400
+ # Set the width and height
+ fig.update_layout(
+ title='Trial Start and End Dates by Conditions',
+ autosize=True, # adjust as per requirement
+ height=height # adjust as per requirement
+ )
+
+
+ return fig
+
+
+
+
+
+
+
+########################################################################################################################################################
+def plot_trial_bubblemap_comp(df):
+ scatter_plot_start_traces = []
+ scatter_plot_end_traces = []
+ scatter_plot_lines = []
+
+ # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
+ df = df[df['StudyType'] == "INTERVENTIONAL"]
+
+ # Fill missing values in the 'Phase' column with a placeholder string
+ df['Phase'] = df['Phase'].fillna('UNKNOWN')
+
+ # Sort by Phase
+ df = df.sort_values(by='Phase')
+ ## address correct date formats
+ #df['StartDate'] = pd.to_datetime(df['StartDate'])
+ df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce')
+ # df['CompletionDate'] = pd.to_datetime(df['CompletionDate'])
+ df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce')
+
+ # Split the conditions
+ df = split_conditions(df, 'Condition')
+
+ # Assign an ID to each unique condition
+ #condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())}
+ # Create a dictionary of unique conditions with their IDs starting from 1
+ condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)}
+
+ # Create a dictionary that maps each NCTId to a list of condition IDs
+ nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict()
+
+ # Define the marker size function
+ df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size)
+
+
+ # Update the hovertemplate to display original Conditions associated with the NCTId
+ hovertemplate_start = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}
Phase: %{customdata[3]}\
+
Start Date: %{x}
Enrollment Count: %{customdata[4]}'
+
+ hovertemplate_end = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}\
+
Phase: %{customdata[3]}
Completion Date: %{x}
Enrollment Count: %{customdata[4]}'
+
+ for nctid in df['NCTId'].unique():
+ df_filtered = df[df['NCTId'] == nctid]
+
+ # Replace the text parameter with original Conditions
+ text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))]
+
+ # Get the first condition ID for the current NCTId
+ first_condition_id = nctid_condition_map[nctid][0]
+ color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})'
+
+ # color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3]
+
+ # Start traces (square)
+ start_trace = go.Scatter(x=df_filtered['StartDate'],
+ y=df_filtered['NCTId'],
+ mode='markers',
+ marker=dict(size=10, symbol='square', color=color),
+ text=text,
+ #customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']],
+ customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']],
+ hovertemplate=hovertemplate_start,
+ showlegend=False)
+ scatter_plot_start_traces.append(start_trace)
+
+ # End traces (circle)
+ end_trace = go.Scatter(x=df_filtered['CompletionDate'],
+ y=df_filtered['NCTId'],
+ mode='markers',
+ marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'),
+ text=text,
+ #customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']],
+ customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']],
+ hovertemplate=hovertemplate_end,
+ showlegend=False)
+ scatter_plot_end_traces.append(end_trace)
+
+ # Line traces connecting start and end dates
+ line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]],
+ y=[nctid, nctid],
+ mode='lines',
+ line=dict(color='black', width=1),
+ showlegend=False)
+ scatter_plot_lines.append(line_trace)
+
+ # Create legend traces for unique conditions with their IDs
+ legend_traces = [go.Scatter(x=[None], y=[None],
+ mode='markers',
+ marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'),
+ name=f'{condition_id}: {condition}',
+ showlegend=True) for condition, condition_id in condition_ids.items()]
+
+ # Combine all traces
+ data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces
+
+ # Update the layout
+ layout = go.Layout(yaxis=dict(title='NCTId',
+ showgrid=False,
+ tickvals=df['NCTId'].unique(),
+ ticktext=df['NCTId'].unique(),
+ tickangle=0),
+ xaxis=dict(title='Start-End Dates',
+ showgrid=False,
+ range=[pd.to_datetime('2010-01-01'), pd.to_datetime('2023-12-31')],
+ tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2010, 2023)]),
+ # tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)],
+ showlegend=True,
+ legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')),
+ margin=dict(l=150),
+ plot_bgcolor='#ffffff',
+ paper_bgcolor='#ffffff',
+ font=dict(family='Segoe UI', color='#000000'))
+
+ fig = go.Figure(data=data, layout=layout)
+
+
+ # Calculate the height based on the number of shortened_conditions
+ num_trial = len(df['NCTId'].unique())
+ if num_trial <= 5:
+ height = 600
+ elif num_trial >= 10:
+ height = 800
+ elif num_trial >= 20:
+ height = 1000
+ else:
+ height = 1400 # Linearly scale between 700 and 1000, assuming a max of 100 conditions
+
+ height = min(height, 1400) # Cap the height at 1400
+ # Set the width and height
+ fig.update_layout(
+ title='Trial Start and End Dates by Conditions',
+ autosize=True, # adjust as per requirement
+ height=height # adjust as per requirement
+ )
+
+
+ return fig
+
+
+
+#######################################################################################
+
+
+#######################################################################################
+
+
+############################################ Trial Site Map without Zip code now ##############
+
+import geopandas as gpd
+
+def plot_trial_site_world_map(df, country_filter=None):
+ df.loc[(df['City'] == 'Multiple Locations') & (df['Country'] == 'Germany'), 'City'] = 'Berlin'
+ unique_cities = df[['City', 'Country']].drop_duplicates().copy()
+
+ geocode_cache = {} # Create an empty dictionary to store geocoded results
+
+ def geocode_with_cache(city, country):
+ key = (city, country)
+ if key not in geocode_cache:
+ geocode_cache[key] = gpd.tools.geocode(f"{city}, {country}").geometry[0]
+ return geocode_cache[key]
+
+ unique_cities['Coordinates'] = unique_cities.apply(lambda row: geocode_with_cache(row['City'], row['Country']), axis=1)
+ unique_cities[['Latitude', 'Longitude']] = unique_cities['Coordinates'].apply(lambda coord: pd.Series({'Latitude': coord.y, 'Longitude': coord.x}))
+
+ df = df.merge(unique_cities, on=['City', 'Country'])
+
+ # Create a new column combining 'Site' and 'Country'
+ df['SiteCountry'] = df['Site'] + ', ' + df['Country']
+
+ df_count = df.groupby(['Country', 'City', 'SiteCountry', 'Condition', 'NCTId','BriefTitle', 'Latitude', 'Longitude']).size().reset_index(name='Count')
+
+ if country_filter:
+ df_count = df_count[df_count['Country'] == country_filter]
+
+
+
+ fig = px.scatter_geo(df_count,
+ lat='Latitude',
+ lon='Longitude',
+ hover_name='SiteCountry',
+ hover_data={'Latitude':False, 'Longitude':False, 'NCTId':False,'BriefTitle':False, 'Condition':False, 'City':True, 'Country':True},
+ size='Count',
+ color='SiteCountry',
+ projection='mercator')
+
+ fig.update_layout(title='Trial Sites Map',
+ geo=dict(showframe=False, showcoastlines=False, showcountries=True),
+ width=1200,
+ height=800)
+
+ return fig
+
+
+
+#############################################################################################################
+
+############################################################# Gradio Function as Views ####################################
+
+
+### ######################### Find Sponspors
+ #############################################################################################################################################
+
+
+
+
+def select_sponsor(sponsor_input, academia_input):
+ if sponsor_input:
+ return sponsor_input
+ else:
+ return academia_input
+
+def select_disease(disease_input, disease_input_text):
+ if disease_input_text:
+ return disease_input_text.strip()
+ else:
+ return disease_input
+
+#summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drug
+
+#async def disease_view (condition, condition_text, sponsor_input, academia_input):
+async def disease_view (condition, sponsor_input):
+ # condition = condition.strip() # Remove leading and trailing spaces
+ #sponsor = select_sponsor(sponsor_input, academia_input)
+ #condition = select_disease(condition, condition_text)
+ sponsor = sponsor_input
+ condition = condition
+
+ ################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None
+
+################################################################################################
+
+ status = "Recruiting"
+ #print("In Gradio")
+ # Call gradio_wrapper_nct with appropriate arguments
+ if condition and sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status )
+ elif sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status )
+ elif condition:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status )
+
+
+#### error traps
+ if html_table_conditions is None:
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None
+ #### error traps
+ if html_table_conditions_collb is None:
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None
+
+ # Convert the HTML table to a pandas DataFrame
+ df = pd.read_html(html_table_conditions)[0]
+
+ #df2 = pd.read_html(html_table_conditions_collb)[0]
+ df2 = []
+ try:
+ df2 = pd.read_html(html_table_conditions_collb)[0]
+ except (ValueError, IndexError):
+ df2 = pd.DataFrame()
+
+
+#### error traps
+ if df.empty and df2.empty:
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None
+####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov
+
+ # Display the DataFrame
+ # evaluate if need to change to collaborator other than top 20 ????
+ # condition_other = plot_condition_others(df)
+ #### Sponsor Only
+ # condition_sunburst = plot_condition_sunburst(df)
+
+ ################################################################################
+ sponsor_tree = plot_sponsor_tree(df)
+
+ collaborator_tree = None # Initialize to None or any other default value
+
+ if not df2.empty:
+
+ collaborator_tree = plot_collaborator_icicle(df2)
+ return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb,sponsor_tree, collaborator_tree
+
+ # return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree
+
+
+##################### Assets ###################################################################################
+
+
+def select_sponsor(s_sponsor_input, s_academia_input):
+ if s_sponsor_input:
+ return s_sponsor_input
+ else:
+ return s_academia_input
+
+def select_condition(s_disease_input, s_disease_input_type):
+ if s_disease_input_type.strip():
+ return s_disease_input_type.strip()
+ else:
+ return s_disease_input
+
+
+async def drug_view(condition, condition_type, s_sponsor_input, s_academia_input):
+ sponsor = select_sponsor(s_sponsor_input, s_academia_input)
+ condition = select_condition(condition, condition_type)
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+################################################################################################
+
+
+ status = "Recruiting"
+ # Call gradio_wrapper_nct with appropriate arguments
+ if condition and sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
+ elif sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
+ elif condition:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)
+#### error traps
+ if html_table_drugs is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+###### Convert the HTML table to a pandas DataFrame
+ df = pd.read_html(html_table_drugs)[0]
+####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov
+#### error traps
+ if df.empty :
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
+ # Display the DataFrame
+ sankey_map_drug = plot_drug_sankey(df)
+
+ return summary_stats,html_table_drugs, sankey_map_drug
+
+
+########################### Condition###################
+################## ########################################################################################
+
+
+
+
+def select_sponsor_phc(s_sponsor_input_phc, s_academia_input_phc):
+ if s_sponsor_input_phc:
+ return s_sponsor_input_phc
+ else:
+ return s_academia_input_phc
+
+def select_condition_phc(s_disease_input_phc, s_disease_input_type_phc):
+ if s_disease_input_type_phc.strip():
+ return s_disease_input_type_phc.strip()
+ else:
+ return s_disease_input_phc
+
+
+#async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input):
+async def disease_view_phc(condition, s_sponsor_input):
+ #sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input )
+ # condition = select_condition_phc(condition, condition_type)
+ sponsor = s_sponsor_input
+ condition = condition
+
+
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+################################################################################################
+
+
+
+ status = "Recruiting"
+ # Call gradio_wrapper_nct with appropriate arguments
+ if condition and sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
+ elif sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
+ elif condition:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)
+
+#### error traps
+ if html_table_conditions is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+
+
+
+ # Convert the HTML table to a pandas DataFrame
+ df = pd.read_html(html_table_conditions)[0]
+ #print(df)
+#### error traps
+ if df.empty :
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
+ ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov
+
+ tree_map_cond_nct = plot_condition_treemap_nct(df)
+
+ sunburst_map_cond_nct = plot_condition_sunburst_nct(df)
+
+ return summary_stats, html_table_conditions, tree_map_cond_nct, sunburst_map_cond_nct
+
+ # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map
+
+
+################## Trial ########################################################################################
+
+
+
+
+def select_sponsor_phs(s_sponsor_input_phs, s_academia_input_phs):
+ if s_sponsor_input_phs:
+ return s_sponsor_input_phs
+ else:
+ return s_academia_input_phs
+
+def select_condition_phs(s_disease_input_phs, s_disease_input_type_phs):
+ if s_disease_input_type_phs.strip():
+ return s_disease_input_type_phs.strip()
+ else:
+ return s_disease_input_phs
+
+
+
+async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academia_input):
+ sponsor = select_sponsor_phs(s_sponsor_input, s_academia_input )
+ condition = select_condition_phs(condition, condition_type)
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+################################################################################################
+
+
+ status = "Recruiting"
+ # Call gradio_wrapper_nct with appropriate arguments
+ if condition and sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
+ elif sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
+ elif condition:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)
+
+#### error traps
+ if html_table_conditions is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+
+ # Convert the HTML table to a pandas DataFrame
+ df = pd.read_html(html_table_conditions)[0]
+ #print(df)
+#### error traps
+ if df.empty :
+ return "The Sponsor Name did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
+ ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov
+
+ #tree_map_cond_nct = plot_condition_treemap_nct(df)
+
+ nct_org_map = plot_nct2org_icicle(df)
+
+ return summary_stats, html_table_conditions, nct_org_map
+
+ # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map
+
+
+
+##################################################### New Trials ######################################
+
+def select_sponsor_phs_n(s_sponsor_input_phs, s_academia_input_phs):
+ if s_sponsor_input_phs:
+ return s_sponsor_input_phs
+ else:
+ return s_academia_input_phs
+
+def select_condition_phs_n(s_disease_input_phs, s_disease_input_type_phs):
+ if s_disease_input_type_phs.strip():
+ return s_disease_input_type_phs.strip()
+ else:
+ return s_disease_input_phs
+
+
+####################################################################################
+async def disease_view_phs_n(condition, condition_type, s_sponsor_input, s_academia_input):
+ sponsor = select_sponsor_phs_n(s_sponsor_input, s_academia_input )
+ condition = select_condition_phs_n(condition, condition_type)
+
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None
+
+################################################################################################
+
+
+
+
+ status = "Not yet recruiting"
+ # Call gradio_wrapper_nct with appropriate arguments
+ if condition and sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
+ elif sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
+ elif condition:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)
+
+#### error traps
+ if html_table_conditions is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None
+
+
+
+ # Convert the HTML table to a pandas DataFrame
+ # df = pd.read_html(html_table_conditions)[0]
+
+ ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov
+
+ #tree_map_cond_nct = plot_condition_treemap_nct(df)
+ # tree_map_cond_nct = plot_condition_treemap_nct(df)
+ ###### Convert the HTML table to a pandas DataFrame
+ df = pd.read_html(html_table_drugs)[0]
+#### error traps
+ if df.empty :
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None
+####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov
+
+ # Display the DataFrame
+ tree_map_cond_nct = plot_drug_sankey(df)
+
+ # nct_org_map = plot_nct2org_icicle(df)
+
+######################################### error traps
+ # if html_table_add is None:
+ # return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+ df2 = pd.read_html(html_table_conditions)[0]
+ bubble_map_trials = plot_trial_bubblemap(df2)
+ # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials
+ return summary_stats, html_table_conditions, tree_map_cond_nct, bubble_map_trials
+
+
+############################################### Completed Trials ####################################################
+def select_sponsor_phs_c(s_sponsor_input_phs, s_academia_input_phs):
+ if s_sponsor_input_phs:
+ return s_sponsor_input_phs
+ else:
+ return s_academia_input_phs
+
+def select_condition_phs_c(s_disease_input_phs, s_disease_input_type_phs):
+ if s_disease_input_type_phs.strip():
+ return s_disease_input_type_phs.strip()
+ else:
+ return s_disease_input_phs
+
+async def disease_view_phs_c(condition, condition_type, s_sponsor_input, s_academia_input):
+ sponsor = select_sponsor_phs_c(s_sponsor_input, s_academia_input )
+ condition = select_condition_phs_c(condition, condition_type)
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None
+
+################################################################################################
+
+
+ status = "Completed"
+ # Call gradio_wrapper_nct with appropriate arguments
+ if condition and sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
+ elif sponsor:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
+ elif condition:
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)
+
+#### error traps
+ if html_table_conditions is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None, None, None
+
+
+
+ # Convert the HTML table to a pandas DataFrame
+ df = pd.read_html(html_table_conditions)[0]
+
+
+ ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov
+
+ tree_map_cond_nct = plot_condition_treemap_nct(df)
+
+ nct_org_map = plot_nct2org_icicle(df)
+
+######################################### error traps
+ # if html_table_add is None:
+ # return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+ df3 = pd.read_html(html_table_conditions)[0]
+ bubble_map_trials = plot_trial_bubblemap_comp(df3)
+
+
+###### Convert the HTML table to a pandas DataFrame
+ df2 = pd.read_html(html_table_drugs)[0]
+####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov
+#### error traps
+ if df.empty and df2.empty and df3.empty:
+ return "The selection did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None
+ # Display the DataFrame
+ sankey_map_drug = plot_drug_sankey(df2)
+
+ #return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials
+ return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,sankey_map_drug,bubble_map_trials
+
+
+
+
+### ############### Country #########################################################
+
+def select_sponsor_con(sponsor_input_con, academia_input_con):
+ if sponsor_input_con:
+ return sponsor_input_con
+ else:
+ return academia_input_con
+
+def select_condition_con(condition_input, condition_input_type):
+ if condition_input_type.strip():
+ return condition_input_type.strip()
+ else:
+ return condition_input
+
+async def condition_view(condition, country, condition_type, sponsor_input_con, academia_input_con):
+ condition = select_condition_con(condition, condition_type)
+ sponsor = select_sponsor_con(sponsor_input_con, academia_input_con)
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+################################################################################################
+
+
+ status = "Recruiting"
+ summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)
+ # Convert the HTML table to a pandas DataFrame
+ # Check if html_table_add is None before converting to DataFrame
+#### error traps
+ if html_table_add is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+
+ df = pd.read_html(html_table_add)[0]
+ df2 = pd.read_html(html_table_add)[0]
+
+#### error traps
+ if df.empty and df2.empty:
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
+ #print(df)
+ trial_country = plot_trial_country_map(df2)
+
+
+ return summary_stats_sites, html_table_add,trial_country
+
+
+
+
+
+############### Site #########################################################################################################
+
+
+def select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s):
+ if sponsor_input_con_s:
+ return sponsor_input_con_s
+ else:
+ return academia_input_con_s
+
+
+def select_condition_con(condition_input, condition_input_type):
+ if condition_input_type.strip():
+ return condition_input_type.strip()
+ else:
+ return condition_input
+
+async def condition_view_s(condition, country, condition_type, sponsor_input_con_s, academia_input_con_s):
+ condition = select_condition_con(condition, condition_type)
+ sponsor = select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s)
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None
+
+################################################################################################
+
+
+
+
+ status = "Recruiting"
+ summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)
+
+ #### error traps
+ if html_table_add is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None,None
+ #### error traps
+ if html_table_add is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None
+ #### error traps
+ if html_table_add is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None
+
+
+ # print(html_table_add)
+ df = pd.read_html(html_table_add)[0]
+ #print(df)
+
+#### error traps
+ if df.empty :
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None
+
+
+
+
+ site_cond = plot_trial_sites(df)
+ country_site = plot_trial_site_map(df)
+
+ return summary_stats_sites, html_table_add, site_cond,country_site
+
+
+
+
+
+
+###################################### Timelines ###################################################################
+
+
+
+def select_sponsor_cont(sponsor_input_con, academia_input_con):
+ if sponsor_input_con:
+ return sponsor_input_con
+ else:
+ return academia_input_con
+
+def select_condition_cont(condition_input, condition_input_type):
+ if condition_input_type.strip():
+ return condition_input_type.strip()
+ else:
+ return condition_input
+
+async def condition_viewt(condition, country, condition_type, sponsor_input_con, academia_input_con):
+ condition = select_condition_cont(condition, condition_type)
+ sponsor = select_sponsor_cont(sponsor_input_con, academia_input_con)
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+ #print(type(sponsor))
+ condition = ' '.join(condition) if isinstance(condition, list) else condition
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+ if condition is not None and isinstance(condition, str):
+ if len(condition) > 50 or not re.match(allowed_chars, condition):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+################################################################################################
+
+
+
+ status = "Recruiting"
+ summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)
+ # Convert the HTML table to a pandas DataFrame
+
+######################################### error traps
+ if html_table_add is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+ df = pd.read_html(html_table_add)[0]
+
+
+#### error traps
+ if df.empty :
+ return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
+
+ bubble_map_trials = plot_trial_bubblemap(df)
+
+ return summary_stats_sites, html_table,bubble_map_trials
+
+
+
+
+
+############### Find Site Map #########################################################################################################
+
+
+def select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map):
+ if sponsor_input_con_map:
+ return sponsor_input_con_map
+ else:
+ return academia_input_con_map
+
+async def condition_view_map(condition, country, sponsor_input_con_map, academia_input_con_map):
+ #condition = condition.strip() # Remove leading and trailing spaces
+ sponsor = select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map)
+################# ### List data type errors in type conversion to string needed for regualr expression
+ sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
+
+ #print(type(condition))
+############################CHECK Sponsor and Condition Inputs #########################################################
+ import re
+
+################ # def check_input(condition, sponsor):
+ allowed_chars = r'^[A-Za-z .,&/()-]*$'
+
+
+
+ if sponsor is not None and isinstance(sponsor, str):
+ if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
+ return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None
+
+################################################################################################
+################################################################################################
+
+
+
+
+
+ status = "Recruiting"
+ summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status )
+ # print(html_table_add)
+ #### error traps
+ if html_table_add is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+ df = pd.read_html(html_table_add)[0]
+
+#### error traps
+ if df.empty :
+ return "The Trial Id did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
+
+ world_map = plot_trial_site_world_map(df)
+
+ if world_map is None:
+ return "Sorry, the plot could not be generated. Please try again by slecting a country!", None, None
+
+ return summary_stats_sites, html_table_add, world_map
+
+
+
+### ########################################Find Trial Eligibility###########################################################################
+
+
+############################################################################ END VIEWS########################
+
+#### To remove the inclusion exclusion numbers duplicating in text
+
+import re
+
+def format_html_list(html_string):
+ # Split the input string by numbers followed by a period and a space
+ items = re.split(r'(\d+\.\s)', html_string)
+
+ # Combine the split items into a list of strings, keeping the original numbers
+ formatted_items = [number + text for number, text in zip(items[1::2], items[2::2])]
+
+ # Remove unwanted characters from each item
+ formatted_items = [re.sub(r':\.', '', item) for item in formatted_items]
+ formatted_items = [re.sub(r'General\.', '', item) for item in formatted_items]
+
+ # Filter out empty list items
+ formatted_items = [item for item in formatted_items if item.strip()]
+
+ # Check if the first item is empty and remove it if so
+ if formatted_items[0].split('. ', 1)[1].strip() == '':
+ formatted_items = formatted_items[1:]
+
+ # Renumber the items
+ # formatted_items = [f"{i+1}. {item.split('. ', 1)[1]}" for i, item in enumerate(formatted_items)]
+ # Renumber the items
+ formatted_items = [
+ f"{i+1}. {item.split('. ', 1)[1]}" if len(item.split('. ', 1)) > 1 else item
+ for i, item in enumerate(formatted_items)
+ ]
+
+ # Remove extra periods
+ formatted_items = [re.sub(r'\.{2,}', '.', item) for item in formatted_items]
+
+ # Join the list items with line breaks to create an HTML string
+ formatted_html = "
".join(formatted_items)
+
+ return formatted_html
+########################################################################################
+
+
+
+
+
+#####################################################################################
+
+#############################################################################################################################################
+async def trial_view_map(nctID):
+ nctID = nctID.strip() # Remove leading and trailing spaces
+ ###### # Check if nctID is valid
+
+ if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12):
+ return "Not a Valid NCT ID has been entered", None, None
+
+ status = "Recruiting"
+ #summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID)
+ summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(NCTId=nctID, status = status)
+ #### error traps
+
+ #trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map])
+
+#################################################################################################################################
+ if html_table_add is None:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+ df = pd.read_html(html_table_add)[0]
+ world_map = plot_trial_site_world_map(df)
+ if world_map is None:
+ return "Sorry, the plot could not be generated. Please try again by selecting a country!", None, None
+
+ return summary_stats_sites, world_map, html_table_add
+ #return html_table, formatted_html_inclusions,formatted_html_exclusions,world_map
+
+
+####################################################################################################################################################
+import plotly.graph_objects as go
+
+def split_numbered_criteria(text):
+ """Split text into list of criteria based on numbered lines"""
+ if not text:
+ return []
+
+ criteria = []
+ current = []
+
+ for line in text.split('\n'):
+ line = line.strip()
+ if line:
+ # Check if line starts with a number followed by period
+ if line[0].isdigit() and '. ' in line[:4]:
+ if current:
+ criteria.append(' '.join(current))
+ current = [line]
+ else:
+ current.append(line)
+
+ # Add the last criteria
+ if current:
+ criteria.append(' '.join(current))
+
+ return criteria if criteria else ["No criteria available"]
+
+def display_criteria_table(inclusion_text, exclusion_text):
+ """
+ Create a two-column Plotly table with inclusion and exclusion criteria
+ split into separate rows based on numbering
+ """
+ try:
+ # Split both texts into lists of criteria
+ inclusion_list = split_numbered_criteria(inclusion_text)
+ exclusion_list = split_numbered_criteria(exclusion_text)
+
+ # Make lists equal length by padding with empty strings
+ max_length = max(len(inclusion_list), len(exclusion_list))
+ inclusion_list.extend([''] * (max_length - len(inclusion_list)))
+ exclusion_list.extend([''] * (max_length - len(exclusion_list)))
+
+ # Create the table
+ fig = go.Figure(data=[go.Table(
+ columnwidth=[500, 500], # Equal width columns
+ header=dict(
+ values=['Inclusion Criteria', 'Exclusion Criteria'],
+ fill_color='#e6f3ff',
+ align=['left', 'left'],
+ font=dict(size=14, color='black'),
+ height=40
+ ),
+ cells=dict(
+ values=[inclusion_list, exclusion_list],
+ fill_color=[['white', '#f9f9f9'] * max_length], # Alternating row colors
+ align=['left', 'left'],
+ font=dict(size=12),
+ height=None,
+ line=dict(color='lightgrey', width=1) # Add light borders
+ )
+ )])
+
+ # Update layout
+ fig.update_layout(
+ title="Trial Eligibility Criteria",
+ width=1200,
+ height=max(400, max_length * 30 + 100), # Dynamic height based on content
+ margin=dict(l=20, r=20, t=40, b=20)
+ )
+
+ return fig
+
+ except Exception as e:
+ print(f"Error in display_criteria_table: {str(e)}")
+ return None
+
+async def trial_view(nctID):
+ nctID = nctID.strip()
+
+ if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12):
+ return "Not a Valid NCT ID has been entered", None, None
+
+ status = "Recruiting"
+ summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID, status = status)
+
+ formatted_inclusions = get_formatted_inclusion_criteria(nctID)
+ formatted_exclusions = get_formatted_exclusion_criteria(nctID)
+
+ if not formatted_inclusions and not formatted_exclusions:
+ return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
+
+ # Create single table with both criteria
+ criteria_table = display_criteria_table(formatted_inclusions, formatted_exclusions)
+
+ return html_table, criteria_table
+
+
+
+############################### Design the interface####################################################################################
+
+## Added after Spet 27 Failure
+from gradio.components import Dropdown
+
+
+###############################################################################################################################################################################
+
+trial_app = gr.Blocks()
+with trial_app:
+ gr.Markdown("Trial Connect")
+ gr.Markdown("Data Source: ClinicalTrials.gov ")
+ #gr.Markdown("Now Recruiting Trials:
")
+ with gr.Tabs():
+
+
+
+ ##############################################################################################################################################
+ ################################################################ Conditions ###############################################################################################
+ with gr.TabItem("Trials"):
+
+ # 1st Row
+####################################################################################################################################################
+#####################################################################################################################################################
+
+
+
+ with gr.Row():
+ gr.HTML('''
+ 'Now Recruiting' Trials for Conditions:
+ 1. Select a Condition, for example, 'Pancreatic Cancer', 'Chronic Kidney Disease', 'MASH' etc.
+ 2. Select a Sponsor'.
+ 3. Click 'Show Trials'.
+ ''')
+
+
+
+#####################################################################################################################################################
+
+
+
+ with gr.Row():
+
+###################################################################
+ with gr.Column():
+ s_disease_input_phc = gr.Dropdown(
+ choices=["Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
+ "Cancer","Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
+ "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
+ "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
+ "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
+ "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
+ "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\
+ "Urothelial Carcinoma",\
+ "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
+ "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\
+ " Major","Metabolic", "Generalized Pustular Psoriasis",\
+ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
+ "Liver Cirrhosis", \
+ "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \
+ "Psychological Trauma","Renal", "Respiratory",\
+ "Schizophrenia", "PTSD", \
+ "Venous Thromboembolism", "Wet"],
+ label="Select Condition"
+ )
+###################################################################
+ with gr.Column():
+
+#### #########################################################################################################################################################################################################
+
+ s_sponsor_input_phc = gr.Dropdown(
+
+ choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
+ "CSL Behring", "Daiichi Sankyo, Inc.",\
+ "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
+ "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
+ "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
+ label="Select Sponsor"
+ )
+######################################################################################################################################################################
+ # 3rd Row
+ with gr.Row(): #academia_input = gr.inputs.Dropdown(
+ s_button_phc = gr.Button("Show Trials")
+
+ # Then, create the clear button and add the dropdown input to it
+ clear_btn_phc = gr.ClearButton()
+ clear_btn_phc.add(s_sponsor_input_phc)
+
+ clear_btn_phc.add(s_disease_input_phc)
+
+ # with gr.Column():
+################# # 3rd row#################################################################
+ # with gr.Row():
+###################################################################
+ # with gr.Column():
+ # s_academia_input_phc = gr.Textbox(lines=1, label="Type a Sponsor Name:")
+ # clear_btn_phc.add(s_academia_input_phc)
+
+###################################################################
+ # with gr.Column():
+
+
+ # s_disease_input_type_phc = gr.Textbox(lines=1, label="Filter by typing a Condition:")
+ # clear_btn_phc.add(s_disease_input_type_phc)
+############################################################################################################################################
+######################################################################################################################################################################
+
+#########################################################################################################################################################################
+ with gr.Row():
+
+ summary_block_phc = gr.HTML(label="Clinical Trials Now Recruiting for Conditions :" )
+#############################################################################################################################################################
+ # with gr.Row():
+ # nct_org_map = gr.Plot()
+##########################################################################################################################################################
+####################################################################################################################################################
+ # with gr.Row():
+ # gr.HTML('Conditions by Trials and Phase
')
+ with gr.Row():
+ # with gr.Column():
+ tree_map_cond_nct = gr.Plot()
+####################################################################################################################################################
+ # with gr.Row():
+ # gr.HTML('Conditions by Trials and Phase
')
+ with gr.Row():
+ # with gr.Column():
+
+ sunburst_map_cond_nct = gr.Plot()
+
+
+
+
+ with gr.Row():
+ output_block_conditions_phc = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors")
+
+ clear_btn_phc.add(summary_block_phc)
+ clear_btn_phc.add(output_block_conditions_phc)
+ clear_btn_phc.add(tree_map_cond_nct)
+ clear_btn_phc.add(sunburst_map_cond_nct)
+
+ #clear_btn_phs.add(nct_org_map)
+
+#########################################################################
+ ################################################################ Trials ###############################################################################################
+ # with gr.TabItem("Trials"):
+
+ # 1st Row
+####################################################################################################################################################
+ # with gr.Row():
+ # gr.HTML('''
+ # Trials 'Now Recruiting':
+ # 1. Select a Sponsor and click 'Show Trials'.
+ # 2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.
+ # ''')
+
+
+#####################################################################################################################################################
+
+ # with gr.Row():
+ # with gr.Column():
+
+#### #########################################################################################################################################################################################################
+
+ # s_sponsor_input_phs = gr.Dropdown(
+
+############################################################################
+ # choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
+ # "CSL Behring", "Daiichi Sankyo, Inc.",\
+ # "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
+ # "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
+ # "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
+ # label="Select a Sponsor"
+ # )
+ ##############################################################################################################
+ # with gr.Column():
+
+
+######################################################################################################################################################################
+ # 3rd Row
+ # with gr.Row(): #academia_input = gr.inputs.Dropdown(
+ # s_button_phs = gr.Button("Show Trials")
+
+ # Then, create the clear button and add the dropdown input to it
+ # clear_btn_phs = gr.ClearButton()
+ # clear_btn_phs.add(s_sponsor_input_phs)
+
+ # clear_btn_phs.add(s_disease_input_phs)
+
+######################################################################################################################################################################
+################# # 3rd row#################################################################
+ # with gr.Row():
+ #################################################################################################################################################################
+ # with gr.Column():
+ # s_academia_input_phs = gr.Textbox(lines=1, label="Type a Sponsor Name:")
+ # clear_btn_phs.add(s_academia_input_phs)
+#################################################################################################################################################################
+ # with gr.Column():
+
+
+ # s_disease_input_type_phs = gr.Textbox(lines=1, label="Filter by typing a Condition:")
+ # clear_btn_phs.add(s_disease_input_type_phs)
+############################################################################################################################################
+
+#########################################################################################################################################################################
+ # with gr.Row():
+
+ # summary_block_phs = gr.HTML(label="Conditions and Sponsors Now Recruiting for Clinical Trials:" )
+#############################################################################################################################################################
+ #with gr.Row():
+ # nct_org_map = gr.Plot()
+##########################################################################################################################################################
+####################################################################################################################################################
+
+ # with gr.Row():
+ # output_block_conditions_phs = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors")
+
+ # clear_btn_phs.add(summary_block_phs)
+ # clear_btn_phs.add(output_block_conditions_phs)
+ # clear_btn_phs.add(nct_org_map)
+
+#########################################################################
+############################################################ ASSETS ###############################################################
+ with gr.TabItem("Drugs"):
+############################################################################################
+ # 1st Row
+####################################################################################################################################################
+ with gr.Row():
+####################################################################################################################################################
+
+ gr.HTML('''
+ Drugs for 'Now Recruiting' Trials:
+
+ 1. Select a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.
+ 2. Select a Sponsor and click 'Show Drugs'.
+ ''')
+
+#####################################################################################################################################################
+ with gr.Row():
+
+#####################################################################################################################################################################################
+ with gr.Column():
+ s_disease_input = gr.Dropdown(
+ choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
+ "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
+ "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
+ "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
+ "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
+ "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
+ "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\
+ "Urothelial Carcinoma",\
+ "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
+ "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\
+ " Major","Metabolic", "Generalized Pustular Psoriasis",\
+ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
+ "Liver Cirrhosis", \
+ "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \
+ "Psychological Trauma","Renal", "Respiratory",\
+ "Schizophrenia", "PTSD", \
+ "Venous Thromboembolism", "Wet"],
+ label= "Filter by a Condition"
+ )
+########################################################################################################################################
+ with gr.Column():
+
+#############################################################################################################################################
+ s_sponsor_input = gr.Dropdown(
+############################################################################
+ choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
+ "CSL Behring", "Daiichi Sankyo, Inc.",\
+ "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
+ "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
+ "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
+ label="Select a Sponsor"
+ )
+############################################################################################
+############################################################################################
+ with gr.Row():
+ s_drug_button = gr.Button("Show Drugs")
+ # Then, create the clear button and add the dropdown input to it
+ clear_btn = gr.ClearButton()
+ clear_btn.add(s_sponsor_input)
+
+ clear_btn.add(s_disease_input)
+
+
+
+ ## with gr.Row():
+#####################################################################################################################################################################################
+################# # 3rd row#################################################################
+ with gr.Row():
+#####################################################################################################################################################################
+ with gr.Column():
+ s_academia_input = gr.Textbox(lines=1, label="Type a Sponsor Name:")
+ clear_btn.add(s_academia_input)
+#################################################################################################################################################################
+ with gr.Column():
+ s_disease_input_type = gr.Textbox(lines=1, label="Filter by typing a Condition:")
+ clear_btn.add(s_disease_input_type)
+#####################################################################################################################################################################################
+
+
+
+
+ with gr.Row():
+ drug_summary_block = gr.HTML(label="Conditions and Drug Assets, Sponsors Now Recruiting for Clinical Trials:" )
+ with gr.Row():
+ sankey_map_drug = gr.Plot()
+ with gr.Row():
+ drug_output_block_conditions = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors")
+
+ clear_btn.add(drug_summary_block)
+ clear_btn.add(drug_output_block_conditions)
+ clear_btn.add(sankey_map_drug)
+ ############################################################################################################################################################################################
+
+##################################################################### Country#####################################################
+
+
+ # with gr.TabItem("Countries"):
+##########################################################################
+ # 1st Row
+####################################################################################################################################################
+ # with gr.Row():
+ # gr.HTML('''
+ # Countries 'Now Recruiting':
+ # 1. Select a Sponsor, a Condition Name and click 'Show Countries'.
+ # 2. Filter by a Country, for example, 'United States','Germany' etc.
+ # ''')
+
+
+#####################################################################################################################################################
+ # with gr.Row():
+###############################################################################
+ # with gr.Column():
+ # sponsor_input_con = gr.Dropdown(
+############################################################################
+ # choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
+ # "CSL Behring", "Daiichi Sankyo, Inc.",\
+ # "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
+ # "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
+ # "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
+ # label="Select a Sponsor"
+ # )
+
+###############################################################################################################################################################################################
+ # with gr.Column():
+
+ # condition_input_con = gr.Dropdown(
+ # choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
+ # "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
+ # "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
+ # "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
+ # "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
+ # "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
+ ## "Urothelial Carcinoma",\
+ # "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
+ ## " Major","Metabolic", "Generalized Pustular Psoriasis",\
+ # "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
+ # "Liver Cirrhosis", \
+ # "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \
+ # "Psychological Trauma","Renal", "Respiratory",\
+ # "Schizophrenia", "PTSD", \
+ # "Venous Thromboembolism", "Wet"],
+ # label= "Select a Condition")
+
+###############################################################################
+ # with gr.Column():
+ # country_input_tr = gr.Dropdown(
+ # choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\
+ # "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\
+ # "Malaysia","Mexico","Netherlands", \
+ # "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\
+ # "United Kingdom"\
+ # ],
+ # label="Filter by a Country")
+###########################################################################################################################################################################################
+###########################################################################################################################################################
+ # with gr.Row():
+
+
+ # condition_button = gr.Button("Show Countries")
+ # Then, create the clear button and add the dropdown input to it
+ # clear_cn_btn = gr.ClearButton()
+ # clear_cn_btn.add(condition_input_con)
+ #clear_cn_btn.add(sponsor_input_con)
+
+
+ # clear_cn_btn.add(country_input_tr)
+################# # 3rd row#################################################################
+ # with gr.Row():
+ ################################################################ ############## ###############################################################################
+ # with gr.Column():
+ # academia_input_con = gr.Textbox(lines=1, label="Type a Sponsor Name:")
+ # clear_cn_btn.add(academia_input_con)
+ ################################################################ ############## ###############################################################################
+ # with gr.Column():
+ # condition_input_type = gr.Textbox(lines=1, label="Filter by typing a Condition:")
+ # clear_cn_btn.add(condition_input_type)
+###############################################################################
+
+
+
+##############################################################################################################################################################################
+ # with gr.Row():
+ # summary_block_cond = gr.HTML(label="Countries with Recruiting Clinical Trials:" )
+ # with gr.Row():
+ #bubble_map_trial = gr.Plot()
+ # with gr.Row():
+ # trial_countries = gr.Plot()
+ # with gr.Row():
+ # condition_output = gr.HTML(label="List of Recruiting Trials")
+ # condition_output = gr.Textbox(label="List of Recruiting Trials")
+ ## clear output ?
+ # clear_cn_btn.add(summary_block_cond)
+ # clear_cn_btn.add(trial_countries)
+ #clear_cn_btn.add(bubble_map_trial)
+ # clear_cn_btn.add(condition_output)
+
+ ############################################################ Site ############################# #####################################################################
+ with gr.TabItem("Locations"):
+###############################################################
+####################################################################################################################################################
+ with gr.Row():
+ gr.HTML('''
+ Sites 'Now Recruiting':
+ 1. Select a Condition, Sponsor and Country and click 'Show Sites'.
+ 2. Review each Site and Cities with the Trial Ids and the Conditions.
+ ''')
+
+ with gr.Row():
+
+################################################################
+ with gr.Column():
+ condition_input_s = gr.Dropdown(
+ choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
+ "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
+ "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
+ "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
+ "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
+ "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
+ "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\
+ "Urothelial Carcinoma",\
+ "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
+ "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\
+ " Major","Metabolic", "Generalized Pustular Psoriasis",\
+ "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
+ "Liver Cirrhosis", \
+ "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \
+ "Psychological Trauma","Renal", "Respiratory",\
+ "Schizophrenia", "PTSD", \
+ "Venous Thromboembolism", "Wet"],
+ label="Select a Condition")
+
+################################################################
+ with gr.Column():
+
+##########################################################################################################################
+ sponsor_input_con_s = gr.Dropdown(
+############################################################################
+############################################################################
+ choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
+ "CSL Behring", "Daiichi Sankyo, Inc.",\
+ "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
+ "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
+ "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
+ label="Select a Sponsor"
+ )
+
+###################################################################################################################
+#################################################################################################################################################
+################################################################
+
+################################################################
+ with gr.Column():
+ country_input_s = gr.Dropdown(
+ choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\
+ "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\
+ "Malaysia","Mexico","Netherlands", \
+ "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\
+ "United Kingdom"\
+ ],
+ label="Select a Country")
+
+###############################################################
+ with gr.Row():
+ # with gr.Column():
+ condition_button_s = gr.Button("Show Sites")
+ # Then, create the clear button and add the dropdown input to it
+ clear_cn_btn = gr.ClearButton()
+ clear_cn_btn.add(condition_input_s)
+
+ clear_cn_btn.add(sponsor_input_con_s)
+
+ clear_cn_btn.add(country_input_s)
+
+################# # 3rd row#################################################################
+ with gr.Row():
+ ################################################################
+ with gr.Column():
+ academia_input_con_s = gr.Textbox(lines=1, label="Type a Sponsor Name:")
+ clear_cn_btn.add(academia_input_con_s)
+
+################################################################
+ with gr.Column():
+ condition_input_site = gr.Textbox(lines=1, label="Filter by typing a Condition:")
+ clear_cn_btn.add(condition_input_site)
+#############################################################################################################################################
+
+
+##################################################################################################################################################
+
+#################################################################################################################################
+
+ with gr.Row():
+ #summary_block = gr.outputs.Textbox(label="Conditions and Sponsors Now Recruiting for Clinical Trials:")
+ summary_block_cond_s = gr.HTML(label="Sites where Sponsors Now Recruiting for Clinical Trials:" )
+ #with gr.Row():
+
+ #world_map = gr.Plot()
+
+ with gr.Row():
+ site_cond = gr.Plot()
+####################################################################################################################################################
+ with gr.Row():
+ gr.HTML('Recruiting Sites with Trial Ids and Conditions
')
+ with gr.Row():
+
+ country_site =gr.Plot()
+
+
+ with gr.Row():
+
+ condition_output_s = gr.HTML(label="List of Recruiting Trials for Country, Sites")
+
+ ## clear output ?
+ clear_cn_btn.add(summary_block_cond_s)
+ clear_cn_btn.add(condition_output_s)
+ clear_cn_btn.add(country_site)
+ clear_cn_btn.add(site_cond)
+
+
+
+
+############################################################################# TIMELINES #############################################################################
+
+ # with gr.TabItem("Timeline"):
+##############################################################
+ # with gr.Row():
+####################################################################################################################################################
+
+ # gr.HTML('''
+ # Timelines for 'Now Recruiting' Trials:
+ # 1. Select a Sponsor and click 'Show Timelines'.
+ # 2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.
+ # ''')
+
+
+
+###########################################################################################
+ # with gr.Row():
+
+ # with gr.Column():
+ ##########################################################################################################################################
+ # sponsor_input_cont = gr.Dropdown(
+############################################################################
+ # choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
+ # "CSL Behring", "Daiichi Sankyo, Inc.",\
+ # "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
+ # "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
+ # "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
+ # label="Select a Sponsor"
+ # )
+###############################################################################################################################################################
+ # with gr.Column():
+ # condition_input_cont= gr.Dropdown(
+
+ # choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
+ # "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
+ # "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
+ # "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
+ # "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
+ # "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
+ ## "Urothelial Carcinoma",\
+ # "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
+ # "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\
+ # " Major","Metabolic", "Generalized Pustular Psoriasis",\
+ # "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
+ # "Liver Cirrhosis", \
+ # "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \
+ # "Psychological Trauma","Renal", "Respiratory",\
+ # "Schizophrenia", "PTSD", \
+ # "Venous Thromboembolism", "Wet"],
+ # label="Filter by a Condition")
+
+###############################################################################################################################################################
+ # with gr.Column():
+ # country_input_trt = gr.Dropdown(
+ # choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\
+ # "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\
+ # "Malaysia","Mexico","Netherlands", \
+ # "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\
+ # "United Kingdom"\
+ # ],
+ # label="Filter by a Country")
+
+###########################################################################################
+ # with gr.Row():
+ # condition_button_t = gr.Button("Show Timelines")
+ # Then, create the clear button and add the dropdown input to it
+ # clear_cn_btn = gr.ClearButton()
+ # clear_cn_btn.add(condition_input_cont)
+ # clear_cn_btn.add(sponsor_input_cont)
+
+ # clear_cn_btn.add(country_input_trt)
+###########################################################################################
+ # with gr.Row():
+ ###############################################################################################################################################################
+ # with gr.Column():
+ # academia_input_cont = gr.Textbox(lines=1, label="Type a Sponsor Name:")
+ # clear_cn_btn.add(academia_input_cont)
+ ###############################################################################################################################################################
+ #with gr.Column():
+ # condition_input_typet = gr.Textbox(lines=1, label="Filter by typing a Condition:")
+ # clear_cn_btn.add(condition_input_typet)
+
+
+
+
+ ##################################################################################################################################
+###########################################################################################
+ # with gr.Row():
+ # summary_block_condt = gr.HTML(label="Countries with Recruiting Clinical Trials:" )
+ ###########################################################################################
+ # with gr.Row():
+ # bubble_map_trial = gr.Plot()
+###########################################################################################
+ # with gr.Row():
+ # condition_outputt = gr.HTML(label="List of Recruiting Trials")
+ # condition_output = gr.Textbox(label="List of Recruiting Trials")
+ ## clear output ?
+ # clear_cn_btn.add(summary_block_condt)
+ # clear_cn_btn.add(bubble_map_trial)
+ # clear_cn_btn.add(condition_outputt)
+
+ ############################################################ Eligibility ##############
+
+
+
+ with gr.TabItem("Eligibility"):
+
+####################################################################################################################################################
+ with gr.Row():
+ gr.HTML('''
+ Eligibility Crietria for a Trial:
+ 1. Type a single Trial's NCT Id,For Example: NCT05512377 or NCT04924075 or NCT04419506 etc. and click 'Show Eligibility'.
+ 2. Inclusion and Exclusion Criteria for that single Trial are displayed with the Diseases, Diagnostic Procedures and Medications highlighted.
+ 3. Wait time approximately 30 seconds for the model to run and highlight eligibility text.
+ ''')
+
+
+
+ with gr.Row():
+
+ #nctID_input = gr.inputs.Textbox(lines=1, label="Type Trial NctId:")
+ nctID_input = gr.Textbox(lines=1, label="Type a Trial NCT Id: ")
+ trial_button = gr.Button("Show Eligibility")
+ #Then, create the clear button and add the dropdown input to it
+ clear_tn_btn = gr.ClearButton()
+ clear_tn_btn.add(nctID_input )
+
+ # with gr.Row():
+ # with gr.Column():
+ # formatted_inclusions_output = gr.HTML(label="Inclusions")
+ # with gr.Column():
+ # formatted_exclusions_output = gr.HTML(label="Exclusions")
+
+ ################################################################################################################################
+ ###############################################################################################
+ with gr.Row():
+ trial_output = gr.HTML(label="Detail of Recruiting Trials")
+################################################
+ with gr.Row():
+ # with gr.Column():
+ eligibilities_plot = gr.Plot()
+ # with gr.Column():
+ # concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts")
+
+ clear_tn_btn.add(trial_output)
+ # clear_tn_btn.add(formatted_inclusions_output)
+ # clear_tn_btn.add(formatted_exclusions_output)
+ clear_tn_btn.add(eligibilities_plot)
+ # clear_tn_btn.add(concept_exclusion)
+
+
+ ##############################################################################################################################################
+
+
+
+
+
+ ################################ EVENT BUTTONS at GRADIO ################################################################################################################################
+
+ ## Sponsors
+ #sponsor_button.click(disease_view, inputs=[disease_input,disease_input_text, sponsor_input, academia_input], outputs=[summary_block,summary_block_collbs,\
+ # sponsor_button.click(disease_view, inputs=[disease_input, sponsor_input], outputs=[summary_block,summary_block_collbs,\
+ # output_block_conditions,output_block_conditions_collbs,\
+ #condition_others,\
+ #condition_sunbursts,
+ # sponsor_trees\
+ # ,collaborator_trees\
+ # ])
+
+ ## Conditions
+ # s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc,s_disease_input_type_phc, s_sponsor_input_phc,s_academia_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\
+ s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc, s_sponsor_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\
+ tree_map_cond_nct,sunburst_map_cond_nct])
+
+
+
+ ## Trials
+
+ # s_button_phs.click(disease_view_phs, inputs=[s_disease_input_phs,s_disease_input_type_phs, s_sponsor_input_phs,s_academia_input_phs], outputs=[summary_block_phs, output_block_conditions_phs,\
+ # nct_org_map])
+
+
+ #s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\
+ # tree_map_cond_nct_n, nct_org_map_n,trial_plot])
+
+ # s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\
+ # tree_map_cond_nct_n, trial_plot])
+
+
+
+
+
+ # s_button_phs_c.click(disease_view_phs_c, inputs=[s_disease_input_phs_c,s_disease_input_type_phs_c, s_sponsor_input_phs_c,s_academia_input_phs_c], outputs=[summary_block_phs_c, output_block_conditions_phs_c,\
+ # tree_map_cond_nct_c, nct_org_map_c,trial_plot_c, time_plot_c])
+
+ ### Drugs
+
+ s_drug_button.click(drug_view, inputs=[s_disease_input, s_disease_input_type, s_sponsor_input, s_academia_input], outputs=[drug_summary_block,drug_output_block_conditions, sankey_map_drug ])
+
+
+ ## Country
+ # condition_button.click(condition_view, inputs=[condition_input_con, country_input_tr,condition_input_type, sponsor_input_con, academia_input_con], outputs=[summary_block_cond,condition_output,trial_countries])
+
+ ## Site
+
+ condition_button_s.click(condition_view_s, inputs=[condition_input_s, country_input_s, condition_input_site,sponsor_input_con_s, academia_input_con_s], \
+ outputs=[summary_block_cond_s,condition_output_s, site_cond,country_site])
+
+ ##Timelines
+
+ condition_button_t.click(condition_viewt, inputs=[condition_input_cont, country_input_trt,condition_input_typet, sponsor_input_cont, academia_input_cont], outputs=[summary_block_condt,condition_outputt,bubble_map_trial])
+
+ ## Map
+
+ # Test this way NCT04419506
+ # trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output, formatted_inclusions_output,formatted_exclusions_output,concept_inclusion,concept_exclusion])
+ # Test this way NCT04419506
+ # trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,concept_inclusion,concept_exclusion])
+ trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,eligibilities_plot])
+
+
+
+ # trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map,trial_output_map])
+
+trial_app.launch(share=True)
+
+
+#trial_app.launch(share=True, debug = "TRUE")
+
+import requests
+import json
+
+# Define the base URL for the new API
+base_url = "https://clinicaltrials.gov/api/v2/studies"
+
+# Define the parameters for the API call
+params = {
+ "query.lead": "Boehringer Ingelheim", # Query the lead sponsor field
+ "filter.overallStatus": "RECRUITING", # Filter by the overall status
+ "fields": "protocolSection.identificationModule.nctId" # Use the correct field name for the NCTId
+}
+
+# Send the API request
+response = requests.get(base_url, params=params)
+# Check the API response status
+#print("API response status code:", response.status_code)
+# Check the API response content
+#print("API response content:", response.text)
+
+# If the status code is 200, parse the JSON response
+if response.status_code == 200:
+ data = response.json()
+ # Extract the NCTIDs from the response
+ nct_ids = [study['protocolSection']['identificationModule']['nctId'] for study in data['studies']]
+ # Create a DataFrame from the list
+ df = pd.DataFrame(nct_ids, columns=['NCTId'])
+
+ # Print the DataFrame
+ print(df)
\ No newline at end of file