diff --git "a/app.py" "b/app.py" new file mode 100644--- /dev/null +++ "b/app.py" @@ -0,0 +1,4221 @@ + +!pip install gradio +#transformers + + + +########### AGENT: Clincialtrial.gov ################################################################################################### +##Gradio App: TRIAL CONNECT +#Author: Tamer Chowdhury' Nov 2024 +#tamer.chowdhury@gmail.com +################################################################################################################################## + +import gradio as gr +from gradio import Interface +from gradio import Dropdown +import io +import re +import pandas as pd +import textwrap +from IPython.display import display +import requests +#from _plotly_utils.utils import NotEncodable +from IPython.core.display import display_markdown + + +######################### from CLAUDE########################################### +import aiohttp +import asyncio +import pandas as pd +import io +import json + +async def fetch(session, url, params): + try: + async with session.get(url, params=params) as response: + if response.status == 200: + text = await response.text() + try: + return json.loads(text) + except json.JSONDecodeError as e: + print(f"Failed to decode JSON: {text[:200]}...") + raise + else: + print(f"HTTP Error: {response.status}") + print(f"Response text: {await response.text()}") + return None + except Exception as e: + print(f"Error in fetch: {str(e)}") + return None + +async def get_nct_ids(lead_sponsor_name=None, disease_area=None, overall_status=None, + location_country=None, NCTId=None, max_records=None, blocks=30): + base_url = "https://clinicaltrials.gov/api/v2/studies" + + # Define the fields we want to retrieve (mapped to v2 API structure) + fields = [ + "protocolSection.identificationModule.nctId", + "protocolSection.identificationModule.orgStudyIdInfo", + "protocolSection.identificationModule.briefTitle", + "protocolSection.conditionsModule.conditions", + "protocolSection.designModule.phases", + "protocolSection.statusModule.overallStatus", + "protocolSection.statusModule.primaryCompletionDateStruct", + "protocolSection.designModule.enrollmentInfo", + "protocolSection.designModule.studyType", + "protocolSection.eligibilityModule.studyPopulation", + "protocolSection.contactsLocationsModule.locations", + "protocolSection.designModule.designInfo", + "protocolSection.armsInterventionsModule.armGroups", + "protocolSection.sponsorCollaboratorsModule.leadSponsor", + "protocolSection.armsInterventionsModule.interventions", + "protocolSection.outcomesModule.primaryOutcomes", + "protocolSection.statusModule.startDateStruct" + ] + + # Build base parameters + params = { + "format": "json", + "fields": ",".join(fields), + "pageSize": "1000", + "countTotal": "true" + } + + print("Constructing query...") + + # Build query parameters + if NCTId: + params["query.id"] = NCTId + else: + if disease_area: + params["query.cond"] = disease_area.replace(" ", "+") + if lead_sponsor_name: + params["query.lead"] = lead_sponsor_name.replace(" ", "+") + if location_country: + params["query.locn"] = location_country.replace(" ", "+") + if overall_status: + params["filter.overallStatus"] = overall_status.upper() + + print(f"Full parameters: {params}") + + all_studies = [] + next_page_token = None + + async with aiohttp.ClientSession() as session: + while True: + try: + if next_page_token: + params["pageToken"] = next_page_token + + response_data = await fetch(session, base_url, params) + + if not response_data or not isinstance(response_data, dict): + print(f"Invalid response data") + break + + studies = response_data.get('studies', []) + if not studies: + print("No more studies found") + break + + all_studies.extend(studies) + print(f"Retrieved {len(studies)} studies. Total so far: {len(all_studies)}") + + # Print first study details for debugging + if len(all_studies) > 0: + first_study = all_studies[0] + print("\nFirst study example:") + print(f"NCT ID: {_get_nested_value(first_study, ['protocolSection', 'identificationModule', 'nctId'])}") + print(f"Status: {_get_nested_value(first_study, ['protocolSection', 'statusModule', 'overallStatus'])}") + + next_page_token = response_data.get('nextPageToken') + if not next_page_token or (max_records and len(all_studies) >= max_records): + break + + except Exception as e: + print(f"Error processing page: {str(e)}") + break + + # Convert all studies to the required format + recruiting_trials_list = [] + + for study in all_studies: + try: + # Status filtering is now handled by the API, so we don't need to filter here + trial_info = { + 'NCTId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'nctId']), + 'Phase': _get_first_item(study, ['protocolSection', 'designModule', 'phases']), + 'OrgStudyId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'orgStudyIdInfo', 'id']), + 'Status': _get_nested_value(study, ['protocolSection', 'statusModule', 'overallStatus']), + 'Condition': '|'.join(_get_nested_value(study, ['protocolSection', 'conditionsModule', 'conditions'], [])), + 'CompletionDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'primaryCompletionDateStruct', 'date']), + 'EnrollmentCount': _get_nested_value(study, ['protocolSection', 'designModule', 'enrollmentInfo', 'count']), + 'StudyType': _get_nested_value(study, ['protocolSection', 'designModule', 'studyType']), + 'Arm': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'armGroups'], 'label'), + 'Drug': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'interventions'], 'name'), + 'Country': _get_location_info(study, 'country'), + 'City': _get_location_info(study, 'city'), + 'Site': _get_location_info(study, 'facility'), + 'StudyPopulation': _get_nested_value(study, ['protocolSection', 'eligibilityModule', 'studyPopulation']), + 'Sponsor': _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'leadSponsor', 'name']), + 'Collaborator': _get_collaborators(study), + 'StartDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'startDateStruct', 'date']), + 'PrimaryMeasure': _get_first_item(study, ['protocolSection', 'outcomesModule', 'primaryOutcomes'], 'measure'), + 'Purpose': _get_nested_value(study, ['protocolSection', 'designModule', 'designInfo', 'primaryPurpose']), + 'BriefTitle': _get_nested_value(study, ['protocolSection', 'identificationModule', 'briefTitle']) + } + recruiting_trials_list.append(trial_info) + + except Exception as e: + print(f"Error processing study: {str(e)}") + continue + + print(f"Total studies processed: {len(recruiting_trials_list)}") + return recruiting_trials_list + +# Helper functions remain the same +def _get_nested_value(obj, path, default=None): + try: + current = obj + for key in path: + if current is None: + return default + current = current.get(key) + return current if current is not None else default + except (KeyError, TypeError, AttributeError): + return default + +def _get_first_item(obj, path, field=None): + try: + items = _get_nested_value(obj, path, []) + if items and isinstance(items, list): + if field: + return items[0].get(field) + return items[0] + return None + except (IndexError, AttributeError): + return None + +def _get_location_info(study, info_type): + try: + locations = _get_nested_value(study, ['protocolSection', 'contactsLocationsModule', 'locations'], []) + if info_type == 'facility': + values = [loc.get('facility', '') for loc in locations if loc.get('facility')] + else: + values = [loc.get(info_type, '') for loc in locations if loc.get(info_type)] + return '|'.join(filter(None, values)) + except Exception: + return None + +def _get_collaborators(study): + try: + collaborators = _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'collaborators'], []) + return '|'.join(collab.get('name', '') for collab in collaborators if collab.get('name')) + except Exception: + return None +########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ################################### + +################# FROM CLAUDE API FOR ELIGIBILITY############################### +import requests +import re + +def get_formatted_inclusion_criteria(nct_id): + """ + Get and format inclusion criteria for a clinical trial using ClinicalTrials.gov API v2 + + Args: + nct_id (str): The NCT ID of the trial + + Returns: + str: Formatted inclusion criteria as a numbered list, or None if not found + """ + # V2 API endpoint + base_url = "https://clinicaltrials.gov/api/v2/studies" + + # Parameters for the API request + params = { + "format": "json", + "fields": "protocolSection.eligibilityModule.eligibilityCriteria", + "query.id": nct_id + } + + try: + # Send the API request + response = requests.get(base_url, params=params) + response.raise_for_status() # Raise an exception for bad status codes + data = response.json() + + # Extract the eligibility criteria text from the v2 API response + if not data.get('studies') or len(data['studies']) == 0: + print(f"No data found for Trial NCT ID: {nct_id}") + return None + + eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria'] + + # Split at "Exclusion Criteria" to get only inclusion criteria + # Using a more robust splitting approach + inclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip() + + # Split the inclusion criteria into a list by line breaks + # Handle different types of line breaks + inclusions = re.split(r'\r?\n+', inclusion_criteria) + + # Clean up the inclusions: + # 1. Remove "Inclusion criteria" header + # 2. Remove empty lines + # 3. Remove lines that are just whitespace or punctuation + cleaned_inclusions = [] + for inclusion in inclusions: + inclusion = inclusion.strip() + if (inclusion and + not re.search(r'^\s*inclusion\s+criteria:?\s*$', inclusion, flags=re.IGNORECASE) and + not re.search(r'^\s*[-•*]\s*$', inclusion)): + + # Remove bullet points and dashes at the start of lines + inclusion = re.sub(r'^\s*[-•*]\s*', '', inclusion) + + # Add to cleaned list if not empty after cleanup + if inclusion: + cleaned_inclusions.append(inclusion) + + # Format the list as a numbered list with periods + formatted_inclusions = [] + for i, inclusion in enumerate(cleaned_inclusions, 1): + # Ensure the line ends with a period + if not inclusion.endswith('.'): + inclusion = inclusion + '.' + formatted_inclusions.append(f"{i}. {inclusion}") + + # Join the list into a single string + return "\n".join(formatted_inclusions) + + except requests.exceptions.RequestException as e: + print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}") + return None + except (IndexError, KeyError) as e: + print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}") + return None + except Exception as e: + print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}") + return None + + + +######################################################################################################### + + +## ############################API For Exclusions################################################################################################################################################### +def get_formatted_exclusion_criteria(nct_id): + """ + Get and format exclusion criteria for a clinical trial using ClinicalTrials.gov API v2 + + Args: + nct_id (str): The NCT ID of the trial + + Returns: + str: Formatted exclusion criteria as a numbered list, or None if not found + """ + # V2 API endpoint + base_url = "https://clinicaltrials.gov/api/v2/studies" + + # Parameters for the API request + params = { + "format": "json", + "fields": "protocolSection.eligibilityModule.eligibilityCriteria", + "query.id": nct_id + } + + try: + # Send the API request + response = requests.get(base_url, params=params) + response.raise_for_status() # Raise an exception for bad status codes + data = response.json() + + # Extract the eligibility criteria text from the v2 API response + if not data.get('studies') or len(data['studies']) == 0: + print(f"No data found for Trial NCT ID: {nct_id}") + return None + + eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria'] + + # Split at "Exclusion Criteria" to get only exclusion criteria + try: + exclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip() + except IndexError: + # Try alternative patterns if the first one doesn't work + try: + exclusion_criteria = re.split(r'(?i)(?:^|\n)\s*exclusion criteria\s*[:|-]?', eligibility_criteria)[1].strip() + except IndexError: + print(f"Could not find exclusion criteria section for Trial NCT ID: {nct_id}") + return None + + # Split the exclusion criteria into a list by line breaks + # Handle different types of line breaks + exclusions = re.split(r'\r?\n+', exclusion_criteria) + + # Clean up the exclusions: + # 1. Remove empty lines + # 2. Remove lines that are just whitespace or punctuation + # 3. Clean up formatting + cleaned_exclusions = [] + for exclusion in exclusions: + exclusion = exclusion.strip() + if (exclusion and + not re.search(r'^\s*$', exclusion) and # Skip empty lines + not re.search(r'^\s*[-•*]\s*$', exclusion)): # Skip lines with just bullets + + # Remove bullet points and dashes at the start of lines + exclusion = re.sub(r'^\s*[-•*]\s*', '', exclusion) + + # Add to cleaned list if not empty after cleanup + if exclusion: + cleaned_exclusions.append(exclusion) + + # Format the list as a numbered list with periods + formatted_exclusions = [] + for i, exclusion in enumerate(cleaned_exclusions, 1): + # Ensure the line ends with a period + if not exclusion.endswith('.'): + exclusion = exclusion + '.' + formatted_exclusions.append(f"{i}. {exclusion}") + + # Join the list into a single string + return "\n".join(formatted_exclusions) + + except requests.exceptions.RequestException as e: + print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}") + return None + except (IndexError, KeyError) as e: + print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}") + return None + except Exception as e: + print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}") + return None + + + + + +################################# Apply CSS Style to HTML Table ############################################################################################################## + +def dataframe_to_html_table(df): + custom_css = """ + + """ + return custom_css + '
' + df.to_html(classes="table table-striped", index=False, border=0) + '
' + +################################################################################################################################## +def format_summary_stats(summary): + formatted_html = f""" + +
{summary}
+ """ + return formatted_html +############################ End of Style ############################################################################################# + + +############### Functions to Process the Dataframes of Disease, Conditions, Trial Details#################################### + + + +# parse the conditions +import re +import pandas as pd +####################################################################################################### +def split_conditions(df, column_to_split): + # Create a list to store the new rows + new_rows = [] + + # Iterate through each row in the original dataframe + for index, row in df.iterrows(): + # Split the column value by comma or pipe and create a new row for each unique condition + for condition in re.split(',|\|', str(row[column_to_split])): + new_row = {col: row[col] if col != column_to_split else condition.strip() for col in df.columns} + new_rows.append(new_row) + + # Create a new dataframe from the list of new rows + temp_df = pd.DataFrame(new_rows) + + return temp_df + +######################################################################################################################### +def split_drug(df, column_to_split): + # Create a list to store the new rows + new_rows = [] + + # Iterate through each row in the original dataframe + for index, row in df.iterrows(): + # Split the column value by comma or pipe and create a new row for each unique condition + for drug in re.split(',|\|', str(row[column_to_split])): + new_row = {col: row[col] if col != column_to_split else drug.strip() for col in df.columns} + new_rows.append(new_row) + + # Create a new dataframe from the list of new rows + temp_df = pd.DataFrame(new_rows) + + return temp_df + +############################################################################################################################################ +############################################################################################# + +def split_columns(df, columns_to_split): + # Create a list to store the new dataframes + new_dfs = [] + + # Iterate through each row in the original dataframe + for index, row in df.iterrows(): + # Create a list of dictionaries to store the split values + split_rows = [] + # Find the maximum number of pipe-separated values in the columns to split + max_splits = max([len(str(row[col]).split('|')) for col in columns_to_split]) + # Iterate through the number of splits + for i in range(max_splits): + # Create a dictionary to store the split values for each column + split_row = {} + # Iterate through the columns to split + for col in columns_to_split: + # Split the column value and store the ith value if it exists, otherwise store None + split_row[col] = str(row[col]).split('|')[i] if i < len(str(row[col]).split('|')) else None + # Add the non-split columns to the dictionary + for col in df.columns: + if col not in columns_to_split: + split_row[col] = row[col] + # Append the dictionary to the list of dictionaries + split_rows.append(split_row) + # Convert the list of dictionaries to a dataframe and append it to the list of new dataframes + new_dfs.append(pd.DataFrame(split_rows)) + + # Concatenate all the new dataframes + temp_df = pd.concat(new_dfs, ignore_index=True) + + # Reorder the columns in the temporary dataframe + temp_df = temp_df[df.columns] + + return temp_df + + +################## INTERVENTIONAL, OBSERVATIONAL Trials Lead Sponsor Counts################################################## +def calculate_summary_stats(df, sponsor): + study_types = ["INTERVENTIONAL", "OBSERVATIONAL"] + summary_stats = [] + + sponsor_name = sponsor if sponsor else "All Lead Sponsors" + + for study_type in study_types: + df_study_type = df[df['StudyType'] == study_type].copy() + + + # Convert the 'EnrollmentCount' column to numeric + + df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce') + + + num_trials = len(df_study_type['NCTId'].unique()) + + unique_conditions = df_study_type['Condition'].unique() + num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult']) + + total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum() + formatted_total_patients = format(int(total_patients), ',') + + + summary_stats.append(f"{num_trials} {study_type} Trials, \ + {num_conditions} Conditions, \ + {formatted_total_patients} Planned Patients.") + #return f"{sponsor_name} - As Lead Sponsor Recruiting For:
" + "
".join(summary_stats) + return f"{sponsor_name} - As Lead Sponsor:
" + "
".join(summary_stats) + +############################################################################################################################################ + +def calculate_summary_stats_collb(df, sponsor): + study_types = ["INTERVENTIONAL", "OBSERVATIONAL"] + summary_stats = [] + + sponsor_name = sponsor if sponsor else "All Collaborators" + + for study_type in study_types: + df_study_type = df[df['StudyType'] == study_type].copy() + + + # Convert the 'EnrollmentCount' column to numeric + + df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce') + + + num_trials = len(df_study_type['NCTId'].unique()) + + unique_conditions = df_study_type['Condition'].unique() + num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult']) + + total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum() + formatted_total_patients = format(int(total_patients), ',') + + + summary_stats.append(f"{num_trials} {study_type} Trials, \ + {num_conditions} Conditions, \ + {formatted_total_patients} Planned Patients.") + return f"{sponsor_name} - With Collaborators Recruiting For:
" + "
".join(summary_stats) + + +################################################################################################################## + +def calculate_summary_stats_sites(df, sponsor, country): + + + #Filter the data frame by the country if a country is provided + if country: + df = df[df['Country'] == country] + + num_trials = len(df['NCTId'].unique()) + + # Group the data frame by NCTId and StudyType in the desired order + grouped_df = df.groupby(['NCTId', 'StudyType']).first().reset_index() + + # Convert the 'EnrollmentCount' column to numeric + grouped_df['EnrollmentCount'] = pd.to_numeric(grouped_df['EnrollmentCount'], errors='coerce') + + # Count the number of unique NCTIds for each StudyType + INTERVENTIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['NCTId'].unique()) + OBSERVATIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['NCTId'].unique()) + + # Count the number of unique countries for each StudyType + INTERVENTIONAL_countries = df[df['StudyType'] == 'INTERVENTIONAL']['Country'].nunique() + OBSERVATIONAL_countries = df[df['StudyType'] == 'OBSERVATIONAL']['Country'].nunique() + + # Count the number of unique sites for each StudyType, grouped by Country, City, and Site + INTERVENTIONAL_grouped = df[df['StudyType'] == 'INTERVENTIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] + OBSERVATIONAL_grouped = df[df['StudyType'] == 'OBSERVATIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0] + + # Calculate the sum of enrollment counts for each StudyType + INTERVENTIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['EnrollmentCount'].sum()) + OBSERVATIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['EnrollmentCount'].sum()) + + formatted_INTERVENTIONAL_patients = format(INTERVENTIONAL_patients, ',') + formatted_OBSERVATIONAL_patients = format(OBSERVATIONAL_patients, ',') + + sponsor_name = sponsor if sponsor else "All Sponsors" + country_name = country if country else "All Countries" + + return f"{sponsor_name}
{INTERVENTIONAL_count} INTERVENTIONAL Trials, in {INTERVENTIONAL_countries} Country, at {INTERVENTIONAL_grouped} Sites, \ + Recruiting: {formatted_INTERVENTIONAL_patients} Planned Patients.
\ + {OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites" + + #{OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites, Recruiting: {formatted_OBSERVATIONAL_patients} Planned Patients." + + +################################################ GRADIO STARTS HERE ######################################################### + +#Wrapper Function called from Interfce to get input , output +async def gradio_wrapper_nct(sponsor=None, condition=None, NCTId=None, country=None, status = None): + # Check if disease, condition, sponsor, and NCTId are provided + + if condition and sponsor: + recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status = status) + elif condition: + recruiting_trials = await get_nct_ids(disease_area=condition, overall_status = status) + elif sponsor: + recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status = status) + elif NCTId: + recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status) + # print("Recruiting trials for NCTId:", recruiting_trials) # Add this line to debug + else: + return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided" + + trial_info_list = [] + for trial in recruiting_trials: + trial_info = {'Sponsor': trial['Sponsor'], + 'Collaborator': trial['Collaborator'], + 'Status': trial['Status'], + 'Drug': trial['Drug'], + 'StudyType': trial['StudyType'], + 'Phase': trial['Phase'], + 'Site': trial['Site'], + 'Country': trial['Country'], + 'City': trial['City'], + 'NCTId': trial['NCTId'], + 'OrgStudyId': trial['OrgStudyId'], + 'Condition': trial['Condition'], + 'StartDate': trial['StartDate'], + 'CompletionDate': trial['CompletionDate'], + 'EnrollmentCount': trial['EnrollmentCount'], + 'PrimaryMeasure': trial['PrimaryMeasure'], + 'Purpose': trial['Purpose'], + 'Arm': trial['Arm'], + 'BriefTitle': trial['BriefTitle']} + trial_info_list.append(trial_info) + + + # Check if trial_info_list is empty + if not trial_info_list: + return None, None, None, None, None, None + + + + import pandas as pd + + clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \ + 'Sponsor','Collaborator']) + + ## Take care of NaN + clinical_trials_gov.fillna("Not Available", inplace=True) + + + clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False]) + + # Convert the DataFrame to an HTML table + html_table = dataframe_to_html_table(clinical_trials_gov) + + + # now move to include country + + #clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) + clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle', 'Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) + + ## Address NaN + clinical_trials_gov_add.fillna("Not Available", inplace=True) + + clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True]) + + # print("Preparing dataframe before split") + + # Create a DataFrame for conditions + clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'BriefTitle','OrgStudyId','Phase','Status','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator']) + # Split the 'Condition' column in clinical_trials_gov_conditions + clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition') + # print("Dataframe after condition split") + #address NaN + clinical_trials_gov_conditions.fillna("Not Available", inplace=True) + + # Create a DataFrame for drugs + clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['Status','NCTId', 'BriefTitle','OrgStudyId','Status','Phase','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator']) + # Split the 'Drug' column in clinical_trials_gov_conditions + clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug') + + clinical_trials_gov_drugs.fillna("Not Available", inplace=True) + + +############################################## ########################################################################################## + # Filter and sort the conditions DataFrame + # Add the filtering condition for Sponsor and Collaborator + # Add the filtering condition for Sponsor and Collaborator + if sponsor: + df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor] + + df1_1 = df1[(df1['Collaborator'] != 'Not Available') & + (~df1['Collaborator'].isnull())] + + df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor] + ### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator) + df2['Collaborator'] = df2['Sponsor'] + + + + df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] + df3 = df3[df3['Collaborator'] != sponsor] + #print(df3) + ## Now add sponsors to collaborators + df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] + #df3['Collaborator'] = df3['Sponsor'] + + clinical_trials_gov_conditions = df1 + clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True) + + + + if sponsor: + + df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor] + df1_1 = df1[(df1['Collaborator'] != 'Not Available') & + (~df1['Collaborator'].isnull())] + ########################################################################################################## + df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor] + ### Now copy sponsors to collaborators: + df2['Collaborator'] = df2['Sponsor'] + ########################################################################################################## + + df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] + df3 = df3[df3['Collaborator'] != sponsor] + + + ## Now add sponsors to collaborators + df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] + #df3['Collaborator'] = df3['Sponsor'] + + clinical_trials_gov_drugs = df1 + clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True) + + + + ############################################################################################################################################### + + + # Convert the filtered DataFrame to an HTML table + #html_table_add = dataframe_to_html_table(country_site_city_df) + + # Convert the DataFrame to an HTML table + html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions) + # Convert the DataFrame to an HTML table + html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs) + + if sponsor: + # Convert the DataFrame to an HTML table + html_table_conditions_collb = dataframe_to_html_table(clinical_trials_gov_conditions_collb) + # Convert the DataFrame to an HTML table + html_table_drugs_collb = dataframe_to_html_table(clinical_trials_gov_drugs_collb) + else: + empty_df = pd.DataFrame() + html_table_conditions_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table") + html_table_drugs_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table") + +######################################################################################################################################## + # Calculate the summary statistics + + # print("Calcualting Stats") + + summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor) + summary_stats = format_summary_stats(summary_stats_pre) + + if sponsor: + + summary_stats_pre_collb = calculate_summary_stats_collb(clinical_trials_gov_conditions_collb, sponsor) + summary_stats_collb = format_summary_stats(summary_stats_pre_collb) + + else: + summary_stats_collb ='' + # Calculate Site Summary + #summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country) + #summary_stats_sites = format_summary_stats(summary_stats_sites_pre) + + # print("Done Stats") + + return summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs + + + +#Wrapper Function called from Interfce to get input , output +async def gradio_wrapper_nct_spn(sponsor=None, condition=None, NCTId=None, country=None, status = None): + # Check if disease, condition, sponsor, and NCTId are provided + + if condition and sponsor: + recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status= status) + elif condition: + recruiting_trials = await get_nct_ids(disease_area=condition, overall_status= status) + elif sponsor: + recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status= status) + elif NCTId: + recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status) + # print("Recruiting trials for NCTId:", recruiting_trials) # Add this line to debug + else: + return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided" + + trial_info_list = [] + for trial in recruiting_trials: + trial_info = {'Sponsor': trial['Sponsor'], + 'Collaborator': trial['Collaborator'], + 'Drug': trial['Drug'], + 'StudyType': trial['StudyType'], + 'Phase': trial['Phase'], + 'Status': trial['Status'], + 'Site': trial['Site'], + 'Country': trial['Country'], + 'City': trial['City'], + # 'Zip': trial['Zip'], + 'NCTId': trial['NCTId'], + 'OrgStudyId': trial['OrgStudyId'], + 'Condition': trial['Condition'], + 'StartDate': trial['StartDate'], + 'CompletionDate': trial['CompletionDate'], + 'EnrollmentCount': trial['EnrollmentCount'], + 'PrimaryMeasure': trial['PrimaryMeasure'], + 'Purpose': trial['Purpose'], + 'Arm': trial['Arm'], + 'BriefTitle': trial['BriefTitle']} + trial_info_list.append(trial_info) + + + # Check if trial_info_list is empty + if not trial_info_list: + return None, None, None, None, None, None + + + import pandas as pd + + clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \ + 'Sponsor','Collaborator']) + + ## Take care of NaN + clinical_trials_gov.fillna("Not Available", inplace=True) + + + clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False]) + + # Convert the DataFrame to an HTML table + html_table = dataframe_to_html_table(clinical_trials_gov) + + + # now move to include country + + #clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) + clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle','Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount']) + + ## Address NaN + clinical_trials_gov_add.fillna("Not Available", inplace=True) + + clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True]) + + # print("Preparing dataframe before split") + + + # Create a DataFrame for conditions + clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'OrgStudyId','Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator']) + # Split the 'Condition' column in clinical_trials_gov_conditions + clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition') + # print("Dataframe after condition split") + #address NaN + clinical_trials_gov_conditions.fillna("Not Available", inplace=True) + + # Create a DataFrame for drugs + clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['NCTId','OrgStudyId', 'Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator']) + # Split the 'Drug' column in clinical_trials_gov_conditions + clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug') + # print("Dataframe after drug split") + + # Split the 'Condition' column in clinical_trials_gov_conditions + #clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Condition') + + #print("Prepared dataframe after condition split on drug ? why ?") + + #address NaN + clinical_trials_gov_drugs.fillna("Not Available", inplace=True) + + # print("Preparing Country City Site split") + + columns_to_split = ['Site', 'Country', 'City'] + + #if not clinical_trials_gov_add.empty: + + country_site_city_df = split_columns(clinical_trials_gov_add, columns_to_split) + + ## Ensure no NaN after Split + country_site_city_df.fillna("Not Available", inplace=True) + + # print("Done Country City Site split") + + # Filter the modified DataFrame by country if provided + if country: + # modified_df = modified_df[modified_df['Country'] == country] + country_site_city_df = country_site_city_df[country_site_city_df['Country'] == country] + + + + + +############################################## ########################################################################################## + # Filter and sort the conditions DataFrame + # Add the filtering condition for Sponsor and Collaborator + # Add the filtering condition for Sponsor and Collaborator + if sponsor: + df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor] + + df1_1 = df1[(df1['Collaborator'] != 'Not Available') & + (~df1['Collaborator'].isnull())] + + df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor] + ### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator) + df2['Collaborator'] = df2['Sponsor'] + + + + df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] + df3 = df3[df3['Collaborator'] != sponsor] + #print(df3) + ## Now add sponsors to collaborators + df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] + #df3['Collaborator'] = df3['Sponsor'] + + clinical_trials_gov_conditions = df1 + clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True) + + + + if sponsor: + + df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor] + df1_1 = df1[(df1['Collaborator'] != 'Not Available') & + (~df1['Collaborator'].isnull())] + ########################################################################################################## + df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor] + ### Now copy sponsors to collaborators: + df2['Collaborator'] = df2['Sponsor'] + ########################################################################################################## + + df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] + df3 = df3[df3['Collaborator'] != sponsor] + + + ## Now add sponsors to collaborators + df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] + #df3['Collaborator'] = df3['Sponsor'] + + clinical_trials_gov_drugs = df1 + clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True) + + + #country_site_city_df + if sponsor: + df1 = country_site_city_df[country_site_city_df['Sponsor'] == sponsor] + df1_1 = df1[(df1['Collaborator'] != 'Not Available') & (~df1['Collaborator'].isnull())] + + df2 = country_site_city_df[country_site_city_df['Collaborator'] == sponsor] + ### Now copy sponsors to collaborators: + df2['Collaborator'] = df2['Sponsor'] + + + #df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(\|)?{sponsor}(\|)?', na=False, flags=re.IGNORECASE, regex=True)] + df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)] + df3 = df3[df3['Collaborator'] != sponsor] + ## Now add sponsors to collaborators + #df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator'] + + country_site_city_df = df1 + country_site_city_df_collb = pd.concat([ df1_1,df2, df3], ignore_index=True) + + + ##################################################################################################################################################################### + ## This only includes data for a specific sponsor and for the collaborators + + # Convert the filtered DataFrame to an HTML table + html_table_add = dataframe_to_html_table(country_site_city_df) + # Convert the DataFrame to an HTML table + html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions) + # Convert the DataFrame to an HTML table + html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs) + +###################################################################################################################################### + +######################################################################################################################################## + # Calculate the summary statistics + summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor) + summary_stats = format_summary_stats(summary_stats_pre) + + + # Calculate Site Summary + summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country) + summary_stats_sites = format_summary_stats(summary_stats_sites_pre) + + + return summary_stats, html_table_conditions, html_table, summary_stats_sites,html_table_add,html_table_drugs + + +############################################################################################################################################################### +##### ################## Start Gradio Interface ######################################################################### + +################################## Condition Icicle and Sponsor Map ######################## + +################################################################### + +import plotly.graph_objects as go +import pandas as pd +import numpy as np + +################################################ TOP 20 Conditions###################################################### + + +########################################################################################################## + + +def plot_condition_sunburst (df): + + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + # Convert 'Condition' names to uppercase + df['Condition'] = df['Condition'].str.upper() + + # Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT" + df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT", "CHRONIC'"])] + + # Group the data by 'Condition' and count the number of NCTId + df_count = df.groupby('Condition')['NCTId'].nunique().reset_index() + + # Sort the DataFrame by Value in descending order and reset the index + df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) + + # Create a DataFrame for the top 30 conditions + top_30_conditions = df_count.head(20) + top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'}) + + # Add 'Display' column to top_30_conditions and set its value to 'TOP 30' + top_30_conditions['Display'] = 'TOP 20' + + # Create the icicle plot + icicle_fig = px.icicle(top_30_conditions, path=['Condition'], values='Number of Trials', + color='Condition', color_continuous_scale='RdBu', + custom_data=['Condition', 'Number of Trials']) + + # Customize the hovertemplate + icicle_fig.update_traces(hovertemplate='%{customdata[0]}
Number of Trials: %{customdata[1]}') + + + # Customize the icicle plot + icicle_fig.update_layout( + title='Top 20 Conditions', + font=dict(family="Arial", size=14, color='black'), + width= 400, + height= 1000, + #autosize=True, + margin=dict(t=50, l=25, r=25, b=25) + ) + + return icicle_fig + + +############################################################ Conditions OTHERS ########### ############################################ +def plot_condition_others (df): + + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + # Convert 'Condition' names to uppercase + df['Condition'] = df['Condition'].str.upper() + + # Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT" + df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT"])] + + # Group the data by 'Condition' and count the number of NCTId + df_count = df.groupby('Condition')['NCTId'].nunique().reset_index() + + # Sort the DataFrame by Value in descending order and reset the index + df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) + + # Create a DataFrame for the top 30 conditions + top_30_conditions = df_count.head(20) + top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'}) + + # Add 'Display' column to top_30_conditions and set its value to 'TOP 30' + top_30_conditions['Display'] = 'TOP 30' + + # Create a DataFrame for the other conditions by filtering out the rows that are part of the top 30 conditions + other_conditions = df_count[~df_count['Condition'].isin(top_30_conditions['Condition'])] + + # Add 'Display' column to other_conditions and set its value to 'OTHERS' + other_conditions['Display'] = 'OTHERS' + other_conditions = other_conditions.rename(columns={'NCTId': 'Number of Trials'}) + #print( other_conditions) + + # Create the icicle plot + #icicle_fig = px.icicle( other_conditions, path=['Condition'], values='Number of Trials', + # color='Condition', color_continuous_scale='RdBu', + # hover_data=['Condition']) + + + # Create the icicle plot + icicle_fig = px.icicle(other_conditions, path=['Condition'], values='Number of Trials', + color='Condition', color_continuous_scale='RdBu', + custom_data=['Condition', 'Number of Trials']) + + # Customize the hovertemplate + icicle_fig.update_traces(hovertemplate='%{customdata[0]}
Number of Trials: %{customdata[1]}') + + # Customize the icicle plot + icicle_fig.update_layout( + title='Other Conditions', + font=dict(family="Arial", size=14, color='black'), + width= 400, + height=1000, + # autosize=True, + margin=dict(t=50, l=25, r=25, b=25) + ) + + return icicle_fig +################################################################################################################################################### + +def wrap_text(text, max_chars_per_line): + words = text.split() + lines = [] + current_line = [] + + for word in words: + if len(' '.join(current_line + [word])) <= max_chars_per_line: + current_line.append(word) + else: + lines.append(' '.join(current_line)) + current_line = [word] + + lines.append(' '.join(current_line)) + return '
'.join(lines) + + +##################################################### Sponsor Counts ########################################### + + +################################################### ############################################################ + +def wrap_text(text, max_chars_per_line): + return '
'.join(textwrap.wrap(text, max_chars_per_line)) + +def plot_sponsor_collaborator_tree_map(df): + + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + # Group the data by 'Sponsor' and 'Collaborator' and count the number of unique NCTId + df_count = df.groupby(['Sponsor', 'Collaborator'])['NCTId'].nunique().reset_index() + + # Sort the DataFrame by Value in descending order and reset the index + df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True) + + # Create a DataFrame for the top 30 sponsors and collaborators + top_30 = df_count.head(30) + top_30 = top_30.rename(columns={'NCTId': 'Number of Trials'}) + + max_chars_per_line = 10 # Adjust this value according to your needs + top_30['Wrapped Sponsor'] = top_30['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line)) + top_30['Wrapped Collaborator'] = top_30['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line)) + + # Create the tree map + tree_map_fig = px.treemap(top_30, path=['Wrapped Sponsor', 'Wrapped Collaborator'], values='Number of Trials', + color='Sponsor', color_continuous_scale='RdBu', + custom_data=['Wrapped Sponsor', 'Wrapped Collaborator', 'Number of Trials']) + + # Customize the hovertemplate + tree_map_fig.update_traces(hovertemplate='%{customdata[0]}
%{customdata[1]}
Number of Trials: %{customdata[2]}') + + # Customize the tree map + tree_map_fig.update_layout( + title='Lead Sponsors and Collaborators', + font=dict(family="Arial", size=14, color='black'), + width=600, + height=1000 + ) + + # Update the labels to show only the number of trials + tree_map_fig.update_traces(textinfo='value') + + return tree_map_fig + + +######################################################################################################### + +def plot_sponsor_tree(df): + + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + #Drop rows where 'Phase' is "Not Applicable" or "UNKNOWN" + #df = df[(df['Phase'] != "Not Applicable") & (df['Phase'] != "UNKNOWN")] + + # Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId + df_count = df.groupby(['Phase', 'Sponsor'])['NCTId'].nunique().reset_index() + + #print(df_count) + + # Then, sum the counts for each combination of Phase and Sponsor + df_count = df_count.groupby(['Phase', 'Sponsor'])['NCTId'].sum().reset_index() + + # Finally, sum the counts for each Sponsor + df_count_tot = df_count.groupby('Sponsor')['NCTId'].sum().reset_index() + + # Sort the DataFrame by Value in descending order and reset the index + df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True) + + # Create a DataFrame for the top 30 sponsors + top_30_sponsors = df_count_tot.head(30) + top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'}) + + + max_chars_per_line = 10 # Adjust this value according to your needs + top_30_sponsors['Wrapped Sponsor'] = top_30_sponsors['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line)) + + # Create the icicle plot + icicle_fig = px.icicle(top_30_sponsors, path=['Wrapped Sponsor'], values='Number of Trials', + color='Sponsor', color_continuous_scale='RdBu', + custom_data=['Wrapped Sponsor', 'Number of Trials']) + + # Customize the hovertemplate + icicle_fig.update_traces(hovertemplate='%{customdata[0]}
Number of Trials: %{customdata[1]}') + + + + # Customize the icicle plot + icicle_fig.update_layout( + title='Sponsor', + font=dict(family="Arial", size=14, color='black'), + width= 600, + height=1000 + # autosize=True + # margin=dict(t=50, l=25, r=25, b=25) + ) + + + return icicle_fig + +###################################################################################################################################### + + +def plot_collaborator_icicle(df): + + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + # Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId + df_count = df.groupby(['Phase', 'Collaborator'])['NCTId'].nunique().reset_index() + + + # Then, sum the counts for each combination of Phase and Sponsor + df_count = df_count.groupby(['Phase', 'Collaborator'])['NCTId'].sum().reset_index() + + # Finally, sum the counts for each 'Collaborator' + df_count_tot = df_count.groupby('Collaborator')['NCTId'].sum().reset_index() + + # Sort the DataFrame by Value in descending order and reset the index + df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True) + + # Create a DataFrame for the top 30 sponsors + top_30_sponsors = df_count_tot.head(30) + top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'}) + + + max_chars_per_line = 10 # Adjust this value according to your needs + top_30_sponsors['Collaborators'] = top_30_sponsors['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line)) + + + # Create the icicle plot + icicle_fig = px.icicle(top_30_sponsors, path=['Collaborators'], values='Number of Trials', + color='Collaborator', color_continuous_scale='RdBu', + custom_data=['Collaborators', 'Number of Trials']) + + # Customize the hovertemplate + icicle_fig.update_traces(hovertemplate='%{customdata[0]}
Number of Trials: %{customdata[1]}') + + # Customize the icicle plot + icicle_fig.update_layout( + title='Collaborators', + font=dict(family="Arial", size=14, color='black'), + width= 600, + height=1000 + #autosize=True + + ) + + + return icicle_fig + +#################################### DRUGS ######################################################################## + +#################### Sankey Diagram for Conditions to Drugs to Phase /NCTId############################################# + + + + +import pandas as pd +import plotly.graph_objects as go +import random + +def random_color(): + return f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})' +############################################################################################################## +def plot_drug_sankey(df): + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + # Fill missing values in the 'Phase' column with a placeholder string + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + # Sort by Phase + df = df.sort_values(by='Phase') + #print(df) + # Split the conditions + df = split_conditions(df, 'Condition') + + conditions = df['Condition'].unique().tolist() + drugs = df['Drug'].unique().tolist() + nct_ids = df['NCTId'].unique().tolist() + study_ids= df['OrgStudyId'].unique().tolist() + phases = df['Phase'].unique().tolist() + + # labels = conditions + drugs + nct_ids + phases + labels = conditions + drugs + nct_ids + study_ids+phases + + # Assign random colors to each node + colors = [random_color() for _ in range(len(labels))] + + source = [] + target = [] + value = [] + + for i, condition in enumerate(conditions): + for j, drug in enumerate(drugs, start=len(conditions)): + count = df[(df['Condition'] == condition) & (df['Drug'] == drug)].shape[0] + if count > 0: + source.append(i) + target.append(j) + value.append(count) + + for i, drug in enumerate(drugs, start=len(conditions)): + for j, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)): + count = df[(df['Drug'] == drug) & (df['NCTId'] == nct_id)].shape[0] + if count > 0: + source.append(i) + target.append(j) + value.append(count) + + + # Add connections between nct_ids and study_ids + for i, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)): + for j, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)): + count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0] + if count > 0: + source.append(i) + target.append(j) + value.append(count) + + # Add connections between study_ids and phases + for i, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)): + for j, phase in enumerate(phases, start=len(conditions) + len(drugs) + len(nct_ids) + len(study_ids)): + count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0] + if count > 0: + source.append(i) + target.append(j) + value.append(count) + + + num_conditions = len(conditions) + if num_conditions <= 2: + height = 400 + elif num_conditions <= 10: + # height = 300 + (num_conditions - 2) * (200 / 8) # Linearly scale between 300 and 500 + height = 800 + elif num_conditions <= 30: + height = 1000 + else: + height = 1200 # Linearly scale between 700 and 1000, assuming a max of 100 conditions + height = min(height, 1000) # Cap the height at 1000 + + fig = go.Figure(data=[go.Sankey( + node=dict( + pad=15, + thickness=15, + line=dict(color="black", width=0.5), + label=labels, + color=colors + ), + link=dict( + source=source, + target=target, + value=value + ) + )]) + + fig.update_layout(title_text="Conditions, Drugs, Trial IDs, Phases for Sponsor", + font_size=10, height=height, autosize=True) + return fig + +########################################################################################### + + +########################################################################################################################### + +#################################################################### TRIALS ############################## + + + +######################################## + +def random_color(): + return f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})" + + + +def plot_condition_treemap_nct_old(df): + df = df[df['StudyType'] == "INTERVENTIONAL"] + df['Phase'] = df['Phase'].fillna('UNKNOWN') + df = df.sort_values(by='Phase') + df = split_conditions(df, 'Condition') + + conditions = df['Condition'].unique().tolist() + nct_ids = df['NCTId'].unique().tolist() + study_ids = df['OrgStudyId'].unique().tolist() + phases = df['Phase'].unique().tolist() + + labels = conditions + nct_ids + study_ids + phases + colors = [random_color() for _ in range(len(labels))] + + source = [] + target = [] + value = [] + + for i, condition in enumerate(conditions): + for j, nct_id in enumerate(nct_ids, start=len(conditions)): + count = df[(df['Condition'] == condition) & (df['NCTId'] == nct_id)].shape[0] + if count > 0: + source.append(i) + target.append(j) + value.append(count) + + for i, nct_id in enumerate(nct_ids, start=len(conditions)): + for j, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)): + count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0] + if count > 0: + source.append(i) + target.append(j) + value.append(count) + + for i, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)): + for j, phase in enumerate(phases, start=len(conditions) + len(nct_ids) + len(study_ids)): + count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0] + if count > 0: + source.append(i) + target.append(j) + value.append(count) + + num_conditions = len(conditions) + if num_conditions <= 2: + height = 400 + elif num_conditions <= 10: + height = 800 + elif num_conditions <= 30: + height = 1000 + else: + height = 1200 + height = min(height, 1000) + + fig = go.Figure(data=[go.Sankey( + node=dict( + pad=15, + thickness=15, + line=dict(color="black", width=0.5), + label=labels, + color=colors + ), + link=dict( + source=source, + target=target, + value=value + ) + )]) + + fig.update_layout(title_text="Conditions, Trial IDs, Study IDs, Phases for Sponsor", + font_size=10, height=height, autosize=True) + return fig +######################################### Conditions############################### + +##################################################################################### + +import plotly.graph_objects as go + +def plot_condition_treemap_nct_old(df): + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + #print("tamer") + df = df[df['StudyType'] == "INTERVENTIONAL"] + #print(df) + # Fill missing values in the 'Phase' column with a placeholder string + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + # Map NCTId to its Title + # Create a dictionary to map NCTId to BriefTitle + nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict() + # Create a dictionary to map NCTId to OrgStudyId + nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict() + + # Create a new dataframe with the required columns + table_df = df[['Condition', 'NCTId', 'Phase']] + + # Add the BriefTitle and OrgStudyId columns + table_df['BriefTitle'] = table_df['NCTId'].map(nctid_to_brieftitle) + table_df['OrgStudyId'] = table_df['NCTId'].map(nctid_to_orgstudyid) + print(table_df) + # Sort the dataframe by Condition alphabetically + table_df = table_df.sort_values('Condition') + + + # Create a Plotly Table + fig = go.Figure(data=[go.Table( + header=dict( + values=['Condition', 'NCTId', 'OrgStudyId', 'BriefTitle', 'Phase'], + fill_color='paleturquoise', + align='left', + font=dict(size=16, color='black') + ), + cells=dict( + values=[table_df.Condition, table_df.NCTId, table_df.OrgStudyId, table_df.BriefTitle, table_df.Phase], + align='left', + font=dict(size=14, color='black') + ) + )]) + + fig.update_layout( + autosize=True, + height=1000, + title_text="Conditions with NCTIds and Phases", + title_x=0.5, + font=dict(size=18) + ) + + return fig + + + + + +###################### Claude modified Jan 14/2025############################# +def plot_condition_sunburst_nct(df): + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + # Fill missing values in the 'Phase' column with a placeholder string + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + # Map NCTId to its Title + nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict() + nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict() + + # Create separate dataframes for each level + condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle']) + nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle']) + phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle']) + + # Add the "Conditions" root node + condition_df = pd.concat([condition_df, pd.DataFrame({ + 'ids': ["Conditions"], + 'labels': ["Conditions"], + 'parents': [""], + 'brieftitle': [None] + })], ignore_index=True) + + # Sort conditions alphabetically and add them + conditions = sorted(df['Condition'].unique(), key=str.lower) + for condition in conditions: + condition_df = pd.concat([condition_df, pd.DataFrame({ + 'ids': [condition], + 'labels': [condition], + 'parents': ["Conditions"], + 'brieftitle': [None] + })], ignore_index=True) + + # Add NCTId level + for condition in conditions: + temp_df = df[df['Condition'] == condition] + nctids = sorted(temp_df['NCTId'].unique()) + for nctid in nctids: + nctid_df = pd.concat([nctid_df, pd.DataFrame({ + 'ids': [f"{condition}-{nctid}"], + 'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"], + 'parents': [condition], + 'brieftitle': [nctid_to_brieftitle[nctid]] + })], ignore_index=True) + + # Add Phase level + for condition in conditions: + temp_df = df[df['Condition'] == condition].sort_values('NCTId') + for _, row in temp_df.iterrows(): + phase_df = pd.concat([phase_df, pd.DataFrame({ + 'ids': [f"{condition}-{row['NCTId']}-{row['Phase']}"], + 'labels': [row['Phase']], + 'parents': [f"{condition}-{row['NCTId']}"], + 'brieftitle': [None] + })], ignore_index=True) + + # Concatenate all dataframes + sunburst_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True) + + # Create the Sunburst figure + fig = go.Figure(go.Sunburst( + ids=sunburst_df.ids, + labels=sunburst_df.labels, + parents=sunburst_df.parents, + maxdepth=3, # Limit the depth to 3 levels + branchvalues="total", + hovertext=sunburst_df['brieftitle'], + hoverinfo='text', + hoverlabel=dict(namelength=-1), + textfont=dict(size=14, family="Arial") + )) + + # Update layout + fig.update_layout( + width=1200, + height=1200, + title={ + 'text': "Clinical Trials by Condition, NCTId, and Phase", + 'y':0.95, + 'x':0.5, + 'xanchor': 'center', + 'yanchor': 'top', + 'font': dict(size=20) + } + ) + + return fig + +#################################################################################### +def plot_condition_treemap_nct(df): + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + # Fill missing values in the 'Phase' column with a placeholder string + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + # Map NCTId to its Title + nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict() + nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict() + + # Create separate dataframes for each level + condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level']) + nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level']) + phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level']) + + # Add the "Conditions" root node + condition_df = pd.concat([condition_df, pd.DataFrame({ + 'ids': ["Conditions"], + 'labels': ["Conditions"], + 'parents': [""], + 'brieftitle': [None], + 'level': [0] + })], ignore_index=True) + + # Add Condition level with sorting + conditions = sorted(df['Condition'].unique(), key=str.lower) + for condition in conditions: + condition_df = pd.concat([condition_df, pd.DataFrame({ + 'ids': [condition], + 'labels': [condition], + 'parents': ["Conditions"], + 'brieftitle': [None], + 'level': [1] + })], ignore_index=True) + + # Add NCTId level + for condition in conditions: + temp_df = df[df['Condition'] == condition] + nctids = sorted(temp_df['NCTId'].unique()) + for nctid in nctids: + nctid_df = pd.concat([nctid_df, pd.DataFrame({ + 'ids': [f"{condition}-{nctid}"], + 'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"], + 'parents': [condition], + 'brieftitle': [nctid_to_brieftitle[nctid]], + 'level': [2] + })], ignore_index=True) + + # Add Phase level + for condition in conditions: + temp_df = df[df['Condition'] == condition].sort_values('NCTId') + for _, row in temp_df.iterrows(): + phase_df = pd.concat([phase_df, pd.DataFrame({ + 'ids': [f"{condition}-{row['NCTId']}-{row['Phase']}"], + 'labels': [row['Phase']], + 'parents': [f"{condition}-{row['NCTId']}"], + 'brieftitle': [None], + 'level': [3] + })], ignore_index=True) + + # Concatenate all dataframes and sort by level and labels + icicle_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True) + + # Sort the dataframe to ensure conditions appear in alphabetical order + # First sort by level to maintain hierarchy + icicle_df = icicle_df.sort_values(['level', 'labels'], + key=lambda x: x.str.lower() if x.name == 'labels' else x) + + # Create the figure with sorted data + fig = go.Figure(go.Icicle( + ids=icicle_df.ids, + labels=icicle_df.labels, + parents=icicle_df.parents, + root_color="lightgrey", + textfont=dict(size=34, family="Arial"), + hovertext=icicle_df['brieftitle'], + hoverinfo='text', + hoverlabel=dict(namelength=-1) + )) + + fig.update_layout(autosize=True, height=1000) + + return fig + +############################################################ + + + +########################################################################################################################### +import re + +def insert_line_break(text, max_length=30): + if len(text) <= max_length: + return text + + nearest_space = text.rfind(' ', 0, max_length) + if nearest_space == -1: + nearest_space = max_length + + return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length) + + + +########################################################### ####################################################################### +def plot_nct2org_icicle(df): + icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hovertext']) + + # Add the "Trials" root node + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': ["Trials"], + 'labels': ["Trials"], + 'parents': [""], + 'hovertext': [""] + })], ignore_index=True) + + # Create a dictionary of NCTId-BriefTitle pairs + nctid_brieftitle = df[['NCTId', 'BriefTitle']].drop_duplicates().set_index('NCTId').to_dict()['BriefTitle'] + + # Add the NCTId level with BriefTitle as hover text + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': df['NCTId'].unique(), + 'labels': df['NCTId'].unique(), + 'parents': ["Trials"] * len(df['NCTId'].unique()), + 'hovertext': [nctid_brieftitle[nctid] for nctid in df['NCTId'].unique()] + })], ignore_index=True) + + # Add the OrgStudyId level + for nctid in df['NCTId'].unique(): + temp_df = df[df['NCTId'] == nctid] + orgstudyids = temp_df['OrgStudyId'].unique() + for orgstudyid in orgstudyids: + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': [f"{nctid}-{orgstudyid}"], + 'labels': [orgstudyid], + 'parents': [nctid], + 'hovertext': [""] + })], ignore_index=True) + + # Add the Condition level + for index, row in df.iterrows(): + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': [f"{row['NCTId']}-{row['OrgStudyId']}-{row['Condition']}-{index}"], + 'labels': [row['Condition']], + 'parents': [f"{row['NCTId']}-{row['OrgStudyId']}"], + 'hovertext': [""] + })], ignore_index=True) + + fig = go.Figure(go.Icicle( + ids=icicle_df.ids, + labels=icicle_df.labels, + parents=icicle_df.parents, + hovertext=icicle_df.hovertext, + root_color="lightgrey", + textfont=dict(size=34, family="Arial") + )) + + fig.update_layout(autosize=True, height=1000) + + return fig + + + +###################################################################################################################################### + + +################################################################################################################# + + +############################## Scatter Plot for Country Timelines ###################################### + +import pandas as pd + +import numpy as np +import plotly.express as px + + +import plotly.graph_objs as go +from plotly.subplots import make_subplots + + +def split_condition(text): + split_text = text.split(',', 1)[0].split('|', 1)[0] + return split_text.strip() + +################################################################################################################################# +import plotly.graph_objs as go + + + +import plotly.graph_objs as go +import plotly.subplots as sp +import pandas as pd +import numpy as np + + + +################################################################### COUNTRY PLOTS ################################################################ +def plot_trial_country_map(df): + df = df[df['StudyType'] == "INTERVENTIONAL"] + df['Phase'] = df['Phase'].fillna('UNKNOWN') + df = df.sort_values(by='Phase') + + # Split the conditions + df = split_conditions(df, 'Condition') + + ## Root Country Node + icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text']) + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': ["Country"], + 'labels': ["Country"], + 'parents': [""], + 'hover_text': ["Country"] + })], ignore_index=True) + + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': df['Country'].unique(), + 'labels': df['Country'].unique(), + 'parents': ["Country"] * len(df['Country'].unique()), + 'hover_text': [f"({len(df[df['Country'] == country]['NCTId'].unique())} Trials)" for country in df['Country'].unique()] + })], ignore_index=True) + + ### Country and Conditions + + for country in df['Country'].unique(): + temp_df = df[df['Country'] == country] + conditions = temp_df['Condition'].unique() + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': [f"{country}__{condition}" for condition in conditions], + 'labels': conditions, + 'parents': [country] * len(conditions), + 'hover_text': [f"({len(temp_df[temp_df['Condition'] == condition]['NCTId'].unique())} Trials)" for condition in conditions] + })], ignore_index=True) + + ### Country with Conditions and Trials NCTId and OrgStudId + + for country_condition in icicle_df['ids'][icicle_df['parents'].isin(df['Country'].unique())]: + country, condition = country_condition.split('__') + temp_df = df[(df['Country'] == country) & (df['Condition'] == condition)] + trials = temp_df['NCTId'].unique() + trial_labels = [f"
{insert_line_break(temp_df[temp_df['NCTId'] == trial]['BriefTitle'].iloc[0])}" for trial in trials] + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': [f"{country_condition}__{trial}" for trial in trials], + 'labels': trials, + 'parents': [country_condition] * len(trials), + 'hover_text': trial_labels + })], ignore_index=True) + + fig = go.Figure(go.Icicle( + ids=icicle_df.ids, + labels=icicle_df.labels, + parents=icicle_df.parents, + textinfo='label', + hovertext=icicle_df.hover_text, + root_color="lightgrey", + textfont=dict(size=30, family="Arial") + )) + + fig.update_layout( + autosize=True,height = 800 + ) + + return fig + + +#################### + +################################################################ SITES ##################################################### + + + + + +################################################################ TRIAL SITES ########################################### +def plot_trial_sites(df): + def insert_line_break(text, max_length=30): + if len(text) <= max_length: + return text + + nearest_space = text.rfind(' ', 0, max_length) + if nearest_space == -1: + nearest_space = max_length + + return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length) + + df = df[df['StudyType'] == "INTERVENTIONAL"] + df['Phase'] = df['Phase'].fillna('UNKNOWN') + df = df.sort_values(by='Phase') + + ## Root Site Node + icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text']) + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': ["Sites"], + 'labels': ["Sites"], + 'parents': [""], + 'hover_text': ["Sites"] + })], ignore_index=True) + + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': df['City'].unique(), + 'labels': df['City'].unique(), + 'parents': ["Sites"] * len(df['City'].unique()), + 'hover_text': [f"({len(df[df['City'] == city]['NCTId'].unique())} Trials)" for city in df['City'].unique()] + })], ignore_index=True) + + ### City and Site + + for city in df['City'].unique(): + temp_df = df[df['City'] == city] + sites = temp_df['Site'].unique() + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': [f"{city}__{site}" for site in sites], + 'labels': sites, + 'parents': [city] * len(sites), + 'hover_text': [f"({len(temp_df[temp_df['Site'] == site]['NCTId'].unique())} Trials)" for site in sites] + })], ignore_index=True) + + ### Site and Trials (NCTId, OrgStudyId, BriefTitle) + + for city_site in icicle_df['ids'][icicle_df['parents'].isin(df['City'].unique())]: + city, site = city_site.split('__') + temp_df = df[(df['City'] == city) & (df['Site'] == site)] + trials = temp_df[['NCTId', 'OrgStudyId']].apply(lambda x: f"{x['NCTId']}
{x['OrgStudyId']}", axis=1).unique() + + for trial in trials: + nctid = trial.split('
')[0] + icicle_df = pd.concat([icicle_df, pd.DataFrame({ + 'ids': [f"{city_site}__{nctid}"], + 'labels': [trial], + 'parents': [city_site], + 'hover_text': [""] + })], ignore_index=True) + + fig = go.Figure(go.Icicle( + ids=icicle_df.ids, + labels=icicle_df.labels, + parents=icicle_df.parents, + textinfo='label', + hovertext=icicle_df.hover_text, + root_color="lightgrey", + textfont=dict(size=30, family="Arial") + )) + + fig.update_layout(autosize=True, height=800) + + return fig + + + +############################################################################################################################################# +def plot_trial_site_map(df): + def insert_line_break(text, max_length=30): + if len(text) <= max_length: + return text + + nearest_space = text.rfind(' ', 0, max_length) + if nearest_space == -1: + nearest_space = max_length + + return text[:nearest_space] + '
' + insert_line_break(text[nearest_space:].strip(), max_length) + + df = df[df['StudyType'] == "INTERVENTIONAL"] + df['Phase'] = df['Phase'].fillna('UNKNOWN') + df = df.sort_values(by='Phase') + + # Split the conditions + df = split_conditions(df, 'Condition') + + #df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count') + #df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count') + + df_count = df.groupby([ 'Site', 'City' ,'NCTId', 'Condition']).size().reset_index(name='Count') + + + #df_count['BriefTitle'] = df_count['BriefTitle'].apply(insert_line_break) + + # fig = px.treemap(df_count, path=['Site', 'NCTId', 'BriefTitle','Condition'], values='Count', color='Site') + fig = px.treemap(df_count, path=['Site', 'City','NCTId', 'Condition'], values='Count', color='Site') + + # Customize font and textinfo for Sponsor, Country, Site, and Condition + fig.update_traces( + textfont=dict(family="Arial", size=30, color='black'), + selector=dict(depth=0) # Apply customization to Sponsor grid + ) + fig.update_traces( + textfont=dict(family="Arial", size=30, color='black'), + selector=dict(depth=1) # Apply customization to Country grid + ) + fig.update_traces( + textfont=dict(family="Arial", size=30, color='black'), + selector=dict(depth=2) # Apply customization to Site grid + ) + + fig.update_layout(autosize=True, height=800) + + return fig + + +############################################################ + +############################################################################################################################################################### + + +########################################################### Timelines ########################################################################################################### + + +import numpy as np +import plotly.graph_objs as go +import matplotlib.pyplot as plt + + +def generate_colors(n): + colors = plt.cm.rainbow(np.linspace(0, 1, n)) + hex_colors = ['#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in colors] + return hex_colors + +def get_marker_size(enrollment_count): + if enrollment_count < 100: + return 20 + elif 100 <= enrollment_count < 300: + return 40 + elif 300 <= enrollment_count < 500: + return 60 + elif 500 <= enrollment_count < 1000: + return 70 + else: + return 100 + +def plot_trial_bubblemap(df): + scatter_plot_start_traces = [] + scatter_plot_end_traces = [] + scatter_plot_lines = [] + + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + # Fill missing values in the 'Phase' column with a placeholder string + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + # Sort by Phase + df = df.sort_values(by='Phase') + ## address correct date formats + #df['StartDate'] = pd.to_datetime(df['StartDate']) + df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce') + #df['CompletionDate'] = pd.to_datetime(df['CompletionDate']) + df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce') + + + # Split the conditions + df = split_conditions(df, 'Condition') + + # Assign an ID to each unique condition + #condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())} + # Create a dictionary of unique conditions with their IDs starting from 1 + condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)} + + # Create a dictionary that maps each NCTId to a list of condition IDs + nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict() + + # Define the marker size function + df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size) + + # Update the hovertemplate to display original Conditions associated with the NCTId + #hovertemplate_start = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
OrgStudyId: %{customdata[1]}
Phase: %{customdata[2]}
Start Date: %{x}
Enrollment Count: %{customdata[3]}' + #hovertemplate_end = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
OrgStudyId: %{customdata[1]}
NCTId: %{customdata[2]}
Phase: %{customdata[3]}
Completion Date: %{x}
Enrollment Count: %{customdata[4]}' + + # Update the hovertemplate to display original Conditions associated with the NCTId + hovertemplate_start = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}
Phase: %{customdata[3]}\ +
Start Date: %{x}
Enrollment Count: %{customdata[4]}' + + #hovertemplate_end = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}
NCTId: %{customdata[3]}\ + #
Phase: %{customdata[4]}
Completion Date: %{x}
Enrollment Count: %{customdata[5]}' + + hovertemplate_end = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}\ +
Phase: %{customdata[3]}
Completion Date: %{x}
Enrollment Count: %{customdata[4]}' + + for nctid in df['NCTId'].unique(): + df_filtered = df[df['NCTId'] == nctid] + + # Replace the text parameter with original Conditions + text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))] + + # Get the first condition ID for the current NCTId + first_condition_id = nctid_condition_map[nctid][0] + color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})' + + # color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3] + + # Start traces (square) + start_trace = go.Scatter(x=df_filtered['StartDate'], + y=df_filtered['NCTId'], + mode='markers', + marker=dict(size=10, symbol='square', color=color), + text=text, + #customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']], + customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], + hovertemplate=hovertemplate_start, + showlegend=False) + scatter_plot_start_traces.append(start_trace) + + # End traces (circle) + end_trace = go.Scatter(x=df_filtered['CompletionDate'], + y=df_filtered['NCTId'], + mode='markers', + marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'), + text=text, + #customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']], + customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], + hovertemplate=hovertemplate_end, + showlegend=False) + scatter_plot_end_traces.append(end_trace) + + # Line traces connecting start and end dates + line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]], + y=[nctid, nctid], + mode='lines', + line=dict(color='black', width=1), + showlegend=False) + scatter_plot_lines.append(line_trace) + + # Create legend traces for unique conditions with their IDs + legend_traces = [go.Scatter(x=[None], y=[None], + mode='markers', + marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'), + name=f'{condition_id}: {condition}', + showlegend=True) for condition, condition_id in condition_ids.items()] + + # Combine all traces + data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces + + # Update the layout + layout = go.Layout(yaxis=dict(title='NCTId', + showgrid=False, + tickvals=df['NCTId'].unique(), + ticktext=df['NCTId'].unique(), + tickangle=0), + xaxis=dict(title='Start-End Dates', + showgrid=False, + range=[pd.to_datetime('2020-01-01'), pd.to_datetime('2028-12-31')], + tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2020, 2029)]), + # tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)], + showlegend=True, + legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')), + margin=dict(l=150), + plot_bgcolor='#ffffff', + paper_bgcolor='#ffffff', + font=dict(family='Segoe UI', color='#000000')) + + fig = go.Figure(data=data, layout=layout) + + + # Calculate the height based on the number of shortened_conditions + num_trial = len(df['NCTId'].unique()) + if num_trial <= 5: + height = 600 + elif num_trial >= 10: + height = 800 + elif num_trial >= 20: + height = 1000 + else: + height = 1400 # Linearly scale between 700 and 1000, assuming a max of 100 conditions + + height = min(height, 1400) # Cap the height at 1400 + # Set the width and height + fig.update_layout( + title='Trial Start and End Dates by Conditions', + autosize=True, # adjust as per requirement + height=height # adjust as per requirement + ) + + + return fig + + + + + + + +######################################################################################################################################################## +def plot_trial_bubblemap_comp(df): + scatter_plot_start_traces = [] + scatter_plot_end_traces = [] + scatter_plot_lines = [] + + # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL" + df = df[df['StudyType'] == "INTERVENTIONAL"] + + # Fill missing values in the 'Phase' column with a placeholder string + df['Phase'] = df['Phase'].fillna('UNKNOWN') + + # Sort by Phase + df = df.sort_values(by='Phase') + ## address correct date formats + #df['StartDate'] = pd.to_datetime(df['StartDate']) + df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce') + # df['CompletionDate'] = pd.to_datetime(df['CompletionDate']) + df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce') + + # Split the conditions + df = split_conditions(df, 'Condition') + + # Assign an ID to each unique condition + #condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())} + # Create a dictionary of unique conditions with their IDs starting from 1 + condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)} + + # Create a dictionary that maps each NCTId to a list of condition IDs + nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict() + + # Define the marker size function + df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size) + + + # Update the hovertemplate to display original Conditions associated with the NCTId + hovertemplate_start = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}
Phase: %{customdata[3]}\ +
Start Date: %{x}
Enrollment Count: %{customdata[4]}' + + hovertemplate_end = 'NCTId: %{y}
Conditions: %{text}
Type: %{customdata[0]}
BriefTitle: %{customdata[1]}
OrgStudyId: %{customdata[2]}\ +
Phase: %{customdata[3]}
Completion Date: %{x}
Enrollment Count: %{customdata[4]}' + + for nctid in df['NCTId'].unique(): + df_filtered = df[df['NCTId'] == nctid] + + # Replace the text parameter with original Conditions + text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))] + + # Get the first condition ID for the current NCTId + first_condition_id = nctid_condition_map[nctid][0] + color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})' + + # color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3] + + # Start traces (square) + start_trace = go.Scatter(x=df_filtered['StartDate'], + y=df_filtered['NCTId'], + mode='markers', + marker=dict(size=10, symbol='square', color=color), + text=text, + #customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']], + customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], + hovertemplate=hovertemplate_start, + showlegend=False) + scatter_plot_start_traces.append(start_trace) + + # End traces (circle) + end_trace = go.Scatter(x=df_filtered['CompletionDate'], + y=df_filtered['NCTId'], + mode='markers', + marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'), + text=text, + #customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']], + customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']], + hovertemplate=hovertemplate_end, + showlegend=False) + scatter_plot_end_traces.append(end_trace) + + # Line traces connecting start and end dates + line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]], + y=[nctid, nctid], + mode='lines', + line=dict(color='black', width=1), + showlegend=False) + scatter_plot_lines.append(line_trace) + + # Create legend traces for unique conditions with their IDs + legend_traces = [go.Scatter(x=[None], y=[None], + mode='markers', + marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'), + name=f'{condition_id}: {condition}', + showlegend=True) for condition, condition_id in condition_ids.items()] + + # Combine all traces + data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces + + # Update the layout + layout = go.Layout(yaxis=dict(title='NCTId', + showgrid=False, + tickvals=df['NCTId'].unique(), + ticktext=df['NCTId'].unique(), + tickangle=0), + xaxis=dict(title='Start-End Dates', + showgrid=False, + range=[pd.to_datetime('2010-01-01'), pd.to_datetime('2023-12-31')], + tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2010, 2023)]), + # tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)], + showlegend=True, + legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')), + margin=dict(l=150), + plot_bgcolor='#ffffff', + paper_bgcolor='#ffffff', + font=dict(family='Segoe UI', color='#000000')) + + fig = go.Figure(data=data, layout=layout) + + + # Calculate the height based on the number of shortened_conditions + num_trial = len(df['NCTId'].unique()) + if num_trial <= 5: + height = 600 + elif num_trial >= 10: + height = 800 + elif num_trial >= 20: + height = 1000 + else: + height = 1400 # Linearly scale between 700 and 1000, assuming a max of 100 conditions + + height = min(height, 1400) # Cap the height at 1400 + # Set the width and height + fig.update_layout( + title='Trial Start and End Dates by Conditions', + autosize=True, # adjust as per requirement + height=height # adjust as per requirement + ) + + + return fig + + + +####################################################################################### + + +####################################################################################### + + +############################################ Trial Site Map without Zip code now ############## + +import geopandas as gpd + +def plot_trial_site_world_map(df, country_filter=None): + df.loc[(df['City'] == 'Multiple Locations') & (df['Country'] == 'Germany'), 'City'] = 'Berlin' + unique_cities = df[['City', 'Country']].drop_duplicates().copy() + + geocode_cache = {} # Create an empty dictionary to store geocoded results + + def geocode_with_cache(city, country): + key = (city, country) + if key not in geocode_cache: + geocode_cache[key] = gpd.tools.geocode(f"{city}, {country}").geometry[0] + return geocode_cache[key] + + unique_cities['Coordinates'] = unique_cities.apply(lambda row: geocode_with_cache(row['City'], row['Country']), axis=1) + unique_cities[['Latitude', 'Longitude']] = unique_cities['Coordinates'].apply(lambda coord: pd.Series({'Latitude': coord.y, 'Longitude': coord.x})) + + df = df.merge(unique_cities, on=['City', 'Country']) + + # Create a new column combining 'Site' and 'Country' + df['SiteCountry'] = df['Site'] + ', ' + df['Country'] + + df_count = df.groupby(['Country', 'City', 'SiteCountry', 'Condition', 'NCTId','BriefTitle', 'Latitude', 'Longitude']).size().reset_index(name='Count') + + if country_filter: + df_count = df_count[df_count['Country'] == country_filter] + + + + fig = px.scatter_geo(df_count, + lat='Latitude', + lon='Longitude', + hover_name='SiteCountry', + hover_data={'Latitude':False, 'Longitude':False, 'NCTId':False,'BriefTitle':False, 'Condition':False, 'City':True, 'Country':True}, + size='Count', + color='SiteCountry', + projection='mercator') + + fig.update_layout(title='Trial Sites Map', + geo=dict(showframe=False, showcoastlines=False, showcountries=True), + width=1200, + height=800) + + return fig + + + +############################################################################################################# + +############################################################# Gradio Function as Views #################################### + + +### ######################### Find Sponspors + ############################################################################################################################################# + + + + +def select_sponsor(sponsor_input, academia_input): + if sponsor_input: + return sponsor_input + else: + return academia_input + +def select_disease(disease_input, disease_input_text): + if disease_input_text: + return disease_input_text.strip() + else: + return disease_input + +#summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drug + +#async def disease_view (condition, condition_text, sponsor_input, academia_input): +async def disease_view (condition, sponsor_input): + # condition = condition.strip() # Remove leading and trailing spaces + #sponsor = select_sponsor(sponsor_input, academia_input) + #condition = select_disease(condition, condition_text) + sponsor = sponsor_input + condition = condition + + ################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None + +################################################################################################ + + status = "Recruiting" + #print("In Gradio") + # Call gradio_wrapper_nct with appropriate arguments + if condition and sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status ) + elif sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status ) + elif condition: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status ) + + +#### error traps + if html_table_conditions is None: + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None + #### error traps + if html_table_conditions_collb is None: + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None + + # Convert the HTML table to a pandas DataFrame + df = pd.read_html(html_table_conditions)[0] + + #df2 = pd.read_html(html_table_conditions_collb)[0] + df2 = [] + try: + df2 = pd.read_html(html_table_conditions_collb)[0] + except (ValueError, IndexError): + df2 = pd.DataFrame() + + +#### error traps + if df.empty and df2.empty: + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None +####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov + + # Display the DataFrame + # evaluate if need to change to collaborator other than top 20 ???? + # condition_other = plot_condition_others(df) + #### Sponsor Only + # condition_sunburst = plot_condition_sunburst(df) + + ################################################################################ + sponsor_tree = plot_sponsor_tree(df) + + collaborator_tree = None # Initialize to None or any other default value + + if not df2.empty: + + collaborator_tree = plot_collaborator_icicle(df2) + return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb,sponsor_tree, collaborator_tree + + # return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree + + +##################### Assets ################################################################################### + + +def select_sponsor(s_sponsor_input, s_academia_input): + if s_sponsor_input: + return s_sponsor_input + else: + return s_academia_input + +def select_condition(s_disease_input, s_disease_input_type): + if s_disease_input_type.strip(): + return s_disease_input_type.strip() + else: + return s_disease_input + + +async def drug_view(condition, condition_type, s_sponsor_input, s_academia_input): + sponsor = select_sponsor(s_sponsor_input, s_academia_input) + condition = select_condition(condition, condition_type) +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + +################################################################################################ + + + status = "Recruiting" + # Call gradio_wrapper_nct with appropriate arguments + if condition and sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) + elif sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) + elif condition: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) +#### error traps + if html_table_drugs is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None +###### Convert the HTML table to a pandas DataFrame + df = pd.read_html(html_table_drugs)[0] +####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov +#### error traps + if df.empty : + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None + # Display the DataFrame + sankey_map_drug = plot_drug_sankey(df) + + return summary_stats,html_table_drugs, sankey_map_drug + + +########################### Condition################### +################## ######################################################################################## + + + + +def select_sponsor_phc(s_sponsor_input_phc, s_academia_input_phc): + if s_sponsor_input_phc: + return s_sponsor_input_phc + else: + return s_academia_input_phc + +def select_condition_phc(s_disease_input_phc, s_disease_input_type_phc): + if s_disease_input_type_phc.strip(): + return s_disease_input_type_phc.strip() + else: + return s_disease_input_phc + + +#async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input): +async def disease_view_phc(condition, s_sponsor_input): + #sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input ) + # condition = select_condition_phc(condition, condition_type) + sponsor = s_sponsor_input + condition = condition + + +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + +################################################################################################ + + + + status = "Recruiting" + # Call gradio_wrapper_nct with appropriate arguments + if condition and sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) + elif sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) + elif condition: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) + +#### error traps + if html_table_conditions is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + + + + # Convert the HTML table to a pandas DataFrame + df = pd.read_html(html_table_conditions)[0] + #print(df) +#### error traps + if df.empty : + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None + ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov + + tree_map_cond_nct = plot_condition_treemap_nct(df) + + sunburst_map_cond_nct = plot_condition_sunburst_nct(df) + + return summary_stats, html_table_conditions, tree_map_cond_nct, sunburst_map_cond_nct + + # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map + + +################## Trial ######################################################################################## + + + + +def select_sponsor_phs(s_sponsor_input_phs, s_academia_input_phs): + if s_sponsor_input_phs: + return s_sponsor_input_phs + else: + return s_academia_input_phs + +def select_condition_phs(s_disease_input_phs, s_disease_input_type_phs): + if s_disease_input_type_phs.strip(): + return s_disease_input_type_phs.strip() + else: + return s_disease_input_phs + + + +async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academia_input): + sponsor = select_sponsor_phs(s_sponsor_input, s_academia_input ) + condition = select_condition_phs(condition, condition_type) +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + +################################################################################################ + + + status = "Recruiting" + # Call gradio_wrapper_nct with appropriate arguments + if condition and sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) + elif sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) + elif condition: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) + +#### error traps + if html_table_conditions is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + + # Convert the HTML table to a pandas DataFrame + df = pd.read_html(html_table_conditions)[0] + #print(df) +#### error traps + if df.empty : + return "The Sponsor Name did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None + ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov + + #tree_map_cond_nct = plot_condition_treemap_nct(df) + + nct_org_map = plot_nct2org_icicle(df) + + return summary_stats, html_table_conditions, nct_org_map + + # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map + + + +##################################################### New Trials ###################################### + +def select_sponsor_phs_n(s_sponsor_input_phs, s_academia_input_phs): + if s_sponsor_input_phs: + return s_sponsor_input_phs + else: + return s_academia_input_phs + +def select_condition_phs_n(s_disease_input_phs, s_disease_input_type_phs): + if s_disease_input_type_phs.strip(): + return s_disease_input_type_phs.strip() + else: + return s_disease_input_phs + + +#################################################################################### +async def disease_view_phs_n(condition, condition_type, s_sponsor_input, s_academia_input): + sponsor = select_sponsor_phs_n(s_sponsor_input, s_academia_input ) + condition = select_condition_phs_n(condition, condition_type) + +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None + +################################################################################################ + + + + + status = "Not yet recruiting" + # Call gradio_wrapper_nct with appropriate arguments + if condition and sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) + elif sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) + elif condition: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) + +#### error traps + if html_table_conditions is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None + + + + # Convert the HTML table to a pandas DataFrame + # df = pd.read_html(html_table_conditions)[0] + + ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov + + #tree_map_cond_nct = plot_condition_treemap_nct(df) + # tree_map_cond_nct = plot_condition_treemap_nct(df) + ###### Convert the HTML table to a pandas DataFrame + df = pd.read_html(html_table_drugs)[0] +#### error traps + if df.empty : + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None +####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov + + # Display the DataFrame + tree_map_cond_nct = plot_drug_sankey(df) + + # nct_org_map = plot_nct2org_icicle(df) + +######################################### error traps + # if html_table_add is None: + # return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + df2 = pd.read_html(html_table_conditions)[0] + bubble_map_trials = plot_trial_bubblemap(df2) + # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials + return summary_stats, html_table_conditions, tree_map_cond_nct, bubble_map_trials + + +############################################### Completed Trials #################################################### +def select_sponsor_phs_c(s_sponsor_input_phs, s_academia_input_phs): + if s_sponsor_input_phs: + return s_sponsor_input_phs + else: + return s_academia_input_phs + +def select_condition_phs_c(s_disease_input_phs, s_disease_input_type_phs): + if s_disease_input_type_phs.strip(): + return s_disease_input_type_phs.strip() + else: + return s_disease_input_phs + +async def disease_view_phs_c(condition, condition_type, s_sponsor_input, s_academia_input): + sponsor = select_sponsor_phs_c(s_sponsor_input, s_academia_input ) + condition = select_condition_phs_c(condition, condition_type) +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None + +################################################################################################ + + + status = "Completed" + # Call gradio_wrapper_nct with appropriate arguments + if condition and sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status) + elif sponsor: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status) + elif condition: + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status) + +#### error traps + if html_table_conditions is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None, None, None + + + + # Convert the HTML table to a pandas DataFrame + df = pd.read_html(html_table_conditions)[0] + + + ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov + + tree_map_cond_nct = plot_condition_treemap_nct(df) + + nct_org_map = plot_nct2org_icicle(df) + +######################################### error traps + # if html_table_add is None: + # return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + df3 = pd.read_html(html_table_conditions)[0] + bubble_map_trials = plot_trial_bubblemap_comp(df3) + + +###### Convert the HTML table to a pandas DataFrame + df2 = pd.read_html(html_table_drugs)[0] +####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov +#### error traps + if df.empty and df2.empty and df3.empty: + return "The selection did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None + # Display the DataFrame + sankey_map_drug = plot_drug_sankey(df2) + + #return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials + return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,sankey_map_drug,bubble_map_trials + + + + +### ############### Country ######################################################### + +def select_sponsor_con(sponsor_input_con, academia_input_con): + if sponsor_input_con: + return sponsor_input_con + else: + return academia_input_con + +def select_condition_con(condition_input, condition_input_type): + if condition_input_type.strip(): + return condition_input_type.strip() + else: + return condition_input + +async def condition_view(condition, country, condition_type, sponsor_input_con, academia_input_con): + condition = select_condition_con(condition, condition_type) + sponsor = select_sponsor_con(sponsor_input_con, academia_input_con) +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + +################################################################################################ + + + status = "Recruiting" + summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) + # Convert the HTML table to a pandas DataFrame + # Check if html_table_add is None before converting to DataFrame +#### error traps + if html_table_add is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + + df = pd.read_html(html_table_add)[0] + df2 = pd.read_html(html_table_add)[0] + +#### error traps + if df.empty and df2.empty: + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None + #print(df) + trial_country = plot_trial_country_map(df2) + + + return summary_stats_sites, html_table_add,trial_country + + + + + +############### Site ######################################################################################################### + + +def select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s): + if sponsor_input_con_s: + return sponsor_input_con_s + else: + return academia_input_con_s + + +def select_condition_con(condition_input, condition_input_type): + if condition_input_type.strip(): + return condition_input_type.strip() + else: + return condition_input + +async def condition_view_s(condition, country, condition_type, sponsor_input_con_s, academia_input_con_s): + condition = select_condition_con(condition, condition_type) + sponsor = select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s) +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None + +################################################################################################ + + + + + status = "Recruiting" + summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) + + #### error traps + if html_table_add is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None,None + #### error traps + if html_table_add is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None + #### error traps + if html_table_add is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None + + + # print(html_table_add) + df = pd.read_html(html_table_add)[0] + #print(df) + +#### error traps + if df.empty : + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None + + + + + site_cond = plot_trial_sites(df) + country_site = plot_trial_site_map(df) + + return summary_stats_sites, html_table_add, site_cond,country_site + + + + + + +###################################### Timelines ################################################################### + + + +def select_sponsor_cont(sponsor_input_con, academia_input_con): + if sponsor_input_con: + return sponsor_input_con + else: + return academia_input_con + +def select_condition_cont(condition_input, condition_input_type): + if condition_input_type.strip(): + return condition_input_type.strip() + else: + return condition_input + +async def condition_viewt(condition, country, condition_type, sponsor_input_con, academia_input_con): + condition = select_condition_cont(condition, condition_type) + sponsor = select_sponsor_cont(sponsor_input_con, academia_input_con) +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + #print(type(sponsor)) + condition = ' '.join(condition) if isinstance(condition, list) else condition + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + if condition is not None and isinstance(condition, str): + if len(condition) > 50 or not re.match(allowed_chars, condition): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + +################################################################################################ + + + + status = "Recruiting" + summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status) + # Convert the HTML table to a pandas DataFrame + +######################################### error traps + if html_table_add is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + df = pd.read_html(html_table_add)[0] + + +#### error traps + if df.empty : + return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None + + bubble_map_trials = plot_trial_bubblemap(df) + + return summary_stats_sites, html_table,bubble_map_trials + + + + + +############### Find Site Map ######################################################################################################### + + +def select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map): + if sponsor_input_con_map: + return sponsor_input_con_map + else: + return academia_input_con_map + +async def condition_view_map(condition, country, sponsor_input_con_map, academia_input_con_map): + #condition = condition.strip() # Remove leading and trailing spaces + sponsor = select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map) +################# ### List data type errors in type conversion to string needed for regualr expression + sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor + + #print(type(condition)) +############################CHECK Sponsor and Condition Inputs ######################################################### + import re + +################ # def check_input(condition, sponsor): + allowed_chars = r'^[A-Za-z .,&/()-]*$' + + + + if sponsor is not None and isinstance(sponsor, str): + if len(sponsor) > 50 or not re.match(allowed_chars, sponsor): + return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None + +################################################################################################ +################################################################################################ + + + + + + status = "Recruiting" + summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status ) + # print(html_table_add) + #### error traps + if html_table_add is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + df = pd.read_html(html_table_add)[0] + +#### error traps + if df.empty : + return "The Trial Id did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None + + world_map = plot_trial_site_world_map(df) + + if world_map is None: + return "Sorry, the plot could not be generated. Please try again by slecting a country!", None, None + + return summary_stats_sites, html_table_add, world_map + + + +### ########################################Find Trial Eligibility########################################################################### + + +############################################################################ END VIEWS######################## + +#### To remove the inclusion exclusion numbers duplicating in text + +import re + +def format_html_list(html_string): + # Split the input string by numbers followed by a period and a space + items = re.split(r'(\d+\.\s)', html_string) + + # Combine the split items into a list of strings, keeping the original numbers + formatted_items = [number + text for number, text in zip(items[1::2], items[2::2])] + + # Remove unwanted characters from each item + formatted_items = [re.sub(r':\.', '', item) for item in formatted_items] + formatted_items = [re.sub(r'General\.', '', item) for item in formatted_items] + + # Filter out empty list items + formatted_items = [item for item in formatted_items if item.strip()] + + # Check if the first item is empty and remove it if so + if formatted_items[0].split('. ', 1)[1].strip() == '': + formatted_items = formatted_items[1:] + + # Renumber the items + # formatted_items = [f"{i+1}. {item.split('. ', 1)[1]}" for i, item in enumerate(formatted_items)] + # Renumber the items + formatted_items = [ + f"{i+1}. {item.split('. ', 1)[1]}" if len(item.split('. ', 1)) > 1 else item + for i, item in enumerate(formatted_items) + ] + + # Remove extra periods + formatted_items = [re.sub(r'\.{2,}', '.', item) for item in formatted_items] + + # Join the list items with line breaks to create an HTML string + formatted_html = "
".join(formatted_items) + + return formatted_html +######################################################################################## + + + + + +##################################################################################### + +############################################################################################################################################# +async def trial_view_map(nctID): + nctID = nctID.strip() # Remove leading and trailing spaces + ###### # Check if nctID is valid + + if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12): + return "Not a Valid NCT ID has been entered", None, None + + status = "Recruiting" + #summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID) + summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(NCTId=nctID, status = status) + #### error traps + + #trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map]) + +################################################################################################################################# + if html_table_add is None: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + df = pd.read_html(html_table_add)[0] + world_map = plot_trial_site_world_map(df) + if world_map is None: + return "Sorry, the plot could not be generated. Please try again by selecting a country!", None, None + + return summary_stats_sites, world_map, html_table_add + #return html_table, formatted_html_inclusions,formatted_html_exclusions,world_map + + +#################################################################################################################################################### +import plotly.graph_objects as go + +def split_numbered_criteria(text): + """Split text into list of criteria based on numbered lines""" + if not text: + return [] + + criteria = [] + current = [] + + for line in text.split('\n'): + line = line.strip() + if line: + # Check if line starts with a number followed by period + if line[0].isdigit() and '. ' in line[:4]: + if current: + criteria.append(' '.join(current)) + current = [line] + else: + current.append(line) + + # Add the last criteria + if current: + criteria.append(' '.join(current)) + + return criteria if criteria else ["No criteria available"] + +def display_criteria_table(inclusion_text, exclusion_text): + """ + Create a two-column Plotly table with inclusion and exclusion criteria + split into separate rows based on numbering + """ + try: + # Split both texts into lists of criteria + inclusion_list = split_numbered_criteria(inclusion_text) + exclusion_list = split_numbered_criteria(exclusion_text) + + # Make lists equal length by padding with empty strings + max_length = max(len(inclusion_list), len(exclusion_list)) + inclusion_list.extend([''] * (max_length - len(inclusion_list))) + exclusion_list.extend([''] * (max_length - len(exclusion_list))) + + # Create the table + fig = go.Figure(data=[go.Table( + columnwidth=[500, 500], # Equal width columns + header=dict( + values=['Inclusion Criteria', 'Exclusion Criteria'], + fill_color='#e6f3ff', + align=['left', 'left'], + font=dict(size=14, color='black'), + height=40 + ), + cells=dict( + values=[inclusion_list, exclusion_list], + fill_color=[['white', '#f9f9f9'] * max_length], # Alternating row colors + align=['left', 'left'], + font=dict(size=12), + height=None, + line=dict(color='lightgrey', width=1) # Add light borders + ) + )]) + + # Update layout + fig.update_layout( + title="Trial Eligibility Criteria", + width=1200, + height=max(400, max_length * 30 + 100), # Dynamic height based on content + margin=dict(l=20, r=20, t=40, b=20) + ) + + return fig + + except Exception as e: + print(f"Error in display_criteria_table: {str(e)}") + return None + +async def trial_view(nctID): + nctID = nctID.strip() + + if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12): + return "Not a Valid NCT ID has been entered", None, None + + status = "Recruiting" + summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID, status = status) + + formatted_inclusions = get_formatted_inclusion_criteria(nctID) + formatted_exclusions = get_formatted_exclusion_criteria(nctID) + + if not formatted_inclusions and not formatted_exclusions: + return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None + + # Create single table with both criteria + criteria_table = display_criteria_table(formatted_inclusions, formatted_exclusions) + + return html_table, criteria_table + + + +############################### Design the interface#################################################################################### + +## Added after Spet 27 Failure +from gradio.components import Dropdown + + +############################################################################################################################################################################### + +trial_app = gr.Blocks() +with trial_app: + gr.Markdown("
Trial Connect
") + gr.Markdown("
Data Source: ClinicalTrials.gov
") + #gr.Markdown("

Now Recruiting Trials:

") + with gr.Tabs(): + + + + ############################################################################################################################################## + ################################################################ Conditions ############################################################################################### + with gr.TabItem("Trials"): + + # 1st Row +#################################################################################################################################################### +##################################################################################################################################################### + + + + with gr.Row(): + gr.HTML(''' +

'Now Recruiting' Trials for Conditions:

+

1. Select a Condition, for example, 'Pancreatic Cancer', 'Chronic Kidney Disease', 'MASH' etc.

+

2. Select a Sponsor'.

+

3. Click 'Show Trials'.

+ ''') + + + +##################################################################################################################################################### + + + + with gr.Row(): + +################################################################### + with gr.Column(): + s_disease_input_phc = gr.Dropdown( + choices=["Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ + "Cancer","Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ + "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ + "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ + "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ + "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ + "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ + "Urothelial Carcinoma",\ + "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ + "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ + " Major","Metabolic", "Generalized Pustular Psoriasis",\ + "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ + "Liver Cirrhosis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "Psychological Trauma","Renal", "Respiratory",\ + "Schizophrenia", "PTSD", \ + "Venous Thromboembolism", "Wet"], + label="Select Condition" + ) +################################################################### + with gr.Column(): + +#### ######################################################################################################################################################################################################### + + s_sponsor_input_phc = gr.Dropdown( + + choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ + "CSL Behring", "Daiichi Sankyo, Inc.",\ + "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ + "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ + "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], + label="Select Sponsor" + ) +###################################################################################################################################################################### + # 3rd Row + with gr.Row(): #academia_input = gr.inputs.Dropdown( + s_button_phc = gr.Button("Show Trials") + + # Then, create the clear button and add the dropdown input to it + clear_btn_phc = gr.ClearButton() + clear_btn_phc.add(s_sponsor_input_phc) + + clear_btn_phc.add(s_disease_input_phc) + + # with gr.Column(): +################# # 3rd row################################################################# + # with gr.Row(): +################################################################### + # with gr.Column(): + # s_academia_input_phc = gr.Textbox(lines=1, label="Type a Sponsor Name:") + # clear_btn_phc.add(s_academia_input_phc) + +################################################################### + # with gr.Column(): + + + # s_disease_input_type_phc = gr.Textbox(lines=1, label="Filter by typing a Condition:") + # clear_btn_phc.add(s_disease_input_type_phc) +############################################################################################################################################ +###################################################################################################################################################################### + +######################################################################################################################################################################### + with gr.Row(): + + summary_block_phc = gr.HTML(label="Clinical Trials Now Recruiting for Conditions :" ) +############################################################################################################################################################# + # with gr.Row(): + # nct_org_map = gr.Plot() +########################################################################################################################################################## +#################################################################################################################################################### + # with gr.Row(): + # gr.HTML('

Conditions by Trials and Phase

') + with gr.Row(): + # with gr.Column(): + tree_map_cond_nct = gr.Plot() +#################################################################################################################################################### + # with gr.Row(): + # gr.HTML('

Conditions by Trials and Phase

') + with gr.Row(): + # with gr.Column(): + + sunburst_map_cond_nct = gr.Plot() + + + + + with gr.Row(): + output_block_conditions_phc = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") + + clear_btn_phc.add(summary_block_phc) + clear_btn_phc.add(output_block_conditions_phc) + clear_btn_phc.add(tree_map_cond_nct) + clear_btn_phc.add(sunburst_map_cond_nct) + + #clear_btn_phs.add(nct_org_map) + +######################################################################### + ################################################################ Trials ############################################################################################### + # with gr.TabItem("Trials"): + + # 1st Row +#################################################################################################################################################### + # with gr.Row(): + # gr.HTML(''' + #

Trials 'Now Recruiting':

+ #

1. Select a Sponsor and click 'Show Trials'.

+ #

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.

+ # ''') + + +##################################################################################################################################################### + + # with gr.Row(): + # with gr.Column(): + +#### ######################################################################################################################################################################################################### + + # s_sponsor_input_phs = gr.Dropdown( + +############################################################################ + # choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ + # "CSL Behring", "Daiichi Sankyo, Inc.",\ + # "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ + # "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ + # "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], + # label="Select a Sponsor" + # ) + ############################################################################################################## + # with gr.Column(): + + +###################################################################################################################################################################### + # 3rd Row + # with gr.Row(): #academia_input = gr.inputs.Dropdown( + # s_button_phs = gr.Button("Show Trials") + + # Then, create the clear button and add the dropdown input to it + # clear_btn_phs = gr.ClearButton() + # clear_btn_phs.add(s_sponsor_input_phs) + + # clear_btn_phs.add(s_disease_input_phs) + +###################################################################################################################################################################### +################# # 3rd row################################################################# + # with gr.Row(): + ################################################################################################################################################################# + # with gr.Column(): + # s_academia_input_phs = gr.Textbox(lines=1, label="Type a Sponsor Name:") + # clear_btn_phs.add(s_academia_input_phs) +################################################################################################################################################################# + # with gr.Column(): + + + # s_disease_input_type_phs = gr.Textbox(lines=1, label="Filter by typing a Condition:") + # clear_btn_phs.add(s_disease_input_type_phs) +############################################################################################################################################ + +######################################################################################################################################################################### + # with gr.Row(): + + # summary_block_phs = gr.HTML(label="Conditions and Sponsors Now Recruiting for Clinical Trials:" ) +############################################################################################################################################################# + #with gr.Row(): + # nct_org_map = gr.Plot() +########################################################################################################################################################## +#################################################################################################################################################### + + # with gr.Row(): + # output_block_conditions_phs = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") + + # clear_btn_phs.add(summary_block_phs) + # clear_btn_phs.add(output_block_conditions_phs) + # clear_btn_phs.add(nct_org_map) + +######################################################################### +############################################################ ASSETS ############################################################### + with gr.TabItem("Drugs"): +############################################################################################ + # 1st Row +#################################################################################################################################################### + with gr.Row(): +#################################################################################################################################################### + + gr.HTML(''' +

Drugs for 'Now Recruiting' Trials:

+ +

1. Select a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.

+

2. Select a Sponsor and click 'Show Drugs'.

+ ''') + +##################################################################################################################################################### + with gr.Row(): + +##################################################################################################################################################################################### + with gr.Column(): + s_disease_input = gr.Dropdown( + choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ + "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ + "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ + "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ + "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ + "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ + "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ + "Urothelial Carcinoma",\ + "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ + "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ + " Major","Metabolic", "Generalized Pustular Psoriasis",\ + "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ + "Liver Cirrhosis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "Psychological Trauma","Renal", "Respiratory",\ + "Schizophrenia", "PTSD", \ + "Venous Thromboembolism", "Wet"], + label= "Filter by a Condition" + ) +######################################################################################################################################## + with gr.Column(): + +############################################################################################################################################# + s_sponsor_input = gr.Dropdown( +############################################################################ + choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ + "CSL Behring", "Daiichi Sankyo, Inc.",\ + "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ + "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ + "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], + label="Select a Sponsor" + ) +############################################################################################ +############################################################################################ + with gr.Row(): + s_drug_button = gr.Button("Show Drugs") + # Then, create the clear button and add the dropdown input to it + clear_btn = gr.ClearButton() + clear_btn.add(s_sponsor_input) + + clear_btn.add(s_disease_input) + + + + ## with gr.Row(): +##################################################################################################################################################################################### +################# # 3rd row################################################################# + with gr.Row(): +##################################################################################################################################################################### + with gr.Column(): + s_academia_input = gr.Textbox(lines=1, label="Type a Sponsor Name:") + clear_btn.add(s_academia_input) +################################################################################################################################################################# + with gr.Column(): + s_disease_input_type = gr.Textbox(lines=1, label="Filter by typing a Condition:") + clear_btn.add(s_disease_input_type) +##################################################################################################################################################################################### + + + + + with gr.Row(): + drug_summary_block = gr.HTML(label="Conditions and Drug Assets, Sponsors Now Recruiting for Clinical Trials:" ) + with gr.Row(): + sankey_map_drug = gr.Plot() + with gr.Row(): + drug_output_block_conditions = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors") + + clear_btn.add(drug_summary_block) + clear_btn.add(drug_output_block_conditions) + clear_btn.add(sankey_map_drug) + ############################################################################################################################################################################################ + +##################################################################### Country##################################################### + + + # with gr.TabItem("Countries"): +########################################################################## + # 1st Row +#################################################################################################################################################### + # with gr.Row(): + # gr.HTML(''' + #

Countries 'Now Recruiting':

+ #

1. Select a Sponsor, a Condition Name and click 'Show Countries'.

+ #

2. Filter by a Country, for example, 'United States','Germany' etc.

+ # ''') + + +##################################################################################################################################################### + # with gr.Row(): +############################################################################### + # with gr.Column(): + # sponsor_input_con = gr.Dropdown( +############################################################################ + # choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ + # "CSL Behring", "Daiichi Sankyo, Inc.",\ + # "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ + # "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ + # "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], + # label="Select a Sponsor" + # ) + +############################################################################################################################################################################################### + # with gr.Column(): + + # condition_input_con = gr.Dropdown( + # choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ + # "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ + # "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ + # "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ + # "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ + # "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ + ## "Urothelial Carcinoma",\ + # "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ + ## " Major","Metabolic", "Generalized Pustular Psoriasis",\ + # "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ + # "Liver Cirrhosis", \ + # "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + # "Psychological Trauma","Renal", "Respiratory",\ + # "Schizophrenia", "PTSD", \ + # "Venous Thromboembolism", "Wet"], + # label= "Select a Condition") + +############################################################################### + # with gr.Column(): + # country_input_tr = gr.Dropdown( + # choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ + # "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ + # "Malaysia","Mexico","Netherlands", \ + # "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ + # "United Kingdom"\ + # ], + # label="Filter by a Country") +########################################################################################################################################################################################### +########################################################################################################################################################### + # with gr.Row(): + + + # condition_button = gr.Button("Show Countries") + # Then, create the clear button and add the dropdown input to it + # clear_cn_btn = gr.ClearButton() + # clear_cn_btn.add(condition_input_con) + #clear_cn_btn.add(sponsor_input_con) + + + # clear_cn_btn.add(country_input_tr) +################# # 3rd row################################################################# + # with gr.Row(): + ################################################################ ############## ############################################################################### + # with gr.Column(): + # academia_input_con = gr.Textbox(lines=1, label="Type a Sponsor Name:") + # clear_cn_btn.add(academia_input_con) + ################################################################ ############## ############################################################################### + # with gr.Column(): + # condition_input_type = gr.Textbox(lines=1, label="Filter by typing a Condition:") + # clear_cn_btn.add(condition_input_type) +############################################################################### + + + +############################################################################################################################################################################## + # with gr.Row(): + # summary_block_cond = gr.HTML(label="Countries with Recruiting Clinical Trials:" ) + # with gr.Row(): + #bubble_map_trial = gr.Plot() + # with gr.Row(): + # trial_countries = gr.Plot() + # with gr.Row(): + # condition_output = gr.HTML(label="List of Recruiting Trials") + # condition_output = gr.Textbox(label="List of Recruiting Trials") + ## clear output ? + # clear_cn_btn.add(summary_block_cond) + # clear_cn_btn.add(trial_countries) + #clear_cn_btn.add(bubble_map_trial) + # clear_cn_btn.add(condition_output) + + ############################################################ Site ############################# ##################################################################### + with gr.TabItem("Locations"): +############################################################### +#################################################################################################################################################### + with gr.Row(): + gr.HTML(''' +

Sites 'Now Recruiting':

+

1. Select a Condition, Sponsor and Country and click 'Show Sites'.

+

2. Review each Site and Cities with the Trial Ids and the Conditions.

+ ''') + + with gr.Row(): + +################################################################ + with gr.Column(): + condition_input_s = gr.Dropdown( + choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ + "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ + "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ + "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ + "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ + "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ + "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\ + "Urothelial Carcinoma",\ + "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ + "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ + " Major","Metabolic", "Generalized Pustular Psoriasis",\ + "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ + "Liver Cirrhosis", \ + "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + "Psychological Trauma","Renal", "Respiratory",\ + "Schizophrenia", "PTSD", \ + "Venous Thromboembolism", "Wet"], + label="Select a Condition") + +################################################################ + with gr.Column(): + +########################################################################################################################## + sponsor_input_con_s = gr.Dropdown( +############################################################################ +############################################################################ + choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ + "CSL Behring", "Daiichi Sankyo, Inc.",\ + "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ + "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ + "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], + label="Select a Sponsor" + ) + +################################################################################################################### +################################################################################################################################################# +################################################################ + +################################################################ + with gr.Column(): + country_input_s = gr.Dropdown( + choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ + "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ + "Malaysia","Mexico","Netherlands", \ + "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ + "United Kingdom"\ + ], + label="Select a Country") + +############################################################### + with gr.Row(): + # with gr.Column(): + condition_button_s = gr.Button("Show Sites") + # Then, create the clear button and add the dropdown input to it + clear_cn_btn = gr.ClearButton() + clear_cn_btn.add(condition_input_s) + + clear_cn_btn.add(sponsor_input_con_s) + + clear_cn_btn.add(country_input_s) + +################# # 3rd row################################################################# + with gr.Row(): + ################################################################ + with gr.Column(): + academia_input_con_s = gr.Textbox(lines=1, label="Type a Sponsor Name:") + clear_cn_btn.add(academia_input_con_s) + +################################################################ + with gr.Column(): + condition_input_site = gr.Textbox(lines=1, label="Filter by typing a Condition:") + clear_cn_btn.add(condition_input_site) +############################################################################################################################################# + + +################################################################################################################################################## + +################################################################################################################################# + + with gr.Row(): + #summary_block = gr.outputs.Textbox(label="Conditions and Sponsors Now Recruiting for Clinical Trials:") + summary_block_cond_s = gr.HTML(label="Sites where Sponsors Now Recruiting for Clinical Trials:" ) + #with gr.Row(): + + #world_map = gr.Plot() + + with gr.Row(): + site_cond = gr.Plot() +#################################################################################################################################################### + with gr.Row(): + gr.HTML('

Recruiting Sites with Trial Ids and Conditions

') + with gr.Row(): + + country_site =gr.Plot() + + + with gr.Row(): + + condition_output_s = gr.HTML(label="List of Recruiting Trials for Country, Sites") + + ## clear output ? + clear_cn_btn.add(summary_block_cond_s) + clear_cn_btn.add(condition_output_s) + clear_cn_btn.add(country_site) + clear_cn_btn.add(site_cond) + + + + +############################################################################# TIMELINES ############################################################################# + + # with gr.TabItem("Timeline"): +############################################################## + # with gr.Row(): +#################################################################################################################################################### + + # gr.HTML(''' + #

Timelines for 'Now Recruiting' Trials:

+ #

1. Select a Sponsor and click 'Show Timelines'.

+ #

2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.

+ # ''') + + + +########################################################################################### + # with gr.Row(): + + # with gr.Column(): + ########################################################################################################################################## + # sponsor_input_cont = gr.Dropdown( +############################################################################ + # choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \ + # "CSL Behring", "Daiichi Sankyo, Inc.",\ + # "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \ + # "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\ + # "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"], + # label="Select a Sponsor" + # ) +############################################################################################################################################################### + # with gr.Column(): + # condition_input_cont= gr.Dropdown( + + # choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\ + # "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\ + # "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\ + # "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\ + # "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\ + # "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\ + ## "Urothelial Carcinoma",\ + # "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \ + # "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\ + # " Major","Metabolic", "Generalized Pustular Psoriasis",\ + # "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\ + # "Liver Cirrhosis", \ + # "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis", \ + # "Psychological Trauma","Renal", "Respiratory",\ + # "Schizophrenia", "PTSD", \ + # "Venous Thromboembolism", "Wet"], + # label="Filter by a Condition") + +############################################################################################################################################################### + # with gr.Column(): + # country_input_trt = gr.Dropdown( + # choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\ + # "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\ + # "Malaysia","Mexico","Netherlands", \ + # "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\ + # "United Kingdom"\ + # ], + # label="Filter by a Country") + +########################################################################################### + # with gr.Row(): + # condition_button_t = gr.Button("Show Timelines") + # Then, create the clear button and add the dropdown input to it + # clear_cn_btn = gr.ClearButton() + # clear_cn_btn.add(condition_input_cont) + # clear_cn_btn.add(sponsor_input_cont) + + # clear_cn_btn.add(country_input_trt) +########################################################################################### + # with gr.Row(): + ############################################################################################################################################################### + # with gr.Column(): + # academia_input_cont = gr.Textbox(lines=1, label="Type a Sponsor Name:") + # clear_cn_btn.add(academia_input_cont) + ############################################################################################################################################################### + #with gr.Column(): + # condition_input_typet = gr.Textbox(lines=1, label="Filter by typing a Condition:") + # clear_cn_btn.add(condition_input_typet) + + + + + ################################################################################################################################## +########################################################################################### + # with gr.Row(): + # summary_block_condt = gr.HTML(label="Countries with Recruiting Clinical Trials:" ) + ########################################################################################### + # with gr.Row(): + # bubble_map_trial = gr.Plot() +########################################################################################### + # with gr.Row(): + # condition_outputt = gr.HTML(label="List of Recruiting Trials") + # condition_output = gr.Textbox(label="List of Recruiting Trials") + ## clear output ? + # clear_cn_btn.add(summary_block_condt) + # clear_cn_btn.add(bubble_map_trial) + # clear_cn_btn.add(condition_outputt) + + ############################################################ Eligibility ############## + + + + with gr.TabItem("Eligibility"): + +#################################################################################################################################################### + with gr.Row(): + gr.HTML(''' +

Eligibility Crietria for a Trial:

+

1. Type a single Trial's NCT Id,For Example: NCT05512377 or NCT04924075 or NCT04419506 etc. and click 'Show Eligibility'.

+

2. Inclusion and Exclusion Criteria for that single Trial are displayed with the Diseases, Diagnostic Procedures and Medications highlighted.

+

3. Wait time approximately 30 seconds for the model to run and highlight eligibility text.

+ ''') + + + + with gr.Row(): + + #nctID_input = gr.inputs.Textbox(lines=1, label="Type Trial NctId:") + nctID_input = gr.Textbox(lines=1, label="Type a Trial NCT Id: ") + trial_button = gr.Button("Show Eligibility") + #Then, create the clear button and add the dropdown input to it + clear_tn_btn = gr.ClearButton() + clear_tn_btn.add(nctID_input ) + + # with gr.Row(): + # with gr.Column(): + # formatted_inclusions_output = gr.HTML(label="Inclusions") + # with gr.Column(): + # formatted_exclusions_output = gr.HTML(label="Exclusions") + + ################################################################################################################################ + ############################################################################################### + with gr.Row(): + trial_output = gr.HTML(label="Detail of Recruiting Trials") +################################################ + with gr.Row(): + # with gr.Column(): + eligibilities_plot = gr.Plot() + # with gr.Column(): + # concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts") + + clear_tn_btn.add(trial_output) + # clear_tn_btn.add(formatted_inclusions_output) + # clear_tn_btn.add(formatted_exclusions_output) + clear_tn_btn.add(eligibilities_plot) + # clear_tn_btn.add(concept_exclusion) + + + ############################################################################################################################################## + + + + + + ################################ EVENT BUTTONS at GRADIO ################################################################################################################################ + + ## Sponsors + #sponsor_button.click(disease_view, inputs=[disease_input,disease_input_text, sponsor_input, academia_input], outputs=[summary_block,summary_block_collbs,\ + # sponsor_button.click(disease_view, inputs=[disease_input, sponsor_input], outputs=[summary_block,summary_block_collbs,\ + # output_block_conditions,output_block_conditions_collbs,\ + #condition_others,\ + #condition_sunbursts, + # sponsor_trees\ + # ,collaborator_trees\ + # ]) + + ## Conditions + # s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc,s_disease_input_type_phc, s_sponsor_input_phc,s_academia_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\ + s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc, s_sponsor_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\ + tree_map_cond_nct,sunburst_map_cond_nct]) + + + + ## Trials + + # s_button_phs.click(disease_view_phs, inputs=[s_disease_input_phs,s_disease_input_type_phs, s_sponsor_input_phs,s_academia_input_phs], outputs=[summary_block_phs, output_block_conditions_phs,\ + # nct_org_map]) + + + #s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ + # tree_map_cond_nct_n, nct_org_map_n,trial_plot]) + + # s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\ + # tree_map_cond_nct_n, trial_plot]) + + + + + + # s_button_phs_c.click(disease_view_phs_c, inputs=[s_disease_input_phs_c,s_disease_input_type_phs_c, s_sponsor_input_phs_c,s_academia_input_phs_c], outputs=[summary_block_phs_c, output_block_conditions_phs_c,\ + # tree_map_cond_nct_c, nct_org_map_c,trial_plot_c, time_plot_c]) + + ### Drugs + + s_drug_button.click(drug_view, inputs=[s_disease_input, s_disease_input_type, s_sponsor_input, s_academia_input], outputs=[drug_summary_block,drug_output_block_conditions, sankey_map_drug ]) + + + ## Country + # condition_button.click(condition_view, inputs=[condition_input_con, country_input_tr,condition_input_type, sponsor_input_con, academia_input_con], outputs=[summary_block_cond,condition_output,trial_countries]) + + ## Site + + condition_button_s.click(condition_view_s, inputs=[condition_input_s, country_input_s, condition_input_site,sponsor_input_con_s, academia_input_con_s], \ + outputs=[summary_block_cond_s,condition_output_s, site_cond,country_site]) + + ##Timelines + + condition_button_t.click(condition_viewt, inputs=[condition_input_cont, country_input_trt,condition_input_typet, sponsor_input_cont, academia_input_cont], outputs=[summary_block_condt,condition_outputt,bubble_map_trial]) + + ## Map + + # Test this way NCT04419506 + # trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output, formatted_inclusions_output,formatted_exclusions_output,concept_inclusion,concept_exclusion]) + # Test this way NCT04419506 + # trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,concept_inclusion,concept_exclusion]) + trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,eligibilities_plot]) + + + + # trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map,trial_output_map]) + +trial_app.launch(share=True) + + +#trial_app.launch(share=True, debug = "TRUE") + +import requests +import json + +# Define the base URL for the new API +base_url = "https://clinicaltrials.gov/api/v2/studies" + +# Define the parameters for the API call +params = { + "query.lead": "Boehringer Ingelheim", # Query the lead sponsor field + "filter.overallStatus": "RECRUITING", # Filter by the overall status + "fields": "protocolSection.identificationModule.nctId" # Use the correct field name for the NCTId +} + +# Send the API request +response = requests.get(base_url, params=params) +# Check the API response status +#print("API response status code:", response.status_code) +# Check the API response content +#print("API response content:", response.text) + +# If the status code is 200, parse the JSON response +if response.status_code == 200: + data = response.json() + # Extract the NCTIDs from the response + nct_ids = [study['protocolSection']['identificationModule']['nctId'] for study in data['studies']] + # Create a DataFrame from the list + df = pd.DataFrame(nct_ids, columns=['NCTId']) + + # Print the DataFrame + print(df) \ No newline at end of file