#pip install  gradio
#transformers


########### AGENT: Clincialtrial.gov ###################################################################################################
##Gradio App: TRIAL CONNECT
#Author: Tamer Chowdhury' Nov 2024
#tamer.chowdhury@gmail.com
##################################################################################################################################

import gradio as gr
from gradio import Interface
from gradio import Dropdown
import io
import re
import pandas as pd
import textwrap
from IPython.display import display
import requests
#from _plotly_utils.utils import NotEncodable
from IPython.core.display import display_markdown


######################### from CLAUDE###########################################
import aiohttp
import asyncio
import pandas as pd
import io
import json

async def fetch(session, url, params):
    try:
        async with session.get(url, params=params) as response:
            if response.status == 200:
                text = await response.text()
                try:
                    return json.loads(text)
                except json.JSONDecodeError as e:
                    print(f"Failed to decode JSON: {text[:200]}...")
                    raise
            else:
                print(f"HTTP Error: {response.status}")
                print(f"Response text: {await response.text()}")
                return None
    except Exception as e:
        print(f"Error in fetch: {str(e)}")
        return None

async def get_nct_ids(lead_sponsor_name=None, disease_area=None, overall_status=None,
                     location_country=None, NCTId=None, max_records=None, blocks=30):
    base_url = "https://clinicaltrials.gov/api/v2/studies"

    # Define the fields we want to retrieve (mapped to v2 API structure)
    fields = [
        "protocolSection.identificationModule.nctId",
        "protocolSection.identificationModule.orgStudyIdInfo",
        "protocolSection.identificationModule.briefTitle",
        "protocolSection.conditionsModule.conditions",
        "protocolSection.designModule.phases",
        "protocolSection.statusModule.overallStatus",
        "protocolSection.statusModule.primaryCompletionDateStruct",
        "protocolSection.designModule.enrollmentInfo",
        "protocolSection.designModule.studyType",
        "protocolSection.eligibilityModule.studyPopulation",
        "protocolSection.contactsLocationsModule.locations",
        "protocolSection.designModule.designInfo",
        "protocolSection.armsInterventionsModule.armGroups",
        "protocolSection.sponsorCollaboratorsModule.leadSponsor",
        "protocolSection.armsInterventionsModule.interventions",
        "protocolSection.outcomesModule.primaryOutcomes",
        "protocolSection.statusModule.startDateStruct"
    ]

    # Build base parameters
    params = {
        "format": "json",
        "fields": ",".join(fields),
        "pageSize": "1000",
        "countTotal": "true"
    }

    print("Constructing query...")

    # Build query parameters
    if NCTId:
        params["query.id"] = NCTId
    else:
        if disease_area:
            params["query.cond"] = disease_area.replace(" ", "+")
        if lead_sponsor_name:
            params["query.lead"] = lead_sponsor_name.replace(" ", "+")
        if location_country:
            params["query.locn"] = location_country.replace(" ", "+")
        if overall_status:
            params["filter.overallStatus"] = overall_status.upper()

    print(f"Full parameters: {params}")

    all_studies = []
    next_page_token = None

    async with aiohttp.ClientSession() as session:
        while True:
            try:
                if next_page_token:
                    params["pageToken"] = next_page_token

                response_data = await fetch(session, base_url, params)

                if not response_data or not isinstance(response_data, dict):
                    print(f"Invalid response data")
                    break

                studies = response_data.get('studies', [])
                if not studies:
                    print("No more studies found")
                    break

                all_studies.extend(studies)
                print(f"Retrieved {len(studies)} studies. Total so far: {len(all_studies)}")

                # Print first study details for debugging
                if len(all_studies) > 0:
                    first_study = all_studies[0]
                    print("\nFirst study example:")
                    print(f"NCT ID: {_get_nested_value(first_study, ['protocolSection', 'identificationModule', 'nctId'])}")
                    print(f"Status: {_get_nested_value(first_study, ['protocolSection', 'statusModule', 'overallStatus'])}")

                next_page_token = response_data.get('nextPageToken')
                if not next_page_token or (max_records and len(all_studies) >= max_records):
                    break

            except Exception as e:
                print(f"Error processing page: {str(e)}")
                break

    # Convert all studies to the required format
    recruiting_trials_list = []

    for study in all_studies:
        try:
            # Status filtering is now handled by the API, so we don't need to filter here
            trial_info = {
                'NCTId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'nctId']),
                'Phase': _get_first_item(study, ['protocolSection', 'designModule', 'phases']),
                'OrgStudyId': _get_nested_value(study, ['protocolSection', 'identificationModule', 'orgStudyIdInfo', 'id']),
                'Status': _get_nested_value(study, ['protocolSection', 'statusModule', 'overallStatus']),
                'Condition': '|'.join(_get_nested_value(study, ['protocolSection', 'conditionsModule', 'conditions'], [])),
                'CompletionDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'primaryCompletionDateStruct', 'date']),
                'EnrollmentCount': _get_nested_value(study, ['protocolSection', 'designModule', 'enrollmentInfo', 'count']),
                'StudyType': _get_nested_value(study, ['protocolSection', 'designModule', 'studyType']),
                'Arm': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'armGroups'], 'label'),
                'Drug': _get_first_item(study, ['protocolSection', 'armsInterventionsModule', 'interventions'], 'name'),
                'Country': _get_location_info(study, 'country'),
                'City': _get_location_info(study, 'city'),
                'Site': _get_location_info(study, 'facility'),
                'StudyPopulation': _get_nested_value(study, ['protocolSection', 'eligibilityModule', 'studyPopulation']),
                'Sponsor': _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'leadSponsor', 'name']),
                'Collaborator': _get_collaborators(study),
                'StartDate': _get_nested_value(study, ['protocolSection', 'statusModule', 'startDateStruct', 'date']),
                'PrimaryMeasure': _get_first_item(study, ['protocolSection', 'outcomesModule', 'primaryOutcomes'], 'measure'),
                'Purpose': _get_nested_value(study, ['protocolSection', 'designModule', 'designInfo', 'primaryPurpose']),
                'BriefTitle': _get_nested_value(study, ['protocolSection', 'identificationModule', 'briefTitle'])
            }
            recruiting_trials_list.append(trial_info)

        except Exception as e:
            print(f"Error processing study: {str(e)}")
            continue

    print(f"Total studies processed: {len(recruiting_trials_list)}")
    return recruiting_trials_list

# Helper functions remain the same
def _get_nested_value(obj, path, default=None):
    try:
        current = obj
        for key in path:
            if current is None:
                return default
            current = current.get(key)
        return current if current is not None else default
    except (KeyError, TypeError, AttributeError):
        return default

def _get_first_item(obj, path, field=None):
    try:
        items = _get_nested_value(obj, path, [])
        if items and isinstance(items, list):
            if field:
                return items[0].get(field)
            return items[0]
        return None
    except (IndexError, AttributeError):
        return None

def _get_location_info(study, info_type):
    try:
        locations = _get_nested_value(study, ['protocolSection', 'contactsLocationsModule', 'locations'], [])
        if info_type == 'facility':
            values = [loc.get('facility', '') for loc in locations if loc.get('facility')]
        else:
            values = [loc.get(info_type, '') for loc in locations if loc.get(info_type)]
        return '|'.join(filter(None, values))
    except Exception:
        return None

def _get_collaborators(study):
    try:
        collaborators = _get_nested_value(study, ['protocolSection', 'sponsorCollaboratorsModule', 'collaborators'], [])
        return '|'.join(collab.get('name', '') for collab in collaborators if collab.get('name'))
    except Exception:
        return None
########### Clinical Trials. gov API for study fileds with Recruiting Trials Only ###################################

################# FROM CLAUDE API FOR ELIGIBILITY###############################
import requests
import re

def get_formatted_inclusion_criteria(nct_id):
    """
    Get and format inclusion criteria for a clinical trial using ClinicalTrials.gov API v2

    Args:
        nct_id (str): The NCT ID of the trial

    Returns:
        str: Formatted inclusion criteria as a numbered list, or None if not found
    """
    # V2 API endpoint
    base_url = "https://clinicaltrials.gov/api/v2/studies"

    # Parameters for the API request
    params = {
        "format": "json",
        "fields": "protocolSection.eligibilityModule.eligibilityCriteria",
        "query.id": nct_id
    }

    try:
        # Send the API request
        response = requests.get(base_url, params=params)
        response.raise_for_status()  # Raise an exception for bad status codes
        data = response.json()

        # Extract the eligibility criteria text from the v2 API response
        if not data.get('studies') or len(data['studies']) == 0:
            print(f"No data found for Trial NCT ID: {nct_id}")
            return None

        eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria']

        # Split at "Exclusion Criteria" to get only inclusion criteria
        # Using a more robust splitting approach
        inclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[0].strip()

        # Split the inclusion criteria into a list by line breaks
        # Handle different types of line breaks
        inclusions = re.split(r'\r?\n+', inclusion_criteria)

        # Clean up the inclusions:
        # 1. Remove "Inclusion criteria" header
        # 2. Remove empty lines
        # 3. Remove lines that are just whitespace or punctuation
        cleaned_inclusions = []
        for inclusion in inclusions:
            inclusion = inclusion.strip()
            if (inclusion and
                not re.search(r'^\s*inclusion\s+criteria:?\s*$', inclusion, flags=re.IGNORECASE) and
                not re.search(r'^\s*[-•*]\s*$', inclusion)):

                # Remove bullet points and dashes at the start of lines
                inclusion = re.sub(r'^\s*[-•*]\s*', '', inclusion)

                # Add to cleaned list if not empty after cleanup
                if inclusion:
                    cleaned_inclusions.append(inclusion)

        # Format the list as a numbered list with periods
        formatted_inclusions = []
        for i, inclusion in enumerate(cleaned_inclusions, 1):
            # Ensure the line ends with a period
            if not inclusion.endswith('.'):
                inclusion = inclusion + '.'
            formatted_inclusions.append(f"{i}. {inclusion}")

        # Join the list into a single string
        return "\n".join(formatted_inclusions)

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}")
        return None
    except (IndexError, KeyError) as e:
        print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}")
        return None
    except Exception as e:
        print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}")
        return None


#########################################################################################################


## ############################API For Exclusions###################################################################################################################################################
def get_formatted_exclusion_criteria(nct_id):
    """
    Get and format exclusion criteria for a clinical trial using ClinicalTrials.gov API v2

    Args:
        nct_id (str): The NCT ID of the trial

    Returns:
        str: Formatted exclusion criteria as a numbered list, or None if not found
    """
    # V2 API endpoint
    base_url = "https://clinicaltrials.gov/api/v2/studies"

    # Parameters for the API request
    params = {
        "format": "json",
        "fields": "protocolSection.eligibilityModule.eligibilityCriteria",
        "query.id": nct_id
    }

    try:
        # Send the API request
        response = requests.get(base_url, params=params)
        response.raise_for_status()  # Raise an exception for bad status codes
        data = response.json()

        # Extract the eligibility criteria text from the v2 API response
        if not data.get('studies') or len(data['studies']) == 0:
            print(f"No data found for Trial NCT ID: {nct_id}")
            return None

        eligibility_criteria = data['studies'][0]['protocolSection']['eligibilityModule']['eligibilityCriteria']

        # Split at "Exclusion Criteria" to get only exclusion criteria
        try:
            exclusion_criteria = re.split(r'\b(?:Exclusion\s+Criteria:?)\b', eligibility_criteria, flags=re.IGNORECASE)[1].strip()
        except IndexError:
            # Try alternative patterns if the first one doesn't work
            try:
                exclusion_criteria = re.split(r'(?i)(?:^|\n)\s*exclusion criteria\s*[:|-]?', eligibility_criteria)[1].strip()
            except IndexError:
                print(f"Could not find exclusion criteria section for Trial NCT ID: {nct_id}")
                return None

        # Split the exclusion criteria into a list by line breaks
        # Handle different types of line breaks
        exclusions = re.split(r'\r?\n+', exclusion_criteria)

        # Clean up the exclusions:
        # 1. Remove empty lines
        # 2. Remove lines that are just whitespace or punctuation
        # 3. Clean up formatting
        cleaned_exclusions = []
        for exclusion in exclusions:
            exclusion = exclusion.strip()
            if (exclusion and
                not re.search(r'^\s*$', exclusion) and  # Skip empty lines
                not re.search(r'^\s*[-•*]\s*$', exclusion)):  # Skip lines with just bullets

                # Remove bullet points and dashes at the start of lines
                exclusion = re.sub(r'^\s*[-•*]\s*', '', exclusion)

                # Add to cleaned list if not empty after cleanup
                if exclusion:
                    cleaned_exclusions.append(exclusion)

        # Format the list as a numbered list with periods
        formatted_exclusions = []
        for i, exclusion in enumerate(cleaned_exclusions, 1):
            # Ensure the line ends with a period
            if not exclusion.endswith('.'):
                exclusion = exclusion + '.'
            formatted_exclusions.append(f"{i}. {exclusion}")

        # Join the list into a single string
        return "\n".join(formatted_exclusions)

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for Trial NCT ID {nct_id}: {str(e)}")
        return None
    except (IndexError, KeyError) as e:
        print(f"Error processing data for Trial NCT ID {nct_id}: {str(e)}")
        return None
    except Exception as e:
        print(f"Unexpected error for Trial NCT ID {nct_id}: {str(e)}")
        return None


#################################  Apply CSS Style to HTML Table ##############################################################################################################

def dataframe_to_html_table(df):
    custom_css = """
    <style>
        .table-container {
            width: 100%;
            max-width: 100%;
            margin-bottom: 1rem;
            overflow-x: auto;
            overflow-y: auto;
            max-height: 400px;
        }
        .table {
            width: 100%;
            max-width: 100%;
            margin-bottom: 1rem;
            border-collapse: collapse;
            white-space: nowrap;
        }
        .table-striped tbody tr:nth-of-type(odd) {
            background-color: rgba(0, 0, 0, 0.05);
        }
        th, td {
            padding: 0.75rem;
            vertical-align: top;
            border-top: 1px solid #dee2e6;
            white-space: normal;
            overflow-wrap: break-word;
            max-width: 150px;
        }
        /* Set the width of the 'BriefTitle' and 'Arm' columns */
        .table td:nth-child(3), .table td:nth-child(11) {
            width: 300px;
        }
        thead th {
            position: sticky;
            top: 0;
            background-color: white;
            z-index: 1;
        }
        thead th::before {
            content: "";
            position: absolute;
            left: 0;
            width: 100%;
            height: 100%;
            border-right: 1px solid #dee2e6;
            background-color: white;
            z-index: -1;
        }
    </style>
    """
    return custom_css + '<div class="table-container">' + df.to_html(classes="table table-striped", index=False, border=0) + '</div>'

##################################################################################################################################
def format_summary_stats(summary):
    formatted_html = f"""
    <style>
        #summary-output {{
            font-weight: bold;
            font-size: 20px;
            color: black;
        }}
    </style>
    <div id="summary-output">{summary}</div>
    """
    return formatted_html
############################ End of Style #############################################################################################


############### Functions to Process the Dataframes of Disease, Conditions, Trial Details####################################


# parse the conditions
import re
import pandas as pd
#######################################################################################################
def split_conditions(df, column_to_split):
    # Create a list to store the new rows
    new_rows = []

    # Iterate through each row in the original dataframe
    for index, row in df.iterrows():
        # Split the column value by comma or pipe and create a new row for each unique condition
        for condition in re.split(',|\|', str(row[column_to_split])):
            new_row = {col: row[col] if col != column_to_split else condition.strip() for col in df.columns}
            new_rows.append(new_row)

    # Create a new dataframe from the list of new rows
    temp_df = pd.DataFrame(new_rows)

    return temp_df

#########################################################################################################################
def split_drug(df, column_to_split):
    # Create a list to store the new rows
    new_rows = []

    # Iterate through each row in the original dataframe
    for index, row in df.iterrows():
        # Split the column value by comma or pipe and create a new row for each unique condition
        for drug in re.split(',|\|', str(row[column_to_split])):
            new_row = {col: row[col] if col != column_to_split else drug.strip() for col in df.columns}
            new_rows.append(new_row)

    # Create a new dataframe from the list of new rows
    temp_df = pd.DataFrame(new_rows)

    return temp_df

############################################################################################################################################
#############################################################################################

def split_columns(df, columns_to_split):
    # Create a list to store the new dataframes
    new_dfs = []

    # Iterate through each row in the original dataframe
    for index, row in df.iterrows():
        # Create a list of dictionaries to store the split values
        split_rows = []
        # Find the maximum number of pipe-separated values in the columns to split
        max_splits = max([len(str(row[col]).split('|')) for col in columns_to_split])
        # Iterate through the number of splits
        for i in range(max_splits):
            # Create a dictionary to store the split values for each column
            split_row = {}
            # Iterate through the columns to split
            for col in columns_to_split:
                # Split the column value and store the ith value if it exists, otherwise store None
                split_row[col] = str(row[col]).split('|')[i] if i < len(str(row[col]).split('|')) else None
            # Add the non-split columns to the dictionary
            for col in df.columns:
                if col not in columns_to_split:
                    split_row[col] = row[col]
            # Append the dictionary to the list of dictionaries
            split_rows.append(split_row)
        # Convert the list of dictionaries to a dataframe and append it to the list of new dataframes
        new_dfs.append(pd.DataFrame(split_rows))

    # Concatenate all the new dataframes
    temp_df = pd.concat(new_dfs, ignore_index=True)

    # Reorder the columns in the temporary dataframe
    temp_df = temp_df[df.columns]

    return temp_df


################## INTERVENTIONAL, OBSERVATIONAL Trials Lead Sponsor Counts##################################################
def calculate_summary_stats(df, sponsor):
    study_types = ["INTERVENTIONAL", "OBSERVATIONAL"]
    summary_stats = []

    sponsor_name = sponsor if sponsor else "All Lead Sponsors"

    for study_type in study_types:
        df_study_type = df[df['StudyType'] == study_type].copy()


        # Convert the 'EnrollmentCount' column to numeric

        df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce')


        num_trials = len(df_study_type['NCTId'].unique())

        unique_conditions = df_study_type['Condition'].unique()
        num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult'])

        total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum()
        formatted_total_patients = format(int(total_patients), ',')


        summary_stats.append(f"{num_trials} {study_type} Trials, \
                               {num_conditions} Conditions,   \
                               {formatted_total_patients} Planned Patients.")
    #return f"{sponsor_name} - As Lead Sponsor Recruiting For: <br>" + "<br>".join(summary_stats)
    return f"{sponsor_name} - As Lead Sponsor: <br>" + "<br>".join(summary_stats)

############################################################################################################################################

def calculate_summary_stats_collb(df, sponsor):
    study_types = ["INTERVENTIONAL", "OBSERVATIONAL"]
    summary_stats = []

    sponsor_name = sponsor if sponsor else "All Collaborators"

    for study_type in study_types:
        df_study_type = df[df['StudyType'] == study_type].copy()


        # Convert the 'EnrollmentCount' column to numeric

        df_study_type['EnrollmentCount'] = pd.to_numeric(df_study_type['EnrollmentCount'], errors='coerce')


        num_trials = len(df_study_type['NCTId'].unique())

        unique_conditions = df_study_type['Condition'].unique()
        num_conditions = len([condition for condition in unique_conditions if condition != 'Healthy' and condition != 'Adult'])

        total_patients = df_study_type.groupby('NCTId')['EnrollmentCount'].first().sum()
        formatted_total_patients = format(int(total_patients), ',')


        summary_stats.append(f"{num_trials} {study_type} Trials, \
                               {num_conditions} Conditions,   \
                               {formatted_total_patients} Planned Patients.")
    return f"{sponsor_name} - With Collaborators Recruiting For: <br>" + "<br>".join(summary_stats)


##################################################################################################################

def calculate_summary_stats_sites(df, sponsor, country):


    #Filter the data frame by the country if a country is provided
    if country:
        df = df[df['Country'] == country]

    num_trials = len(df['NCTId'].unique())

    # Group the data frame by NCTId and StudyType in the desired order
    grouped_df = df.groupby(['NCTId', 'StudyType']).first().reset_index()

    # Convert the 'EnrollmentCount' column to numeric
    grouped_df['EnrollmentCount'] = pd.to_numeric(grouped_df['EnrollmentCount'], errors='coerce')

    # Count the number of unique NCTIds for each StudyType
    INTERVENTIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['NCTId'].unique())
    OBSERVATIONAL_count = len(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['NCTId'].unique())

    # Count the number of unique countries for each StudyType
    INTERVENTIONAL_countries = df[df['StudyType'] == 'INTERVENTIONAL']['Country'].nunique()
    OBSERVATIONAL_countries = df[df['StudyType'] == 'OBSERVATIONAL']['Country'].nunique()

    # Count the number of unique sites for each StudyType, grouped by Country, City, and Site
    INTERVENTIONAL_grouped = df[df['StudyType'] == 'INTERVENTIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0]
    OBSERVATIONAL_grouped = df[df['StudyType'] == 'OBSERVATIONAL'].groupby(['Country', 'City', 'Site'])['NCTId'].nunique().reset_index().shape[0]

    # Calculate the sum of enrollment counts for each StudyType
    INTERVENTIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'INTERVENTIONAL']['EnrollmentCount'].sum())
    OBSERVATIONAL_patients = int(grouped_df[grouped_df['StudyType'] == 'OBSERVATIONAL']['EnrollmentCount'].sum())

    formatted_INTERVENTIONAL_patients = format(INTERVENTIONAL_patients, ',')
    formatted_OBSERVATIONAL_patients = format(OBSERVATIONAL_patients, ',')

    sponsor_name = sponsor if sponsor else "All Sponsors"
    country_name = country if country else "All Countries"

    return f"{sponsor_name} <br> {INTERVENTIONAL_count} INTERVENTIONAL Trials, in {INTERVENTIONAL_countries} Country, at {INTERVENTIONAL_grouped} Sites, \
              Recruiting: {formatted_INTERVENTIONAL_patients} Planned Patients. <br>\
              {OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites"

 #{OBSERVATIONAL_count} OBSERVATIONAL Trials, in {OBSERVATIONAL_countries} Country, at {OBSERVATIONAL_grouped} Sites, Recruiting: {formatted_OBSERVATIONAL_patients} Planned Patients."


################################################  GRADIO STARTS HERE #########################################################

#Wrapper Function called from Interfce to get input , output
async def gradio_wrapper_nct(sponsor=None, condition=None, NCTId=None, country=None, status = None):
    # Check if disease, condition, sponsor, and NCTId are provided

    if condition and sponsor:
        recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status = status)
    elif condition:
        recruiting_trials = await get_nct_ids(disease_area=condition, overall_status = status)
    elif sponsor:
        recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status = status)
    elif NCTId:
        recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status)
      # print("Recruiting trials for NCTId:", recruiting_trials)  # Add this line to debug
    else:
        return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided"

    trial_info_list = []
    for trial in recruiting_trials:
        trial_info = {'Sponsor': trial['Sponsor'],
                      'Collaborator': trial['Collaborator'],
                      'Status': trial['Status'],
                      'Drug': trial['Drug'],
                      'StudyType': trial['StudyType'],
                      'Phase': trial['Phase'],
                      'Site': trial['Site'],
                      'Country': trial['Country'],
                      'City': trial['City'],
                      'NCTId': trial['NCTId'],
                      'OrgStudyId': trial['OrgStudyId'],
                      'Condition': trial['Condition'],
                      'StartDate': trial['StartDate'],
                      'CompletionDate': trial['CompletionDate'],
                      'EnrollmentCount': trial['EnrollmentCount'],
                      'PrimaryMeasure': trial['PrimaryMeasure'],
                      'Purpose': trial['Purpose'],
                      'Arm': trial['Arm'],
                      'BriefTitle': trial['BriefTitle']}
        trial_info_list.append(trial_info)


        # Check if trial_info_list is empty
    if not trial_info_list:
      return None, None, None, None, None, None


    import pandas as pd

    clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \
                                                           'Sponsor','Collaborator'])

    ## Take care of NaN
    clinical_trials_gov.fillna("Not Available", inplace=True)


    clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False])

     # Convert the DataFrame to an HTML table
    html_table = dataframe_to_html_table(clinical_trials_gov)


     # now move to include country

    #clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount'])
    clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle', 'Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount'])

    ## Address NaN
    clinical_trials_gov_add.fillna("Not Available", inplace=True)

    clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True])

   # print("Preparing dataframe before split")

    # Create a DataFrame for conditions
    clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'BriefTitle','OrgStudyId','Phase','Status','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator'])
    # Split the 'Condition' column in clinical_trials_gov_conditions
    clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition')
  #  print("Dataframe after condition split")
    #address NaN
    clinical_trials_gov_conditions.fillna("Not Available", inplace=True)

     # Create a DataFrame for drugs
    clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['Status','NCTId', 'BriefTitle','OrgStudyId','Status','Phase','StudyType','Condition', 'Drug','StartDate', 'CompletionDate','EnrollmentCount','Sponsor', 'Collaborator'])
    # Split the 'Drug' column in clinical_trials_gov_conditions
    clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug')

    clinical_trials_gov_drugs.fillna("Not Available", inplace=True)


############################################## ##########################################################################################
    # Filter and sort the conditions DataFrame
     # Add the filtering condition for Sponsor and Collaborator
        # Add the filtering condition for Sponsor and Collaborator
    if sponsor:
      df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor]

      df1_1 = df1[(df1['Collaborator'] != 'Not Available') &
      (~df1['Collaborator'].isnull())]

      df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor]
      ### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator)
      df2['Collaborator'] = df2['Sponsor']


      df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
      df3 = df3[df3['Collaborator'] != sponsor]
      #print(df3)
      ## Now add sponsors to collaborators
      df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
      #df3['Collaborator'] = df3['Sponsor']

      clinical_trials_gov_conditions = df1
      clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True)


    if sponsor:

      df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor]
      df1_1 = df1[(df1['Collaborator'] != 'Not Available') &
      (~df1['Collaborator'].isnull())]
      ##########################################################################################################
      df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor]
      ### Now copy sponsors to collaborators:
      df2['Collaborator'] = df2['Sponsor']
      ##########################################################################################################

      df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
      df3 = df3[df3['Collaborator'] != sponsor]


      ## Now add sponsors to collaborators
      df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
      #df3['Collaborator'] = df3['Sponsor']

      clinical_trials_gov_drugs = df1
      clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True)


  ###############################################################################################################################################


    # Convert the filtered DataFrame to an HTML table
    #html_table_add = dataframe_to_html_table(country_site_city_df)

    # Convert the DataFrame to an HTML table
    html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions)
     # Convert the DataFrame to an HTML table
    html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs)

    if sponsor:
      # Convert the DataFrame to an HTML table
      html_table_conditions_collb = dataframe_to_html_table(clinical_trials_gov_conditions_collb)
      # Convert the DataFrame to an HTML table
      html_table_drugs_collb = dataframe_to_html_table(clinical_trials_gov_drugs_collb)
    else:
      empty_df = pd.DataFrame()
      html_table_conditions_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table")
      html_table_drugs_collb = empty_df.to_html(index=False, header=True, border=0, table_id="empty_table")

########################################################################################################################################
    # Calculate the summary statistics

  #  print("Calcualting Stats")

    summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor)
    summary_stats = format_summary_stats(summary_stats_pre)

    if sponsor:

      summary_stats_pre_collb = calculate_summary_stats_collb(clinical_trials_gov_conditions_collb, sponsor)
      summary_stats_collb = format_summary_stats(summary_stats_pre_collb)

    else:
      summary_stats_collb =''
    # Calculate Site Summary
    #summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country)
    #summary_stats_sites = format_summary_stats(summary_stats_sites_pre)

  #  print("Done Stats")

    return summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs


#Wrapper Function called from Interfce to get input , output
async def gradio_wrapper_nct_spn(sponsor=None, condition=None, NCTId=None, country=None, status = None):
    # Check if disease, condition, sponsor, and NCTId are provided

    if condition and sponsor:
        recruiting_trials = await get_nct_ids(disease_area=condition, lead_sponsor_name=sponsor, overall_status= status)
    elif condition:
        recruiting_trials = await get_nct_ids(disease_area=condition, overall_status= status)
    elif sponsor:
        recruiting_trials = await get_nct_ids(lead_sponsor_name=sponsor, overall_status= status)
    elif NCTId:
        recruiting_trials = await get_nct_ids(NCTId=NCTId, overall_status = status)
      # print("Recruiting trials for NCTId:", recruiting_trials)  # Add this line to debug
    else:
        return pd.DataFrame(), "No condition, sponsor, or trial NCT Id provided"

    trial_info_list = []
    for trial in recruiting_trials:
        trial_info = {'Sponsor': trial['Sponsor'],
                      'Collaborator': trial['Collaborator'],
                      'Drug': trial['Drug'],
                      'StudyType': trial['StudyType'],
                      'Phase': trial['Phase'],
                      'Status': trial['Status'],
                      'Site': trial['Site'],
                      'Country': trial['Country'],
                      'City': trial['City'],
                    #  'Zip': trial['Zip'],
                      'NCTId': trial['NCTId'],
                      'OrgStudyId': trial['OrgStudyId'],
                      'Condition': trial['Condition'],
                      'StartDate': trial['StartDate'],
                      'CompletionDate': trial['CompletionDate'],
                      'EnrollmentCount': trial['EnrollmentCount'],
                      'PrimaryMeasure': trial['PrimaryMeasure'],
                      'Purpose': trial['Purpose'],
                      'Arm': trial['Arm'],
                      'BriefTitle': trial['BriefTitle']}
        trial_info_list.append(trial_info)


    # Check if trial_info_list is empty
    if not trial_info_list:
      return None, None, None, None, None, None


    import pandas as pd

    clinical_trials_gov = pd.DataFrame(trial_info_list, columns=[ 'NCTId','OrgStudyId','Status','BriefTitle','Condition', 'Drug','Phase','StudyType','StartDate', 'CompletionDate','EnrollmentCount', 'Arm','Purpose', 'PrimaryMeasure', \
                                                           'Sponsor','Collaborator'])

    ## Take care of NaN
    clinical_trials_gov.fillna("Not Available", inplace=True)


    clinical_trials_gov = clinical_trials_gov.sort_values(by=[ 'StudyType', 'Phase' , 'CompletionDate','EnrollmentCount'], ascending=[ True, False,True,False])

     # Convert the DataFrame to an HTML table
    html_table = dataframe_to_html_table(clinical_trials_gov)


     # now move to include country

    #clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId', 'Site','Country','City','Zip','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount'])
    clinical_trials_gov_add = pd.DataFrame(trial_info_list, columns=[ 'StudyType','Phase','NCTId','OrgStudyId','Status', 'BriefTitle','Site','Country','City','Condition','Sponsor','Collaborator','Drug','StartDate', 'CompletionDate','EnrollmentCount'])

    ## Address NaN
    clinical_trials_gov_add.fillna("Not Available", inplace=True)

    clinical_trials_gov_add = clinical_trials_gov_add.sort_values(by=[ 'StudyType', 'Phase' , 'EnrollmentCount','CompletionDate', 'Country'], ascending=[ True, False,False,True,True])

  #  print("Preparing dataframe before split")


    # Create a DataFrame for conditions
    clinical_trials_gov_conditions = pd.DataFrame(trial_info_list, columns=['NCTId', 'OrgStudyId','Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator'])
    # Split the 'Condition' column in clinical_trials_gov_conditions
    clinical_trials_gov_conditions = split_conditions(clinical_trials_gov_conditions, 'Condition')
   # print("Dataframe after condition split")
    #address NaN
    clinical_trials_gov_conditions.fillna("Not Available", inplace=True)

     # Create a DataFrame for drugs
    clinical_trials_gov_drugs = pd.DataFrame(trial_info_list, columns=['NCTId','OrgStudyId', 'Status','BriefTitle','Phase','StudyType','Condition', 'Drug','EnrollmentCount','Sponsor', 'Collaborator'])
    # Split the 'Drug' column in clinical_trials_gov_conditions
    clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Drug')
   # print("Dataframe after drug split")

    # Split the 'Condition' column in clinical_trials_gov_conditions
    #clinical_trials_gov_drugs = split_conditions(clinical_trials_gov_drugs, 'Condition')

    #print("Prepared dataframe after condition split on drug ? why ?")

    #address NaN
    clinical_trials_gov_drugs.fillna("Not Available", inplace=True)

   # print("Preparing Country City Site split")

    columns_to_split = ['Site', 'Country', 'City']

    #if not clinical_trials_gov_add.empty:

    country_site_city_df = split_columns(clinical_trials_gov_add, columns_to_split)

      ## Ensure no NaN after Split
    country_site_city_df.fillna("Not Available", inplace=True)

   # print("Done Country City Site split")

    # Filter the modified DataFrame by country if provided
    if country:
        #  modified_df = modified_df[modified_df['Country'] == country]
        country_site_city_df = country_site_city_df[country_site_city_df['Country'] == country]


############################################## ##########################################################################################
    # Filter and sort the conditions DataFrame
     # Add the filtering condition for Sponsor and Collaborator
        # Add the filtering condition for Sponsor and Collaborator
    if sponsor:
      df1 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Sponsor'] == sponsor]

      df1_1 = df1[(df1['Collaborator'] != 'Not Available') &
      (~df1['Collaborator'].isnull())]

      df2 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'] == sponsor]
      ### Now move primary sponsors to collaborators: ( that is non BI sponsor to replace BI as now collaborator)
      df2['Collaborator'] = df2['Sponsor']


      df3 = clinical_trials_gov_conditions[clinical_trials_gov_conditions['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
      df3 = df3[df3['Collaborator'] != sponsor]
      #print(df3)
      ## Now add sponsors to collaborators
      df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
      #df3['Collaborator'] = df3['Sponsor']

      clinical_trials_gov_conditions = df1
      clinical_trials_gov_conditions_collb = pd.concat([df1_1,df2, df3], ignore_index=True)


    if sponsor:

      df1 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Sponsor'] == sponsor]
      df1_1 = df1[(df1['Collaborator'] != 'Not Available') &
      (~df1['Collaborator'].isnull())]
      ##########################################################################################################
      df2 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'] == sponsor]
      ### Now copy sponsors to collaborators:
      df2['Collaborator'] = df2['Sponsor']
      ##########################################################################################################

      df3 = clinical_trials_gov_drugs[clinical_trials_gov_drugs['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
      df3 = df3[df3['Collaborator'] != sponsor]


      ## Now add sponsors to collaborators
      df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']
      #df3['Collaborator'] = df3['Sponsor']

      clinical_trials_gov_drugs = df1
      clinical_trials_gov_drugs_collb = pd.concat([df1_1,df2, df3], ignore_index=True)


    #country_site_city_df
    if sponsor:
            df1 = country_site_city_df[country_site_city_df['Sponsor'] == sponsor]
            df1_1 = df1[(df1['Collaborator'] != 'Not Available') & (~df1['Collaborator'].isnull())]

            df2 = country_site_city_df[country_site_city_df['Collaborator'] == sponsor]
            ### Now copy sponsors to collaborators:
            df2['Collaborator'] = df2['Sponsor']


            #df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(\|)?{sponsor}(\|)?', na=False, flags=re.IGNORECASE, regex=True)]
            df3 = country_site_city_df[country_site_city_df['Collaborator'].str.contains(f'(^|\|){sponsor}(\||$)', na=False, flags=re.IGNORECASE, regex=True)]
            df3 = df3[df3['Collaborator'] != sponsor]
            ## Now add sponsors to collaborators
            #df3['Collaborator'] = df3['Sponsor'] + '|' + df3['Collaborator']

            country_site_city_df = df1
            country_site_city_df_collb = pd.concat([ df1_1,df2, df3], ignore_index=True)


  #####################################################################################################################################################################
     ##   This only includes data for a specific sponsor and for the collaborators

    # Convert the filtered DataFrame to an HTML table
    html_table_add = dataframe_to_html_table(country_site_city_df)
    # Convert the DataFrame to an HTML table
    html_table_conditions = dataframe_to_html_table(clinical_trials_gov_conditions)
     # Convert the DataFrame to an HTML table
    html_table_drugs = dataframe_to_html_table(clinical_trials_gov_drugs)

######################################################################################################################################

########################################################################################################################################
    # Calculate the summary statistics
    summary_stats_pre = calculate_summary_stats(clinical_trials_gov_conditions, sponsor)
    summary_stats = format_summary_stats(summary_stats_pre)


    # Calculate Site Summary
    summary_stats_sites_pre = calculate_summary_stats_sites(country_site_city_df, sponsor, country)
    summary_stats_sites = format_summary_stats(summary_stats_sites_pre)


    return summary_stats, html_table_conditions, html_table, summary_stats_sites,html_table_add,html_table_drugs


###############################################################################################################################################################
##### ##################                 Start Gradio Interface           #########################################################################

################################## Condition Icicle and Sponsor  Map ########################

###################################################################

import plotly.graph_objects as go
import pandas as pd
import numpy as np

################################################ TOP 20 Conditions######################################################


##########################################################################################################


def plot_condition_sunburst (df):

      # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    # Convert 'Condition' names to uppercase
    df['Condition'] = df['Condition'].str.upper()

    # Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT"
    df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT", "CHRONIC'"])]

    # Group the data by 'Condition' and count the number of NCTId
    df_count = df.groupby('Condition')['NCTId'].nunique().reset_index()

    # Sort the DataFrame by Value in descending order and reset the index
    df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True)

    # Create a DataFrame for the top 30 conditions
    top_30_conditions = df_count.head(20)
    top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'})

    # Add 'Display' column to top_30_conditions and set its value to 'TOP 30'
    top_30_conditions['Display'] = 'TOP 20'

       # Create the icicle plot
    icicle_fig = px.icicle(top_30_conditions, path=['Condition'], values='Number of Trials',
                color='Condition', color_continuous_scale='RdBu',
                custom_data=['Condition', 'Number of Trials'])

    # Customize the hovertemplate
    icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}')


    # Customize the icicle plot
    icicle_fig.update_layout(
        title='Top 20 Conditions',
        font=dict(family="Arial", size=14, color='black'),
        width= 400,
        height= 1000,
       #autosize=True,
        margin=dict(t=50, l=25, r=25, b=25)
    )

    return icicle_fig


############################################################  Conditions OTHERS  ###########           ############################################
def plot_condition_others (df):

      # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    # Convert 'Condition' names to uppercase
    df['Condition'] = df['Condition'].str.upper()

    # Filter out rows where 'Condition' is "OTHER", "OTHERS", "HEALTHY", or "ADULT"
    df = df[~df['Condition'].isin(["OTHER", "OTHERS", "HEALTHY", "ADULT"])]

    # Group the data by 'Condition' and count the number of NCTId
    df_count = df.groupby('Condition')['NCTId'].nunique().reset_index()

    # Sort the DataFrame by Value in descending order and reset the index
    df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True)

    # Create a DataFrame for the top 30 conditions
    top_30_conditions = df_count.head(20)
    top_30_conditions = top_30_conditions.rename(columns={'NCTId': 'Number of Trials'})

    # Add 'Display' column to top_30_conditions and set its value to 'TOP 30'
    top_30_conditions['Display'] = 'TOP 30'

    # Create a DataFrame for the other conditions by filtering out the rows that are part of the top 30 conditions
    other_conditions = df_count[~df_count['Condition'].isin(top_30_conditions['Condition'])]

    # Add 'Display' column to other_conditions and set its value to 'OTHERS'
    other_conditions['Display'] = 'OTHERS'
    other_conditions = other_conditions.rename(columns={'NCTId': 'Number of Trials'})
    #print( other_conditions)

    # Create the icicle plot
    #icicle_fig = px.icicle( other_conditions, path=['Condition'], values='Number of Trials',
                 #   color='Condition', color_continuous_scale='RdBu',
                 #   hover_data=['Condition'])


    # Create the icicle plot
    icicle_fig = px.icicle(other_conditions, path=['Condition'], values='Number of Trials',
                color='Condition', color_continuous_scale='RdBu',
                custom_data=['Condition', 'Number of Trials'])

    # Customize the hovertemplate
    icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}')

    # Customize the icicle plot
    icicle_fig.update_layout(
        title='Other Conditions',
        font=dict(family="Arial", size=14, color='black'),
        width= 400,
        height=1000,
      #  autosize=True,
        margin=dict(t=50, l=25, r=25, b=25)
    )

    return icicle_fig
###################################################################################################################################################

def wrap_text(text, max_chars_per_line):
    words = text.split()
    lines = []
    current_line = []

    for word in words:
        if len(' '.join(current_line + [word])) <= max_chars_per_line:
            current_line.append(word)
        else:
            lines.append(' '.join(current_line))
            current_line = [word]

    lines.append(' '.join(current_line))
    return '<br>'.join(lines)


#####################################################  Sponsor Counts ###########################################


###################################################   ############################################################

def wrap_text(text, max_chars_per_line):
    return '<br>'.join(textwrap.wrap(text, max_chars_per_line))

def plot_sponsor_collaborator_tree_map(df):

    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    # Group the data by 'Sponsor' and 'Collaborator' and count the number of unique NCTId
    df_count = df.groupby(['Sponsor', 'Collaborator'])['NCTId'].nunique().reset_index()

    # Sort the DataFrame by Value in descending order and reset the index
    df_count = df_count.sort_values('NCTId', ascending=False).reset_index(drop=True)

    # Create a DataFrame for the top 30 sponsors and collaborators
    top_30 = df_count.head(30)
    top_30 = top_30.rename(columns={'NCTId': 'Number of Trials'})

    max_chars_per_line = 10  # Adjust this value according to your needs
    top_30['Wrapped Sponsor'] = top_30['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line))
    top_30['Wrapped Collaborator'] = top_30['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line))

    # Create the tree map
    tree_map_fig = px.treemap(top_30, path=['Wrapped Sponsor', 'Wrapped Collaborator'], values='Number of Trials',
                color='Sponsor', color_continuous_scale='RdBu',
                custom_data=['Wrapped Sponsor', 'Wrapped Collaborator', 'Number of Trials'])

    # Customize the hovertemplate
    tree_map_fig.update_traces(hovertemplate='%{customdata[0]}<br>%{customdata[1]}<br>Number of Trials: %{customdata[2]}')

    # Customize the tree map
    tree_map_fig.update_layout(
        title='Lead Sponsors and Collaborators',
        font=dict(family="Arial", size=14, color='black'),
        width=600,
        height=1000
    )

    # Update the labels to show only the number of trials
    tree_map_fig.update_traces(textinfo='value')

    return tree_map_fig


#########################################################################################################

def plot_sponsor_tree(df):

    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    df['Phase'] = df['Phase'].fillna('UNKNOWN')

    #Drop rows where 'Phase' is "Not Applicable" or "UNKNOWN"
    #df = df[(df['Phase'] != "Not Applicable") & (df['Phase'] != "UNKNOWN")]

    # Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId
    df_count = df.groupby(['Phase', 'Sponsor'])['NCTId'].nunique().reset_index()

    #print(df_count)

    # Then, sum the counts for each combination of Phase and Sponsor
    df_count = df_count.groupby(['Phase', 'Sponsor'])['NCTId'].sum().reset_index()

    # Finally, sum the counts for each Sponsor
    df_count_tot = df_count.groupby('Sponsor')['NCTId'].sum().reset_index()

    # Sort the DataFrame by Value in descending order and reset the index
    df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True)

    # Create a DataFrame for the top 30 sponsors
    top_30_sponsors = df_count_tot.head(30)
    top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'})


    max_chars_per_line = 10  # Adjust this value according to your needs
    top_30_sponsors['Wrapped Sponsor'] = top_30_sponsors['Sponsor'].apply(lambda x: wrap_text(x, max_chars_per_line))

      # Create the icicle plot
    icicle_fig = px.icicle(top_30_sponsors, path=['Wrapped Sponsor'], values='Number of Trials',
                color='Sponsor', color_continuous_scale='RdBu',
                custom_data=['Wrapped Sponsor', 'Number of Trials'])

    # Customize the hovertemplate
    icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}')


    # Customize the icicle plot
    icicle_fig.update_layout(
        title='Sponsor',
        font=dict(family="Arial", size=14, color='black'),
        width= 600,
        height=1000
       # autosize=True
       # margin=dict(t=50, l=25, r=25, b=25)
    )


    return icicle_fig

######################################################################################################################################


def plot_collaborator_icicle(df):

    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    df['Phase'] = df['Phase'].fillna('UNKNOWN')

    # Group the data by 'Phase' and 'Sponsor' and count the number of unique NCTId
    df_count = df.groupby(['Phase', 'Collaborator'])['NCTId'].nunique().reset_index()


    # Then, sum the counts for each combination of Phase and Sponsor
    df_count = df_count.groupby(['Phase', 'Collaborator'])['NCTId'].sum().reset_index()

    # Finally, sum the counts for each 'Collaborator'
    df_count_tot = df_count.groupby('Collaborator')['NCTId'].sum().reset_index()

    # Sort the DataFrame by Value in descending order and reset the index
    df_count_tot = df_count_tot.sort_values('NCTId', ascending=False).reset_index(drop=True)

    # Create a DataFrame for the top 30 sponsors
    top_30_sponsors = df_count_tot.head(30)
    top_30_sponsors = top_30_sponsors.rename(columns={'NCTId': 'Number of Trials'})


    max_chars_per_line = 10  # Adjust this value according to your needs
    top_30_sponsors['Collaborators'] = top_30_sponsors['Collaborator'].apply(lambda x: wrap_text(x, max_chars_per_line))


    # Create the icicle plot
    icicle_fig = px.icicle(top_30_sponsors, path=['Collaborators'], values='Number of Trials',
                color='Collaborator', color_continuous_scale='RdBu',
                custom_data=['Collaborators', 'Number of Trials'])

    # Customize the hovertemplate
    icicle_fig.update_traces(hovertemplate='%{customdata[0]}<br>Number of Trials: %{customdata[1]}')

    # Customize the icicle plot
    icicle_fig.update_layout(
        title='Collaborators',
        font=dict(family="Arial", size=14, color='black'),
        width= 600,
        height=1000
        #autosize=True

    )


    return icicle_fig

####################################  DRUGS  ########################################################################

####################  Sankey Diagram for Conditions to Drugs to Phase /NCTId#############################################


import pandas as pd
import plotly.graph_objects as go
import random

def random_color():
    return f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})'
##############################################################################################################
def plot_drug_sankey(df):
    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    # Fill missing values in the 'Phase' column with a placeholder string
    df['Phase'] = df['Phase'].fillna('UNKNOWN')

    # Sort by Phase
    df = df.sort_values(by='Phase')
    #print(df)
    # Split the conditions
    df = split_conditions(df, 'Condition')

    conditions = df['Condition'].unique().tolist()
    drugs = df['Drug'].unique().tolist()
    nct_ids = df['NCTId'].unique().tolist()
    study_ids= df['OrgStudyId'].unique().tolist()
    phases = df['Phase'].unique().tolist()

   # labels = conditions + drugs + nct_ids + phases
    labels = conditions + drugs + nct_ids + study_ids+phases

    # Assign random colors to each node
    colors = [random_color() for _ in range(len(labels))]

    source = []
    target = []
    value = []

    for i, condition in enumerate(conditions):
        for j, drug in enumerate(drugs, start=len(conditions)):
            count = df[(df['Condition'] == condition) & (df['Drug'] == drug)].shape[0]
            if count > 0:
                source.append(i)
                target.append(j)
                value.append(count)

    for i, drug in enumerate(drugs, start=len(conditions)):
        for j, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)):
            count = df[(df['Drug'] == drug) & (df['NCTId'] == nct_id)].shape[0]
            if count > 0:
                source.append(i)
                target.append(j)
                value.append(count)


    # Add connections between nct_ids and study_ids
    for i, nct_id in enumerate(nct_ids, start=len(conditions) + len(drugs)):
          for j, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)):
              count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0]
              if count > 0:
                  source.append(i)
                  target.append(j)
                  value.append(count)

    # Add connections between study_ids and phases
    for i, study_id in enumerate(study_ids, start=len(conditions) + len(drugs) + len(nct_ids)):
          for j, phase in enumerate(phases, start=len(conditions) + len(drugs) + len(nct_ids) + len(study_ids)):
              count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0]
              if count > 0:
                  source.append(i)
                  target.append(j)
                  value.append(count)


    num_conditions = len(conditions)
    if num_conditions <= 2:
        height = 400
    elif num_conditions <= 10:
       # height = 300 + (num_conditions - 2) * (200 / 8)  # Linearly scale between 300 and 500
        height = 800
    elif num_conditions <= 30:
        height = 1000
    else:
        height = 1200  # Linearly scale between 700 and 1000, assuming a max of 100 conditions
    height = min(height, 1000)  # Cap the height at 1000

    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=15,
            line=dict(color="black", width=0.5),
            label=labels,
            color=colors
        ),
        link=dict(
            source=source,
            target=target,
            value=value
        )
    )])

    fig.update_layout(title_text="Conditions, Drugs, Trial IDs, Phases for Sponsor",
                      font_size=10, height=height, autosize=True)
    return fig

###########################################################################################


###########################################################################################################################

####################################################################    TRIALS                 ##############################


########################################

def random_color():
    return f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})"


def plot_condition_treemap_nct_old(df):
    df = df[df['StudyType'] == "INTERVENTIONAL"]
    df['Phase'] = df['Phase'].fillna('UNKNOWN')
    df = df.sort_values(by='Phase')
    df = split_conditions(df, 'Condition')

    conditions = df['Condition'].unique().tolist()
    nct_ids = df['NCTId'].unique().tolist()
    study_ids = df['OrgStudyId'].unique().tolist()
    phases = df['Phase'].unique().tolist()

    labels = conditions + nct_ids + study_ids + phases
    colors = [random_color() for _ in range(len(labels))]

    source = []
    target = []
    value = []

    for i, condition in enumerate(conditions):
        for j, nct_id in enumerate(nct_ids, start=len(conditions)):
            count = df[(df['Condition'] == condition) & (df['NCTId'] == nct_id)].shape[0]
            if count > 0:
                source.append(i)
                target.append(j)
                value.append(count)

    for i, nct_id in enumerate(nct_ids, start=len(conditions)):
        for j, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)):
            count = df[(df['NCTId'] == nct_id) & (df['OrgStudyId'] == study_id)].shape[0]
            if count > 0:
                source.append(i)
                target.append(j)
                value.append(count)

    for i, study_id in enumerate(study_ids, start=len(conditions) + len(nct_ids)):
        for j, phase in enumerate(phases, start=len(conditions) + len(nct_ids) + len(study_ids)):
            count = df[(df['OrgStudyId'] == study_id) & (df['Phase'] == phase)].shape[0]
            if count > 0:
                source.append(i)
                target.append(j)
                value.append(count)

    num_conditions = len(conditions)
    if num_conditions <= 2:
        height = 400
    elif num_conditions <= 10:
        height = 800
    elif num_conditions <= 30:
        height = 1000
    else:
        height = 1200
    height = min(height, 1000)

    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=15,
            line=dict(color="black", width=0.5),
            label=labels,
            color=colors
        ),
        link=dict(
            source=source,
            target=target,
            value=value
        )
    )])

    fig.update_layout(title_text="Conditions, Trial IDs, Study IDs, Phases for Sponsor",
                      font_size=10, height=height,  autosize=True)
    return fig
######################################### Conditions###############################

#####################################################################################

import plotly.graph_objects as go

def plot_condition_treemap_nct_old(df):
    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    #print("tamer")
    df = df[df['StudyType'] == "INTERVENTIONAL"]
    #print(df)
    # Fill missing values in the 'Phase' column with a placeholder string
    df['Phase'] = df['Phase'].fillna('UNKNOWN')

    # Map NCTId to its Title
    # Create a dictionary to map NCTId to BriefTitle
    nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict()
    # Create a dictionary to map NCTId to OrgStudyId
    nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict()

    # Create a new dataframe with the required columns
    table_df = df[['Condition', 'NCTId', 'Phase']]

    # Add the BriefTitle and OrgStudyId columns
    table_df['BriefTitle'] = table_df['NCTId'].map(nctid_to_brieftitle)
    table_df['OrgStudyId'] = table_df['NCTId'].map(nctid_to_orgstudyid)
    print(table_df)
    # Sort the dataframe by Condition alphabetically
    table_df = table_df.sort_values('Condition')


    # Create a Plotly Table
    fig = go.Figure(data=[go.Table(
        header=dict(
            values=['Condition', 'NCTId', 'OrgStudyId', 'BriefTitle', 'Phase'],
            fill_color='paleturquoise',
            align='left',
            font=dict(size=16, color='black')
        ),
        cells=dict(
            values=[table_df.Condition, table_df.NCTId, table_df.OrgStudyId, table_df.BriefTitle, table_df.Phase],
            align='left',
            font=dict(size=14, color='black')
        )
    )])

    fig.update_layout(
        autosize=True,
        height=1000,
        title_text="Conditions with NCTIds and Phases",
        title_x=0.5,
        font=dict(size=18)
    )

    return fig


###################### Claude modified Jan 14/2025#############################
def plot_condition_sunburst_nct(df):
    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    # Fill missing values in the 'Phase' column with a placeholder string
    df['Phase'] = df['Phase'].fillna('UNKNOWN')

    # Map NCTId to its Title
    nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict()
    nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict()

    # Create separate dataframes for each level
    condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle'])
    nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle'])
    phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle'])

    # Add the "Conditions" root node
    condition_df = pd.concat([condition_df, pd.DataFrame({
        'ids': ["Conditions"],
        'labels': ["Conditions"],
        'parents': [""],
        'brieftitle': [None]
    })], ignore_index=True)

    # Sort conditions alphabetically and add them
    conditions = sorted(df['Condition'].unique(), key=str.lower)
    for condition in conditions:
        condition_df = pd.concat([condition_df, pd.DataFrame({
            'ids': [condition],
            'labels': [condition],
            'parents': ["Conditions"],
            'brieftitle': [None]
        })], ignore_index=True)

    # Add NCTId level
    for condition in conditions:
        temp_df = df[df['Condition'] == condition]
        nctids = sorted(temp_df['NCTId'].unique())
        for nctid in nctids:
            nctid_df = pd.concat([nctid_df, pd.DataFrame({
                'ids': [f"{condition}-{nctid}"],
                'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"],
                'parents': [condition],
                'brieftitle': [nctid_to_brieftitle[nctid]]
            })], ignore_index=True)

    # Add Phase level
    for condition in conditions:
        temp_df = df[df['Condition'] == condition].sort_values('NCTId')
        for _, row in temp_df.iterrows():
            phase_df = pd.concat([phase_df, pd.DataFrame({
                'ids': [f"{condition}-{row['NCTId']}-{row['Phase']}"],
                'labels': [row['Phase']],
                'parents': [f"{condition}-{row['NCTId']}"],
                'brieftitle': [None]
            })], ignore_index=True)

    # Concatenate all dataframes
    sunburst_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True)

    # Create the Sunburst figure
    fig = go.Figure(go.Sunburst(
        ids=sunburst_df.ids,
        labels=sunburst_df.labels,
        parents=sunburst_df.parents,
        maxdepth=3,  # Limit the depth to 3 levels
        branchvalues="total",
        hovertext=sunburst_df['brieftitle'],
        hoverinfo='text',
        hoverlabel=dict(namelength=-1),
        textfont=dict(size=14, family="Arial")
    ))

    # Update layout
    fig.update_layout(
        width=1200,
        height=1200,
        title={
            'text': "Clinical Trials by Condition, NCTId, and Phase",
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(size=20)
        }
    )

    return fig

####################################################################################
def plot_condition_treemap_nct(df):
    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    # Fill missing values in the 'Phase' column with a placeholder string
    df['Phase'] = df['Phase'].fillna('UNKNOWN')

    # Map NCTId to its Title
    nctid_to_brieftitle = df.set_index('NCTId')['BriefTitle'].to_dict()
    nctid_to_orgstudyid = df.set_index('NCTId')['OrgStudyId'].to_dict()

    # Create separate dataframes for each level
    condition_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level'])
    nctid_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level'])
    phase_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'brieftitle', 'level'])

    # Add the "Conditions" root node
    condition_df = pd.concat([condition_df, pd.DataFrame({
        'ids': ["Conditions"],
        'labels': ["Conditions"],
        'parents': [""],
        'brieftitle': [None],
        'level': [0]
    })], ignore_index=True)

    # Add Condition level with sorting
    conditions = sorted(df['Condition'].unique(), key=str.lower)
    for condition in conditions:
        condition_df = pd.concat([condition_df, pd.DataFrame({
            'ids': [condition],
            'labels': [condition],
            'parents': ["Conditions"],
            'brieftitle': [None],
            'level': [1]
        })], ignore_index=True)

    # Add NCTId level
    for condition in conditions:
        temp_df = df[df['Condition'] == condition]
        nctids = sorted(temp_df['NCTId'].unique())
        for nctid in nctids:
            nctid_df = pd.concat([nctid_df, pd.DataFrame({
                'ids': [f"{condition}-{nctid}"],
                'labels': [f"{nctid} ({nctid_to_orgstudyid[nctid]})"],
                'parents': [condition],
                'brieftitle': [nctid_to_brieftitle[nctid]],
                'level': [2]
            })], ignore_index=True)

    # Add Phase level
    for condition in conditions:
        temp_df = df[df['Condition'] == condition].sort_values('NCTId')
        for _, row in temp_df.iterrows():
            phase_df = pd.concat([phase_df, pd.DataFrame({
                'ids': [f"{condition}-{row['NCTId']}-{row['Phase']}"],
                'labels': [row['Phase']],
                'parents': [f"{condition}-{row['NCTId']}"],
                'brieftitle': [None],
                'level': [3]
            })], ignore_index=True)

    # Concatenate all dataframes and sort by level and labels
    icicle_df = pd.concat([condition_df, nctid_df, phase_df], ignore_index=True)
    
    # Sort the dataframe to ensure conditions appear in alphabetical order
    # First sort by level to maintain hierarchy
    icicle_df = icicle_df.sort_values(['level', 'labels'], 
                                     key=lambda x: x.str.lower() if x.name == 'labels' else x)
    
    # Create the figure with sorted data
    fig = go.Figure(go.Icicle(
        ids=icicle_df.ids,
        labels=icicle_df.labels,
        parents=icicle_df.parents,
        root_color="lightgrey",
        textfont=dict(size=34, family="Arial"),
        hovertext=icicle_df['brieftitle'],
        hoverinfo='text',
        hoverlabel=dict(namelength=-1)
    ))

    fig.update_layout(autosize=True, height=1000)

    return fig

############################################################


###########################################################################################################################
import re

def insert_line_break(text, max_length=30):
    if len(text) <= max_length:
        return text

    nearest_space = text.rfind(' ', 0, max_length)
    if nearest_space == -1:
        nearest_space = max_length

    return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length)


###########################################################                   #######################################################################
def plot_nct2org_icicle(df):
    icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hovertext'])

    # Add the "Trials" root node
    icicle_df = pd.concat([icicle_df, pd.DataFrame({
        'ids': ["Trials"],
        'labels': ["Trials"],
        'parents': [""],
        'hovertext': [""]
    })], ignore_index=True)

    # Create a dictionary of NCTId-BriefTitle pairs
    nctid_brieftitle = df[['NCTId', 'BriefTitle']].drop_duplicates().set_index('NCTId').to_dict()['BriefTitle']

    # Add the NCTId level with BriefTitle as hover text
    icicle_df = pd.concat([icicle_df, pd.DataFrame({
        'ids': df['NCTId'].unique(),
        'labels': df['NCTId'].unique(),
        'parents': ["Trials"] * len(df['NCTId'].unique()),
        'hovertext': [nctid_brieftitle[nctid] for nctid in df['NCTId'].unique()]
    })], ignore_index=True)

    # Add the OrgStudyId level
    for nctid in df['NCTId'].unique():
        temp_df = df[df['NCTId'] == nctid]
        orgstudyids = temp_df['OrgStudyId'].unique()
        for orgstudyid in orgstudyids:
            icicle_df = pd.concat([icicle_df, pd.DataFrame({
                'ids': [f"{nctid}-{orgstudyid}"],
                'labels': [orgstudyid],
                'parents': [nctid],
                'hovertext': [""]
            })], ignore_index=True)

    # Add the Condition level
    for index, row in df.iterrows():
        icicle_df = pd.concat([icicle_df, pd.DataFrame({
            'ids': [f"{row['NCTId']}-{row['OrgStudyId']}-{row['Condition']}-{index}"],
            'labels': [row['Condition']],
            'parents': [f"{row['NCTId']}-{row['OrgStudyId']}"],
            'hovertext': [""]
        })], ignore_index=True)

    fig = go.Figure(go.Icicle(
        ids=icicle_df.ids,
        labels=icicle_df.labels,
        parents=icicle_df.parents,
        hovertext=icicle_df.hovertext,
        root_color="lightgrey",
        textfont=dict(size=34, family="Arial")
    ))

    fig.update_layout(autosize=True, height=1000)

    return fig


######################################################################################################################################


#################################################################################################################


############################## Scatter Plot for Country Timelines ######################################

import pandas as pd

import numpy as np
import plotly.express as px


import plotly.graph_objs as go
from plotly.subplots import make_subplots


def split_condition(text):
    split_text = text.split(',', 1)[0].split('|', 1)[0]
    return split_text.strip()

#################################################################################################################################
import plotly.graph_objs as go


import plotly.graph_objs as go
import plotly.subplots as sp
import pandas as pd
import numpy as np


###################################################################  COUNTRY PLOTS   ################################################################
def plot_trial_country_map(df):
    df = df[df['StudyType'] == "INTERVENTIONAL"]
    df['Phase'] = df['Phase'].fillna('UNKNOWN')
    df = df.sort_values(by='Phase')

    # Split the conditions
    df = split_conditions(df, 'Condition')

    ## Root Country Node
    icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text'])
    icicle_df = pd.concat([icicle_df, pd.DataFrame({
        'ids': ["Country"],
        'labels': ["Country"],
        'parents': [""],
        'hover_text': ["Country"]
    })], ignore_index=True)

    icicle_df = pd.concat([icicle_df, pd.DataFrame({
        'ids': df['Country'].unique(),
        'labels': df['Country'].unique(),
        'parents': ["Country"] * len(df['Country'].unique()),
        'hover_text': [f"({len(df[df['Country'] == country]['NCTId'].unique())} Trials)" for country in df['Country'].unique()]
    })], ignore_index=True)

    ### Country and Conditions

    for country in df['Country'].unique():
        temp_df = df[df['Country'] == country]
        conditions = temp_df['Condition'].unique()
        icicle_df = pd.concat([icicle_df, pd.DataFrame({
            'ids': [f"{country}__{condition}" for condition in conditions],
            'labels': conditions,
            'parents': [country] * len(conditions),
            'hover_text': [f"({len(temp_df[temp_df['Condition'] == condition]['NCTId'].unique())} Trials)" for condition in conditions]
        })], ignore_index=True)

    ### Country with Conditions and Trials NCTId and OrgStudId

    for country_condition in icicle_df['ids'][icicle_df['parents'].isin(df['Country'].unique())]:
        country, condition = country_condition.split('__')
        temp_df = df[(df['Country'] == country) & (df['Condition'] == condition)]
        trials = temp_df['NCTId'].unique()
        trial_labels = [f"<br>{insert_line_break(temp_df[temp_df['NCTId'] == trial]['BriefTitle'].iloc[0])}" for trial in trials]
        icicle_df = pd.concat([icicle_df, pd.DataFrame({
            'ids': [f"{country_condition}__{trial}" for trial in trials],
            'labels': trials,
            'parents': [country_condition] * len(trials),
            'hover_text': trial_labels
        })], ignore_index=True)

    fig = go.Figure(go.Icicle(
        ids=icicle_df.ids,
        labels=icicle_df.labels,
        parents=icicle_df.parents,
        textinfo='label',
        hovertext=icicle_df.hover_text,
        root_color="lightgrey",
        textfont=dict(size=30, family="Arial")
    ))

    fig.update_layout(
     autosize=True,height = 800
     )

    return fig


####################

################################################################    SITES        #####################################################


################################################################ TRIAL SITES   ###########################################
def plot_trial_sites(df):
    def insert_line_break(text, max_length=30):
        if len(text) <= max_length:
            return text

        nearest_space = text.rfind(' ', 0, max_length)
        if nearest_space == -1:
            nearest_space = max_length

        return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length)

    df = df[df['StudyType'] == "INTERVENTIONAL"]
    df['Phase'] = df['Phase'].fillna('UNKNOWN')
    df = df.sort_values(by='Phase')

    ## Root Site Node
    icicle_df = pd.DataFrame(columns=['ids', 'labels', 'parents', 'hover_text'])
    icicle_df = pd.concat([icicle_df, pd.DataFrame({
        'ids': ["Sites"],
        'labels': ["Sites"],
        'parents': [""],
        'hover_text': ["Sites"]
    })], ignore_index=True)

    icicle_df = pd.concat([icicle_df, pd.DataFrame({
        'ids': df['City'].unique(),
        'labels': df['City'].unique(),
        'parents': ["Sites"] * len(df['City'].unique()),
        'hover_text': [f"({len(df[df['City'] == city]['NCTId'].unique())} Trials)" for city in df['City'].unique()]
    })], ignore_index=True)

    ### City and Site

    for city in df['City'].unique():
        temp_df = df[df['City'] == city]
        sites = temp_df['Site'].unique()
        icicle_df = pd.concat([icicle_df, pd.DataFrame({
            'ids': [f"{city}__{site}" for site in sites],
            'labels': sites,
            'parents': [city] * len(sites),
            'hover_text': [f"({len(temp_df[temp_df['Site'] == site]['NCTId'].unique())} Trials)" for site in sites]
        })], ignore_index=True)

    ### Site and Trials (NCTId, OrgStudyId, BriefTitle)

    for city_site in icicle_df['ids'][icicle_df['parents'].isin(df['City'].unique())]:
        city, site = city_site.split('__')
        temp_df = df[(df['City'] == city) & (df['Site'] == site)]
        trials = temp_df[['NCTId', 'OrgStudyId']].apply(lambda x: f"{x['NCTId']}<br>{x['OrgStudyId']}", axis=1).unique()

        for trial in trials:
            nctid = trial.split('<br>')[0]
            icicle_df = pd.concat([icicle_df, pd.DataFrame({
                'ids': [f"{city_site}__{nctid}"],
                'labels': [trial],
                'parents': [city_site],
                'hover_text': [""]
            })], ignore_index=True)

    fig = go.Figure(go.Icicle(
        ids=icicle_df.ids,
        labels=icicle_df.labels,
        parents=icicle_df.parents,
        textinfo='label',
        hovertext=icicle_df.hover_text,
        root_color="lightgrey",
        textfont=dict(size=30, family="Arial")
    ))

    fig.update_layout(autosize=True, height=800)

    return fig


#############################################################################################################################################
def plot_trial_site_map(df):
    def insert_line_break(text, max_length=30):
        if len(text) <= max_length:
            return text

        nearest_space = text.rfind(' ', 0, max_length)
        if nearest_space == -1:
            nearest_space = max_length

        return text[:nearest_space] + '<br>' + insert_line_break(text[nearest_space:].strip(), max_length)

    df = df[df['StudyType'] == "INTERVENTIONAL"]
    df['Phase'] = df['Phase'].fillna('UNKNOWN')
    df = df.sort_values(by='Phase')

    # Split the conditions
    df = split_conditions(df, 'Condition')

    #df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count')
    #df_count = df.groupby([ 'Site', 'NCTId', 'BriefTitle','Condition']).size().reset_index(name='Count')

    df_count = df.groupby([ 'Site', 'City' ,'NCTId', 'Condition']).size().reset_index(name='Count')


    #df_count['BriefTitle'] = df_count['BriefTitle'].apply(insert_line_break)

   # fig = px.treemap(df_count, path=['Site', 'NCTId', 'BriefTitle','Condition'], values='Count', color='Site')
    fig = px.treemap(df_count, path=['Site', 'City','NCTId', 'Condition'], values='Count', color='Site')

    # Customize font and textinfo for Sponsor, Country, Site, and Condition
    fig.update_traces(
        textfont=dict(family="Arial", size=30, color='black'),
        selector=dict(depth=0)  # Apply customization to Sponsor grid
    )
    fig.update_traces(
        textfont=dict(family="Arial", size=30, color='black'),
        selector=dict(depth=1)  # Apply customization to Country grid
    )
    fig.update_traces(
        textfont=dict(family="Arial", size=30, color='black'),
        selector=dict(depth=2)  # Apply customization to Site grid
    )

    fig.update_layout(autosize=True, height=800)

    return fig


############################################################

###############################################################################################################################################################


###########################################################    Timelines                    ###########################################################################################################


import numpy as np
import plotly.graph_objs as go
import matplotlib.pyplot as plt


def generate_colors(n):
    colors = plt.cm.rainbow(np.linspace(0, 1, n))
    hex_colors = ['#%02x%02x%02x' % (int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in colors]
    return hex_colors

def get_marker_size(enrollment_count):
    if enrollment_count < 100:
        return 20
    elif 100 <= enrollment_count < 300:
        return 40
    elif 300 <= enrollment_count < 500:
        return 60
    elif 500 <= enrollment_count < 1000:
        return 70
    else:
        return 100

def plot_trial_bubblemap(df):
    scatter_plot_start_traces = []
    scatter_plot_end_traces = []
    scatter_plot_lines = []

    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    # Fill missing values in the 'Phase' column with a placeholder string
    df['Phase'] = df['Phase'].fillna('UNKNOWN')

    # Sort by Phase
    df = df.sort_values(by='Phase')
    ## address correct date formats
    #df['StartDate'] = pd.to_datetime(df['StartDate'])
    df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce')
    #df['CompletionDate'] = pd.to_datetime(df['CompletionDate'])
    df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce')


    # Split the conditions
    df = split_conditions(df, 'Condition')

    # Assign an ID to each unique condition
    #condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())}
    # Create a dictionary of unique conditions with their IDs starting from 1
    condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)}

    # Create a dictionary that maps each NCTId to a list of condition IDs
    nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict()

    # Define the marker size function
    df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size)

    # Update the hovertemplate to display original Conditions associated with the NCTId
    #hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>OrgStudyId: %{customdata[1]}<br>Phase: %{customdata[2]}<br>Start Date: %{x}<br>Enrollment Count: %{customdata[3]}<extra></extra>'
    #hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>OrgStudyId: %{customdata[1]}<br>NCTId: %{customdata[2]}<br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>'

    # Update the hovertemplate to display original Conditions associated with the NCTId
    hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>Phase: %{customdata[3]}\
    <br>Start Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>'

    #hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>NCTId: %{customdata[3]}\
    #<br>Phase: %{customdata[4]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[5]}<extra></extra>'

    hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}\
    <br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>'

    for nctid in df['NCTId'].unique():
        df_filtered = df[df['NCTId'] == nctid]

        # Replace the text parameter with original Conditions
        text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))]

        # Get the first condition ID for the current NCTId
        first_condition_id = nctid_condition_map[nctid][0]
        color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})'

       # color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3]

        # Start traces (square)
        start_trace = go.Scatter(x=df_filtered['StartDate'],
                                y=df_filtered['NCTId'],
                                mode='markers',
                                marker=dict(size=10, symbol='square', color=color),
                                text=text,
                                #customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']],
                                customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']],
                                hovertemplate=hovertemplate_start,
                                showlegend=False)
        scatter_plot_start_traces.append(start_trace)

        # End traces (circle)
        end_trace = go.Scatter(x=df_filtered['CompletionDate'],
                              y=df_filtered['NCTId'],
                              mode='markers',
                              marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'),
                              text=text,
                              #customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']],
                              customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']],
                              hovertemplate=hovertemplate_end,
                              showlegend=False)
        scatter_plot_end_traces.append(end_trace)

        # Line traces connecting start and end dates
        line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]],
                                    y=[nctid, nctid],
                                    mode='lines',
                                    line=dict(color='black', width=1),
                                    showlegend=False)
        scatter_plot_lines.append(line_trace)

    # Create legend traces for unique conditions with their IDs
    legend_traces = [go.Scatter(x=[None], y=[None],
                                mode='markers',
                                marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'),
                                name=f'{condition_id}: {condition}',
                                showlegend=True) for condition, condition_id in condition_ids.items()]

    # Combine all traces
    data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces

    # Update the layout
    layout = go.Layout(yaxis=dict(title='NCTId',
                                   showgrid=False,
                                   tickvals=df['NCTId'].unique(),
                                   ticktext=df['NCTId'].unique(),
                                   tickangle=0),
                       xaxis=dict(title='Start-End Dates',
                                  showgrid=False,
                                  range=[pd.to_datetime('2020-01-01'), pd.to_datetime('2028-12-31')],
                                  tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2020, 2029)]),
                                #  tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)],
                       showlegend=True,
                       legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')),
                       margin=dict(l=150),
                       plot_bgcolor='#ffffff',
                       paper_bgcolor='#ffffff',
                       font=dict(family='Segoe UI', color='#000000'))

    fig = go.Figure(data=data, layout=layout)


     # Calculate the height based on the number of shortened_conditions
    num_trial = len(df['NCTId'].unique())
    if num_trial <= 5:
        height = 600
    elif num_trial >= 10:
        height = 800
    elif num_trial >= 20:
        height = 1000
    else:
        height = 1400  # Linearly scale between 700 and 1000, assuming a max of 100 conditions

    height = min(height, 1400)  # Cap the height at 1400
     # Set the width and height
    fig.update_layout(
        title='Trial Start and End Dates by Conditions',
        autosize=True,  # adjust as per requirement
        height=height   # adjust as per requirement
    )


    return fig


########################################################################################################################################################
def plot_trial_bubblemap_comp(df):
    scatter_plot_start_traces = []
    scatter_plot_end_traces = []
    scatter_plot_lines = []

    # Filter the dataframe for 'StudyType' equal to "INTERVENTIONAL"
    df = df[df['StudyType'] == "INTERVENTIONAL"]

    # Fill missing values in the 'Phase' column with a placeholder string
    df['Phase'] = df['Phase'].fillna('UNKNOWN')

    # Sort by Phase
    df = df.sort_values(by='Phase')
    ## address correct date formats
    #df['StartDate'] = pd.to_datetime(df['StartDate'])
    df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce')
   # df['CompletionDate'] = pd.to_datetime(df['CompletionDate'])
    df['CompletionDate'] = pd.to_datetime(df['CompletionDate'], errors='coerce')

    # Split the conditions
    df = split_conditions(df, 'Condition')

    # Assign an ID to each unique condition
    #condition_ids = {condition: idx for idx, condition in enumerate(df['Condition'].unique())}
    # Create a dictionary of unique conditions with their IDs starting from 1
    condition_ids = {condition: i for i, condition in enumerate(df['Condition'].unique(), start=1)}

    # Create a dictionary that maps each NCTId to a list of condition IDs
    nctid_condition_map = df.groupby('NCTId')['Condition'].apply(lambda x: [condition_ids[cond] for cond in x]).to_dict()

    # Define the marker size function
    df['MarkerSize'] = df['EnrollmentCount'].apply(get_marker_size)


    # Update the hovertemplate to display original Conditions associated with the NCTId
    hovertemplate_start = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}<br>Phase: %{customdata[3]}\
    <br>Start Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>'

    hovertemplate_end = 'NCTId: %{y}<br>Conditions: %{text}<br>Type: %{customdata[0]}<br>BriefTitle: %{customdata[1]}<br>OrgStudyId: %{customdata[2]}\
    <br>Phase: %{customdata[3]}<br>Completion Date: %{x}<br>Enrollment Count: %{customdata[4]}<extra></extra>'

    for nctid in df['NCTId'].unique():
        df_filtered = df[df['NCTId'] == nctid]

        # Replace the text parameter with original Conditions
        text = [', '.join(df_filtered['Condition'].unique()) for _ in range(len(df_filtered))]

        # Get the first condition ID for the current NCTId
        first_condition_id = nctid_condition_map[nctid][0]
        color = f'rgb({first_condition_id * 10 % 256}, {(first_condition_id * 20) % 256}, {(first_condition_id * 30) % 256})'

       # color = ['rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)'][first_condition_id % 3]

        # Start traces (square)
        start_trace = go.Scatter(x=df_filtered['StartDate'],
                                y=df_filtered['NCTId'],
                                mode='markers',
                                marker=dict(size=10, symbol='square', color=color),
                                text=text,
                                #customdata=df_filtered[['StudyType', 'OrgStudyId', 'Phase', 'EnrollmentCount']],
                                customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']],
                                hovertemplate=hovertemplate_start,
                                showlegend=False)
        scatter_plot_start_traces.append(start_trace)

        # End traces (circle)
        end_trace = go.Scatter(x=df_filtered['CompletionDate'],
                              y=df_filtered['NCTId'],
                              mode='markers',
                              marker=dict(size=df_filtered['MarkerSize'], symbol='circle', color=color, sizemode='diameter'),
                              text=text,
                              #customdata=df_filtered[['StudyType', 'OrgStudyId', 'NCTId', 'Phase', 'EnrollmentCount']],
                              customdata=df_filtered[['Condition', 'BriefTitle','OrgStudyId', 'Phase', 'EnrollmentCount']],
                              hovertemplate=hovertemplate_end,
                              showlegend=False)
        scatter_plot_end_traces.append(end_trace)

        # Line traces connecting start and end dates
        line_trace = go.Scatter(x=[df_filtered['StartDate'].iloc[0], df_filtered['CompletionDate'].iloc[0]],
                                    y=[nctid, nctid],
                                    mode='lines',
                                    line=dict(color='black', width=1),
                                    showlegend=False)
        scatter_plot_lines.append(line_trace)

    # Create legend traces for unique conditions with their IDs
    legend_traces = [go.Scatter(x=[None], y=[None],
                                mode='markers',
                                marker=dict(size=10, symbol='circle', color=f'rgb({condition_id * 10 % 256}, {(condition_id * 20) % 256}, {(condition_id * 30) % 256})'),
                                name=f'{condition_id}: {condition}',
                                showlegend=True) for condition, condition_id in condition_ids.items()]

    # Combine all traces
    data = scatter_plot_start_traces + scatter_plot_end_traces + scatter_plot_lines + legend_traces

    # Update the layout
    layout = go.Layout(yaxis=dict(title='NCTId',
                                   showgrid=False,
                                   tickvals=df['NCTId'].unique(),
                                   ticktext=df['NCTId'].unique(),
                                   tickangle=0),
                       xaxis=dict(title='Start-End Dates',
                                  showgrid=False,
                                  range=[pd.to_datetime('2010-01-01'), pd.to_datetime('2023-12-31')],
                                  tickvals=[pd.to_datetime(f'{year}-01-01') for year in range(2010, 2023)]),
                                #  tickvals=[pd.to_datetime(f'{year}') for year in range(2020, 2029)],
                       showlegend=True,
                       legend=dict(title='Conditions', x=1.05, y=1, traceorder='normal', bgcolor='rgba(255,255,255,0.5)', font=dict(color='#000000')),
                       margin=dict(l=150),
                       plot_bgcolor='#ffffff',
                       paper_bgcolor='#ffffff',
                       font=dict(family='Segoe UI', color='#000000'))

    fig = go.Figure(data=data, layout=layout)


     # Calculate the height based on the number of shortened_conditions
    num_trial = len(df['NCTId'].unique())
    if num_trial <= 5:
        height = 600
    elif num_trial >= 10:
        height = 800
    elif num_trial >= 20:
        height = 1000
    else:
        height = 1400  # Linearly scale between 700 and 1000, assuming a max of 100 conditions

    height = min(height, 1400)  # Cap the height at 1400
     # Set the width and height
    fig.update_layout(
        title='Trial Start and End Dates by Conditions',
        autosize=True,  # adjust as per requirement
        height=height   # adjust as per requirement
    )


    return fig


#######################################################################################


#######################################################################################


############################################   Trial Site Map without Zip code now ##############

import geopandas as gpd

def plot_trial_site_world_map(df, country_filter=None):
    df.loc[(df['City'] == 'Multiple Locations') & (df['Country'] == 'Germany'), 'City'] = 'Berlin'
    unique_cities = df[['City', 'Country']].drop_duplicates().copy()

    geocode_cache = {}  # Create an empty dictionary to store geocoded results

    def geocode_with_cache(city, country):
        key = (city, country)
        if key not in geocode_cache:
            geocode_cache[key] = gpd.tools.geocode(f"{city}, {country}").geometry[0]
        return geocode_cache[key]

    unique_cities['Coordinates'] = unique_cities.apply(lambda row: geocode_with_cache(row['City'], row['Country']), axis=1)
    unique_cities[['Latitude', 'Longitude']] = unique_cities['Coordinates'].apply(lambda coord: pd.Series({'Latitude': coord.y, 'Longitude': coord.x}))

    df = df.merge(unique_cities, on=['City', 'Country'])

      # Create a new column combining 'Site' and 'Country'
    df['SiteCountry'] = df['Site'] + ', ' + df['Country']

    df_count = df.groupby(['Country', 'City', 'SiteCountry', 'Condition', 'NCTId','BriefTitle', 'Latitude', 'Longitude']).size().reset_index(name='Count')

    if country_filter:
        df_count = df_count[df_count['Country'] == country_filter]


    fig = px.scatter_geo(df_count,
                         lat='Latitude',
                         lon='Longitude',
                         hover_name='SiteCountry',
                         hover_data={'Latitude':False, 'Longitude':False, 'NCTId':False,'BriefTitle':False, 'Condition':False, 'City':True, 'Country':True},
                         size='Count',
                         color='SiteCountry',
                         projection='mercator')

    fig.update_layout(title='Trial Sites Map',
                      geo=dict(showframe=False, showcoastlines=False, showcountries=True),
                      width=1200,
                      height=800)

    return fig


#############################################################################################################

#############################################################           Gradio Function as Views ####################################


###  #########################                             Find Sponspors
 #############################################################################################################################################


def select_sponsor(sponsor_input, academia_input):
    if sponsor_input:
        return sponsor_input
    else:
        return academia_input

def select_disease(disease_input, disease_input_text):
    if disease_input_text:
        return disease_input_text.strip()
    else:
        return disease_input

#summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drug

#async def disease_view (condition, condition_text, sponsor_input, academia_input):
async def disease_view (condition, sponsor_input):
   # condition = condition.strip()  # Remove leading and trailing spaces
    #sponsor = select_sponsor(sponsor_input, academia_input)
    #condition = select_disease(condition, condition_text)
    sponsor = sponsor_input
    condition = condition

 ################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None,None, None, None, None, None

################################################################################################

    status = "Recruiting"
   #print("In Gradio")
    # Call gradio_wrapper_nct with appropriate arguments
    if condition and sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status )
    elif sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status )
    elif condition:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status )


#### error traps
    if html_table_conditions is None:
        return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None
     #### error traps
    if html_table_conditions_collb is None:
        return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None

    # Convert the HTML table to a pandas DataFrame
    df = pd.read_html(html_table_conditions)[0]

    #df2 = pd.read_html(html_table_conditions_collb)[0]
    df2 = []
    try:
     df2 = pd.read_html(html_table_conditions_collb)[0]
    except (ValueError, IndexError):
     df2 = pd.DataFrame()


#### error traps
    if df.empty and df2.empty:
       return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None, None, None
####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov

    # Display the DataFrame
    # evaluate if need to change to collaborator other than top 20 ????
   # condition_other = plot_condition_others(df)
    #### Sponsor Only
   # condition_sunburst = plot_condition_sunburst(df)

    ################################################################################
    sponsor_tree = plot_sponsor_tree(df)

    collaborator_tree = None  # Initialize to None or any other default value

    if not df2.empty:

     collaborator_tree = plot_collaborator_icicle(df2)
    return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb,sponsor_tree, collaborator_tree

   # return summary_stats,summary_stats_collb, html_table_conditions,html_table_conditions_collb, condition_other, condition_sunburst ,sponsor_tree, collaborator_tree


##################### Assets ###################################################################################


def select_sponsor(s_sponsor_input, s_academia_input):
    if s_sponsor_input:
        return s_sponsor_input
    else:
        return s_academia_input

def select_condition(s_disease_input, s_disease_input_type):
    if s_disease_input_type.strip():
        return s_disease_input_type.strip()
    else:
        return s_disease_input


async def drug_view(condition, condition_type, s_sponsor_input, s_academia_input):
    sponsor = select_sponsor(s_sponsor_input, s_academia_input)
    condition = select_condition(condition, condition_type)
################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

################################################################################################


    status = "Recruiting"
    # Call gradio_wrapper_nct with appropriate arguments
    if condition and sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
    elif sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
    elif condition:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)
#### error traps
    if html_table_drugs is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
###### Convert the HTML table to a pandas DataFrame
    df = pd.read_html(html_table_drugs)[0]
####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov
#### error traps
    if df.empty :
       return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
    # Display the DataFrame
    sankey_map_drug  = plot_drug_sankey(df)

    return  summary_stats,html_table_drugs, sankey_map_drug


########################### Condition###################
##################    ########################################################################################


def select_sponsor_phc(s_sponsor_input_phc, s_academia_input_phc):
    if s_sponsor_input_phc:
        return s_sponsor_input_phc
    else:
        return s_academia_input_phc

def select_condition_phc(s_disease_input_phc, s_disease_input_type_phc):
    if  s_disease_input_type_phc.strip():
        return  s_disease_input_type_phc.strip()
    else:
        return s_disease_input_phc


#async def disease_view_phc(condition, condition_type, s_sponsor_input, s_academia_input):
async def disease_view_phc(condition, s_sponsor_input):
    #sponsor = select_sponsor_phc(s_sponsor_input, s_academia_input )
   # condition = select_condition_phc(condition, condition_type)
    sponsor = s_sponsor_input
    condition = condition


################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

################################################################################################


    status = "Recruiting"
    # Call gradio_wrapper_nct with appropriate arguments
    if condition and sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
    elif sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
    elif condition:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)

#### error traps
    if html_table_conditions is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None


    # Convert the HTML table to a pandas DataFrame
    df = pd.read_html(html_table_conditions)[0]
    #print(df)
#### error traps
    if df.empty :
       return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
    ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov

    tree_map_cond_nct = plot_condition_treemap_nct(df)

    sunburst_map_cond_nct = plot_condition_sunburst_nct(df)

    return summary_stats, html_table_conditions, tree_map_cond_nct, sunburst_map_cond_nct

   # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map


##################  Trial  ########################################################################################


def select_sponsor_phs(s_sponsor_input_phs, s_academia_input_phs):
    if s_sponsor_input_phs:
        return s_sponsor_input_phs
    else:
        return s_academia_input_phs

def select_condition_phs(s_disease_input_phs, s_disease_input_type_phs):
    if  s_disease_input_type_phs.strip():
        return  s_disease_input_type_phs.strip()
    else:
        return s_disease_input_phs


async def disease_view_phs(condition, condition_type, s_sponsor_input, s_academia_input):
    sponsor = select_sponsor_phs(s_sponsor_input, s_academia_input )
    condition = select_condition_phs(condition, condition_type)
################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

################################################################################################


    status = "Recruiting"
    # Call gradio_wrapper_nct with appropriate arguments
    if condition and sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
    elif sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
    elif condition:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)

#### error traps
    if html_table_conditions is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None

    # Convert the HTML table to a pandas DataFrame
    df = pd.read_html(html_table_conditions)[0]
    #print(df)
#### error traps
    if df.empty :
       return "The Sponsor Name did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
    ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov

    #tree_map_cond_nct = plot_condition_treemap_nct(df)

    nct_org_map =  plot_nct2org_icicle(df)

    return summary_stats, html_table_conditions,  nct_org_map

   # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map


#####################################################  New Trials              ######################################

def select_sponsor_phs_n(s_sponsor_input_phs, s_academia_input_phs):
    if s_sponsor_input_phs:
        return s_sponsor_input_phs
    else:
        return s_academia_input_phs

def select_condition_phs_n(s_disease_input_phs, s_disease_input_type_phs):
    if  s_disease_input_type_phs.strip():
        return  s_disease_input_type_phs.strip()
    else:
        return s_disease_input_phs


####################################################################################
async def disease_view_phs_n(condition, condition_type, s_sponsor_input, s_academia_input):
    sponsor = select_sponsor_phs_n(s_sponsor_input, s_academia_input )
    condition = select_condition_phs_n(condition, condition_type)

################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None

################################################################################################


    status = "Not yet recruiting"
    # Call gradio_wrapper_nct with appropriate arguments
    if condition and sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
    elif sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
    elif condition:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)

#### error traps
    if html_table_conditions is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None


    # Convert the HTML table to a pandas DataFrame
   # df = pd.read_html(html_table_conditions)[0]

    ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov

    #tree_map_cond_nct = plot_condition_treemap_nct(df)
   # tree_map_cond_nct = plot_condition_treemap_nct(df)
    ###### Convert the HTML table to a pandas DataFrame
    df = pd.read_html(html_table_drugs)[0]
#### error traps
    if df.empty :
       return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None
####### Address Runtime API Issue to not connecting or fetching from Clinical Trials.gov

    # Display the DataFrame
    tree_map_cond_nct  = plot_drug_sankey(df)

   # nct_org_map =  plot_nct2org_icicle(df)

######################################### error traps
   # if html_table_add is None:
       # return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
    df2 = pd.read_html(html_table_conditions)[0]
    bubble_map_trials = plot_trial_bubblemap(df2)
   # return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials
    return summary_stats, html_table_conditions, tree_map_cond_nct, bubble_map_trials


###############################################        Completed Trials                ####################################################
def select_sponsor_phs_c(s_sponsor_input_phs, s_academia_input_phs):
    if s_sponsor_input_phs:
        return s_sponsor_input_phs
    else:
        return s_academia_input_phs

def select_condition_phs_c(s_disease_input_phs, s_disease_input_type_phs):
    if  s_disease_input_type_phs.strip():
        return  s_disease_input_type_phs.strip()
    else:
        return s_disease_input_phs

async def disease_view_phs_c(condition, condition_type, s_sponsor_input, s_academia_input):
    sponsor = select_sponsor_phs_c(s_sponsor_input, s_academia_input )
    condition = select_condition_phs_c(condition, condition_type)
################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None, None, None

################################################################################################


    status = "Completed"
    # Call gradio_wrapper_nct with appropriate arguments
    if condition and sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, sponsor=sponsor, status = status)
    elif sponsor:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(sponsor=sponsor, status = status)
    elif condition:
        summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(condition=condition, status = status)

#### error traps
    if html_table_conditions is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None, None, None


    # Convert the HTML table to a pandas DataFrame
    df = pd.read_html(html_table_conditions)[0]


    ####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov

    tree_map_cond_nct = plot_condition_treemap_nct(df)

    nct_org_map =  plot_nct2org_icicle(df)

######################################### error traps
   # if html_table_add is None:
       # return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
    df3 = pd.read_html(html_table_conditions)[0]
    bubble_map_trials = plot_trial_bubblemap_comp(df3)


###### Convert the HTML table to a pandas DataFrame
    df2 = pd.read_html(html_table_drugs)[0]
####### Address Runtime API Issue to not connecting or fteching from Clinical Trials.gov
#### error traps
    if df.empty and df2.empty and df3.empty:
       return "The selection did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None, None, None
    # Display the DataFrame
    sankey_map_drug  = plot_drug_sankey(df2)

    #return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,bubble_map_trials
    return summary_stats, html_table_conditions, tree_map_cond_nct, nct_org_map,sankey_map_drug,bubble_map_trials


###   ###############   Country      #########################################################

def select_sponsor_con(sponsor_input_con, academia_input_con):
    if sponsor_input_con:
        return sponsor_input_con
    else:
        return academia_input_con

def select_condition_con(condition_input, condition_input_type):
    if  condition_input_type.strip():
        return  condition_input_type.strip()
    else:
        return condition_input

async def condition_view(condition, country, condition_type, sponsor_input_con, academia_input_con):
    condition = select_condition_con(condition, condition_type)
    sponsor = select_sponsor_con(sponsor_input_con, academia_input_con)
################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

################################################################################################


    status = "Recruiting"
    summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)
     # Convert the HTML table to a pandas DataFrame
    # Check if html_table_add is None before converting to DataFrame
#### error traps
    if html_table_add is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None

    df = pd.read_html(html_table_add)[0]
    df2 = pd.read_html(html_table_add)[0]

#### error traps
    if df.empty and df2.empty:
       return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None
    #print(df)
    trial_country = plot_trial_country_map(df2)


    return  summary_stats_sites, html_table_add,trial_country


###############          Site         #########################################################################################################


def select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s):
    if sponsor_input_con_s:
        return sponsor_input_con_s
    else:
        return academia_input_con_s


def select_condition_con(condition_input, condition_input_type):
    if  condition_input_type.strip():
        return  condition_input_type.strip()
    else:
        return condition_input

async def condition_view_s(condition, country, condition_type, sponsor_input_con_s, academia_input_con_s):
    condition = select_condition_con(condition, condition_type)
    sponsor = select_sponsor_con_s(sponsor_input_con_s, academia_input_con_s)
################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None, None

################################################################################################


    status = "Recruiting"
    summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)

 #### error traps
    if html_table_add is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None,None
 #### error traps
    if html_table_add is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None
 #### error traps
    if html_table_add is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None, None


   # print(html_table_add)
    df = pd.read_html(html_table_add)[0]
    #print(df)

#### error traps
    if df.empty :
       return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None, None


    site_cond = plot_trial_sites(df)
    country_site = plot_trial_site_map(df)

    return summary_stats_sites, html_table_add, site_cond,country_site


###################################### Timelines ###################################################################


def select_sponsor_cont(sponsor_input_con, academia_input_con):
    if sponsor_input_con:
        return sponsor_input_con
    else:
        return academia_input_con

def select_condition_cont(condition_input, condition_input_type):
    if  condition_input_type.strip():
        return  condition_input_type.strip()
    else:
        return condition_input

async def condition_viewt(condition, country, condition_type, sponsor_input_con, academia_input_con):
    condition = select_condition_cont(condition, condition_type)
    sponsor = select_sponsor_cont(sponsor_input_con, academia_input_con)
################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor
    #print(type(sponsor))
    condition = ' '.join(condition) if isinstance(condition, list) else condition
    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re
################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'

    if condition is not None and isinstance(condition, str):
            if len(condition) > 50 or not re.match(allowed_chars, condition):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

################################################################################################


    status = "Recruiting"
    summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status)
     # Convert the HTML table to a pandas DataFrame

######################################### error traps
    if html_table_add is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
    df = pd.read_html(html_table_add)[0]


#### error traps
    if df.empty :
       return "The Sponsor Name typed did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None

    bubble_map_trials = plot_trial_bubblemap(df)

    return  summary_stats_sites, html_table,bubble_map_trials


###############               Find Site Map          #########################################################################################################


def select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map):
    if sponsor_input_con_map:
        return sponsor_input_con_map
    else:
        return academia_input_con_map

async def condition_view_map(condition, country, sponsor_input_con_map, academia_input_con_map):
    #condition = condition.strip()  # Remove leading and trailing spaces
    sponsor = select_sponsor_con_map(sponsor_input_con_map, academia_input_con_map)
################# ### List data type errors in type conversion to string needed for regualr expression
    sponsor = ' '.join(sponsor) if isinstance(sponsor, list) else sponsor

    #print(type(condition))
############################CHECK Sponsor and Condition Inputs #########################################################
    import re

################  # def check_input(condition, sponsor):
    allowed_chars = r'^[A-Za-z .,&/()-]*$'


    if sponsor is not None and isinstance(sponsor, str):
            if len(sponsor) > 50 or not re.match(allowed_chars, sponsor):
                return "Error: Invalid input. Please limit your input within 50 characters and use only English letters with allowed special characters [. , & / -].", None, None

################################################################################################
################################################################################################


    status = "Recruiting"
    summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(condition=condition, sponsor=sponsor, country=country, status = status )
   # print(html_table_add)
     #### error traps
    if html_table_add is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
    df = pd.read_html(html_table_add)[0]

#### error traps
    if df.empty :
       return "The Trial Id did not match with ClinicalTrials.Gov, Please try with new selection again!", None, None

    world_map = plot_trial_site_world_map(df)

    if world_map is None:
        return "Sorry, the plot could not be generated. Please try again by slecting a country!", None, None

    return summary_stats_sites, html_table_add, world_map


### ########################################Find Trial Eligibility###########################################################################


############################################################################  END VIEWS########################

#### To remove the inclusion exclusion numbers duplicating in text

import re

def format_html_list(html_string):
    # Split the input string by numbers followed by a period and a space
    items = re.split(r'(\d+\.\s)', html_string)

    # Combine the split items into a list of strings, keeping the original numbers
    formatted_items = [number + text for number, text in zip(items[1::2], items[2::2])]

    # Remove unwanted characters from each item
    formatted_items = [re.sub(r':\.', '', item) for item in formatted_items]
    formatted_items = [re.sub(r'General\.', '', item) for item in formatted_items]

    # Filter out empty list items
    formatted_items = [item for item in formatted_items if item.strip()]

    # Check if the first item is empty and remove it if so
    if formatted_items[0].split('. ', 1)[1].strip() == '':
        formatted_items = formatted_items[1:]

    # Renumber the items
   # formatted_items = [f"{i+1}. {item.split('. ', 1)[1]}" for i, item in enumerate(formatted_items)]
    # Renumber the items
    formatted_items = [
        f"{i+1}. {item.split('. ', 1)[1]}" if len(item.split('. ', 1)) > 1 else item
        for i, item in enumerate(formatted_items)
    ]

    # Remove extra periods
    formatted_items = [re.sub(r'\.{2,}', '.', item) for item in formatted_items]

    # Join the list items with line breaks to create an HTML string
    formatted_html = "<br>".join(formatted_items)

    return formatted_html
########################################################################################


#####################################################################################

#############################################################################################################################################
async def trial_view_map(nctID):
    nctID = nctID.strip()  # Remove leading and trailing spaces
    ######    # Check if nctID is valid

    if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12):
        return "Not a Valid NCT ID has been entered", None, None

    status = "Recruiting"
    #summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID)
    summary_stats, html_table_conditions, html_table, summary_stats_sites, html_table_add,html_table_drugs = await gradio_wrapper_nct_spn(NCTId=nctID, status = status)
     #### error traps

    #trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map])

#################################################################################################################################
    if html_table_add is None:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None
    df = pd.read_html(html_table_add)[0]
    world_map = plot_trial_site_world_map(df)
    if world_map is None:
        return "Sorry, the plot could not be generated. Please try again by selecting a country!", None, None

    return summary_stats_sites, world_map, html_table_add
    #return html_table, formatted_html_inclusions,formatted_html_exclusions,world_map


####################################################################################################################################################
import plotly.graph_objects as go

def split_numbered_criteria(text):
    """Split text into list of criteria based on numbered lines"""
    if not text:
        return []

    criteria = []
    current = []

    for line in text.split('\n'):
        line = line.strip()
        if line:
            # Check if line starts with a number followed by period
            if line[0].isdigit() and '. ' in line[:4]:
                if current:
                    criteria.append(' '.join(current))
                current = [line]
            else:
                current.append(line)

    # Add the last criteria
    if current:
        criteria.append(' '.join(current))

    return criteria if criteria else ["No criteria available"]

def display_criteria_table(inclusion_text, exclusion_text):
    """
    Create a two-column Plotly table with inclusion and exclusion criteria
    split into separate rows based on numbering
    """
    try:
        # Split both texts into lists of criteria
        inclusion_list = split_numbered_criteria(inclusion_text)
        exclusion_list = split_numbered_criteria(exclusion_text)

        # Make lists equal length by padding with empty strings
        max_length = max(len(inclusion_list), len(exclusion_list))
        inclusion_list.extend([''] * (max_length - len(inclusion_list)))
        exclusion_list.extend([''] * (max_length - len(exclusion_list)))

        # Create the table
        fig = go.Figure(data=[go.Table(
            columnwidth=[500, 500],  # Equal width columns
            header=dict(
                values=['<b>Inclusion Criteria</b>', '<b>Exclusion Criteria</b>'],
                fill_color='#e6f3ff',
                align=['left', 'left'],
                font=dict(size=14, color='black'),
                height=40
            ),
            cells=dict(
                values=[inclusion_list, exclusion_list],
                fill_color=[['white', '#f9f9f9'] * max_length],  # Alternating row colors
                align=['left', 'left'],
                font=dict(size=12),
                height=None,
                line=dict(color='lightgrey', width=1)  # Add light borders
            )
        )])

        # Update layout
        fig.update_layout(
            title="Trial Eligibility Criteria",
            width=1200,
            height=max(400, max_length * 30 + 100),  # Dynamic height based on content
            margin=dict(l=20, r=20, t=40, b=20)
        )

        return fig

    except Exception as e:
        print(f"Error in display_criteria_table: {str(e)}")
        return None

async def trial_view(nctID):
    nctID = nctID.strip()

    if not nctID.startswith('NCT') or not (10 <= len(nctID) <= 12):
        return "Not a Valid NCT ID has been entered", None, None

    status = "Recruiting"
    summary_stats,summary_stats_collb, html_table_conditions, html_table_conditions_collb, html_table,html_table_drugs = await gradio_wrapper_nct(NCTId=nctID, status = status)

    formatted_inclusions = get_formatted_inclusion_criteria(nctID)
    formatted_exclusions = get_formatted_exclusion_criteria(nctID)

    if not formatted_inclusions and not formatted_exclusions:
        return "No data matched from Clinical Trials.Gov, Please try with new selection !", None, None

    # Create single table with both criteria
    criteria_table = display_criteria_table(formatted_inclusions, formatted_exclusions)

    return html_table, criteria_table


############################### Design the interface####################################################################################

## Added after Spet 27 Failure
from gradio.components import Dropdown


###############################################################################################################################################################################

trial_app = gr.Blocks()
with trial_app:
 gr.Markdown("<center style='font-size: 36px;'><b style='color: green;'>Trial Connect</b></center>")
 gr.Markdown("<center style='font-size: 20px;'><b style='color: green;'>Data Source: ClinicalTrials.gov </b></center>")
 #gr.Markdown("<p style='text-align: left; font-size: 20px; color: green;'>Now Recruiting Trials:</p>")
 with gr.Tabs():


  ##############################################################################################################################################
   ################################################################    Conditions                ###############################################################################################
        with gr.TabItem("Trials"):

        # 1st Row
####################################################################################################################################################
#####################################################################################################################################################


            with gr.Row():
               gr.HTML('''
                      <h1 style="font-size:16px;font-weight:normal;color:green; ">'Now Recruiting' Trials for Conditions:</h1>
                      <p style="font-size:16px;color:green; ">1. Select a Condition, for example, 'Pancreatic Cancer', 'Chronic Kidney Disease', 'MASH' etc.</p>
                      <p style="font-size:16px;color:green; ">2. Select a Sponsor'. </p>
                      <p style="font-size:16px;color:green; ">3. Click 'Show Trials'. </p>
                      ''')


#####################################################################################################################################################


            with gr.Row():

###################################################################
               with gr.Column():
                s_disease_input_phc = gr.Dropdown(
                      choices=["Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
                              "Cancer","Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
                              "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
                              "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
                              "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
                                "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
                                "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\
                                "Urothelial Carcinoma",\
                                "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
                              "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\
                                " Major","Metabolic",  "Generalized Pustular Psoriasis",\
                                "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
                                "Liver Cirrhosis", \
                              "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis",  \
                              "Psychological Trauma","Renal", "Respiratory",\
                              "Schizophrenia", "PTSD", \
                              "Venous Thromboembolism", "Wet"],
                      label="Select Condition"
                )
###################################################################
               with gr.Column():

#### #########################################################################################################################################################################################################

                  s_sponsor_input_phc = gr.Dropdown(

                    choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
                                            "CSL Behring", "Daiichi Sankyo, Inc.",\
                                            "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
                                              "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
                                              "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
                                    label="Select Sponsor"
                                    )
######################################################################################################################################################################
          # 3rd Row
            with gr.Row():  #academia_input = gr.inputs.Dropdown(
                s_button_phc = gr.Button("Show Trials")

                # Then, create the clear button and add the dropdown input to it
                clear_btn_phc = gr.ClearButton()
                clear_btn_phc.add(s_sponsor_input_phc)

                clear_btn_phc.add(s_disease_input_phc)

             # with gr.Column():
################# # 3rd row#################################################################
          #  with gr.Row():
###################################################################
           #    with gr.Column():
            #     s_academia_input_phc = gr.Textbox(lines=1, label="Type a Sponsor Name:")
             #    clear_btn_phc.add(s_academia_input_phc)

###################################################################
              # with gr.Column():


               #   s_disease_input_type_phc = gr.Textbox(lines=1, label="Filter by typing a Condition:")
                #  clear_btn_phc.add(s_disease_input_type_phc)
############################################################################################################################################
######################################################################################################################################################################

#########################################################################################################################################################################
            with gr.Row():

               summary_block_phc = gr.HTML(label="Clinical Trials Now Recruiting for Conditions :" )
#############################################################################################################################################################
          #  with gr.Row():
             #    nct_org_map = gr.Plot()
##########################################################################################################################################################
####################################################################################################################################################
           # with gr.Row():
           #    gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Conditions by Trials and Phase</h1>')
            with gr.Row():
              # with gr.Column():
                 tree_map_cond_nct = gr.Plot()
####################################################################################################################################################
           # with gr.Row():
           #    gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Conditions by Trials and Phase</h1>')
            with gr.Row():
              # with gr.Column():
                 
                sunburst_map_cond_nct = gr.Plot()


            with gr.Row():
               output_block_conditions_phc = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors")

            clear_btn_phc.add(summary_block_phc)
            clear_btn_phc.add(output_block_conditions_phc)
            clear_btn_phc.add(tree_map_cond_nct)
            clear_btn_phc.add(sunburst_map_cond_nct)

            #clear_btn_phs.add(nct_org_map)

#########################################################################
 ################################################################    Trials                     ###############################################################################################
       # with gr.TabItem("Trials"):

        # 1st Row
####################################################################################################################################################
        #    with gr.Row():
         #     gr.HTML('''
          #            <h1 style="font-size:16px;font-weight:normal;color:green; ">Trials 'Now Recruiting':</h1>
           #           <p style="font-size:16px;color:green; ">1. Select a Sponsor and click 'Show Trials'. </p>
            #          <p style="font-size:16px;color:green; ">2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.</p>
             #         ''')


#####################################################################################################################################################

         #   with gr.Row():
          #    with gr.Column():

#### #########################################################################################################################################################################################################

          #     s_sponsor_input_phs = gr.Dropdown(

############################################################################
           #         choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
            #                                "CSL Behring", "Daiichi Sankyo, Inc.",\
             #                               "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
              #                                "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
               #                               "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
                #                     label="Select a Sponsor"
                 #                   )
                              ##############################################################################################################
           #   with gr.Column():


######################################################################################################################################################################
          # 3rd Row
         #   with gr.Row():  #academia_input = gr.inputs.Dropdown(
          #      s_button_phs = gr.Button("Show Trials")

                # Then, create the clear button and add the dropdown input to it
           #     clear_btn_phs = gr.ClearButton()
            #    clear_btn_phs.add(s_sponsor_input_phs)

             #   clear_btn_phs.add(s_disease_input_phs)

######################################################################################################################################################################
################# # 3rd row#################################################################
          #  with gr.Row():
              #################################################################################################################################################################
           #   with gr.Column():
            #     s_academia_input_phs = gr.Textbox(lines=1, label="Type a Sponsor Name:")
             #    clear_btn_phs.add(s_academia_input_phs)
#################################################################################################################################################################
         #     with gr.Column():


          #       s_disease_input_type_phs = gr.Textbox(lines=1, label="Filter by typing a Condition:")
           #      clear_btn_phs.add(s_disease_input_type_phs)
############################################################################################################################################

#########################################################################################################################################################################
        #    with gr.Row():

         #      summary_block_phs = gr.HTML(label="Conditions and Sponsors Now Recruiting for Clinical Trials:" )
#############################################################################################################################################################
            #with gr.Row():
             #    nct_org_map = gr.Plot()
##########################################################################################################################################################
####################################################################################################################################################

      #      with gr.Row():
       #        output_block_conditions_phs = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors")

        #    clear_btn_phs.add(summary_block_phs)
         #   clear_btn_phs.add(output_block_conditions_phs)
          #  clear_btn_phs.add(nct_org_map)

#########################################################################
############################################################   ASSETS  ###############################################################
        with gr.TabItem("Drugs"):
############################################################################################
        # 1st Row
####################################################################################################################################################
            with gr.Row():
####################################################################################################################################################

              gr.HTML('''
                      <h1 style="font-size:16px;font-weight:normal;color:green; ">Drugs for 'Now Recruiting' Trials:</h1>
                    
                      <p style="font-size:16px;color:green; ">1. Select a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc. </p>
                      <p style="font-size:16px;color:green; ">2. Select a Sponsor and click 'Show Drugs'. </p>
                      ''')

#####################################################################################################################################################
            with gr.Row():

#####################################################################################################################################################################################
              with gr.Column():
                s_disease_input = gr.Dropdown(
                     choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
                             "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
                             "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
                             "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
                             "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
                              "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
                              "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\
                              "Urothelial Carcinoma",\
                              "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
                             "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\
                              " Major","Metabolic",  "Generalized Pustular Psoriasis",\
                              "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
                              "Liver Cirrhosis", \
                             "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis",  \
                             "Psychological Trauma","Renal", "Respiratory",\
                             "Schizophrenia", "PTSD", \
                             "Venous Thromboembolism", "Wet"],
                    label= "Filter by a Condition"
                )
########################################################################################################################################              
              with gr.Column():

#############################################################################################################################################
               s_sponsor_input = gr.Dropdown(
############################################################################
                choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
                                            "CSL Behring", "Daiichi Sankyo, Inc.",\
                                            "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
                                              "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
                                              "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
                                     label="Select a Sponsor"
                                    )                
############################################################################################
############################################################################################
            with gr.Row():
                s_drug_button = gr.Button("Show Drugs")
                # Then, create the clear button and add the dropdown input to it
                clear_btn = gr.ClearButton()
                clear_btn.add(s_sponsor_input)

                clear_btn.add(s_disease_input)


           ## with gr.Row():
#####################################################################################################################################################################################
################# # 3rd row#################################################################
            with gr.Row():
#####################################################################################################################################################################
              with gr.Column():
                 s_academia_input = gr.Textbox(lines=1, label="Type a Sponsor Name:")
                 clear_btn.add(s_academia_input)
#################################################################################################################################################################
              with gr.Column():
                s_disease_input_type = gr.Textbox(lines=1, label="Filter by typing a Condition:")
                clear_btn.add(s_disease_input_type)
#####################################################################################################################################################################################


            with gr.Row():
                 drug_summary_block = gr.HTML(label="Conditions and Drug Assets, Sponsors Now Recruiting for Clinical Trials:" )
            with gr.Row():
                 sankey_map_drug = gr.Plot()
            with gr.Row():
               drug_output_block_conditions = gr.HTML(label="Outputs: List of Conditions, Trial Ids and Sponsors")

            clear_btn.add(drug_summary_block)
            clear_btn.add(drug_output_block_conditions)
            clear_btn.add(sankey_map_drug)
 ############################################################################################################################################################################################

#####################################################################  Country#####################################################


     #  with gr.TabItem("Countries"):
##########################################################################
        # 1st Row
####################################################################################################################################################
      #      with gr.Row():
       #        gr.HTML('''
        #              <h1 style="font-size:16px;font-weight:normal;color:green; ">Countries 'Now Recruiting':</h1>
         #             <p style="font-size:16px;color:green; ">1. Select a Sponsor, a Condition Name and click 'Show Countries'. </p>
          #            <p style="font-size:16px;color:green; ">2. Filter by a Country, for example, 'United States','Germany' etc.  </p>
           #           ''')


#####################################################################################################################################################
          #  with gr.Row():
###############################################################################
           #   with gr.Column():
            #     sponsor_input_con = gr.Dropdown(
############################################################################
             #       choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
              #                              "CSL Behring", "Daiichi Sankyo, Inc.",\
               #                             "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
                #                              "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
                 #                             "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
                  #                   label="Select a Sponsor"
                   #                 )

###############################################################################################################################################################################################
            # with gr.Column():

             #    condition_input_con = gr.Dropdown(
              #       choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
               #              "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
                #             "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
                 #            "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
                  #           "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
                   #           "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
                    ##         "Urothelial Carcinoma",\
                      #        "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
                       ##      " Major","Metabolic",  "Generalized Pustular Psoriasis",\
                         #     "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
                          #    "Liver Cirrhosis", \
                           #  "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis",  \
                           #  "Psychological Trauma","Renal", "Respiratory",\
                           #  "Schizophrenia", "PTSD", \
                           #  "Venous Thromboembolism", "Wet"],
                   # label= "Select a Condition")

###############################################################################
         #     with gr.Column():
          #       country_input_tr = gr.Dropdown(
           #      choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\
            #                 "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\
             #                "Malaysia","Mexico","Netherlands", \
              #               "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\
               #              "United Kingdom"\
                #             ],
                 # label="Filter by a Country")
###########################################################################################################################################################################################
###########################################################################################################################################################
          #  with gr.Row():


           #   condition_button = gr.Button("Show Countries")
                    # Then, create the clear button and add the dropdown input to it
            #  clear_cn_btn = gr.ClearButton()
             # clear_cn_btn.add(condition_input_con)
              #clear_cn_btn.add(sponsor_input_con)


             # clear_cn_btn.add(country_input_tr)
################# # 3rd row#################################################################
           # with gr.Row():
 ################################################################    ##############            ###############################################################################
            #  with gr.Column():
             #    academia_input_con = gr.Textbox(lines=1, label="Type a Sponsor Name:")
              #   clear_cn_btn.add(academia_input_con)
 ################################################################    ##############            ###############################################################################
             # with gr.Column():
              #  condition_input_type = gr.Textbox(lines=1, label="Filter by typing a Condition:")
               # clear_cn_btn.add(condition_input_type)
###############################################################################


##############################################################################################################################################################################
      #      with gr.Row():
       #        summary_block_cond = gr.HTML(label="Countries with Recruiting Clinical Trials:" )
         #  with gr.Row():
               #bubble_map_trial = gr.Plot()
        #    with gr.Row():
         #      trial_countries = gr.Plot()
          #  with gr.Row():
           #  condition_output = gr.HTML(label="List of Recruiting Trials")
             # condition_output = gr.Textbox(label="List of Recruiting Trials")
              ## clear output ?
            # clear_cn_btn.add(summary_block_cond)
            # clear_cn_btn.add(trial_countries)
             #clear_cn_btn.add(bubble_map_trial)
            # clear_cn_btn.add(condition_output)

 ############################################################ Site  ############################# #####################################################################
        with gr.TabItem("Locations"):
###############################################################
####################################################################################################################################################
            with gr.Row():
                  gr.HTML('''
                      <h1 style="font-size:16px;font-weight:normal;color:green; ">Sites 'Now Recruiting':</h1>
                      <p style="font-size:16px;color:green; ">1. Select a Condition, Sponsor and Country and click 'Show Sites'. </p>
                      <p style="font-size:16px;color:green; ">2. Review each Site and Cities with the Trial Ids and the Conditions.  </p>
                      ''')

            with gr.Row():

################################################################
              with gr.Column():
                 condition_input_s = gr.Dropdown(
                     choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
                             "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
                             "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
                             "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
                             "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
                              "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
                              "Solid Tumor","Stomach Cancer","Rectal Cancer","Triple Negative Breast Cancer","Thyroid Cancer",\
                              "Urothelial Carcinoma",\
                              "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
                             "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\
                              " Major","Metabolic",  "Generalized Pustular Psoriasis",\
                              "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
                              "Liver Cirrhosis", \
                             "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis",  \
                             "Psychological Trauma","Renal", "Respiratory",\
                             "Schizophrenia", "PTSD", \
                             "Venous Thromboembolism", "Wet"],
                    label="Select a Condition")

################################################################
              with gr.Column():

##########################################################################################################################
                 sponsor_input_con_s = gr.Dropdown(
############################################################################
############################################################################
                    choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
                                            "CSL Behring", "Daiichi Sankyo, Inc.",\
                                            "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
                                              "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
                                              "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
                                    label="Select a Sponsor"
                                    )

###################################################################################################################
#################################################################################################################################################
################################################################

################################################################
              with gr.Column():
                 country_input_s = gr.Dropdown(
                    choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\
                             "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\
                             "Malaysia","Mexico","Netherlands", \
                             "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\
                             "United Kingdom"\
                             ],
                    label="Select a Country")

###############################################################
            with gr.Row():
             # with gr.Column():
              condition_button_s = gr.Button("Show Sites")
                    # Then, create the clear button and add the dropdown input to it
              clear_cn_btn = gr.ClearButton()
              clear_cn_btn.add(condition_input_s)

              clear_cn_btn.add(sponsor_input_con_s)

              clear_cn_btn.add(country_input_s)

################# # 3rd row#################################################################
            with gr.Row():
              ################################################################
              with gr.Column():
                 academia_input_con_s = gr.Textbox(lines=1, label="Type a Sponsor Name:")
                 clear_cn_btn.add(academia_input_con_s)

################################################################
              with gr.Column():
                condition_input_site = gr.Textbox(lines=1, label="Filter by typing a Condition:")
                clear_cn_btn.add(condition_input_site)
#############################################################################################################################################


##################################################################################################################################################

#################################################################################################################################

            with gr.Row():
               #summary_block = gr.outputs.Textbox(label="Conditions and Sponsors Now Recruiting for Clinical Trials:")
               summary_block_cond_s = gr.HTML(label="Sites where Sponsors Now Recruiting for Clinical Trials:" )
               #with gr.Row():

               #world_map = gr.Plot()

            with gr.Row():
              site_cond = gr.Plot()
####################################################################################################################################################
            with gr.Row():
               gr.HTML('<h1 style="font-size:24px; color:black; font-weight:bold;">Recruiting Sites with Trial Ids and Conditions </h1>')
            with gr.Row():

              country_site =gr.Plot()


            with gr.Row():

              condition_output_s = gr.HTML(label="List of Recruiting Trials for Country, Sites")

              ## clear output ?
              clear_cn_btn.add(summary_block_cond_s)
              clear_cn_btn.add(condition_output_s)
              clear_cn_btn.add(country_site)
              clear_cn_btn.add(site_cond)


############################################################################# TIMELINES     #############################################################################

      #  with gr.TabItem("Timeline"):
##############################################################
       #      with gr.Row():
####################################################################################################################################################

        #      gr.HTML('''
         #             <h1 style="font-size:16px;font-weight:normal;color:green; ">Timelines for 'Now Recruiting' Trials:</h1>
          #            <p style="font-size:16px;color:green; ">1. Select a Sponsor and click 'Show Timelines'. </p>
           #           <p style="font-size:16px;color:green; ">2. Filter by a Condition Name, for example, 'Lung Cancer', 'Chronic Kidney Disease', 'MASH' etc.</p>
            #          ''')


###########################################################################################
           #  with gr.Row():

            #  with gr.Column():
             ##########################################################################################################################################
               #  sponsor_input_cont = gr.Dropdown(
############################################################################
               # choices=[ "AbbVie", "Amgen","AstraZeneca","Bayer", "Biogen","Bristol-Myers Squibb", "Boehringer Ingelheim", \
                #                            "CSL Behring", "Daiichi Sankyo, Inc.",\
                 #                           "Eli Lilly and Company","Eisai Inc.","Gilead Sciences","GlaxoSmithKline", "Hoffmann-La Roche", \
                  #                            "Janssen Research & Development, LLC", "M.D. Anderson Cancer Center","Merck Sharp & Dohme LLC","ModernaTX, Inc.", "National Cancer Institute",\
                   #                           "Novartis Pharmaceuticals", "Novo Nordisk A/S","Pfizer", "Regeneron Pharmaceuticals", "Sanofi","Takeda"],
                    #                label="Select a Sponsor"
                     #               )
###############################################################################################################################################################
            #  with gr.Column():
             #     condition_input_cont= gr.Dropdown(

              #       choices=["Cancer","Breast Cancer","Biliary Tract Cancer", "Bladder Cancer",\
               #              "Carcinoma","Cervical Cancer","Colon Cancer","Colorectal Cancer","Endometrial Cancer",\
                #             "Esophageal Cancer","Gallbladder Carcinoma","Gastric Cancer","Glioblastoma", "Head and Neck Cancer",\
                 #            "Head and Neck Squamous Cell Carcinoma (HNSCC)","Hepatic Cancer",\
                  #           "Kidney Cancer","Liver Cancer", "Lung Cancer","Melanoma","Non-Hodgkin Lymphoma","Non-Small Cell Lung Cancer",\
                   #           "Ovarian Cancer","Pancreatic Cancer","Prostate Cancer","Renal Cancer",\
                    ##         "Urothelial Carcinoma",\
                      #        "Alzheimer","Asthma","Attention Deficit Hyperactivity Disorder","Bronchiectasis","Cognitive Deficit", "COPD", \
                       #      "Chronic Kidney Diseases","Crohn Disease","Diabetes", "Diabetic Retinopathy","Depression","Depressive Disorder",\
                       #       " Major","Metabolic",  "Generalized Pustular Psoriasis",\
                        #      "Heart Failure","Hepatic Insufficiency","Hypertension","Idiopathic Pulmonary Fibrosis", "Interstitial",\
                        #      "Liver Cirrhosis", \
                        #     "MASH","Non-alcoholic Fatty Liver Disease", "Obesity", "Pancreatic Diseases","Psoriasis",  \
                        #     "Psychological Trauma","Renal", "Respiratory",\
                        #     "Schizophrenia", "PTSD", \
                        #     "Venous Thromboembolism", "Wet"],
                       # label="Filter by a Condition")

###############################################################################################################################################################
           #   with gr.Column():
            #     country_input_trt = gr.Dropdown(
             #    choices=["United States", "Argentina","Australia", "Austria","Belgium","Brazil","Bulgaria","Canada","Columbia","China", "Chile","Croatia","Czechia","Denmark","Finland","France", "Greece","Germany","Hungary",\
              #               "India","Ireland","Israel","Italy","Japan","Korea","Latvia",\
               #              "Malaysia","Mexico","Netherlands", \
                #             "New Zealand","Norway","Poland","Portugal","Romania", "Serbia","Singapore","Slovakia","Spain", "South Africa","Sweden", "Switzerland","Taiwan","Turkey",\
                 #            "United Kingdom"\
                 #            ],
                 # label="Filter by a Country")

###########################################################################################
            # with gr.Row():
             #   condition_button_t = gr.Button("Show Timelines")
                      # Then, create the clear button and add the dropdown input to it
              #  clear_cn_btn = gr.ClearButton()
              # clear_cn_btn.add(condition_input_cont)
              #  clear_cn_btn.add(sponsor_input_cont)

              #  clear_cn_btn.add(country_input_trt)
###########################################################################################
            # with gr.Row():
                ###############################################################################################################################################################
             # with gr.Column():
              #      academia_input_cont = gr.Textbox(lines=1, label="Type a Sponsor Name:")
               #     clear_cn_btn.add(academia_input_cont)
  ###############################################################################################################################################################
              #with gr.Column():
               #   condition_input_typet = gr.Textbox(lines=1, label="Filter by typing a Condition:")
                #  clear_cn_btn.add(condition_input_typet)


  ##################################################################################################################################
###########################################################################################
           #  with gr.Row():
            #    summary_block_condt = gr.HTML(label="Countries with Recruiting Clinical Trials:" )
 ###########################################################################################
            # with gr.Row():
             #     bubble_map_trial = gr.Plot()
###########################################################################################
            # with gr.Row():
             # condition_outputt = gr.HTML(label="List of Recruiting Trials")
              # condition_output = gr.Textbox(label="List of Recruiting Trials")
                ## clear output ?
             # clear_cn_btn.add(summary_block_condt)
             # clear_cn_btn.add(bubble_map_trial)
             # clear_cn_btn.add(condition_outputt)

 ############################################################ Eligibility ##############


        with gr.TabItem("Eligibility"):

####################################################################################################################################################
            with gr.Row():
               gr.HTML('''
                      <h1 style="font-size:16px;font-weight:normal;color:green; ">Eligibility Crietria for a Trial:</h1>
                      <p style="font-size:16px;color:green; ">1. Type a single Trial's NCT Id,For Example: NCT05512377 or NCT04924075 or NCT04419506 etc. and click 'Show Eligibility'. </p>
                      <p style="font-size:16px;color:green; ">2. Inclusion and Exclusion Criteria for that single Trial are displayed with the Diseases, Diagnostic Procedures and Medications highlighted. </p>
                      <p style="font-size:16px;color:green; ">3. Wait time approximately 30 seconds for the model to run and highlight eligibility text. </p>
                      ''')


            with gr.Row():

             #nctID_input = gr.inputs.Textbox(lines=1, label="Type Trial NctId:")
             nctID_input = gr.Textbox(lines=1, label="Type a Trial NCT Id: ")
             trial_button = gr.Button("Show Eligibility")
             #Then, create the clear button and add the dropdown input to it
             clear_tn_btn = gr.ClearButton()
             clear_tn_btn.add(nctID_input )

          #  with gr.Row():
           #  with gr.Column():
           #  formatted_inclusions_output = gr.HTML(label="Inclusions")
            # with gr.Column():
             # formatted_exclusions_output = gr.HTML(label="Exclusions")

 ################################################################################################################################
 ###############################################################################################
            with gr.Row():
             trial_output = gr.HTML(label="Detail of Recruiting Trials")
################################################
            with gr.Row():
               # with gr.Column():
                 eligibilities_plot = gr.Plot()
              #  with gr.Column():
               #  concept_exclusion= gr.HighlightedText(label="Display of Exclusion Concepts")

                 clear_tn_btn.add(trial_output)
               # clear_tn_btn.add(formatted_inclusions_output)
               # clear_tn_btn.add(formatted_exclusions_output)
                 clear_tn_btn.add(eligibilities_plot)
               # clear_tn_btn.add(concept_exclusion)


  ##############################################################################################################################################


  ################################ EVENT BUTTONS at GRADIO ################################################################################################################################

            ## Sponsors
            #sponsor_button.click(disease_view, inputs=[disease_input,disease_input_text, sponsor_input, academia_input], outputs=[summary_block,summary_block_collbs,\
           # sponsor_button.click(disease_view, inputs=[disease_input, sponsor_input], outputs=[summary_block,summary_block_collbs,\
                                                                                                                               #   output_block_conditions,output_block_conditions_collbs,\
                                                                                                                                  #condition_others,\
                                                                                                                                  #condition_sunbursts,
                                                                                                                                 # sponsor_trees\
                                                                                                                                 # ,collaborator_trees\
                                                                                                                                #  ])

          ## Conditions
           # s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc,s_disease_input_type_phc, s_sponsor_input_phc,s_academia_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\
            s_button_phc.click(disease_view_phc, inputs=[s_disease_input_phc, s_sponsor_input_phc], outputs=[summary_block_phc, output_block_conditions_phc,\
                                                                                                                                                           tree_map_cond_nct,sunburst_map_cond_nct])


            ## Trials

          #  s_button_phs.click(disease_view_phs, inputs=[s_disease_input_phs,s_disease_input_type_phs, s_sponsor_input_phs,s_academia_input_phs], outputs=[summary_block_phs, output_block_conditions_phs,\
                                                                                                                                             #             nct_org_map])


            #s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\
             #                                                                                                                                              tree_map_cond_nct_n, nct_org_map_n,trial_plot])

        #    s_button_phs_n.click(disease_view_phs_n, inputs=[s_disease_input_phs_n,s_disease_input_type_phs_n, s_sponsor_input_phs_n,s_academia_input_phs_n], outputs=[summary_block_phs_n, output_block_conditions_phs_n,\
            #                                                                                                                                               tree_map_cond_nct_n, trial_plot])


          #  s_button_phs_c.click(disease_view_phs_c, inputs=[s_disease_input_phs_c,s_disease_input_type_phs_c, s_sponsor_input_phs_c,s_academia_input_phs_c], outputs=[summary_block_phs_c, output_block_conditions_phs_c,\
           #                                                                                                                                                tree_map_cond_nct_c, nct_org_map_c,trial_plot_c, time_plot_c])

            ### Drugs

            s_drug_button.click(drug_view, inputs=[s_disease_input, s_disease_input_type, s_sponsor_input, s_academia_input], outputs=[drug_summary_block,drug_output_block_conditions, sankey_map_drug ])


            ## Country
           # condition_button.click(condition_view, inputs=[condition_input_con, country_input_tr,condition_input_type, sponsor_input_con, academia_input_con], outputs=[summary_block_cond,condition_output,trial_countries])

            ## Site

            condition_button_s.click(condition_view_s, inputs=[condition_input_s, country_input_s, condition_input_site,sponsor_input_con_s, academia_input_con_s], \
                                     outputs=[summary_block_cond_s,condition_output_s, site_cond,country_site])

            ##Timelines

         #   condition_button_t.click(condition_viewt, inputs=[condition_input_cont, country_input_trt,condition_input_typet, sponsor_input_cont, academia_input_cont], outputs=[summary_block_condt,condition_outputt,bubble_map_trial])

            ## Map

            # Test this way NCT04419506
           # trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output, formatted_inclusions_output,formatted_exclusions_output,concept_inclusion,concept_exclusion])
             # Test this way NCT04419506
           # trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,concept_inclusion,concept_exclusion])
            trial_button.click(trial_view, inputs=[nctID_input], outputs=[trial_output,eligibilities_plot])


          #  trial_buttons.click(trial_view_map, inputs=[nctID_inputs], outputs=[summary_block_trial_map, world_map,trial_output_map])

trial_app.launch(share=True)


#trial_app.launch(share=True, debug = "TRUE")

import requests
import json

# Define the base URL for the new API
base_url = "https://clinicaltrials.gov/api/v2/studies"

# Define the parameters for the API call
params = {
    "query.lead": "Boehringer Ingelheim",  # Query the lead sponsor field
    "filter.overallStatus": "RECRUITING",  # Filter by the overall status
    "fields": "protocolSection.identificationModule.nctId"  # Use the correct field name for the NCTId
}

# Send the API request
response = requests.get(base_url, params=params)
# Check the API response status
#print("API response status code:", response.status_code)
# Check the API response content
#print("API response content:", response.text)

# If the status code is 200, parse the JSON response
if response.status_code == 200:
    data = response.json()
    # Extract the NCTIDs from the response
    nct_ids = [study['protocolSection']['identificationModule']['nctId'] for study in data['studies']]
    # Create a DataFrame from the list
    df = pd.DataFrame(nct_ids, columns=['NCTId'])

    # Print the DataFrame
    print(df)