Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| def search_trials(user_age, user_sex, user_state, user_keywords, csv_path="clinical_trials_cleaned_merged.csv"): | |
| """ | |
| Search for recruiting US clinical trials matching the user's demographics & optional keywords. | |
| Returns ALL available columns from the dataset. | |
| """ | |
| # === Load dataset === | |
| df = pd.read_csv(csv_path) | |
| # Drop missing critical columns | |
| df = df.dropna(subset=["MinimumAge", "MaximumAge", "Sex", "OverallStatus"]) | |
| # Keep only US & recruiting trials | |
| df = df[df["LocationCountry"] == "United States"] | |
| df = df[df["OverallStatus"].str.lower() == "recruiting"] | |
| # Convert ages to numeric | |
| def parse_age(age_str): | |
| if pd.isnull(age_str): | |
| return None | |
| parts = str(age_str).split() | |
| try: | |
| return int(parts[0]) | |
| except: | |
| return None | |
| df["MinAgeNum"] = df["MinimumAge"].apply(parse_age) | |
| df["MaxAgeNum"] = df["MaximumAge"].apply(parse_age) | |
| # Prepare user's keywords list | |
| if isinstance(user_keywords, str): | |
| keywords = [k.strip().lower() for k in user_keywords.split(",") if k.strip()] | |
| elif isinstance(user_keywords, list): | |
| keywords = [str(k).strip().lower() for k in user_keywords if str(k).strip()] | |
| else: | |
| keywords = [] | |
| # === Create masks === | |
| sex_mask = df["Sex"].str.lower().isin([str(user_sex).lower(), "all"]) | |
| age_mask = (df["MinAgeNum"] <= int(user_age)) & (df["MaxAgeNum"] >= int(user_age)) | |
| state_mask = df["LocationState"].str.lower() == str(user_state).lower() | |
| if keywords: | |
| def row_matches_any_keyword(row): | |
| row_as_str = " ".join(str(x).lower() for x in row.values if pd.notnull(x)) | |
| return any(k in row_as_str for k in keywords) | |
| keyword_mask = df.apply(row_matches_any_keyword, axis=1) | |
| else: | |
| keyword_mask = True | |
| # Apply all filters and return ALL columns | |
| filtered_df = df[sex_mask & age_mask & state_mask & keyword_mask].reset_index(drop=True) | |
| # Drop helper numeric age cols if you don’t want them visible | |
| filtered_df = filtered_df.drop(columns=["MinAgeNum", "MaxAgeNum"], errors="ignore") | |
| return filtered_df | |