Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from gspread_dataframe import set_with_dataframe | |
| def load_data(gs, start_date, end_date, d_type): | |
| """ | |
| load data from google sheet | |
| Args: | |
| gs (Gsheet instance ): GSheet instance with access to google sheet | |
| start_date (str): Date str format(YYYY-mm-dd) | |
| end_date (str): Date str format(YYYY-mm-dd) | |
| d_type (str):sheet type (prescription or diagnosis) | |
| Raises: | |
| gr.Error: 403 permission denied | |
| Returns: | |
| data (DataFrame): loaded data from google sheet | |
| """ | |
| sheets_dict = { | |
| "diagnosis": "Diagnosis", | |
| "prescription": "Prescriptions" | |
| } | |
| # open data and super sheet | |
| try: | |
| data_sheet = gs.worksheet(sheets_dict[d_type]) | |
| except: | |
| raise gr.Error(message='Permission denied. Please add IAM user to the sheet and try again') | |
| # read data from sheets | |
| all_data = pd.DataFrame(data_sheet.get_all_records()) | |
| all_data.columns = all_data.columns.str.strip() | |
| # transform date fields | |
| all_data['date_cleaned'] = pd.to_datetime(all_data['date_cleaned'], infer_datetime_format=True) | |
| all_data['date_reviewed'] = pd.to_datetime(all_data['date_reviewed'], infer_datetime_format=True) | |
| data = all_data[(all_data['date_cleaned'] >= start_date) & (all_data['date_cleaned'] <= end_date)] | |
| return data | |
| def preprocess_data(data, d_type): | |
| """ | |
| preprocess loaded data from google sheet | |
| Args: | |
| data (DataFrame): google sheet data as a dataframe | |
| d_type (Str): sheet type (prescription or diagnosis) | |
| Raises: | |
| gr.Error (ValueError): Wrong column | |
| Returns: | |
| Overlap (DataFrame): Overlap data | |
| """ | |
| data.columns = data.columns.str.strip() | |
| overlap_cols = { | |
| "prescription": ["RX Norm [Super Generic]_x", "RX Norm [Super Generic]_y" ], | |
| "diagnosis": ["ICD10 Diagnosis_x", "ICD10 Diagnosis_y"] | |
| } | |
| try: | |
| # extract the interns | |
| intern_1 = data['Intern'].unique()[0] | |
| intern_2 = data['Intern'].unique()[1] | |
| # intern 1 and intern 2 data | |
| intern_1_data = data[data['Intern'] == intern_1.strip()] | |
| intern_2_data = data[data['Intern'] == intern_2.strip()] | |
| # extract the overlap | |
| overlap = pd.merge(intern_1_data, intern_2_data, on='Unstructured Name') | |
| # remove all white spaces | |
| col = overlap_cols[d_type] | |
| overlap[col[0]] = overlap[col[0]].str.strip() | |
| overlap[col[1]] = overlap[col[1]].str.strip() | |
| except: | |
| raise gr.Error(message="No Intern Column") | |
| return overlap | |
| def post_process(data, d_type): | |
| """ | |
| post process data | |
| Args: | |
| data (DataFrame): matched data as dataframe | |
| d_type (str): sheet type (prescription or diagnosis) | |
| Returns: | |
| data (DataFrame): postprocessed data as dataframe | |
| """ | |
| drop_cols = { | |
| "prescription": ['Type_y','Supervisor_y', 'date_cleaned_y', 'date_reviewed_y'], | |
| "diagnosis": ['Supervisor_y', 'date_cleaned_y', 'date_reviewed_y'] | |
| } | |
| post_cols = { | |
| "prescription": ['Unstructured Name', 'Intern_x', 'Status_x', 'RX Norm [Super Generic]_x', | |
| 'Intern_y', 'Status_y', 'RX Norm [Super Generic]_y', 'match_status', 'Type_x', | |
| 'Supervisor_x', 'date_cleaned_x', 'date_reviewed_x'], | |
| "diagnosis": ['Unstructured Name', 'Intern_x', 'Status_x', 'ICD10 Diagnosis_x', | |
| 'Intern_y', 'Status_y', 'ICD10 Diagnosis_y', 'match_status', | |
| 'Supervisor_x', 'date_cleaned_x', 'date_reviewed_x'] | |
| } | |
| d_cols = drop_cols[d_type] | |
| p_cols = post_cols[d_type] | |
| data = data.drop(columns=d_cols) | |
| data = data[p_cols] | |
| data = data.rename(columns={ | |
| 'date_cleaned_x': 'date_cleaned', | |
| 'date_reviewed_x': 'date_reviewed' | |
| }) | |
| return data | |
| def output_data(gc, data, week_no, dtype): | |
| """ | |
| outputs data to a google sheet | |
| Args: | |
| gc (GSheet instance): Gsheet permission instance | |
| data (DataFrame): Data to write to google sheet | |
| week_no (str): Week no e.g. week_1 | |
| d_type (str): sheet type (prescription or diagnosis) | |
| Raises: | |
| gr.exception.Error: Sheet Error | |
| """ | |
| out_sheet_dict = { | |
| "diagnosis": "https://docs.google.com/spreadsheets/d/1UJ2PHR62mcz11D2qeX-Wk2fs1357BW78o8g76lwG93w/edit#gid=2123373592", | |
| "prescription": "https://docs.google.com/spreadsheets/d/1Fo9V6J_L9eWX3qEScITP2L8nI9exk_VslGMgk-1jCJw/edit#gid=1287467149" | |
| } | |
| outsheet_url = out_sheet_dict[dtype] | |
| out_gs = gc.open_by_url(outsheet_url) | |
| # write output to sheet | |
| try: | |
| out_worksheet = out_gs.worksheet(week_no) | |
| # clear the worksheet | |
| out_worksheet.clear() | |
| except: | |
| raise gr.Error(f"Result sheet for {week_no} does not exist") | |
| # write dataframe to work sheet | |
| set_with_dataframe( | |
| worksheet=out_worksheet, | |
| dataframe=data, | |
| include_index=False, | |
| include_column_header=True, | |
| resize=True) | |