import gradio as gr import pandas as pd from gspread_dataframe import set_with_dataframe def load_data(gs, start_date, end_date, d_type): """ load data from google sheet Args: gs (Gsheet instance ): GSheet instance with access to google sheet start_date (str): Date str format(YYYY-mm-dd) end_date (str): Date str format(YYYY-mm-dd) d_type (str):sheet type (prescription or diagnosis) Raises: gr.Error: 403 permission denied Returns: data (DataFrame): loaded data from google sheet """ sheets_dict = { "diagnosis": "Diagnosis", "prescription": "Prescriptions" } # open data and super sheet try: data_sheet = gs.worksheet(sheets_dict[d_type]) except: raise gr.Error(message='Permission denied. Please add IAM user to the sheet and try again') # read data from sheets all_data = pd.DataFrame(data_sheet.get_all_records()) all_data.columns = all_data.columns.str.strip() # transform date fields all_data['date_cleaned'] = pd.to_datetime(all_data['date_cleaned'], infer_datetime_format=True) all_data['date_reviewed'] = pd.to_datetime(all_data['date_reviewed'], infer_datetime_format=True) data = all_data[(all_data['date_cleaned'] >= start_date) & (all_data['date_cleaned'] <= end_date)] return data def preprocess_data(data, d_type): """ preprocess loaded data from google sheet Args: data (DataFrame): google sheet data as a dataframe d_type (Str): sheet type (prescription or diagnosis) Raises: gr.Error (ValueError): Wrong column Returns: Overlap (DataFrame): Overlap data """ data.columns = data.columns.str.strip() overlap_cols = { "prescription": ["RX Norm [Super Generic]_x", "RX Norm [Super Generic]_y" ], "diagnosis": ["ICD10 Diagnosis_x", "ICD10 Diagnosis_y"] } try: # extract the interns intern_1 = data['Intern'].unique()[0] intern_2 = data['Intern'].unique()[1] # intern 1 and intern 2 data intern_1_data = data[data['Intern'] == intern_1.strip()] intern_2_data = data[data['Intern'] == intern_2.strip()] # extract the overlap overlap = pd.merge(intern_1_data, intern_2_data, on='Unstructured Name') # remove all white spaces col = overlap_cols[d_type] overlap[col[0]] = overlap[col[0]].str.strip() overlap[col[1]] = overlap[col[1]].str.strip() except: raise gr.Error(message="No Intern Column") return overlap def post_process(data, d_type): """ post process data Args: data (DataFrame): matched data as dataframe d_type (str): sheet type (prescription or diagnosis) Returns: data (DataFrame): postprocessed data as dataframe """ drop_cols = { "prescription": ['Type_y','Supervisor_y', 'date_cleaned_y', 'date_reviewed_y'], "diagnosis": ['Supervisor_y', 'date_cleaned_y', 'date_reviewed_y'] } post_cols = { "prescription": ['Unstructured Name', 'Intern_x', 'Status_x', 'RX Norm [Super Generic]_x', 'Intern_y', 'Status_y', 'RX Norm [Super Generic]_y', 'match_status', 'Type_x', 'Supervisor_x', 'date_cleaned_x', 'date_reviewed_x'], "diagnosis": ['Unstructured Name', 'Intern_x', 'Status_x', 'ICD10 Diagnosis_x', 'Intern_y', 'Status_y', 'ICD10 Diagnosis_y', 'match_status', 'Supervisor_x', 'date_cleaned_x', 'date_reviewed_x'] } d_cols = drop_cols[d_type] p_cols = post_cols[d_type] data = data.drop(columns=d_cols) data = data[p_cols] data = data.rename(columns={ 'date_cleaned_x': 'date_cleaned', 'date_reviewed_x': 'date_reviewed' }) return data def output_data(gc, data, week_no, dtype): """ outputs data to a google sheet Args: gc (GSheet instance): Gsheet permission instance data (DataFrame): Data to write to google sheet week_no (str): Week no e.g. week_1 d_type (str): sheet type (prescription or diagnosis) Raises: gr.exception.Error: Sheet Error """ out_sheet_dict = { "diagnosis": "https://docs.google.com/spreadsheets/d/1UJ2PHR62mcz11D2qeX-Wk2fs1357BW78o8g76lwG93w/edit#gid=2123373592", "prescription": "https://docs.google.com/spreadsheets/d/1Fo9V6J_L9eWX3qEScITP2L8nI9exk_VslGMgk-1jCJw/edit#gid=1287467149" } outsheet_url = out_sheet_dict[dtype] out_gs = gc.open_by_url(outsheet_url) # write output to sheet try: out_worksheet = out_gs.worksheet(week_no) # clear the worksheet out_worksheet.clear() except: raise gr.Error(f"Result sheet for {week_no} does not exist") # write dataframe to work sheet set_with_dataframe( worksheet=out_worksheet, dataframe=data, include_index=False, include_column_header=True, resize=True)