Spaces:
Sleeping
Sleeping
File size: 5,104 Bytes
4ff22c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import gradio as gr
import pandas as pd
from gspread_dataframe import set_with_dataframe
def load_data(gs, start_date, end_date, d_type):
"""
load data from google sheet
Args:
gs (Gsheet instance ): GSheet instance with access to google sheet
start_date (str): Date str format(YYYY-mm-dd)
end_date (str): Date str format(YYYY-mm-dd)
d_type (str):sheet type (prescription or diagnosis)
Raises:
gr.Error: 403 permission denied
Returns:
data (DataFrame): loaded data from google sheet
"""
sheets_dict = {
"diagnosis": "Diagnosis",
"prescription": "Prescriptions"
}
# open data and super sheet
try:
data_sheet = gs.worksheet(sheets_dict[d_type])
except:
raise gr.Error(message='Permission denied. Please add IAM user to the sheet and try again')
# read data from sheets
all_data = pd.DataFrame(data_sheet.get_all_records())
all_data.columns = all_data.columns.str.strip()
# transform date fields
all_data['date_cleaned'] = pd.to_datetime(all_data['date_cleaned'], infer_datetime_format=True)
all_data['date_reviewed'] = pd.to_datetime(all_data['date_reviewed'], infer_datetime_format=True)
data = all_data[(all_data['date_cleaned'] >= start_date) & (all_data['date_cleaned'] <= end_date)]
return data
def preprocess_data(data, d_type):
"""
preprocess loaded data from google sheet
Args:
data (DataFrame): google sheet data as a dataframe
d_type (Str): sheet type (prescription or diagnosis)
Raises:
gr.Error (ValueError): Wrong column
Returns:
Overlap (DataFrame): Overlap data
"""
data.columns = data.columns.str.strip()
overlap_cols = {
"prescription": ["RX Norm [Super Generic]_x", "RX Norm [Super Generic]_y" ],
"diagnosis": ["ICD10 Diagnosis_x", "ICD10 Diagnosis_y"]
}
try:
# extract the interns
intern_1 = data['Intern'].unique()[0]
intern_2 = data['Intern'].unique()[1]
# intern 1 and intern 2 data
intern_1_data = data[data['Intern'] == intern_1.strip()]
intern_2_data = data[data['Intern'] == intern_2.strip()]
# extract the overlap
overlap = pd.merge(intern_1_data, intern_2_data, on='Unstructured Name')
# remove all white spaces
col = overlap_cols[d_type]
overlap[col[0]] = overlap[col[0]].str.strip()
overlap[col[1]] = overlap[col[1]].str.strip()
except:
raise gr.Error(message="No Intern Column")
return overlap
def post_process(data, d_type):
"""
post process data
Args:
data (DataFrame): matched data as dataframe
d_type (str): sheet type (prescription or diagnosis)
Returns:
data (DataFrame): postprocessed data as dataframe
"""
drop_cols = {
"prescription": ['Type_y','Supervisor_y', 'date_cleaned_y', 'date_reviewed_y'],
"diagnosis": ['Supervisor_y', 'date_cleaned_y', 'date_reviewed_y']
}
post_cols = {
"prescription": ['Unstructured Name', 'Intern_x', 'Status_x', 'RX Norm [Super Generic]_x',
'Intern_y', 'Status_y', 'RX Norm [Super Generic]_y', 'match_status', 'Type_x',
'Supervisor_x', 'date_cleaned_x', 'date_reviewed_x'],
"diagnosis": ['Unstructured Name', 'Intern_x', 'Status_x', 'ICD10 Diagnosis_x',
'Intern_y', 'Status_y', 'ICD10 Diagnosis_y', 'match_status',
'Supervisor_x', 'date_cleaned_x', 'date_reviewed_x']
}
d_cols = drop_cols[d_type]
p_cols = post_cols[d_type]
data = data.drop(columns=d_cols)
data = data[p_cols]
data = data.rename(columns={
'date_cleaned_x': 'date_cleaned',
'date_reviewed_x': 'date_reviewed'
})
return data
def output_data(gc, data, week_no, dtype):
"""
outputs data to a google sheet
Args:
gc (GSheet instance): Gsheet permission instance
data (DataFrame): Data to write to google sheet
week_no (str): Week no e.g. week_1
d_type (str): sheet type (prescription or diagnosis)
Raises:
gr.exception.Error: Sheet Error
"""
out_sheet_dict = {
"diagnosis": "https://docs.google.com/spreadsheets/d/1UJ2PHR62mcz11D2qeX-Wk2fs1357BW78o8g76lwG93w/edit#gid=2123373592",
"prescription": "https://docs.google.com/spreadsheets/d/1Fo9V6J_L9eWX3qEScITP2L8nI9exk_VslGMgk-1jCJw/edit#gid=1287467149"
}
outsheet_url = out_sheet_dict[dtype]
out_gs = gc.open_by_url(outsheet_url)
# write output to sheet
try:
out_worksheet = out_gs.worksheet(week_no)
# clear the worksheet
out_worksheet.clear()
except:
raise gr.Error(f"Result sheet for {week_no} does not exist")
# write dataframe to work sheet
set_with_dataframe(
worksheet=out_worksheet,
dataframe=data,
include_index=False,
include_column_header=True,
resize=True)
|