File size: 5,104 Bytes
4ff22c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr 
import pandas as pd
from gspread_dataframe import set_with_dataframe  

def load_data(gs, start_date, end_date, d_type):
    """
    load data from google sheet

    Args:
        gs (Gsheet instance ): GSheet instance with access to google sheet
        start_date (str): Date str format(YYYY-mm-dd)
        end_date (str): Date str format(YYYY-mm-dd)
        d_type (str):sheet type (prescription or diagnosis)

    Raises:
        gr.Error: 403 permission denied

    Returns:
        data (DataFrame): loaded data from google sheet
    """
    sheets_dict = {
        "diagnosis": "Diagnosis",
        "prescription": "Prescriptions"
    }

    # open data and super sheet
    try:
        data_sheet = gs.worksheet(sheets_dict[d_type])
    except:
        raise gr.Error(message='Permission denied. Please add IAM user to the sheet and try again')
    
    # read data from sheets
    all_data = pd.DataFrame(data_sheet.get_all_records())
    all_data.columns = all_data.columns.str.strip()

    # transform date fields
    all_data['date_cleaned'] = pd.to_datetime(all_data['date_cleaned'], infer_datetime_format=True)
    all_data['date_reviewed'] = pd.to_datetime(all_data['date_reviewed'], infer_datetime_format=True)

    data = all_data[(all_data['date_cleaned'] >= start_date) & (all_data['date_cleaned'] <= end_date)]

    return data

def preprocess_data(data, d_type):
    """
    preprocess loaded data from google sheet

    Args:
        data (DataFrame):  google sheet data as a dataframe
        d_type (Str): sheet type (prescription or diagnosis)

    Raises:
        gr.Error (ValueError): Wrong column

    Returns:
        Overlap (DataFrame): Overlap data
    """

    data.columns = data.columns.str.strip()

    overlap_cols = {
       "prescription": ["RX Norm [Super Generic]_x", "RX Norm [Super Generic]_y" ], 
       "diagnosis": ["ICD10 Diagnosis_x", "ICD10 Diagnosis_y"]
    }

    try:
        # extract the interns
        intern_1 = data['Intern'].unique()[0]
        intern_2 = data['Intern'].unique()[1]

        # intern 1 and intern 2 data 
        intern_1_data = data[data['Intern'] == intern_1.strip()]
        intern_2_data = data[data['Intern'] == intern_2.strip()]

        # extract the overlap
        overlap = pd.merge(intern_1_data, intern_2_data, on='Unstructured Name')

        # remove all white spaces 
        col = overlap_cols[d_type]
        overlap[col[0]] = overlap[col[0]].str.strip()
        overlap[col[1]] = overlap[col[1]].str.strip()
    except:
       raise gr.Error(message="No Intern Column")

    return overlap

def post_process(data, d_type):
    """
   post process data

    Args:
        data (DataFrame): matched data as dataframe
        d_type (str): sheet type (prescription or diagnosis)

    Returns:
        data (DataFrame): postprocessed data as dataframe
    """
    drop_cols = {
        "prescription": ['Type_y','Supervisor_y', 'date_cleaned_y', 'date_reviewed_y'], 
        "diagnosis": ['Supervisor_y', 'date_cleaned_y', 'date_reviewed_y']
    }
    post_cols = {
        "prescription": ['Unstructured Name', 'Intern_x', 'Status_x', 'RX Norm [Super Generic]_x', 
                            'Intern_y', 'Status_y', 'RX Norm [Super Generic]_y',  'match_status', 'Type_x', 
                            'Supervisor_x', 'date_cleaned_x', 'date_reviewed_x'],
        "diagnosis":   ['Unstructured Name', 'Intern_x', 'Status_x', 'ICD10 Diagnosis_x', 
                        'Intern_y', 'Status_y', 'ICD10 Diagnosis_y',  'match_status',
                        'Supervisor_x', 'date_cleaned_x', 'date_reviewed_x']
        }
    
    d_cols = drop_cols[d_type]
    p_cols = post_cols[d_type]

    data = data.drop(columns=d_cols)
    data = data[p_cols]

    data = data.rename(columns={
        'date_cleaned_x': 'date_cleaned',
        'date_reviewed_x': 'date_reviewed'
        })
    
    return data

def output_data(gc, data, week_no, dtype):
    """
    outputs data to a google sheet

    Args:
        gc (GSheet instance): Gsheet permission instance
        data (DataFrame):  Data to write to google sheet
        week_no (str): Week no e.g. week_1
        d_type (str): sheet type (prescription or diagnosis)
    
    Raises:
        gr.exception.Error: Sheet Error
    """
    out_sheet_dict = {
        "diagnosis": "https://docs.google.com/spreadsheets/d/1UJ2PHR62mcz11D2qeX-Wk2fs1357BW78o8g76lwG93w/edit#gid=2123373592", 
        "prescription": "https://docs.google.com/spreadsheets/d/1Fo9V6J_L9eWX3qEScITP2L8nI9exk_VslGMgk-1jCJw/edit#gid=1287467149"
    }

    outsheet_url = out_sheet_dict[dtype]
    out_gs = gc.open_by_url(outsheet_url)

    # write output to sheet 
    try:
        out_worksheet = out_gs.worksheet(week_no)
        # clear the worksheet 
        out_worksheet.clear()
    except:
        raise gr.Error(f"Result sheet for {week_no} does not exist")

    # write dataframe to work sheet
    set_with_dataframe(
        worksheet=out_worksheet, 
        dataframe=data, 
        include_index=False,
        include_column_header=True, 
        resize=True)