from datetime import datetime import gradio as gr import gspread import validators from validators import ValidationFailure from google.oauth2.service_account import Credentials from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive import etl # test sheet - https://docs.google.com/spreadsheets/d/1iJ0-882HsWkAth0e0P_kf21aL6583Z7m1LwloaRCVEc/edit#gid=0 week_list = [f"week_{i}" for i in range(1, 51)] # credentials file cred_file_path = 'heliumhealth-a05d595e5991.json' # google auth scopes scopes = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive'] # create credentials credentials = Credentials.from_service_account_file( cred_file_path, scopes=scopes ) # authorize google spreadsheet gc = gspread.authorize(credentials) gauth = GoogleAuth() drive = GoogleDrive(gauth) def overlap_matching(row, d_type): """ matching overlap """ match_cols = { "prescription": ['RX Norm [Super Generic]_x', 'RX Norm [Super Generic]_y'], "diagnosis": ['ICD10 Diagnosis_x','ICD10 Diagnosis_y'] } cols = match_cols[d_type] row['match_status'] = 'match' if row[cols[0]] == row[cols[1]] else 'no match' return row def overlap_check(gs, start_date, end_date, week_no, d_type): """ load, preprocess, check overlap, postprocess and output data to google sheet Args: gs (GSheet instance): Gsheet instance access to google sheet start_date (str): Date str format(YYYY-mm-dd) end_date (_type_): Date str format(YYYY-mm-dd) d_type (_type_): sheet type (prescription or diagnosis) """ # load data all_data = etl.load_data(gs, start_date, end_date, d_type) # preprocess and return overlap data overlap_data = etl.preprocess_data(all_data, d_type) # do overlap matching overlap_data = overlap_data.apply(overlap_matching, axis=1, args=[d_type]) # post process overlap_data = etl.post_process(overlap_data, d_type) # write to sheet etl.output_data(gc, overlap_data, week_no, d_type) def overlap_check_main(sheet_type, start_date_str, end_date_str, week_str, sheet_url): """ overlap check main function Args: sheet_type (str): sheet type (prescription or diagnosis) start_date_str (str): start date string: e.g. 2023-03-21 end_date_str (str): end date string: e.g. 2023-03-24 week_str (str): week string e.g. week_1 sheet_url (url): _description_ Raises: gr.exceptions.Error: Date Format Error - either start_date or end date are bad format gr.exceptions.Error: Date Error - when start_date is greater than end date gr.exceptions.Error: URL Error - Bad url format """ # format date from string try: start_date = datetime.strptime(start_date_str.strip(), "%Y-%m-%d").date() end_date = datetime.strptime(end_date_str.strip(), "%Y-%m-%d").date() except: raise gr.Error(message="Wrong date format") # raise error when start date is greater end date if start_date >= end_date: raise gr.Error(message="Start date cannot be greater end date") # Check if the input is valid url # ToDO: Error message displayed is not explanatory - Fix it url_check = validators.url(sheet_url) if isinstance(url_check, ValidationFailure): raise gr.Error(message="Please enter a valid URL") # open the google sheet for reading gs = gc.open_by_url(sheet_url) # if sheet_type == 'prescription': try: overlap_check(gs, start_date_str, end_date_str, week_str, sheet_type) except: gr.Error(message="Permission denied. Please add IAM user to the sheet and try again") return f"Successfully ran {sheet_type} overlap check for {week_str.replace('_', ' ').title()} ({start_date_str} - {end_date_str})" with gr.Blocks() as demo: gr.Markdown( """ ## Overlap Check App * Add IAM User to sheet you want to test * Gsheet tabs required for diagnosis: * Diagnosis * Gsheet tabs required for prescription: * Prescriptions * Data headers required for diagnosis * Unstructured Name, ICD10 Diagnosis, Intern * Data headers required for prescription * Unstructured Name, RX Norm [Super Generic], Intern """ ) # inputs sheet_type = gr.Dropdown(['prescription', 'diagnosis'], label="QA Type") start_date = gr.Textbox(label="Start Date", placeholder="YYYY-MM-DD") end_date = gr.Textbox(label="End Date", placeholder="YYYY-MM-DD") week_input = gr.Dropdown(week_list, label="Week") url = gr.Textbox(label="URL", placeholder="Enter sheet url ...") # outputs output = gr.Textbox(label="Output Box") run_btn = gr.Button("Run") run_btn.click( fn=overlap_check_main, inputs=[ sheet_type, start_date, end_date, week_input, url ], outputs=output, api_name="Overlap_check" ) demo.launch()