import pandas as pd
import numpy as np
import streamlit as st
import base64  # Standard Python Module
from io import StringIO, BytesIO  # Standard Python Module

# hide menu and footer
st.set_page_config(page_title=None, page_icon=None, layout="wide", initial_sidebar_state="auto", menu_items=None)
hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)  # unsafe_allow_html allows us to embed html code


def generate_excel_download_link(df):
    # Credit Excel: https://discuss.streamlit.io/t/how-to-add-a-download-excel-csv-function-to-a-button/4474/5
    towrite = BytesIO()
    df.to_excel(towrite, encoding="utf-8", index=False, header=True)  # write to BytesIO buffer
    towrite.seek(0)  # reset pointer
    b64 = base64.b64encode(towrite.read()).decode()
    href = f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="facility_ranking_SSM.xlsx">Download </a>'
    return st.markdown(href, unsafe_allow_html=True)


# DE=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/DE.csv')
# FPE=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/FPE.csv')
# sd=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/SD.csv')

# width in full screen


st.title('AI Ranking-CPHC Project Jhpiego')
# add two columns
col1, col2 = st.columns(2)
with col1:        
    counts=0

    # file upload
    upload_file_FPE = st.file_uploader("Upload Facility Profile Entry Data in CSV format", type=['csv'])

    # if upload_file_DE is not None highlight "file not uploaded"


    if upload_file_FPE:
        FPE = pd.read_csv(upload_file_FPE)
        
        # check the columns heading available in the file
        required_col=['NIN_2_HFI', 'HFI_Name', 'PHC_CHC_Type',
            'State_Name', 'District_Name', 'Taluka_Name', 'Block_Name',
            'Proposed Date', 'Progressive Date',
            'Total Population in catchment area of facility']
        if set(required_col).issubset(FPE.columns):
            st.subheader("Perfect")
            st.write(FPE.head(3))
        else:
            st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(FPE.columns)}")
            counts=1


    # file upload
    upload_file_DE = st.file_uploader("Upload Daily Entry Data in CSV format", type=['csv'])
    if upload_file_DE:
        DE = pd.read_csv(upload_file_DE)
        
        required_col=['NIN ID',  'Entry Date', 'Footfall Male', 'Footfall Female ',
        'Footfall Others ', 'Footfall Total', 'Patients received medicines ',
        'Patients availed diagnostic tests',
        ' Patients availed tele-consulation services ',
        'Wellness sessions conducted ',
        'Total participants of Wellness session']
        if set(required_col).issubset(DE.columns):
            st.subheader("Perfect")
            st.write(DE.head(3))
        else:
            st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(DE.columns)}")
            counts=1


    # file upload
    upload_file_SD = st.file_uploader("Upload Service Delivery Data in CSV format", type=['csv'])
    if upload_file_SD:
        sd = pd.read_csv(upload_file_SD)
        
        required_col=['NIN ID',  'Entry Month', 
        'HTN Individuals screened Male', 'HTN Individuals screened Female',
        'HTN Individuals screened Other', 'HTN Newly diagnosed Male',
        'HTN Newly diagnosed Female', 'HTN Newly diagnosed Other',
        'HTN On treatment Male', 'HTN On treatment Female',
        'HTN On treatment Other', 'DM Individuals screened Male',
        'DM Individuals screened Female', 'DM Individuals screened Other',
        'DM Newly diagnosed Male', 'DM Newly diagnosed Female',
        'DM Newly diagnosed Other', 'DM On treatment Male',
        'DM On treatment Female', 'DM On treatment Other',
        'OC Individuals screened Male', 'OC Individuals screened Female',
        'OC Individuals screened Other', 'OC Newly diagnosed Male',
        'OC Newly diagnosed Female', 'OC Newly diagnosed Other',
        'OC On treatment male', 'OC On treatment Female',
        'OC On treatment Other', 'BC Individuals screened female',
        'BC Newly diagnosed female', 'BC On treatment female',
        'CC Individuals screened female', 'CC Newly diagnosed female',
        'CC On treatment female', 'Individuals referred for screening male',
        'Individuals referred for screening female',
        'Individuals referred for screening other', 'Newly diagnosed Male',
        'Newly diagnosed Female', 'Newly diagnosed Other', 'On treatment Male',
        'On treatment Female', 'On treatment Other',
        'Total Patients received antihypertensive medicines at this centre',
        'Total Patients received ant-diabetic medicines at this centre',
        'Medicines_TPR_AO_M', 'Total TB patients received DOTS from the centre',
        'Medicines_CSOM_EOM', 'Availability of functional BP apparatus',
        'Availability of functional glucometer', 'Closing stock of glucostrips',
        'Performance/team based incentives for MO/SN/CHO',
        'Team based incentives for ASHA/MPW']
        if set(required_col).issubset(sd.columns):
            st.subheader("Perfect")
            st.write(sd.head(3))
        else:
            st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(sd.columns)}")
            counts=1

    # if upload_file_FPE and upload_file_DE and upload_file_SD is not None:
            # markdown processing
        st.markdown('''
        # *************************Processing Done: Please check next section**************************
        ''')
    st.subheader("This tool is developed by : Surendra Mehta Data Analyst Jhpiego India all rights reserved")
with col2:

    if counts==0 and upload_file_FPE and upload_file_DE and upload_file_SD:
    

        # Facility PE cleaning **********************************************************************************************************
        FPE.rename(columns={'NIN_2_HFI':'NIN ID'}, inplace=True)

        # Total Population in catchment area of facility remove any commas and convert in int format
        FPE['Total Population in catchment area of facility']=FPE['Total Population in catchment area of facility'].replace({',': ''}, regex=True)
        FPE['Total Population in catchment area of facility']=FPE['Total Population in catchment area of facility'].astype(int)

        col=['NIN ID', 'HFI_Name', 'PHC_CHC_Type',
            'State_Name', 'District_Name', 'Taluka_Name', 'Block_Name',
            'Proposed Date', 'Progressive Date',
            'Total Population in catchment area of facility']
        FPE=FPE[col]
        geo=['HFI_Name', 'PHC_CHC_Type','State_Name', 'District_Name', 'Taluka_Name', 'Block_Name']

        # fill NA if any value is missing in geo 
        FPE[geo]=FPE[geo].fillna('NA')
        # total facility 
        total_facility=FPE.shape[0]
        st.write(f"Total facility in facility profile entry is :{total_facility}")

        
        # DE cleaning *******************************************************************************************************************
        #Entry Date convert in date format in format of 2022-05-09
        
        DE['Entry Date'] = pd.to_datetime(DE['Entry Date'], format='%Y-%m-%d')
        DE.head(3)
        # add month-year column from entry date
        DE['Month-Year'] = DE['Entry Date'].dt.strftime('%b-%Y')
        DE['Total'] = DE["Footfall Male"] + DE["Footfall Female "]+DE["Footfall Others "]
        # if wellness session conducted is yes then 1 else 0
        DE['Wellness sessions conducted ']=DE['Wellness sessions conducted '].replace({'Yes': 1, 'No': 0})
        DE['Reporting']=1

        DE_col=['NIN ID','Reporting', 'Total',
            ' Patients availed tele-consulation services ',
            'Wellness sessions conducted ',
                'Month-Year']
        DE=DE[DE_col]
        #groupby
        DE=DE.groupby(['NIN ID','Month-Year']).sum().reset_index()


        #total facility in DE
        st.write(f"Total facility in DE is :{DE['NIN ID'].nunique()}") 

        # SD cleaning ********************************************************************************************************************

        #Entry Date convert in date format in format of 2022-11-30
        sd['Entry Month'] = pd.to_datetime(sd['Entry Month'], format='%Y-%m-%d')

        # # add month-year column from entry date
        sd['Month-Year'] = sd['Entry Month'].dt.strftime('%b-%Y')

        #show all the columns in view
        pd.set_option('display.max_columns', None)

        int_col=['Individuals empanelled',
            'Community Based Assessment Checklist filled',
            'HTN Individuals screened Male', 'HTN Individuals screened Female',
            'HTN Individuals screened Other', 'HTN Newly diagnosed Male',
            'HTN Newly diagnosed Female', 'HTN Newly diagnosed Other',
            'HTN On treatment Male', 'HTN On treatment Female',
            'HTN On treatment Other', 'DM Individuals screened Male',
            'DM Individuals screened Female', 'DM Individuals screened Other',
            'DM Newly diagnosed Male', 'DM Newly diagnosed Female',
            'DM Newly diagnosed Other', 'DM On treatment Male',
            'DM On treatment Female', 'DM On treatment Other',
            'OC Individuals screened Male', 'OC Individuals screened Female',
            'OC Individuals screened Other', 'OC Newly diagnosed Male',
            'OC Newly diagnosed Female', 'OC Newly diagnosed Other',
            'OC On treatment male', 'OC On treatment Female',
            'OC On treatment Other', 'BC Individuals screened female',
            'BC Newly diagnosed female', 'BC On treatment female',
            'CC Individuals screened female', 'CC Newly diagnosed female',
            'CC On treatment female', 'Individuals referred for screening male',
            'Individuals referred for screening female',
            'Individuals referred for screening other', 'Newly diagnosed Male',
            'Newly diagnosed Female', 'Newly diagnosed Other', 'On treatment Male',
            'On treatment Female', 'On treatment Other',
            'Total Patients received antihypertensive medicines at this centre',
            'Total Patients received ant-diabetic medicines at this centre','Medicines_TPR_AO_M',"Closing stock of glucostrips"]


        # convert all the int columns in int format
        sd_int=sd[int_col].head()
        # remove any commas in the int columns
        sd[sd_int.columns] = sd[sd_int.columns].replace({',': ''}, regex=True)
        #convert all the int columns in int format
        sd[int_col] = sd[int_col].apply(pd.to_numeric, errors='coerce', axis=1)

        # add total column in sd_int

        #HTN
        sd['HTN screened '] = sd['HTN Individuals screened Male']+ sd['HTN Individuals screened Female']+ sd['HTN Individuals screened Other']
        sd['HTN diagnosed '] = sd['HTN Newly diagnosed Male']+ sd['HTN Newly diagnosed Female']+ sd['HTN Newly diagnosed Other']
        sd['HTN on treatment '] = sd['HTN On treatment Male']+ sd['HTN On treatment Female']+ sd['HTN On treatment Other']

        #DM
        sd['DM screened '] = sd['DM Individuals screened Male']+ sd['DM Individuals screened Female']+ sd['DM Individuals screened Other']
        sd['DM diagnosed '] = sd['DM Newly diagnosed Male']+ sd['DM Newly diagnosed Female']+ sd['DM Newly diagnosed Other']
        sd['DM on treatment '] = sd['DM On treatment Male']+ sd['DM On treatment Female']+ sd['DM On treatment Other']

        #DM
        sd['OC screened '] = sd['OC Individuals screened Male']+ sd['OC Individuals screened Female']+ sd['OC Individuals screened Other']
        sd['OC diagnosed '] = sd['OC Newly diagnosed Male']+ sd['OC Newly diagnosed Female']+ sd['OC Newly diagnosed Other']
        sd['OC on treatment '] = sd['OC On treatment male']+ sd['OC On treatment Female']+ sd['OC On treatment Other']


        #referred
        sd['TB_Referred'] = sd['Individuals referred for screening male']+ sd['Individuals referred for screening female']+ sd['Individuals referred for screening other']

        #Newly diagnosed
        sd['TB_Newly diagnosed'] = sd['Newly diagnosed Male']+ sd['Newly diagnosed Female']+ sd['Newly diagnosed Other']

        #On treatment
        sd['TB_On treatment'] = sd['On treatment Male']+ sd['On treatment Female']+ sd['On treatment Other']
        # if yes than 1 else 0 add both
        sd["equipments_BP_gluco"]=sd["Availability of functional BP apparatus"].replace({'Yes': 1, 'No': 0})+sd["Availability of functional glucometer"].replace({'Yes': 1, 'No': 0})
        sd["pbi_tbi"]=sd["Performance/team based incentives for MO/SN/CHO"].replace({'Yes': 1, 'No': 0})+sd["Team based incentives for ASHA/MPW"].replace({'Yes': 1, 'No': 0})


        selected_col=['NIN ID', 'Facility Name', 'Facility Type', 'State', 'District',
            'Taluka', 'Block', 'Entry Month',
                    'Month-Year',
            'HTN screened ', 'DM screened ',
            'OC screened ','BC Individuals screened female', 'TB_Referred','equipments_BP_gluco',"pbi_tbi"]
        sd=sd[selected_col]


        # group by NIN-ID and month-year sum
        sd=sd.groupby(['NIN ID','Month-Year']).sum().reset_index()

        # total facility in SD
        st.write("Total facility in SD",sd['NIN ID'].nunique())


        df=pd.merge(DE, sd, on=['NIN ID','Month-Year'], how='outer')
        #merge with FPE
        df=pd.merge(FPE,df, on=['NIN ID'], how='outer')


        #df add a target population column with 80 percent of the total population
        df['Target Population']=round(df['Total Population in catchment area of facility']*0.8,0)


        # # all float value in one list
        # float_col=df.select_dtypes(include=['float']).columns.tolist()

        # # convert all the float columns in int format
        # df[float_col] = df[float_col].apply(pd.to_numeric, errors='coerce', axis=1)

        # cal_Reporting map value>=20 than 15, IF value >=10 than 10 else 0
        df['cal_Reporting']=np.where(df['Reporting']>=20,15,np.where(df['Reporting']>=10,10,0))
        # cal_footfall map if 'Total'/'Target Population'>=1, 15,if 'Total'/'Target Population'>=.8, 10,if 'Total'/'Target Population'>=.5,5, else 0

        df['cal_footfall']=np.where((df['Total']/df['Target Population'])>=1,15,np.where((df['Total']/df['Target Population'])>=.8,10,np.where((df['Total']/df['Target Population'])>=.5,5,0)))
        # Equipment value AV4=2,10,IF value =1,5,0
        df['cal_Equipment']=np.where(df['equipments_BP_gluco']==2,10,np.where(df['equipments_BP_gluco']==1,5,0))
        # pbi_tbi value AV4=2,10,IF value =1,5,0
        df['cal_pbi_tbi']=np.where(df['pbi_tbi']==2,10,np.where(df['pbi_tbi']==1,5,0))
        # tb refered value referred/total >=30,10 ,if referred/total >=20,5, else 0
        df['cal_tb']=np.where((df['TB_Referred']/df['Total'])>=.3,10,np.where((df['TB_Referred']/df['Total'])>=.2,5,0))
        # teleconsultation value AM4>=30,5,IF(AM4>=15,3,IF(AM4>=1,1,0
        df['cal_teleconsultation']=np.where(df[' Patients availed tele-consulation services ']>=30,5,np.where(df[' Patients availed tele-consulation services ']>=15,3,np.where(df[' Patients availed tele-consulation services ']>=1,1,0)))
        # wellness and yoga AQ6>=10,5,IF(AQ6>=5,3,0)
        df['cal_wellness']=np.where(df['Wellness sessions conducted ']>=10,5,np.where(df['Wellness sessions conducted ']>=5,3,0))
        # save the df in csv
        

        # pivot table 
        df_pivot=df.pivot_table(index=["NIN ID","HFI_Name","PHC_CHC_Type","State_Name","District_Name","Taluka_Name","Block_Name"],columns='Month-Year',
        values=['HTN screened ','DM screened ','OC screened ','BC Individuals screened female','cal_Reporting','cal_footfall','cal_Equipment',
            'cal_pbi_tbi','cal_tb','cal_teleconsultation','cal_wellness'],aggfunc='sum').reset_index()

        # total unique facility in all 3 uploaded data
        st.write(f"Total facility in all 3 uploaded data {df_pivot['NIN ID'].nunique()}")

        col=[col[0] + '_' + col[1] for col in df_pivot.columns]
        df_pivot.columns = col
        df_pivot = df_pivot.reset_index()


        # filter all columns having "screened" or refered
        screened_col=[col for col in df_pivot.columns if 'screened' in col]

        # import minmaxscaler
        from sklearn.preprocessing import MinMaxScaler

        # apply minmaxscaler on screened_col
        scaler = MinMaxScaler()
        df_pivot[screened_col] = scaler.fit_transform(df_pivot[screened_col])

        # for each month-year calculate the sum of all the columns 
        unique_month=df['Month-Year'].unique()
        #drop blank value
        unique_month=unique_month[~pd.isnull(unique_month)]

        for i in unique_month:
            df_pivot[i+'_sum']=df_pivot[[col for col in df_pivot.columns if i in col]].sum(axis=1)
            # add rank column and based on sum of each month-year rank the facility
            df_pivot[i+'_rank']=df_pivot[i+'_sum'].rank(ascending=False)


        # For Below month ranking will be calculated st.write(unique_month)
        st.subheader('Below month ranking will be calculated')
        st.write(unique_month)
        
        # calculate the sum of all the month-year sum
        df_pivot['Overall Score']=df_pivot[[col for col in df_pivot.columns if 'sum' in col]].sum(axis=1)


        # keep only ranking and overall score columns
        sum=[col for col in df_pivot.columns if 'sum' in col]
        rank=[col for col in df_pivot.columns if 'rank' in col]

        # add both list in one list
        sum_rank=sum+rank


        # all sum and rank columns in df_pivot convert in int format
        df_pivot[sum_rank] = df_pivot[sum_rank].apply(pd.to_numeric, errors='coerce', axis=1)

        df_rank=df_pivot[['NIN ID_','HFI_Name_','PHC_CHC_Type_', 'State_Name_','District_Name_','Taluka_Name_',
        'Block_Name_','Overall Score']+sum_rank]

            # df subheader 
        st.subheader("Download the Ranking")
        generate_excel_download_link(df_rank)
        
            # df subheader pivot table
        st.subheader("Detailed calculation download")
        generate_excel_download_link(df_pivot)
        
            # subheader download all 3 files analysis
        st.subheader("Download all 3 files analysis")
        generate_excel_download_link(df)

        # df subheader