import pandas as pd import numpy as np import streamlit as st import base64 # Standard Python Module from io import StringIO, BytesIO # Standard Python Module # hide menu and footer st.set_page_config(page_title=None, page_icon=None, layout="wide", initial_sidebar_state="auto", menu_items=None) hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) # unsafe_allow_html allows us to embed html code def generate_excel_download_link(df): # Credit Excel: https://discuss.streamlit.io/t/how-to-add-a-download-excel-csv-function-to-a-button/4474/5 towrite = BytesIO() df.to_excel(towrite, encoding="utf-8", index=False, header=True) # write to BytesIO buffer towrite.seek(0) # reset pointer b64 = base64.b64encode(towrite.read()).decode() href = f'Download ' return st.markdown(href, unsafe_allow_html=True) # DE=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/DE.csv') # FPE=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/FPE.csv') # sd=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/SD.csv') # width in full screen st.title('AI Ranking-CPHC Project Jhpiego') # add two columns col1, col2 = st.columns(2) with col1: counts=0 # file upload upload_file_FPE = st.file_uploader("Upload Facility Profile Entry Data in CSV format", type=['csv']) # if upload_file_DE is not None highlight "file not uploaded" if upload_file_FPE: FPE = pd.read_csv(upload_file_FPE) # check the columns heading available in the file required_col=['NIN_2_HFI', 'HFI_Name', 'PHC_CHC_Type', 'State_Name', 'District_Name', 'Taluka_Name', 'Block_Name', 'Proposed Date', 'Progressive Date', 'Total Population in catchment area of facility'] if set(required_col).issubset(FPE.columns): st.subheader("Perfect") st.write(FPE.head(3)) else: st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(FPE.columns)}") counts=1 # file upload upload_file_DE = st.file_uploader("Upload Daily Entry Data in CSV format", type=['csv']) if upload_file_DE: DE = pd.read_csv(upload_file_DE) required_col=['NIN ID', 'Entry Date', 'Footfall Male', 'Footfall Female ', 'Footfall Others ', 'Footfall Total', 'Patients received medicines ', 'Patients availed diagnostic tests', ' Patients availed tele-consulation services ', 'Wellness sessions conducted ', 'Total participants of Wellness session'] if set(required_col).issubset(DE.columns): st.subheader("Perfect") st.write(DE.head(3)) else: st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(DE.columns)}") counts=1 # file upload upload_file_SD = st.file_uploader("Upload Service Delivery Data in CSV format", type=['csv']) if upload_file_SD: sd = pd.read_csv(upload_file_SD) required_col=['NIN ID', 'Entry Month', 'HTN Individuals screened Male', 'HTN Individuals screened Female', 'HTN Individuals screened Other', 'HTN Newly diagnosed Male', 'HTN Newly diagnosed Female', 'HTN Newly diagnosed Other', 'HTN On treatment Male', 'HTN On treatment Female', 'HTN On treatment Other', 'DM Individuals screened Male', 'DM Individuals screened Female', 'DM Individuals screened Other', 'DM Newly diagnosed Male', 'DM Newly diagnosed Female', 'DM Newly diagnosed Other', 'DM On treatment Male', 'DM On treatment Female', 'DM On treatment Other', 'OC Individuals screened Male', 'OC Individuals screened Female', 'OC Individuals screened Other', 'OC Newly diagnosed Male', 'OC Newly diagnosed Female', 'OC Newly diagnosed Other', 'OC On treatment male', 'OC On treatment Female', 'OC On treatment Other', 'BC Individuals screened female', 'BC Newly diagnosed female', 'BC On treatment female', 'CC Individuals screened female', 'CC Newly diagnosed female', 'CC On treatment female', 'Individuals referred for screening male', 'Individuals referred for screening female', 'Individuals referred for screening other', 'Newly diagnosed Male', 'Newly diagnosed Female', 'Newly diagnosed Other', 'On treatment Male', 'On treatment Female', 'On treatment Other', 'Total Patients received antihypertensive medicines at this centre', 'Total Patients received ant-diabetic medicines at this centre', 'Medicines_TPR_AO_M', 'Total TB patients received DOTS from the centre', 'Medicines_CSOM_EOM', 'Availability of functional BP apparatus', 'Availability of functional glucometer', 'Closing stock of glucostrips', 'Performance/team based incentives for MO/SN/CHO', 'Team based incentives for ASHA/MPW'] if set(required_col).issubset(sd.columns): st.subheader("Perfect") st.write(sd.head(3)) else: st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(sd.columns)}") counts=1 # if upload_file_FPE and upload_file_DE and upload_file_SD is not None: # markdown processing st.markdown(''' # *************************Processing Done: Please check next section************************** ''') st.subheader("This tool is developed by : Surendra Mehta Data Analyst Jhpiego India all rights reserved") with col2: if counts==0 and upload_file_FPE and upload_file_DE and upload_file_SD: # Facility PE cleaning ********************************************************************************************************** FPE.rename(columns={'NIN_2_HFI':'NIN ID'}, inplace=True) # Total Population in catchment area of facility remove any commas and convert in int format FPE['Total Population in catchment area of facility']=FPE['Total Population in catchment area of facility'].replace({',': ''}, regex=True) FPE['Total Population in catchment area of facility']=FPE['Total Population in catchment area of facility'].astype(int) col=['NIN ID', 'HFI_Name', 'PHC_CHC_Type', 'State_Name', 'District_Name', 'Taluka_Name', 'Block_Name', 'Proposed Date', 'Progressive Date', 'Total Population in catchment area of facility'] FPE=FPE[col] geo=['HFI_Name', 'PHC_CHC_Type','State_Name', 'District_Name', 'Taluka_Name', 'Block_Name'] # fill NA if any value is missing in geo FPE[geo]=FPE[geo].fillna('NA') # total facility total_facility=FPE.shape[0] st.write(f"Total facility in facility profile entry is :{total_facility}") # DE cleaning ******************************************************************************************************************* #Entry Date convert in date format in format of 2022-05-09 DE['Entry Date'] = pd.to_datetime(DE['Entry Date'], format='%Y-%m-%d') DE.head(3) # add month-year column from entry date DE['Month-Year'] = DE['Entry Date'].dt.strftime('%b-%Y') DE['Total'] = DE["Footfall Male"] + DE["Footfall Female "]+DE["Footfall Others "] # if wellness session conducted is yes then 1 else 0 DE['Wellness sessions conducted ']=DE['Wellness sessions conducted '].replace({'Yes': 1, 'No': 0}) DE['Reporting']=1 DE_col=['NIN ID','Reporting', 'Total', ' Patients availed tele-consulation services ', 'Wellness sessions conducted ', 'Month-Year'] DE=DE[DE_col] #groupby DE=DE.groupby(['NIN ID','Month-Year']).sum().reset_index() #total facility in DE st.write(f"Total facility in DE is :{DE['NIN ID'].nunique()}") # SD cleaning ******************************************************************************************************************** #Entry Date convert in date format in format of 2022-11-30 sd['Entry Month'] = pd.to_datetime(sd['Entry Month'], format='%Y-%m-%d') # # add month-year column from entry date sd['Month-Year'] = sd['Entry Month'].dt.strftime('%b-%Y') #show all the columns in view pd.set_option('display.max_columns', None) int_col=['Individuals empanelled', 'Community Based Assessment Checklist filled', 'HTN Individuals screened Male', 'HTN Individuals screened Female', 'HTN Individuals screened Other', 'HTN Newly diagnosed Male', 'HTN Newly diagnosed Female', 'HTN Newly diagnosed Other', 'HTN On treatment Male', 'HTN On treatment Female', 'HTN On treatment Other', 'DM Individuals screened Male', 'DM Individuals screened Female', 'DM Individuals screened Other', 'DM Newly diagnosed Male', 'DM Newly diagnosed Female', 'DM Newly diagnosed Other', 'DM On treatment Male', 'DM On treatment Female', 'DM On treatment Other', 'OC Individuals screened Male', 'OC Individuals screened Female', 'OC Individuals screened Other', 'OC Newly diagnosed Male', 'OC Newly diagnosed Female', 'OC Newly diagnosed Other', 'OC On treatment male', 'OC On treatment Female', 'OC On treatment Other', 'BC Individuals screened female', 'BC Newly diagnosed female', 'BC On treatment female', 'CC Individuals screened female', 'CC Newly diagnosed female', 'CC On treatment female', 'Individuals referred for screening male', 'Individuals referred for screening female', 'Individuals referred for screening other', 'Newly diagnosed Male', 'Newly diagnosed Female', 'Newly diagnosed Other', 'On treatment Male', 'On treatment Female', 'On treatment Other', 'Total Patients received antihypertensive medicines at this centre', 'Total Patients received ant-diabetic medicines at this centre','Medicines_TPR_AO_M',"Closing stock of glucostrips"] # convert all the int columns in int format sd_int=sd[int_col].head() # remove any commas in the int columns sd[sd_int.columns] = sd[sd_int.columns].replace({',': ''}, regex=True) #convert all the int columns in int format sd[int_col] = sd[int_col].apply(pd.to_numeric, errors='coerce', axis=1) # add total column in sd_int #HTN sd['HTN screened '] = sd['HTN Individuals screened Male']+ sd['HTN Individuals screened Female']+ sd['HTN Individuals screened Other'] sd['HTN diagnosed '] = sd['HTN Newly diagnosed Male']+ sd['HTN Newly diagnosed Female']+ sd['HTN Newly diagnosed Other'] sd['HTN on treatment '] = sd['HTN On treatment Male']+ sd['HTN On treatment Female']+ sd['HTN On treatment Other'] #DM sd['DM screened '] = sd['DM Individuals screened Male']+ sd['DM Individuals screened Female']+ sd['DM Individuals screened Other'] sd['DM diagnosed '] = sd['DM Newly diagnosed Male']+ sd['DM Newly diagnosed Female']+ sd['DM Newly diagnosed Other'] sd['DM on treatment '] = sd['DM On treatment Male']+ sd['DM On treatment Female']+ sd['DM On treatment Other'] #DM sd['OC screened '] = sd['OC Individuals screened Male']+ sd['OC Individuals screened Female']+ sd['OC Individuals screened Other'] sd['OC diagnosed '] = sd['OC Newly diagnosed Male']+ sd['OC Newly diagnosed Female']+ sd['OC Newly diagnosed Other'] sd['OC on treatment '] = sd['OC On treatment male']+ sd['OC On treatment Female']+ sd['OC On treatment Other'] #referred sd['TB_Referred'] = sd['Individuals referred for screening male']+ sd['Individuals referred for screening female']+ sd['Individuals referred for screening other'] #Newly diagnosed sd['TB_Newly diagnosed'] = sd['Newly diagnosed Male']+ sd['Newly diagnosed Female']+ sd['Newly diagnosed Other'] #On treatment sd['TB_On treatment'] = sd['On treatment Male']+ sd['On treatment Female']+ sd['On treatment Other'] # if yes than 1 else 0 add both sd["equipments_BP_gluco"]=sd["Availability of functional BP apparatus"].replace({'Yes': 1, 'No': 0})+sd["Availability of functional glucometer"].replace({'Yes': 1, 'No': 0}) sd["pbi_tbi"]=sd["Performance/team based incentives for MO/SN/CHO"].replace({'Yes': 1, 'No': 0})+sd["Team based incentives for ASHA/MPW"].replace({'Yes': 1, 'No': 0}) selected_col=['NIN ID', 'Facility Name', 'Facility Type', 'State', 'District', 'Taluka', 'Block', 'Entry Month', 'Month-Year', 'HTN screened ', 'DM screened ', 'OC screened ','BC Individuals screened female', 'TB_Referred','equipments_BP_gluco',"pbi_tbi"] sd=sd[selected_col] # group by NIN-ID and month-year sum sd=sd.groupby(['NIN ID','Month-Year']).sum().reset_index() # total facility in SD st.write("Total facility in SD",sd['NIN ID'].nunique()) df=pd.merge(DE, sd, on=['NIN ID','Month-Year'], how='outer') #merge with FPE df=pd.merge(FPE,df, on=['NIN ID'], how='outer') #df add a target population column with 80 percent of the total population df['Target Population']=round(df['Total Population in catchment area of facility']*0.8,0) # # all float value in one list # float_col=df.select_dtypes(include=['float']).columns.tolist() # # convert all the float columns in int format # df[float_col] = df[float_col].apply(pd.to_numeric, errors='coerce', axis=1) # cal_Reporting map value>=20 than 15, IF value >=10 than 10 else 0 df['cal_Reporting']=np.where(df['Reporting']>=20,15,np.where(df['Reporting']>=10,10,0)) # cal_footfall map if 'Total'/'Target Population'>=1, 15,if 'Total'/'Target Population'>=.8, 10,if 'Total'/'Target Population'>=.5,5, else 0 df['cal_footfall']=np.where((df['Total']/df['Target Population'])>=1,15,np.where((df['Total']/df['Target Population'])>=.8,10,np.where((df['Total']/df['Target Population'])>=.5,5,0))) # Equipment value AV4=2,10,IF value =1,5,0 df['cal_Equipment']=np.where(df['equipments_BP_gluco']==2,10,np.where(df['equipments_BP_gluco']==1,5,0)) # pbi_tbi value AV4=2,10,IF value =1,5,0 df['cal_pbi_tbi']=np.where(df['pbi_tbi']==2,10,np.where(df['pbi_tbi']==1,5,0)) # tb refered value referred/total >=30,10 ,if referred/total >=20,5, else 0 df['cal_tb']=np.where((df['TB_Referred']/df['Total'])>=.3,10,np.where((df['TB_Referred']/df['Total'])>=.2,5,0)) # teleconsultation value AM4>=30,5,IF(AM4>=15,3,IF(AM4>=1,1,0 df['cal_teleconsultation']=np.where(df[' Patients availed tele-consulation services ']>=30,5,np.where(df[' Patients availed tele-consulation services ']>=15,3,np.where(df[' Patients availed tele-consulation services ']>=1,1,0))) # wellness and yoga AQ6>=10,5,IF(AQ6>=5,3,0) df['cal_wellness']=np.where(df['Wellness sessions conducted ']>=10,5,np.where(df['Wellness sessions conducted ']>=5,3,0)) # save the df in csv # pivot table df_pivot=df.pivot_table(index=["NIN ID","HFI_Name","PHC_CHC_Type","State_Name","District_Name","Taluka_Name","Block_Name"],columns='Month-Year', values=['HTN screened ','DM screened ','OC screened ','BC Individuals screened female','cal_Reporting','cal_footfall','cal_Equipment', 'cal_pbi_tbi','cal_tb','cal_teleconsultation','cal_wellness'],aggfunc='sum').reset_index() # total unique facility in all 3 uploaded data st.write(f"Total facility in all 3 uploaded data {df_pivot['NIN ID'].nunique()}") col=[col[0] + '_' + col[1] for col in df_pivot.columns] df_pivot.columns = col df_pivot = df_pivot.reset_index() # filter all columns having "screened" or refered screened_col=[col for col in df_pivot.columns if 'screened' in col] # import minmaxscaler from sklearn.preprocessing import MinMaxScaler # apply minmaxscaler on screened_col scaler = MinMaxScaler() df_pivot[screened_col] = scaler.fit_transform(df_pivot[screened_col]) # for each month-year calculate the sum of all the columns unique_month=df['Month-Year'].unique() #drop blank value unique_month=unique_month[~pd.isnull(unique_month)] for i in unique_month: df_pivot[i+'_sum']=df_pivot[[col for col in df_pivot.columns if i in col]].sum(axis=1) # add rank column and based on sum of each month-year rank the facility df_pivot[i+'_rank']=df_pivot[i+'_sum'].rank(ascending=False) # For Below month ranking will be calculated st.write(unique_month) st.subheader('Below month ranking will be calculated') st.write(unique_month) # calculate the sum of all the month-year sum df_pivot['Overall Score']=df_pivot[[col for col in df_pivot.columns if 'sum' in col]].sum(axis=1) # keep only ranking and overall score columns sum=[col for col in df_pivot.columns if 'sum' in col] rank=[col for col in df_pivot.columns if 'rank' in col] # add both list in one list sum_rank=sum+rank # all sum and rank columns in df_pivot convert in int format df_pivot[sum_rank] = df_pivot[sum_rank].apply(pd.to_numeric, errors='coerce', axis=1) df_rank=df_pivot[['NIN ID_','HFI_Name_','PHC_CHC_Type_', 'State_Name_','District_Name_','Taluka_Name_', 'Block_Name_','Overall Score']+sum_rank] # df subheader st.subheader("Download the Ranking") generate_excel_download_link(df_rank) # df subheader pivot table st.subheader("Detailed calculation download") generate_excel_download_link(df_pivot) # subheader download all 3 files analysis st.subheader("Download all 3 files analysis") generate_excel_download_link(df) # df subheader