Spaces:

surendra962
/

ranking

Runtime error

App Files Files Community

ranking / facility_ranking.py

surendra962

Upload 4 files

807049f over 1 year ago

raw

history blame contribute delete

18.8 kB

	import pandas as pd
	import numpy as np
	import streamlit as st
	import base64 # Standard Python Module
	from io import StringIO, BytesIO # Standard Python Module

	# hide menu and footer
	st.set_page_config(page_title=None, page_icon=None, layout="wide", initial_sidebar_state="auto", menu_items=None)
	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_streamlit_style, unsafe_allow_html=True) # unsafe_allow_html allows us to embed html code





	def generate_excel_download_link(df):
	# Credit Excel: https://discuss.streamlit.io/t/how-to-add-a-download-excel-csv-function-to-a-button/4474/5
	towrite = BytesIO()
	df.to_excel(towrite, encoding="utf-8", index=False, header=True) # write to BytesIO buffer
	towrite.seek(0) # reset pointer
	b64 = base64.b64encode(towrite.read()).decode()
	href = f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="facility_ranking_SSM.xlsx">Download </a>'
	return st.markdown(href, unsafe_allow_html=True)





	# DE=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/DE.csv')
	# FPE=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/FPE.csv')
	# sd=pd.read_csv('C:/Python/1. Dashboard/Facility ranking/Data/SD.csv')

	# width in full screen


	st.title('AI Ranking-CPHC Project Jhpiego')
	# add two columns
	col1, col2 = st.columns(2)
	with col1:
	counts=0

	# file upload
	upload_file_FPE = st.file_uploader("Upload Facility Profile Entry Data in CSV format", type=['csv'])

	# if upload_file_DE is not None highlight "file not uploaded"


	if upload_file_FPE:
	FPE = pd.read_csv(upload_file_FPE)

	# check the columns heading available in the file
	required_col=['NIN_2_HFI', 'HFI_Name', 'PHC_CHC_Type',
	'State_Name', 'District_Name', 'Taluka_Name', 'Block_Name',
	'Proposed Date', 'Progressive Date',
	'Total Population in catchment area of facility']
	if set(required_col).issubset(FPE.columns):
	st.subheader("Perfect")
	st.write(FPE.head(3))
	else:
	st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(FPE.columns)}")
	counts=1


	# file upload
	upload_file_DE = st.file_uploader("Upload Daily Entry Data in CSV format", type=['csv'])
	if upload_file_DE:
	DE = pd.read_csv(upload_file_DE)

	required_col=['NIN ID', 'Entry Date', 'Footfall Male', 'Footfall Female ',
	'Footfall Others ', 'Footfall Total', 'Patients received medicines ',
	'Patients availed diagnostic tests',
	' Patients availed tele-consulation services ',
	'Wellness sessions conducted ',
	'Total participants of Wellness session']
	if set(required_col).issubset(DE.columns):
	st.subheader("Perfect")
	st.write(DE.head(3))
	else:
	st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(DE.columns)}")
	counts=1


	# file upload
	upload_file_SD = st.file_uploader("Upload Service Delivery Data in CSV format", type=['csv'])
	if upload_file_SD:
	sd = pd.read_csv(upload_file_SD)

	required_col=['NIN ID', 'Entry Month',
	'HTN Individuals screened Male', 'HTN Individuals screened Female',
	'HTN Individuals screened Other', 'HTN Newly diagnosed Male',
	'HTN Newly diagnosed Female', 'HTN Newly diagnosed Other',
	'HTN On treatment Male', 'HTN On treatment Female',
	'HTN On treatment Other', 'DM Individuals screened Male',
	'DM Individuals screened Female', 'DM Individuals screened Other',
	'DM Newly diagnosed Male', 'DM Newly diagnosed Female',
	'DM Newly diagnosed Other', 'DM On treatment Male',
	'DM On treatment Female', 'DM On treatment Other',
	'OC Individuals screened Male', 'OC Individuals screened Female',
	'OC Individuals screened Other', 'OC Newly diagnosed Male',
	'OC Newly diagnosed Female', 'OC Newly diagnosed Other',
	'OC On treatment male', 'OC On treatment Female',
	'OC On treatment Other', 'BC Individuals screened female',
	'BC Newly diagnosed female', 'BC On treatment female',
	'CC Individuals screened female', 'CC Newly diagnosed female',
	'CC On treatment female', 'Individuals referred for screening male',
	'Individuals referred for screening female',
	'Individuals referred for screening other', 'Newly diagnosed Male',
	'Newly diagnosed Female', 'Newly diagnosed Other', 'On treatment Male',
	'On treatment Female', 'On treatment Other',
	'Total Patients received antihypertensive medicines at this centre',
	'Total Patients received ant-diabetic medicines at this centre',
	'Medicines_TPR_AO_M', 'Total TB patients received DOTS from the centre',
	'Medicines_CSOM_EOM', 'Availability of functional BP apparatus',
	'Availability of functional glucometer', 'Closing stock of glucostrips',
	'Performance/team based incentives for MO/SN/CHO',
	'Team based incentives for ASHA/MPW']
	if set(required_col).issubset(sd.columns):
	st.subheader("Perfect")
	st.write(sd.head(3))
	else:
	st.subheader(f"Please upload correct file missing columns are {set(required_col)-set(sd.columns)}")
	counts=1

	# if upload_file_FPE and upload_file_DE and upload_file_SD is not None:
	# markdown processing
	st.markdown('''
	# ***********************Processing Done: Please check next section************************
	''')
	st.subheader("This tool is developed by : Surendra Mehta Data Analyst Jhpiego India all rights reserved")
	with col2:

	if counts==0 and upload_file_FPE and upload_file_DE and upload_file_SD:




	# Facility PE cleaning **********************************************************************************************************
	FPE.rename(columns={'NIN_2_HFI':'NIN ID'}, inplace=True)

	# Total Population in catchment area of facility remove any commas and convert in int format
	FPE['Total Population in catchment area of facility']=FPE['Total Population in catchment area of facility'].replace({',': ''}, regex=True)
	FPE['Total Population in catchment area of facility']=FPE['Total Population in catchment area of facility'].astype(int)

	col=['NIN ID', 'HFI_Name', 'PHC_CHC_Type',
	'State_Name', 'District_Name', 'Taluka_Name', 'Block_Name',
	'Proposed Date', 'Progressive Date',
	'Total Population in catchment area of facility']
	FPE=FPE[col]
	geo=['HFI_Name', 'PHC_CHC_Type','State_Name', 'District_Name', 'Taluka_Name', 'Block_Name']

	# fill NA if any value is missing in geo
	FPE[geo]=FPE[geo].fillna('NA')
	# total facility
	total_facility=FPE.shape[0]
	st.write(f"Total facility in facility profile entry is :{total_facility}")


	# DE cleaning *******************************************************************************************************************
	#Entry Date convert in date format in format of 2022-05-09

	DE['Entry Date'] = pd.to_datetime(DE['Entry Date'], format='%Y-%m-%d')
	DE.head(3)
	# add month-year column from entry date
	DE['Month-Year'] = DE['Entry Date'].dt.strftime('%b-%Y')
	DE['Total'] = DE["Footfall Male"] + DE["Footfall Female "]+DE["Footfall Others "]
	# if wellness session conducted is yes then 1 else 0
	DE['Wellness sessions conducted ']=DE['Wellness sessions conducted '].replace({'Yes': 1, 'No': 0})
	DE['Reporting']=1

	DE_col=['NIN ID','Reporting', 'Total',
	' Patients availed tele-consulation services ',
	'Wellness sessions conducted ',
	'Month-Year']
	DE=DE[DE_col]
	#groupby
	DE=DE.groupby(['NIN ID','Month-Year']).sum().reset_index()


	#total facility in DE
	st.write(f"Total facility in DE is :{DE['NIN ID'].nunique()}")

	# SD cleaning ********************************************************************************************************************

	#Entry Date convert in date format in format of 2022-11-30
	sd['Entry Month'] = pd.to_datetime(sd['Entry Month'], format='%Y-%m-%d')

	# # add month-year column from entry date
	sd['Month-Year'] = sd['Entry Month'].dt.strftime('%b-%Y')

	#show all the columns in view
	pd.set_option('display.max_columns', None)

	int_col=['Individuals empanelled',
	'Community Based Assessment Checklist filled',
	'HTN Individuals screened Male', 'HTN Individuals screened Female',
	'HTN Individuals screened Other', 'HTN Newly diagnosed Male',
	'HTN Newly diagnosed Female', 'HTN Newly diagnosed Other',
	'HTN On treatment Male', 'HTN On treatment Female',
	'HTN On treatment Other', 'DM Individuals screened Male',
	'DM Individuals screened Female', 'DM Individuals screened Other',
	'DM Newly diagnosed Male', 'DM Newly diagnosed Female',
	'DM Newly diagnosed Other', 'DM On treatment Male',
	'DM On treatment Female', 'DM On treatment Other',
	'OC Individuals screened Male', 'OC Individuals screened Female',
	'OC Individuals screened Other', 'OC Newly diagnosed Male',
	'OC Newly diagnosed Female', 'OC Newly diagnosed Other',
	'OC On treatment male', 'OC On treatment Female',
	'OC On treatment Other', 'BC Individuals screened female',
	'BC Newly diagnosed female', 'BC On treatment female',
	'CC Individuals screened female', 'CC Newly diagnosed female',
	'CC On treatment female', 'Individuals referred for screening male',
	'Individuals referred for screening female',
	'Individuals referred for screening other', 'Newly diagnosed Male',
	'Newly diagnosed Female', 'Newly diagnosed Other', 'On treatment Male',
	'On treatment Female', 'On treatment Other',
	'Total Patients received antihypertensive medicines at this centre',
	'Total Patients received ant-diabetic medicines at this centre','Medicines_TPR_AO_M',"Closing stock of glucostrips"]


	# convert all the int columns in int format
	sd_int=sd[int_col].head()
	# remove any commas in the int columns
	sd[sd_int.columns] = sd[sd_int.columns].replace({',': ''}, regex=True)
	#convert all the int columns in int format
	sd[int_col] = sd[int_col].apply(pd.to_numeric, errors='coerce', axis=1)

	# add total column in sd_int

	#HTN
	sd['HTN screened '] = sd['HTN Individuals screened Male']+ sd['HTN Individuals screened Female']+ sd['HTN Individuals screened Other']
	sd['HTN diagnosed '] = sd['HTN Newly diagnosed Male']+ sd['HTN Newly diagnosed Female']+ sd['HTN Newly diagnosed Other']
	sd['HTN on treatment '] = sd['HTN On treatment Male']+ sd['HTN On treatment Female']+ sd['HTN On treatment Other']

	#DM
	sd['DM screened '] = sd['DM Individuals screened Male']+ sd['DM Individuals screened Female']+ sd['DM Individuals screened Other']
	sd['DM diagnosed '] = sd['DM Newly diagnosed Male']+ sd['DM Newly diagnosed Female']+ sd['DM Newly diagnosed Other']
	sd['DM on treatment '] = sd['DM On treatment Male']+ sd['DM On treatment Female']+ sd['DM On treatment Other']

	#DM
	sd['OC screened '] = sd['OC Individuals screened Male']+ sd['OC Individuals screened Female']+ sd['OC Individuals screened Other']
	sd['OC diagnosed '] = sd['OC Newly diagnosed Male']+ sd['OC Newly diagnosed Female']+ sd['OC Newly diagnosed Other']
	sd['OC on treatment '] = sd['OC On treatment male']+ sd['OC On treatment Female']+ sd['OC On treatment Other']


	#referred
	sd['TB_Referred'] = sd['Individuals referred for screening male']+ sd['Individuals referred for screening female']+ sd['Individuals referred for screening other']

	#Newly diagnosed
	sd['TB_Newly diagnosed'] = sd['Newly diagnosed Male']+ sd['Newly diagnosed Female']+ sd['Newly diagnosed Other']

	#On treatment
	sd['TB_On treatment'] = sd['On treatment Male']+ sd['On treatment Female']+ sd['On treatment Other']
	# if yes than 1 else 0 add both
	sd["equipments_BP_gluco"]=sd["Availability of functional BP apparatus"].replace({'Yes': 1, 'No': 0})+sd["Availability of functional glucometer"].replace({'Yes': 1, 'No': 0})
	sd["pbi_tbi"]=sd["Performance/team based incentives for MO/SN/CHO"].replace({'Yes': 1, 'No': 0})+sd["Team based incentives for ASHA/MPW"].replace({'Yes': 1, 'No': 0})


	selected_col=['NIN ID', 'Facility Name', 'Facility Type', 'State', 'District',
	'Taluka', 'Block', 'Entry Month',
	'Month-Year',
	'HTN screened ', 'DM screened ',
	'OC screened ','BC Individuals screened female', 'TB_Referred','equipments_BP_gluco',"pbi_tbi"]
	sd=sd[selected_col]


	# group by NIN-ID and month-year sum
	sd=sd.groupby(['NIN ID','Month-Year']).sum().reset_index()

	# total facility in SD
	st.write("Total facility in SD",sd['NIN ID'].nunique())



	df=pd.merge(DE, sd, on=['NIN ID','Month-Year'], how='outer')
	#merge with FPE
	df=pd.merge(FPE,df, on=['NIN ID'], how='outer')



	#df add a target population column with 80 percent of the total population
	df['Target Population']=round(df['Total Population in catchment area of facility']*0.8,0)



	# # all float value in one list
	# float_col=df.select_dtypes(include=['float']).columns.tolist()

	# # convert all the float columns in int format
	# df[float_col] = df[float_col].apply(pd.to_numeric, errors='coerce', axis=1)

	# cal_Reporting map value>=20 than 15, IF value >=10 than 10 else 0
	df['cal_Reporting']=np.where(df['Reporting']>=20,15,np.where(df['Reporting']>=10,10,0))
	# cal_footfall map if 'Total'/'Target Population'>=1, 15,if 'Total'/'Target Population'>=.8, 10,if 'Total'/'Target Population'>=.5,5, else 0

	df['cal_footfall']=np.where((df['Total']/df['Target Population'])>=1,15,np.where((df['Total']/df['Target Population'])>=.8,10,np.where((df['Total']/df['Target Population'])>=.5,5,0)))
	# Equipment value AV4=2,10,IF value =1,5,0
	df['cal_Equipment']=np.where(df['equipments_BP_gluco']==2,10,np.where(df['equipments_BP_gluco']==1,5,0))
	# pbi_tbi value AV4=2,10,IF value =1,5,0
	df['cal_pbi_tbi']=np.where(df['pbi_tbi']==2,10,np.where(df['pbi_tbi']==1,5,0))
	# tb refered value referred/total >=30,10 ,if referred/total >=20,5, else 0
	df['cal_tb']=np.where((df['TB_Referred']/df['Total'])>=.3,10,np.where((df['TB_Referred']/df['Total'])>=.2,5,0))
	# teleconsultation value AM4>=30,5,IF(AM4>=15,3,IF(AM4>=1,1,0
	df['cal_teleconsultation']=np.where(df[' Patients availed tele-consulation services ']>=30,5,np.where(df[' Patients availed tele-consulation services ']>=15,3,np.where(df[' Patients availed tele-consulation services ']>=1,1,0)))
	# wellness and yoga AQ6>=10,5,IF(AQ6>=5,3,0)
	df['cal_wellness']=np.where(df['Wellness sessions conducted ']>=10,5,np.where(df['Wellness sessions conducted ']>=5,3,0))
	# save the df in csv


	# pivot table
	df_pivot=df.pivot_table(index=["NIN ID","HFI_Name","PHC_CHC_Type","State_Name","District_Name","Taluka_Name","Block_Name"],columns='Month-Year',
	values=['HTN screened ','DM screened ','OC screened ','BC Individuals screened female','cal_Reporting','cal_footfall','cal_Equipment',
	'cal_pbi_tbi','cal_tb','cal_teleconsultation','cal_wellness'],aggfunc='sum').reset_index()

	# total unique facility in all 3 uploaded data
	st.write(f"Total facility in all 3 uploaded data {df_pivot['NIN ID'].nunique()}")

	col=[col[0] + '_' + col[1] for col in df_pivot.columns]
	df_pivot.columns = col
	df_pivot = df_pivot.reset_index()



	# filter all columns having "screened" or refered
	screened_col=[col for col in df_pivot.columns if 'screened' in col]

	# import minmaxscaler
	from sklearn.preprocessing import MinMaxScaler

	# apply minmaxscaler on screened_col
	scaler = MinMaxScaler()
	df_pivot[screened_col] = scaler.fit_transform(df_pivot[screened_col])

	# for each month-year calculate the sum of all the columns
	unique_month=df['Month-Year'].unique()
	#drop blank value
	unique_month=unique_month[~pd.isnull(unique_month)]

	for i in unique_month:
	df_pivot[i+'_sum']=df_pivot[[col for col in df_pivot.columns if i in col]].sum(axis=1)
	# add rank column and based on sum of each month-year rank the facility
	df_pivot[i+'_rank']=df_pivot[i+'_sum'].rank(ascending=False)



	# For Below month ranking will be calculated st.write(unique_month)
	st.subheader('Below month ranking will be calculated')
	st.write(unique_month)

	# calculate the sum of all the month-year sum
	df_pivot['Overall Score']=df_pivot[[col for col in df_pivot.columns if 'sum' in col]].sum(axis=1)





	# keep only ranking and overall score columns
	sum=[col for col in df_pivot.columns if 'sum' in col]
	rank=[col for col in df_pivot.columns if 'rank' in col]

	# add both list in one list
	sum_rank=sum+rank


	# all sum and rank columns in df_pivot convert in int format
	df_pivot[sum_rank] = df_pivot[sum_rank].apply(pd.to_numeric, errors='coerce', axis=1)

	df_rank=df_pivot[['NIN ID_','HFI_Name_','PHC_CHC_Type_', 'State_Name_','District_Name_','Taluka_Name_',
	'Block_Name_','Overall Score']+sum_rank]

	# df subheader
	st.subheader("Download the Ranking")
	generate_excel_download_link(df_rank)

	# df subheader pivot table
	st.subheader("Detailed calculation download")
	generate_excel_download_link(df_pivot)

	# subheader download all 3 files analysis
	st.subheader("Download all 3 files analysis")
	generate_excel_download_link(df)

	# df subheader