db_query / queries /process_site_db.py
DavMelchi's picture
Adding City Adresse Commune and cercle to Physical DB
9d69630
import pandas as pd
from utils.utils_vars import SiteAnalysisData, UtilsVars
GSM_COLUMNS = [
"code",
"site_name",
"site_config_band",
"Region",
"number_trx_per_site",
"Longitude",
"Latitude",
"Hauteur",
"City",
"Adresse",
"Commune",
"Cercle",
]
WCDMA_COLUMNS = [
"code",
"site_name",
"Region",
"site_config_band",
"Longitude",
"Latitude",
"Hauteur",
"City",
"Adresse",
"Commune",
"Cercle",
]
LTE_COLUMNS = [
"code",
"lnbts_name",
"site_config_band",
"Region",
"Longitude",
"Latitude",
"Hauteur",
"City",
"Adresse",
"Commune",
"Cercle",
]
CODE_COLUMNS = [
"code",
"Region",
"Longitude",
"Latitude",
"Hauteur",
"City",
"Adresse",
"Commune",
"Cercle",
]
def clean_bands(bands):
if pd.isna(bands):
return None
parts = [p for p in bands.split("/") if p != "nan"]
return "/".join(parts) if parts else None
def site_db():
gsm_df: pd.DataFrame = UtilsVars.all_db_dfs[0]
wcdma_df: pd.DataFrame = UtilsVars.all_db_dfs[3]
lte_fdd_df: pd.DataFrame = UtilsVars.all_db_dfs[4]
lte_tdd_df: pd.DataFrame = UtilsVars.all_db_dfs[5]
gsm_df = gsm_df[GSM_COLUMNS]
gsm_df = gsm_df.rename(
columns={
"code": "code",
"site_name": "gsm_name",
"site_config_band": "2G_Bands",
}
)
gsm_df.drop_duplicates(subset=["code"], keep="first", inplace=True)
wcdma_df = wcdma_df[WCDMA_COLUMNS]
wcdma_df = wcdma_df.rename(
columns={
"code": "code",
"site_name": "wcdma_name",
"site_config_band": "3G_Bands",
}
)
wcdma_df.drop_duplicates(subset=["code"], keep="first", inplace=True)
lte_fdd_df = lte_fdd_df[LTE_COLUMNS]
lte_tdd_df = lte_tdd_df[LTE_COLUMNS]
lte_df: pd.DataFrame = pd.concat([lte_fdd_df, lte_tdd_df], ignore_index=False)
lte_df = lte_df.rename(
columns={
"code": "code",
"lnbts_name": "lte_name",
"site_config_band": "4G_Bands",
}
)
lte_df.drop_duplicates(subset=["code"], keep="first", inplace=True)
################################# CODE DATAFRAME#############################
gsm_code_df: pd.DataFrame = (
gsm_df[CODE_COLUMNS].copy() if gsm_df is not None else pd.DataFrame()
)
wcdma_code_df: pd.DataFrame = (
wcdma_df[CODE_COLUMNS].copy() if wcdma_df is not None else pd.DataFrame()
)
lte_code_df: pd.DataFrame = (
lte_df[CODE_COLUMNS].copy() if lte_df is not None else pd.DataFrame()
)
code_df: pd.DataFrame = pd.concat(
[gsm_code_df, wcdma_code_df, lte_code_df], ignore_index=True
)
code_df.drop_duplicates(subset=["code"], keep="first", inplace=True)
code_df.dropna(subset=["code"], inplace=True)
# order by code
code_df.sort_values(by=["code"], inplace=True)
# print(code_df)
# ################################# SITE DATAFRAME#############################
gsm_df_final = gsm_df[
[
"code",
"gsm_name",
"2G_Bands",
"number_trx_per_site",
]
].copy()
wcdma_df_final = wcdma_df[["code", "wcdma_name", "3G_Bands"]].copy()
lte_df_final = lte_df[["code", "lte_name", "4G_Bands"]].copy()
site_df = pd.merge(code_df, gsm_df_final, how="left", on="code")
site_df = pd.merge(site_df, wcdma_df_final, how="left", on="code")
site_df = pd.merge(site_df, lte_df_final, how="left", on="code")
# order by code
site_df["site_name"] = (
site_df["gsm_name"].fillna(site_df["wcdma_name"]).fillna(site_df["lte_name"])
)
site_df["all_bands"] = (
(site_df[["2G_Bands", "3G_Bands", "4G_Bands"]])
.astype(str)
.apply("/".join, axis=1)
)
site_df["all_bands"] = site_df["all_bands"].apply(clean_bands)
site_df = site_df[
[
"code",
"site_name",
"Region",
"2G_Bands",
"3G_Bands",
"4G_Bands",
"all_bands",
"number_trx_per_site",
"Longitude",
"Latitude",
"Hauteur",
"City",
"Adresse",
"Commune",
"Cercle",
]
]
site_df.sort_values(by=["code"], inplace=True)
UtilsVars.all_db_dfs.append(site_df)
UtilsVars.all_db_dfs_names.append("SITE")
####################### SITE ANALYSIS ###################################################
SiteAnalysisData.total_number_of_site = len(site_df["code"].unique())
SiteAnalysisData.total_munber_of_gsm_site = site_df["2G_Bands"].notna().sum()
SiteAnalysisData.total_number_of_wcdma_site = site_df["3G_Bands"].notna().sum()
SiteAnalysisData.total_number_of_lte_site = site_df["4G_Bands"].notna().sum()
SiteAnalysisData.gsm_bands_distribution = site_df["2G_Bands"].value_counts(
ascending=True
)
SiteAnalysisData.wcdma_bands_distribution = site_df["3G_Bands"].value_counts(
ascending=True
)
SiteAnalysisData.lte_bands_distribution = site_df["4G_Bands"].value_counts(
ascending=True
)
SiteAnalysisData.all_bands_distribution = site_df["all_bands"].value_counts(
ascending=True
)
SiteAnalysisData.number_of_trx_per_site_distribution = site_df[
"number_trx_per_site"
].value_counts()