fschwartzer's picture
Update app.py
8cba5f8 verified
import streamlit as st
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import plotly.express as px
st.set_page_config(layout="wide")
df = pd.read_csv('last_results_8.csv')
temp_data = pd.read_csv('temp_data(2).csv')
temp_data['Data_Completa'] = pd.to_datetime(temp_data['Data_Completa'])
temp_data.sort_values(['Instituição', 'Conta', 'Data_Completa'], inplace=True)
temp_data['Últimos 12 meses'] = temp_data.groupby(['Instituição', 'Conta'])['Valor'].transform(lambda x: x.rolling(window=12, min_periods=1).sum())
last_dates = temp_data.groupby(['Instituição', 'Conta'])['Data_Completa'].transform(max)
last_rows = temp_data[temp_data['Data_Completa'] == last_dates]
ultimo_ano = last_rows[['Instituição', 'Conta', 'Últimos 12 meses']]
image1 = 'images/rs_pmpa.PNG'
title_html = """
<style>
@font-face {
font-family: 'Quicksand';
src: url('font/Quicksand-VariableFont_wght.ttf') format('truetype');
}
body {
font-family: 'Quicksand', sans-serif;
}
.custom-title {
color: darkgreen;
font-size: 30px;
font-weight: bold;
}
</style>
<span class='custom-title'>PREVISÕES DE RECEITAS</span>
"""
# Set a fixed width for the sidebar
st.markdown(
"""
<style>
.sidebar .sidebar-content {
width: 300px;
}
</style>
""",
unsafe_allow_html=True
)
with st.sidebar:
st.image(image1, use_column_width=True)
st.markdown(title_html, unsafe_allow_html=True)
selected_instituicao = st.selectbox('Seleciona Instituição', df['Instituição'].unique())
selected_conta = st.selectbox('Seleciona Conta', df['Conta'].unique())
# Filter the DataFrame based on selected values
#filtered_df = df[(df['Instituição'] == selected_instituicao) & (df['Conta'] == selected_conta)]
# Initial filtering based on selected 'Instituição'
instituicao_df = df[df['Instituição'] == selected_instituicao]
# Container for adjusted DataFrame rows
adjusted_rows = []
# Iterate through each unique 'Conta' within the selected 'Instituição'
for conta in instituicao_df['Conta'].unique():
conta_df = instituicao_df[instituicao_df['Conta'] == selected_conta]
# Check if 'Linear Regression' is available for this 'Conta'
if len(conta_df['Modelo'].unique()) > 1 and "Linear Regression" in conta_df['Modelo'].unique():
lr_rows = conta_df[conta_df['Modelo'] == 'Linear Regression']
adjusted_rows.append(lr_rows)
else:
# If not, include all models' results for this 'Conta'
adjusted_rows.append(conta_df)
# Combine all adjusted rows back into a single DataFrame
filtered_df = pd.concat(adjusted_rows)
#col1, col2, col3 = st.columns(3) # This divides the page into three equal parts
# Set custom width for columns
col1_width = 500
col2_width = 500
col3_width = 500
col1, col2, col3 = st.columns([col1_width, col2_width, col3_width])
# Display the Forecasts values in the first column
col1.header('Composição da RLIT')
tab_df = df[df['Instituição'] == selected_instituicao]
data = []
for conta in tab_df['Conta'].unique():
# Filter the DataFrame for the current 'Conta'
conta_df = tab_df[tab_df['Conta'] == conta]
if len(conta_df['Modelo'].unique()) > 1 and "Linear Regression" in conta_df['Modelo'].unique():
conta_df = conta_df[conta_df['Modelo'] == "Linear Regression"]
# Initialize a variable to store the sum for the current 'Conta'
conta_sum = 0.0
# Take the first 'Modelo' for simplicity
modelo = conta_df['Modelo'].iloc[0]
# Iterate over each row in the filtered DataFrame for the current 'Conta'
for _, row in conta_df.iterrows():
lines = row['Forecasts'].split('\n')
for line in lines[:-1]: # Skip the summary line
if line.strip():
parts = line.split()
value = parts[-1]
try:
conta_sum += float(value)
except ValueError:
print(f"Skipping line unable to convert to float: {line}")
# Append the data to the list
data.append({'Conta': conta, 'Modelo': modelo, 'Próximos 12 meses': conta_sum})
# Convert the list to a DataFrame
table_data = pd.DataFrame(data)
total_sum = table_data['Próximos 12 meses'].sum()
labels = table_data['Conta']
sizes = [(i / total_sum) * 100 for i in table_data['Próximos 12 meses']]
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct='%1.1f%%',)
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
col1.pyplot(fig1)
# Display the Forecasts values in the first column
col2.header('Valores previstos')
if not filtered_df.empty:
data_string = filtered_df['Forecasts'].iloc[0]
# Split the string into lines
lines = data_string.split('\n')
mes = 0
# Iterate through the lines and extract the values
for line in lines[:-1]: # Skip the last two lines which might not contain forecast data
period, value = line.split()
num_float = float(value)
monetary_value = f'R$ {num_float:,.2f}' # Adding commas for thousands separator
mes += 1
col2.write(f"Mês {mes}: {monetary_value}")
else:
col2.warning('No data available for the selected filters.')
# Display the Forecasts values as line plots in the second column
col3.header('Gráfico com previsões')
if not filtered_df.empty:
data_string = filtered_df['Forecasts'].iloc[0]
# Create a list to store data for each period
data = []
# Split the string into lines
lines = data_string.split('\n')
mes = 0
# Iterate through the lines and extract the values
for line in lines[:-1]:
period, value = line.split()
num_float = float(value)
monetary_value = f'R$ {num_float:,.2f}' # Adding commas for thousands separator
mes += 1
data.append({'Period': int(mes), 'Monetary Value': num_float})
# Create a DataFrame from the list
chart_data = pd.DataFrame(data)
# Sort the DataFrame by 'Period'
chart_data = chart_data.sort_values(by='Period')
# Display line chart with "period" on X-axis and "Monetary Value" on Y-axis
col3.line_chart(chart_data.set_index('Period'))
else:
col3.warning('No data available for the selected filters.')
# Display the table in the third column
#col3 = st.columns(1) # You can use st.columns(1) to create a single column layout
col4_width = 750
col5_width = 750
col4, col5 = st.columns([col4_width, col5_width])
if not filtered_df.empty:
# Filter the DataFrame for the selected institution
tab_df = df[df['Instituição'] == selected_instituicao]
# Create an empty list to store data
data = []
# Iterate through each unique 'Conta' in the filtered DataFrame
for conta in tab_df['Conta'].unique():
# Filter the DataFrame for the current 'Conta'
conta_df = tab_df[tab_df['Conta'] == conta]
if len(conta_df['Modelo'].unique()) > 1 and "Linear Regression" in conta_df['Modelo'].unique():
conta_df = conta_df[conta_df['Modelo'] == "Linear Regression"]
# Initialize a variable to store the sum for the current 'Conta'
conta_sum = 0.0
# Take the first 'Modelo' for simplicity
modelo = conta_df['Modelo'].iloc[0]
# Iterate over each row in the filtered DataFrame for the current 'Conta'
for _, row in conta_df.iterrows():
lines = row['Forecasts'].split('\n')
for line in lines[:-1]: # Skip the summary line
if line.strip():
parts = line.split()
value = parts[-1]
try:
conta_sum += float(value)
except ValueError:
print(f"Skipping line unable to convert to float: {line}")
# Format the sum as a monetary value
monetary_value = f'R$ {conta_sum:,.2f}'
# Append the data to the list
data.append({'Conta': conta, 'Modelo': modelo, 'Próximos 12 meses': monetary_value})
# Convert the list to a DataFrame
table_data = pd.DataFrame(data)
last_df = ultimo_ano[ultimo_ano['Instituição'] == selected_instituicao]
last_df.drop(['Instituição'], axis=1, inplace=True)
print(last_df)
last_sum = last_df.iloc[:,-1].sum()
def format_currency(x):
return "R${:,.2f}".format(x)
last_df['Últimos 12 meses'] = last_df['Últimos 12 meses'].apply(format_currency)
table_data = pd.merge(table_data, last_df)
print(table_data)
try:
# Calculate the grand total sum of 'Próximos 12 meses' and 'Últimos 12 meses' values
total_sum = sum(float(row['Próximos 12 meses'].replace('R$ ', '').replace(',', '')) for row in data)
total_sum_prev = last_sum
# Append the "Total" row
total_row = pd.DataFrame({
'Conta': ['TOTAL (RLIT)'],
'Modelo': [''],
'Próximos 12 meses': [f'R$ {total_sum:,.2f}'],
'Últimos 12 meses': [f'R$ {total_sum_prev:,.2f}']
})
table_data = pd.concat([table_data, total_row], ignore_index=True)
# Additional rows calculations and appending
# Assuming percentages for health and education as previously mentioned
saude_value = total_sum * 0.15
educacao_value = total_sum * 0.25
saude_value_prev = total_sum_prev * 0.15
educacao_value_prev = total_sum_prev * 0.25
saude_row = pd.DataFrame({'Conta': ['Saúde (15% da RLIT)'], 'Modelo': [''], 'Próximos 12 meses': [f'R$ {saude_value:,.2f}'], 'Últimos 12 meses': [f'R$ {saude_value_prev:,.2f}']})
educacao_row = pd.DataFrame({'Conta': ['Educação (25% da RLIT)'], 'Modelo': [''], 'Próximos 12 meses': [f'R$ {educacao_value:,.2f}'], 'Últimos 12 meses': [f'R$ {educacao_value_prev:,.2f}']})
# Append these rows to the table data
table_data = pd.concat([table_data, saude_row, educacao_row], ignore_index=True)
table_data.fillna('-', inplace=True)
# Display the table using Streamlit
st.table(table_data)
except Exception as e:
st.error(f"Error in processing data: {str(e)}")
else:
col4.warning('No data available for the selected filters.')
col5.header('Realizado X Previsto')
for conta in tab_df['Conta'].unique():
# Filter the DataFrame for the current 'Conta'
conta_df = tab_df[tab_df['Conta'] == conta]
if len(conta_df['Modelo'].unique()) > 1 and "Linear Regression" in conta_df['Modelo'].unique():
conta_df = conta_df[conta_df['Modelo'] == "Linear Regression"]
# Initialize a variable to store the sum for the current 'Conta'
conta_sum = 0.0
# Take the first 'Modelo' for simplicity
modelo = conta_df['Modelo'].iloc[0]
# Iterate over each row in the filtered DataFrame for the current 'Conta'
for _, row in conta_df.iterrows():
lines = row['Forecasts'].split('\n')
for line in lines[:-1]: # Skip the summary line
if line.strip():
parts = line.split()
value = parts[-1]
try:
conta_sum += float(value)
except ValueError:
print(f"Skipping line unable to convert to float: {line}")
# Append the data to the list
data.append({'Conta': conta, 'Modelo': modelo, 'Próximos 12 meses': conta_sum})
last_df = ultimo_ano[ultimo_ano['Instituição'] == selected_instituicao]
last_df.drop(['Instituição'], axis=1, inplace=True)
print(last_df)
last_sum = last_df.iloc[:,-1].sum()
total_sum = sum(float(row['Próximos 12 meses'].replace('R$ ', '').replace(',', '')) for row in data)
total_sum_prev = last_sum
saude_value = total_sum * 0.15
educacao_value = total_sum * 0.25
saude_value_prev = total_sum_prev * 0.15
educacao_value_prev = total_sum_prev * 0.25
data = {
"Últimos 12 meses": [saude_value_prev, educacao_value_prev], # Placeholder data for 'Last 12 Months'
"Próximos 12 meses": [saude_value, educacao_value] # Placeholder data for 'Next 12 Months'
}
# Define the index names
index_names = ["Saúde", "Educação"] # 'Health' and 'Education'
df = pd.DataFrame(data, index=index_names).reset_index().melt(id_vars='index', var_name='Period', value_name='Value')
# Create the bar chart
fig = px.bar(df, x='index', y='Value', color='Period', barmode='group')
col5.write(fig)
st.markdown("""
<b>Observação:</b> Previsões realizadas com dados extraídos do Relatório Resumido de Execução Orçamentária (RREO) até o 6º bimestre de 2023 no Sistema de Informações Contábeis e Fiscais do Setor Público Brasileiro (SICONFI).
[Link](https://siconfi.tesouro.gov.br/)
""", unsafe_allow_html=True)