import streamlit as st
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from geopy.distance import geodesic
import googlemaps
from geopy.exc import GeocoderTimedOut
from streamlit_folium import st_folium
import folium
from branca.colormap import LinearColormap
import base64
from io import BytesIO
import sys
import pydeck as pdk
from ydata_profiling import ProfileReport
import streamlit.components.v1 as components
from folium.plugins import MarkerCluster
from sklearn.neighbors import NearestNeighbors
# Set wide mode
st.set_page_config(layout="wide")
# Print the Python version
print("Python version")
print(sys.version)
print("Version info.")
print(sys.version_info)
image1 = 'images/avalia-removebg-preview.png'
css_file = "style.css"
# Abrindo e lendo o arquivo CSS
with open(css_file, "r") as css:
css_style = css.read()
st.markdown(f'', unsafe_allow_html=True)
# Function to add heatmap layer to folium map
def add_heatmap_layer(map_obj, data, column_name, colormap_name, radius=15):
heat_data = data[['latitude', 'longitude', column_name]].dropna()
heat_layer = folium.FeatureGroup(name=f'Variável - {column_name}')
cmap = LinearColormap(colors=['blue', 'white', 'red'], vmin=heat_data[column_name].min(), vmax=heat_data[column_name].max())
for index, row in heat_data.iterrows():
folium.CircleMarker(
location=[row['latitude'], row['longitude']],
radius=radius,
fill=True,
fill_color=cmap(row[column_name]),
fill_opacity=0.5,
weight=0,
popup=f"{column_name}: {row[column_name]:.2f}" # Fix here
).add_to(heat_layer)
heat_layer.add_to(map_obj)
# Function to calculate distance in meters between two coordinates
def calculate_distance(lat1, lon1, lat2, lon2):
coords_1 = (lat1, lon1)
coords_2 = (lat2, lon2)
return geodesic(coords_1, coords_2).meters
def knn_predict(df, target_column, features_columns, k=5):
# Separate features and target variable
X = df[features_columns]
y = df[target_column]
# Check if there is enough data for prediction
if len(X) < k:
return np.zeros(len(X)) # Return an array of zeros if there isn't enough data
# Create KNN regressor
knn = KNeighborsRegressor(n_neighbors=k)
# Fit the model
knn.fit(X, y)
# Use the model to predict target_column for the filtered_data
predictions = knn.predict(df[features_columns])
return predictions
# Create a DataFrame with sample data
data = pd.read_excel('data_nexus.xlsx')
# Initialize variables to avoid NameError
radius_visible = True
custom_address_initial = 'Centro, Lajeado - RS, Brazil' # Initial custom address
#custom_lat = data['latitude'].median()
custom_lat = -29.45880114339262
#custom_lon = data['longitude'].median()
custom_lon = -51.97011580843118
radius_in_meters = 150000
filtered_data = data # Initialize with the entire dataset
# Calculate a zoom level based on the maximum distance
zoom_level = 13
# Create a sidebar for controls
with st.sidebar:
st.markdown(f'', unsafe_allow_html=True)
st.image(image1, width=200)
# Add a dropdown for filtering "Fonte"
selected_fonte = st.selectbox('Finalidade', data['Fonte'].unique(), index=data['Fonte'].unique().tolist().index('Venda'))
data = data[data['Fonte'] == selected_fonte]
# Add a dropdown for filtering "Tipo"
selected_tipo = st.selectbox('Tipo de imóvel', data['Tipo'].unique(), index=data['Tipo'].unique().tolist().index('Apartamento'))
data_tipo = data[data['Tipo'] == selected_tipo]
custom_address = st.text_input('Informe o endereço', custom_address_initial)
radius_visible = True # Show radius slider for custom coordinates
gmaps = googlemaps.Client(key='AIzaSyDoJ6C7NE2CHqFcaHTnhreOfgJeTk4uSH0') # Replace with your API key
try:
# Ensure custom_address ends with " - RS, Brazil"
custom_address = custom_address.strip() # Remove leading/trailing whitespaces
if not custom_address.endswith(" - RS, Brazil"):
custom_address += " - RS, Brazil"
location = gmaps.geocode(custom_address)[0]['geometry']['location']
custom_lat, custom_lon = location['lat'], location['lng']
except (IndexError, GeocoderTimedOut):
st.error("Erro: Não foi possível geocodificar o endereço fornecido. Por favor, verifique e tente novamente.")
# Conditionally render the radius slider
if radius_visible:
radius_in_meters = st.number_input('Selecione raio (em metros)', min_value=0, max_value=100000, value=2000)
# Add sliders to filter data based
#atotal_range = st.slider('Área Total', float(data_tipo['Atotal'].min()), float(data_tipo['Atotal'].max()), (float(data_tipo['Atotal'].min()), float(data_tipo['Atotal'].max())), step=.1 if data_tipo['Atotal'].min() != data_tipo['Atotal'].max() else 0.1)
#apriv_range = st.slider('Área Privativa', float(data_tipo['Apriv'].min()), float(data_tipo['Apriv'].max()), (float(data_tipo['Apriv'].min()), float(data_tipo['Apriv'].max())), step=.1 if data_tipo['Apriv'].min() != data_tipo['Apriv'].max() else 0.1)
# Create two columns for Área Total inputs
col1, col2 = st.columns(2)
with col1:
atotal_min = st.number_input('Área Total mínima',
min_value=float(data_tipo['Atotal'].min()),
max_value=float(data_tipo['Atotal'].max()),
value=float(data_tipo['Atotal'].min()),
step=0.1)
with col2:
atotal_max = st.number_input('Área Total máxima',
min_value=float(data_tipo['Atotal'].min()),
max_value=float(data_tipo['Atotal'].max()),
value=float(data_tipo['Atotal'].max()),
step=0.1)
# Create two columns for Área Privativa inputs
col3, col4 = st.columns(2)
with col3:
apriv_min = st.number_input('Área Privativa mínima',
min_value=float(data_tipo['Apriv'].min()),
max_value=float(data_tipo['Apriv'].max()),
value=float(data_tipo['Apriv'].min()),
step=0.1)
with col4:
apriv_max = st.number_input('Área Privativa máxima',
min_value=float(data_tipo['Apriv'].min()),
max_value=float(data_tipo['Apriv'].max()),
value=float(data_tipo['Apriv'].max()),
step=0.1)
#data_tipo = data_tipo[(data_tipo['Atotal'].between(atotal_range[0], atotal_range[1])) &
#(data_tipo['Apriv'].between(apriv_range[0], apriv_range[1]))]
data_tipo = data_tipo[(data_tipo['Atotal'].between(atotal_min, atotal_max)) &
(data_tipo['Apriv'].between(apriv_min, apriv_max))]
filtered_data = data_tipo[data_tipo.apply(lambda x: calculate_distance(x['latitude'], x['longitude'], custom_lat, custom_lon), axis=1) <= radius_in_meters]
filtered_data = filtered_data.dropna() # Drop rows with NaN values
# Add a custom CSS class to the map container
st.markdown(f"""""", unsafe_allow_html=True)
# Determine which area feature to use for prediction
filtered_data['area_feature'] = np.where(filtered_data['Apriv'] != 0, filtered_data['Apriv'], filtered_data['Atotal'])
# Define the target column based on conditions
filtered_data['target_column'] = np.where(filtered_data['Vunit_priv'] != 0, filtered_data['Vunit_priv'], filtered_data['Vunit_total'])
# Apply KNN and get predicted target values
predicted_target = knn_predict(filtered_data, 'target_column', ['latitude', 'longitude', 'area_feature']) # Update with your features
# Add predicted target values to filtered_data
filtered_data['Predicted_target'] = predicted_target
# Set custom width for columns
tab1, tab2, tab3, tab4 = st.tabs(["Mapa", "Planilha", "Análise dos Dados", "Regressão Linear"])
with tab1:
st.markdown(f'', unsafe_allow_html=True)
# Define a PyDeck view state for the initial map view
view_state = pdk.ViewState(latitude=filtered_data['latitude'].mean(), longitude=filtered_data['longitude'].mean(), zoom=zoom_level)
# Define a PyDeck layer for plotting
layer = pdk.Layer(
"ScatterplotLayer",
filtered_data,
get_position=["longitude", "latitude"],
get_color="[237, 181, 0, 160]", # RGBA color for light orange, adjust opacity with the last number
get_radius=100, # Adjust dot size as needed
)
# Create a PyDeck map using the defined layer and view state
deck_map = pdk.Deck(layers=[layer], initial_view_state=view_state, map_style="mapbox://styles/mapbox/light-v9")
# Display the map in Streamlit
st.pydeck_chart(deck_map)
#st.map(filtered_data, zoom=zoom_level, use_container_width=True)
with tab2:
st.markdown(f'', unsafe_allow_html=True)
st.write("Dados:", filtered_data) # Debug: Print filtered_data
if st.button('Baixar planilha'):
st.write("Preparando...")
# Set up the file to be downloaded
output_df = filtered_data
# Create a BytesIO buffer to hold the Excel file
excel_buffer = BytesIO()
# Convert DataFrame to Excel and save to the buffer
with pd.ExcelWriter(excel_buffer, engine="xlsxwriter") as writer:
output_df.to_excel(writer, index=False, sheet_name="Sheet1")
# Reset the buffer position to the beginning
excel_buffer.seek(0)
# Create a download link
b64 = base64.b64encode(excel_buffer.read()).decode()
href = f'Clique aqui para baixar a planilha'
#st.markdown(href, unsafe_allow_html=True)
# Use st.empty() to create a placeholder and update it with the link
download_placeholder = st.empty()
download_placeholder.markdown(href, unsafe_allow_html=True)
with tab3:
st.markdown(f'', unsafe_allow_html=True)
# Parâmetro para o número de vizinhos
k_neighbors = 5
# Função para prever valores usando KNN e retornar os vizinhos mais próximos
def knn_predict(data, target_column, feature_columns, k=5):
knn = NearestNeighbors(n_neighbors=k)
knn.fit(data[feature_columns])
distances, indices = knn.kneighbors(data[feature_columns])
# Calcular a média dos vizinhos como valor predito
predicted_target = []
for i in range(len(data)):
neighbors_targets = data.iloc[indices[i]][target_column]
predicted_target.append(neighbors_targets.mean())
return np.array(predicted_target), distances, indices
# Aplicar KNN e obter valores e índices dos vizinhos mais próximos
predicted_target, distances, indices = knn_predict(filtered_data, 'target_column', ['latitude', 'longitude'], k=k_neighbors)
# Adicionar coluna dos valores preditos ao DataFrame
filtered_data['Predicted_target'] = predicted_target
# Verifica se há previsões para exibir
if 'Predicted_target' in filtered_data.columns and not np.all(predicted_target == 0):
# Escolher a primeira coordenada para pesquisa
coord_pesquisa = [filtered_data.iloc[0]['latitude'], filtered_data.iloc[0]['longitude']]
# Criar o mapa centralizado na coordenada pesquisada com um nível de zoom alto
mapa = folium.Map(location=coord_pesquisa, zoom_start=15)
marker_cluster = MarkerCluster().add_to(mapa)
# Iterar pelos 5 pontos mais próximos e conectar os vizinhos à coordenada de pesquisa
for neighbor_idx in indices[0]: # Usar apenas os 5 vizinhos mais próximos da primeira coordenada
neighbor_row = filtered_data.iloc[neighbor_idx]
coord_vizinho = [neighbor_row['latitude'], neighbor_row['longitude']]
# Adicionar marcadores e linhas de conexão
folium.Marker(coord_vizinho, popup=f"Predicted: {neighbor_row['Predicted_target']:.2f}").add_to(marker_cluster)
folium.PolyLine([coord_pesquisa, coord_vizinho], color='blue', weight=2).add_to(mapa)
# Exibir o mapa no Streamlit
st.markdown("## **Mapa dos 5 Vizinhos mais Próximos (KNN)**")
st.write("O mapa mostra os 5 pontos de dados mais próximos ao ponto de pesquisa.")
st.components.v1.html(mapa._repr_html_(), height=500)
else:
st.warning(f"**Dados insuficientes para inferência do valor. Mínimo necessário:** {k_threshold}")
with tab4:
st.markdown(f'', unsafe_allow_html=True)
components.iframe("https://davidsb-avalia-se-rl-tabs.hf.space", height=600, scrolling=True)