# Imports import pandas as pd import streamlit as st import pybase64 as base64 # from modules.tables import RealEstateMaste from sqlalchemy import create_engine import plotly.express as px import io import urllib.request import bs4 as bs import scipy.stats as stats import plotly.figure_factory as ff import scipy.stats as stats import plotly.graph_objects as go import numpy as np from datetime import date from datetime import timedelta from Data.credentials import credentials_postgresql as credpost def formatnum(numero): ''' Esta función permite dar formato a los montos de saldo y valor cuota en las cartolas. ''' return '{:,.0f}'.format(numero).replace(",", "@").replace(".", ",").replace("@", ".") def get_UF(): link = "https://valoruf.cl/" req = urllib.request.Request(link) res = urllib.request.urlopen(req) resData = res.read() soup = bs.BeautifulSoup(resData) uf = soup.find(class_="vpr").contents[0] uf_value = uf.split(' ')[1] uf_value = uf_value.replace('.', '') uf_value = uf_value.replace(',', '.') return uf_value def button_style(): style_button = """ """ st.markdown(style_button, unsafe_allow_html=True) def plot_column(data, column, nom_var_inv, placeholder, sigmas=3, limite=None): data = data.rename(columns=nom_var_inv) column = nom_var_inv[column] limit = data[column].mean() + sigmas * data[column].std() data = data.loc[data[column] <= limit] if limite is not None: data = data.loc[data[column] <= limite] fig = px.scatter_mapbox(data_frame=data, lat='latitud', lon='longitud', zoom=10, color=column, hover_data=['codigo', 'Superficie', 'dormitorios', 'banos', 'Precio en CLP'], color_continuous_scale=px.colors.diverging.RdYlBu, width=1000, height=800) placeholder.plotly_chart(fig, use_container_width=True) def plot_column2(data, column, nom_var_inv, placeholder, zoom, sigmas=10, limite=None): data = data.rename(columns=nom_var_inv) column = nom_var_inv[column] limit = data[column].mean() + sigmas * data[column].std() data = data.loc[data[column] <= limit] if limite is not None: data = data.loc[data[column] <= limite] fig = px.scatter_mapbox(data_frame=data, lat='latitud', lon='longitud', zoom=zoom, color=column, hover_data=['codigo', 'Superficie', 'dormitorios', 'banos', 'Precio en CLP', 'Comuna'], color_continuous_scale=px.colors.diverging.RdYlBu, width=1000, height=800,size='Superficie',size_max=25) placeholder.plotly_chart(fig, use_container_width=True) def get_comuna(direction): values = direction.split(',') return values[-2] def unit_separator(string): values = string.split(" ") return values[0] def transform_df(scraping_df): # Extract numbers from string # Change datatype # Add 'Comuna' and 'Región' scraping_df[["Comuna"]] = scraping_df[["direccion"]].applymap(lambda x: get_comuna(x)) # Scrap UF value for date #scraping_df[["Valor UF"]] = get_UF() return scraping_df def get_table_excel_link(df, name): towrite = io.BytesIO() downloaded_file = df.to_excel(towrite, encoding='utf-8', index=False, header=True) towrite.seek(0) # reset pointer file_name = 'Data' + name + '.xlsx' style = 'style="color:black;text-decoration: none; font-size:18px;"' name_mark = "Descargar " + name + ".xlsx" b64 = base64.b64encode(towrite.read()).decode() # some strings linko= f'
' return linko @st.cache(allow_output_mutation=True) def query_inmob(): url = credpost["POSTGRESQL"] engine = create_engine(url, echo=False) semana_pasada = date.today() - timedelta(7) semana_pasada = semana_pasada.strftime("%Y-%m-%d") data = pd.read_sql_query("""select * from scraping_inmob where Fecha > '{Fecha} 00:00:00' ORDER BY Fecha desc""".format(Fecha=semana_pasada) , con=engine) return data def run_scrapping(): button_style() px.set_mapbox_access_token('pk.eyJ1IjoibW9rc2VuYmVyZyIsImEiOiJja3QwOTc3dHgyNzBhMnFsczJ2Y2w3bWJlIn0.m8c3duvR5hQVjbjEByorWQ') data = query_inmob() precio_uf = get_UF() data["valor_uf"] = data["valor_peso"]/float(precio_uf) data['predicted_venta_por_m2_UF']=data['predicted_venta_por_m2']/float(precio_uf) nom_var = {"Precio en UF": "valor_uf", "Precio en CLP": "valor_peso", "Superficie": "superficie", "Predicted venta por M2 en CLP": "predicted_venta_por_m2", "Predicted venta por M2 en UF": "predicted_venta_por_m2_UF", "Predicted arriendo por M2": "predicted_arriendo_por_m2", "Predicted yield anual": "predicted_yield_anual" } nom_var_inv = {v: k for k, v in nom_var.items()} var = list(nom_var.keys()) with st.form(key='my_form'): col1, col2, col3, col4 = st.columns((9, 1, 9, 1)) cols = st.columns((9, 1, 5, 5)) pre_selection = col1.selectbox("Variable", var) selection = nom_var[pre_selection] mercado = col3.selectbox("Mercado", ["Venta", "Arriendo", "Todos"]) if mercado != "Todos": mercado = mercado.lower() data_mercado = data[data["mercado"] == mercado] else: data_mercado = data cifras = len(str(int(max(data_mercado[selection])))) tipo_prop = cols[0].selectbox("Tipo de propiedad", ["Todos", "Casa", "Departamento"]) if tipo_prop != "Todos": tipo_prop2 = tipo_prop.lower() data_mercado = data_mercado[data_mercado["tipo_propiedad"] == tipo_prop2] if cifras > 3: corte = 10 ** (cifras - 3) cota_final = (int(max(data_mercado[selection].dropna()))//corte+1) *corte cota_inf = cols[2].number_input(label="Valor Mínimo", min_value=0, value=0, max_value=cota_final ) cota_sup = cols[3].number_input(label="Valor Máximo", min_value=0, value=cota_final, max_value=cota_final ) else: cota_final = max(data_mercado[selection].dropna())+1 cota_inf = cols[2].number_input(label="Valor Mínimo", min_value=0.0, value=0.0, step=0.1, max_value=cota_final ) cota_sup = cols[3].number_input(label="Valor Máximo", min_value=0.0, step=0.1, value=cota_final, max_value=cota_final ) data_final = data_mercado[data_mercado[selection] < cota_sup] data_final = data_final[data_final[selection] > cota_inf] submit_button = st.form_submit_button(label='Actualizar') col1, col2 = st.columns((0.65, 1)) placeholder = st.empty() placeholder2 = st.empty() placeholder2.markdown(get_table_excel_link(data, "Data Completa"), unsafe_allow_html=True) plot_column(data, selection, nom_var_inv, placeholder) if submit_button: plot_column(data_final, selection, nom_var_inv, placeholder) st.markdown(get_table_excel_link(data_final, " Data filtrada"), unsafe_allow_html=True) def scraping_localizado(): button_style() data = query_inmob() data = transform_df(data) precio_uf = get_UF() data["valor_uf"] = data["valor_peso"]/float(precio_uf) data['predicted_venta_por_m2_UF'] = data['predicted_venta_por_m2']/float(precio_uf) nom_var = {"Precio en UF": "valor_uf", "Precio en CLP": "valor_peso", "Superficie": "superficie", "Predicted venta por M2 en CLP": "predicted_venta_por_m2", "Predicted venta por M2 en UF": "predicted_venta_por_m2_UF", "Predicted arriendo por M2": "predicted_arriendo_por_m2", "Predicted yield anual": "predicted_yield_anual" } nom_var_inv = {v : k for k, v in nom_var.items()} var = list(nom_var.keys()) with st.form(key='my_form'): col1, col2, col3, col4 = st.columns((9, 1, 9, 1)) cols = st.columns((9, 1, 5, 5)) pre_selection = col1.selectbox("Variable", var) selection = nom_var[pre_selection] mercado = col3.selectbox("Mercado", ["Venta", "Arriendo", "Todos"]) banos = col1.slider("Baños", value=(1, 5), min_value=0, max_value=10) dormitorios = col3.slider("Dormitorios", value=(1, 5), min_value=0, max_value=10) com = ["Todas"] + sorted(list(dict.fromkeys(data["Comuna"]))) comuna = col1.selectbox("Comuna", com) zoom = col3.number_input("Zoom", value=12) if mercado != "Todos": mercado = mercado.lower() data_mercado = data[data["mercado"] == mercado] else: data_mercado = data if comuna != "Todas": data_mercado = data_mercado[data_mercado["Comuna"] == comuna] if len(data_mercado) > 0: cifras = len(str(int(max(data_mercado[selection].dropna())))) else: cifras = 0 tipo_prop = cols[0].selectbox("Tipo de Propiedad", ["Todos", "Casa", "Departamento"]) if tipo_prop != "Todos": tipo_prop2 = tipo_prop.lower() data_mercado = data_mercado[data_mercado["tipo_propiedad"] == tipo_prop2] if cifras > 3: corte = 10 ** (cifras - 3) cota_final = (int(max(data_mercado[selection].dropna()))//corte+1)*corte cota_inf = cols[2].number_input(label="Valor Mínimo", min_value=0, value=0, max_value=cota_final ) cota_sup = cols[3].number_input(label="Valor Máximo", min_value=0, value=cota_final, max_value=cota_final ) else: cota_final = max(data_mercado[selection].dropna())+1 cota_inf = cols[2].number_input(label="Valor Mínimo", min_value=0.0, value=0.0, step=0.1, max_value=cota_final ) cota_sup = cols[3].number_input(label="Valor Máximo", min_value=0.0, step=0.1, value=cota_final, max_value=cota_final ) data_final = data_mercado[data_mercado[selection] < cota_sup] data_final = data_final[data_final[selection] > cota_inf] data_final = data_final[data_final["banos"] > banos[0]] data_final = data_final[data_final["banos"] < banos[1]] data_final = data_final[data_final["dormitorios"] > dormitorios[0]] data_final = data_final[data_final["dormitorios"] < dormitorios[1]] st.write('Stats principales del análisis') st.write('Valor promedio en UF por metro cuadrado: {:.2f} UF'.format((data_final['valor_uf']/data_final['superficie']).mean())) st.write('Superficie promedio: {:.0f} m2'.format(data_final['superficie'].mean())) st.write('N° de dormitorios promedio: {:.2f}'.format(data_final['dormitorios'].mean())) st.write('N° de baños promedio: {:.2f}'.format(data_final['banos'].mean())) submit_button = st.form_submit_button(label='Actualizar') col1, col2 = st.columns((0.65, 1)) placeholder = st.empty() placeholder2 = st.empty() placeholder3 = st.empty() placeholder2.markdown(get_table_excel_link(data, "Data Completa"), unsafe_allow_html=True) plot_column2(data, selection, nom_var_inv, placeholder, zoom) dist_gamma = stats.gamma.rvs(1, scale=250000, size=2965) if submit_button: plot_column2(data_final, selection, nom_var_inv, placeholder, zoom) st.markdown(get_table_excel_link(data_final, " Data filtrada"), unsafe_allow_html=True) data_hist = data_final[data_final["mercado"] == "venta"] data_hist = data_hist[data_hist["valor_peso"] > 40000000] data_hist = data_hist[data_hist["valor_peso"] < 1000000000]["valor_peso"] * 0.2 fig2 = px.histogram((data_hist), x='valor_peso') #placeholder3.plotly_chart(fig2) # streamlit run analisis_inmob.py # scraping_localizado()