Spaces:

A-New-Day-001
/

VN-Housing-App

Sleeping

File size: 8,524 Bytes

5426d51

import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter  # Import ScalarFormatter
import plotly.express as px
import numpy as np

st.set_option('deprecation.showPyplotGlobalUse', False)

# Extract location
input = 'data_3/data_test.csv'
output = 'data_3/data_test_city.csv'
# Load the addresses file into a DataFrame
addresses_df = pd.read_csv(input, encoding='UTF-8-SIG')
# print(addresses_df.head())

# Load the cities/districts file into a DataFrame
cities_districts_df = pd.read_csv('data_3/Cities.csv', encoding='UTF-8-SIG')

# Function to find city and district for each address
def find_city_district(location):
    location = str(location)  # Ensure location is a string
    for index, row in cities_districts_df.iterrows():
        if str(row["City"]) in location and str(row["District"]) in location:
            return row["City"], row["District"]
    return None, None

# Apply the function to the addresses DataFrame
addresses_df[["City", "District"]] = addresses_df["Location"].apply(find_city_district).apply(pd.Series)

# Save the new DataFrame to a CSV file
addresses_df.to_csv(output, index=False)

data = pd.read_csv('data_3/data_test_city.csv')
print(data.info())

df = data.dropna(subset = 'Price')
df = df.dropna(subset = 'City')
df=df[~((df['Price'] == 'Thỏa thuận'))]
df['Price'] = pd.to_numeric(df['Price'].str.replace(',', ''), errors='coerce')
df['Price'].astype(float)

print(df.info())

def plot_minmax_prices(selected_category):
    # Filter the data based on the selected category
    filtered_data = df[df['Category'] == selected_category]
    
    # Create a pivot table
    pivot_table = filtered_data.pivot_table(index=['City', 'Category'], values='Price', aggfunc=['min', 'max']).reset_index()
    print(pivot_table.head())
    pivot_table.columns=['City','Category','Min Price','Max Price']
    # Display the data table for the filtered data
    st.subheader('Tổng hợp Giá bất động sản cao nhất và thấp nhất ở các tỉnh thành')
    st.dataframe(pivot_table)

def plot_by_category(selected_category):
    # Get the unique city names and sort them alphabetically
    unique_cities = sorted(df['City'].unique())
    selected_city = st.sidebar.selectbox('Chọn thành phố hoặc tỉnh', unique_cities)
    # Filter the data for the selected city
    filtered_data = df[(df['City'] == selected_city) & (df['Category'] == selected_category)]
    # Display the data table for the filtered data
    # st.write('### Data Table')
    # st.write(filtered_data)

    # Check if data is empty
    if filtered_data.empty:
        print("filtered_data is empty")
        st.warning(f"No data available for {selected_category} in {selected_city}.")
    else:
        # Plot Number of property by District
        st.subheader(f'Số lượng bất động sản {selected_category} ở {selected_city}')
        fig = plt.figure(figsize=(6, 3))
        sns.countplot(data=filtered_data, y='District')
        plt.xticks(rotation=25)  # Rotate x-axis labels for better readability
        plt.xlabel('Số lượng')
        plt.ylabel('Quận/Huyện')
        st.pyplot(fig)

        # Plot Price per Area
        st.subheader(f'Giá bất động sản {selected_category} theo M² ở {selected_city}')
        # Create a new column for Price per Area
        filtered_data['Price per Area'] = filtered_data['Price'] / filtered_data['Area']
        # Plot the data
        fig = plt.figure(figsize=(6, 3))
        sns.barplot(data=filtered_data,y='District',x='Price per Area')
        plt.xticks(rotation=45)
        plt.xlabel('Giá trung bình')
        plt.ylabel('Quận/Huyện')
        # Show the full number of price instead of scientific notation
        plt.ticklabel_format(style='plain', axis='x')
        st.pyplot(fig)

        # Plot the estate type by City
        # Create a pie chart showing the proportion of estate types by city
        st.subheader(f'Loại bất động sản ở {selected_city}')
        estate_type_counts = filtered_data['Estate type'].value_counts()
        fig = px.pie(
        values=estate_type_counts.values,
        names=estate_type_counts.index,
        )
        # Display the chart
        st.plotly_chart(fig)

        # Plot the certification status by City
        # Replace empty values (including spaces) with NaN in the 'Certification Status' column
        filtered_data['Certification status'] = filtered_data['Certification status'].replace(' ', pd.NA)
        # Replace blank (empty) values with "Không xác định" in the 'Certification Status' column
        filtered_data['Certification status'].fillna("Không xác định", inplace=True)
        certification_count = len(filtered_data[filtered_data['Certification status'].notna()])
        if certification_count == 0:
            st.write('')
        else:
            # Create a pie chart showing the proportion of certification status by city
            st.subheader(f'Tình trạng pháp lý của bất động sản ở {selected_city}')
            certification_counts = filtered_data['Certification status'].value_counts()
            fig = px.pie(
            values=certification_counts.values,
            names=certification_counts.index,
            )
            # Display the chart
            st.plotly_chart(fig)

        # Plot the directions per city and Category
        direction_count = len(filtered_data[filtered_data['Direction'].notna()])
        if direction_count == 0:
            st.write('')
        else:
            # Create a pie chart showing the proportion of estate types by city
            st.subheader(f'Hướng bất động sản {selected_category} ở {selected_city}')
            # Create a horizontal bar chart
            fig = plt.figure(figsize=(6, 3))
            sns.set(style='whitegrid')
            sns.countplot(data=filtered_data, x="Direction", palette="Spectral")
            plt.xlabel('Hướng')
            plt.ylabel('Số lượng')
            # plt.title(f'Directions of property in {selected_city}')
            plt.show()
            # Display the chart
            st.pyplot(fig)

        # Create a pie chart showing the proportion of estate types by city
        st.subheader(f'Tỷ lệ bất động sản có chỗ đậu xe ở {selected_city}')
        # Create a pie chart to show the proportion of parking slot and non-parking slot
        # parking_slot_count = filtered_data[filtered_data['Parking slot'].notna()]['Parking slot'].count()
        parking_slot_count = len(filtered_data[~np.isnan(filtered_data['Parking slot'])])
        # non_parking_slot_count = filtered_data[filtered_data['Parking slot'].isna()]['Parking slot'].count()
        non_parking_slot_count = len(filtered_data[np.isnan(filtered_data['Parking slot'])])
        fig_pie = px.pie(
        names=['Có chỗ đậu xe', 'Không có chỗ đậu xe'],
        values=[parking_slot_count, non_parking_slot_count]
        )
        # Display the pie chart
        st.plotly_chart(fig_pie)
        if parking_slot_count == 0:
            st.write('')
        else:
            st.subheader(f'Số lượng chỗ đậu xe ở {selected_city}')
            filtered_data2 = filtered_data[filtered_data['Parking slot'].notna() & (filtered_data['Parking slot'] != ' ')]
            # Create a horizontal bar chart
            plt.figure(figsize=(6, 3))
            sns.set(style="whitegrid")
            sns.countplot(data=filtered_data2, x="Parking slot", palette="Spectral")
            plt.xlabel('Số lượng chỗ đậu xe/bất động sản')
            plt.ylabel('Số lượng')
            # Display the chart
            st.pyplot()
    
        # Create a pie chart showing the proportion of estate types by city
        st.subheader(f'Tỷ lệ người bán ở {selected_city}')
        # Create a pie chart to show the proportion of parking slot and non-parking slot
        personal_count = filtered_data[filtered_data['Seller type'] == 'Cá Nhân - Chính Chủ']['Seller type'].count()
        non_personal_count = filtered_data[filtered_data['Seller type'] == 'Công Ty Nhà Đất - Môi Giới BĐS']['Seller type'].count()
        fig_pie = px.pie(
        names=['Cá Nhân - Chính Chủ', 'Công Ty Nhà Đất - Môi Giới BĐS'],
        values=[personal_count, non_personal_count],
        )
        # Display the pie chart
        st.plotly_chart(fig_pie)