A-New-Day-001's picture
Upload 24 files
5426d51
raw
history blame
No virus
8.52 kB
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter # Import ScalarFormatter
import plotly.express as px
import numpy as np
st.set_option('deprecation.showPyplotGlobalUse', False)
# Extract location
input = 'data_3/data_test.csv'
output = 'data_3/data_test_city.csv'
# Load the addresses file into a DataFrame
addresses_df = pd.read_csv(input, encoding='UTF-8-SIG')
# print(addresses_df.head())
# Load the cities/districts file into a DataFrame
cities_districts_df = pd.read_csv('data_3/Cities.csv', encoding='UTF-8-SIG')
# Function to find city and district for each address
def find_city_district(location):
location = str(location) # Ensure location is a string
for index, row in cities_districts_df.iterrows():
if str(row["City"]) in location and str(row["District"]) in location:
return row["City"], row["District"]
return None, None
# Apply the function to the addresses DataFrame
addresses_df[["City", "District"]] = addresses_df["Location"].apply(find_city_district).apply(pd.Series)
# Save the new DataFrame to a CSV file
addresses_df.to_csv(output, index=False)
data = pd.read_csv('data_3/data_test_city.csv')
print(data.info())
df = data.dropna(subset = 'Price')
df = df.dropna(subset = 'City')
df=df[~((df['Price'] == 'Thỏa thuận'))]
df['Price'] = pd.to_numeric(df['Price'].str.replace(',', ''), errors='coerce')
df['Price'].astype(float)
print(df.info())
def plot_minmax_prices(selected_category):
# Filter the data based on the selected category
filtered_data = df[df['Category'] == selected_category]
# Create a pivot table
pivot_table = filtered_data.pivot_table(index=['City', 'Category'], values='Price', aggfunc=['min', 'max']).reset_index()
print(pivot_table.head())
pivot_table.columns=['City','Category','Min Price','Max Price']
# Display the data table for the filtered data
st.subheader('Tổng hợp Giá bất động sản cao nhất và thấp nhất ở các tỉnh thành')
st.dataframe(pivot_table)
def plot_by_category(selected_category):
# Get the unique city names and sort them alphabetically
unique_cities = sorted(df['City'].unique())
selected_city = st.sidebar.selectbox('Chọn thành phố hoặc tỉnh', unique_cities)
# Filter the data for the selected city
filtered_data = df[(df['City'] == selected_city) & (df['Category'] == selected_category)]
# Display the data table for the filtered data
# st.write('### Data Table')
# st.write(filtered_data)
# Check if data is empty
if filtered_data.empty:
print("filtered_data is empty")
st.warning(f"No data available for {selected_category} in {selected_city}.")
else:
# Plot Number of property by District
st.subheader(f'Số lượng bất động sản {selected_category}{selected_city}')
fig = plt.figure(figsize=(6, 3))
sns.countplot(data=filtered_data, y='District')
plt.xticks(rotation=25) # Rotate x-axis labels for better readability
plt.xlabel('Số lượng')
plt.ylabel('Quận/Huyện')
st.pyplot(fig)
# Plot Price per Area
st.subheader(f'Giá bất động sản {selected_category} theo M² ở {selected_city}')
# Create a new column for Price per Area
filtered_data['Price per Area'] = filtered_data['Price'] / filtered_data['Area']
# Plot the data
fig = plt.figure(figsize=(6, 3))
sns.barplot(data=filtered_data,y='District',x='Price per Area')
plt.xticks(rotation=45)
plt.xlabel('Giá trung bình')
plt.ylabel('Quận/Huyện')
# Show the full number of price instead of scientific notation
plt.ticklabel_format(style='plain', axis='x')
st.pyplot(fig)
# Plot the estate type by City
# Create a pie chart showing the proportion of estate types by city
st.subheader(f'Loại bất động sản ở {selected_city}')
estate_type_counts = filtered_data['Estate type'].value_counts()
fig = px.pie(
values=estate_type_counts.values,
names=estate_type_counts.index,
)
# Display the chart
st.plotly_chart(fig)
# Plot the certification status by City
# Replace empty values (including spaces) with NaN in the 'Certification Status' column
filtered_data['Certification status'] = filtered_data['Certification status'].replace(' ', pd.NA)
# Replace blank (empty) values with "Không xác định" in the 'Certification Status' column
filtered_data['Certification status'].fillna("Không xác định", inplace=True)
certification_count = len(filtered_data[filtered_data['Certification status'].notna()])
if certification_count == 0:
st.write('')
else:
# Create a pie chart showing the proportion of certification status by city
st.subheader(f'Tình trạng pháp lý của bất động sản ở {selected_city}')
certification_counts = filtered_data['Certification status'].value_counts()
fig = px.pie(
values=certification_counts.values,
names=certification_counts.index,
)
# Display the chart
st.plotly_chart(fig)
# Plot the directions per city and Category
direction_count = len(filtered_data[filtered_data['Direction'].notna()])
if direction_count == 0:
st.write('')
else:
# Create a pie chart showing the proportion of estate types by city
st.subheader(f'Hướng bất động sản {selected_category}{selected_city}')
# Create a horizontal bar chart
fig = plt.figure(figsize=(6, 3))
sns.set(style='whitegrid')
sns.countplot(data=filtered_data, x="Direction", palette="Spectral")
plt.xlabel('Hướng')
plt.ylabel('Số lượng')
# plt.title(f'Directions of property in {selected_city}')
plt.show()
# Display the chart
st.pyplot(fig)
# Create a pie chart showing the proportion of estate types by city
st.subheader(f'Tỷ lệ bất động sản có chỗ đậu xe ở {selected_city}')
# Create a pie chart to show the proportion of parking slot and non-parking slot
# parking_slot_count = filtered_data[filtered_data['Parking slot'].notna()]['Parking slot'].count()
parking_slot_count = len(filtered_data[~np.isnan(filtered_data['Parking slot'])])
# non_parking_slot_count = filtered_data[filtered_data['Parking slot'].isna()]['Parking slot'].count()
non_parking_slot_count = len(filtered_data[np.isnan(filtered_data['Parking slot'])])
fig_pie = px.pie(
names=['Có chỗ đậu xe', 'Không có chỗ đậu xe'],
values=[parking_slot_count, non_parking_slot_count]
)
# Display the pie chart
st.plotly_chart(fig_pie)
if parking_slot_count == 0:
st.write('')
else:
st.subheader(f'Số lượng chỗ đậu xe ở {selected_city}')
filtered_data2 = filtered_data[filtered_data['Parking slot'].notna() & (filtered_data['Parking slot'] != ' ')]
# Create a horizontal bar chart
plt.figure(figsize=(6, 3))
sns.set(style="whitegrid")
sns.countplot(data=filtered_data2, x="Parking slot", palette="Spectral")
plt.xlabel('Số lượng chỗ đậu xe/bất động sản')
plt.ylabel('Số lượng')
# Display the chart
st.pyplot()
# Create a pie chart showing the proportion of estate types by city
st.subheader(f'Tỷ lệ người bán ở {selected_city}')
# Create a pie chart to show the proportion of parking slot and non-parking slot
personal_count = filtered_data[filtered_data['Seller type'] == 'Cá Nhân - Chính Chủ']['Seller type'].count()
non_personal_count = filtered_data[filtered_data['Seller type'] == 'Công Ty Nhà Đất - Môi Giới BĐS']['Seller type'].count()
fig_pie = px.pie(
names=['Cá Nhân - Chính Chủ', 'Công Ty Nhà Đất - Môi Giới BĐS'],
values=[personal_count, non_personal_count],
)
# Display the pie chart
st.plotly_chart(fig_pie)