Spaces:
Sleeping
Sleeping
File size: 8,524 Bytes
5426d51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter # Import ScalarFormatter
import plotly.express as px
import numpy as np
st.set_option('deprecation.showPyplotGlobalUse', False)
# Extract location
input = 'data_3/data_test.csv'
output = 'data_3/data_test_city.csv'
# Load the addresses file into a DataFrame
addresses_df = pd.read_csv(input, encoding='UTF-8-SIG')
# print(addresses_df.head())
# Load the cities/districts file into a DataFrame
cities_districts_df = pd.read_csv('data_3/Cities.csv', encoding='UTF-8-SIG')
# Function to find city and district for each address
def find_city_district(location):
location = str(location) # Ensure location is a string
for index, row in cities_districts_df.iterrows():
if str(row["City"]) in location and str(row["District"]) in location:
return row["City"], row["District"]
return None, None
# Apply the function to the addresses DataFrame
addresses_df[["City", "District"]] = addresses_df["Location"].apply(find_city_district).apply(pd.Series)
# Save the new DataFrame to a CSV file
addresses_df.to_csv(output, index=False)
data = pd.read_csv('data_3/data_test_city.csv')
print(data.info())
df = data.dropna(subset = 'Price')
df = df.dropna(subset = 'City')
df=df[~((df['Price'] == 'Thỏa thuận'))]
df['Price'] = pd.to_numeric(df['Price'].str.replace(',', ''), errors='coerce')
df['Price'].astype(float)
print(df.info())
def plot_minmax_prices(selected_category):
# Filter the data based on the selected category
filtered_data = df[df['Category'] == selected_category]
# Create a pivot table
pivot_table = filtered_data.pivot_table(index=['City', 'Category'], values='Price', aggfunc=['min', 'max']).reset_index()
print(pivot_table.head())
pivot_table.columns=['City','Category','Min Price','Max Price']
# Display the data table for the filtered data
st.subheader('Tổng hợp Giá bất động sản cao nhất và thấp nhất ở các tỉnh thành')
st.dataframe(pivot_table)
def plot_by_category(selected_category):
# Get the unique city names and sort them alphabetically
unique_cities = sorted(df['City'].unique())
selected_city = st.sidebar.selectbox('Chọn thành phố hoặc tỉnh', unique_cities)
# Filter the data for the selected city
filtered_data = df[(df['City'] == selected_city) & (df['Category'] == selected_category)]
# Display the data table for the filtered data
# st.write('### Data Table')
# st.write(filtered_data)
# Check if data is empty
if filtered_data.empty:
print("filtered_data is empty")
st.warning(f"No data available for {selected_category} in {selected_city}.")
else:
# Plot Number of property by District
st.subheader(f'Số lượng bất động sản {selected_category} ở {selected_city}')
fig = plt.figure(figsize=(6, 3))
sns.countplot(data=filtered_data, y='District')
plt.xticks(rotation=25) # Rotate x-axis labels for better readability
plt.xlabel('Số lượng')
plt.ylabel('Quận/Huyện')
st.pyplot(fig)
# Plot Price per Area
st.subheader(f'Giá bất động sản {selected_category} theo M² ở {selected_city}')
# Create a new column for Price per Area
filtered_data['Price per Area'] = filtered_data['Price'] / filtered_data['Area']
# Plot the data
fig = plt.figure(figsize=(6, 3))
sns.barplot(data=filtered_data,y='District',x='Price per Area')
plt.xticks(rotation=45)
plt.xlabel('Giá trung bình')
plt.ylabel('Quận/Huyện')
# Show the full number of price instead of scientific notation
plt.ticklabel_format(style='plain', axis='x')
st.pyplot(fig)
# Plot the estate type by City
# Create a pie chart showing the proportion of estate types by city
st.subheader(f'Loại bất động sản ở {selected_city}')
estate_type_counts = filtered_data['Estate type'].value_counts()
fig = px.pie(
values=estate_type_counts.values,
names=estate_type_counts.index,
)
# Display the chart
st.plotly_chart(fig)
# Plot the certification status by City
# Replace empty values (including spaces) with NaN in the 'Certification Status' column
filtered_data['Certification status'] = filtered_data['Certification status'].replace(' ', pd.NA)
# Replace blank (empty) values with "Không xác định" in the 'Certification Status' column
filtered_data['Certification status'].fillna("Không xác định", inplace=True)
certification_count = len(filtered_data[filtered_data['Certification status'].notna()])
if certification_count == 0:
st.write('')
else:
# Create a pie chart showing the proportion of certification status by city
st.subheader(f'Tình trạng pháp lý của bất động sản ở {selected_city}')
certification_counts = filtered_data['Certification status'].value_counts()
fig = px.pie(
values=certification_counts.values,
names=certification_counts.index,
)
# Display the chart
st.plotly_chart(fig)
# Plot the directions per city and Category
direction_count = len(filtered_data[filtered_data['Direction'].notna()])
if direction_count == 0:
st.write('')
else:
# Create a pie chart showing the proportion of estate types by city
st.subheader(f'Hướng bất động sản {selected_category} ở {selected_city}')
# Create a horizontal bar chart
fig = plt.figure(figsize=(6, 3))
sns.set(style='whitegrid')
sns.countplot(data=filtered_data, x="Direction", palette="Spectral")
plt.xlabel('Hướng')
plt.ylabel('Số lượng')
# plt.title(f'Directions of property in {selected_city}')
plt.show()
# Display the chart
st.pyplot(fig)
# Create a pie chart showing the proportion of estate types by city
st.subheader(f'Tỷ lệ bất động sản có chỗ đậu xe ở {selected_city}')
# Create a pie chart to show the proportion of parking slot and non-parking slot
# parking_slot_count = filtered_data[filtered_data['Parking slot'].notna()]['Parking slot'].count()
parking_slot_count = len(filtered_data[~np.isnan(filtered_data['Parking slot'])])
# non_parking_slot_count = filtered_data[filtered_data['Parking slot'].isna()]['Parking slot'].count()
non_parking_slot_count = len(filtered_data[np.isnan(filtered_data['Parking slot'])])
fig_pie = px.pie(
names=['Có chỗ đậu xe', 'Không có chỗ đậu xe'],
values=[parking_slot_count, non_parking_slot_count]
)
# Display the pie chart
st.plotly_chart(fig_pie)
if parking_slot_count == 0:
st.write('')
else:
st.subheader(f'Số lượng chỗ đậu xe ở {selected_city}')
filtered_data2 = filtered_data[filtered_data['Parking slot'].notna() & (filtered_data['Parking slot'] != ' ')]
# Create a horizontal bar chart
plt.figure(figsize=(6, 3))
sns.set(style="whitegrid")
sns.countplot(data=filtered_data2, x="Parking slot", palette="Spectral")
plt.xlabel('Số lượng chỗ đậu xe/bất động sản')
plt.ylabel('Số lượng')
# Display the chart
st.pyplot()
# Create a pie chart showing the proportion of estate types by city
st.subheader(f'Tỷ lệ người bán ở {selected_city}')
# Create a pie chart to show the proportion of parking slot and non-parking slot
personal_count = filtered_data[filtered_data['Seller type'] == 'Cá Nhân - Chính Chủ']['Seller type'].count()
non_personal_count = filtered_data[filtered_data['Seller type'] == 'Công Ty Nhà Đất - Môi Giới BĐS']['Seller type'].count()
fig_pie = px.pie(
names=['Cá Nhân - Chính Chủ', 'Công Ty Nhà Đất - Môi Giới BĐS'],
values=[personal_count, non_personal_count],
)
# Display the pie chart
st.plotly_chart(fig_pie)
|