Spaces:

A-New-Day-001
/

VN-Housing-App

Sleeping

App Files Files Community

VN-Housing-App / analytics_reports /reports.py

A-New-Day-001

Upload 24 files

5426d51 about 1 year ago

raw

history blame

No virus

8.52 kB

	import streamlit as st
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	from matplotlib.ticker import ScalarFormatter # Import ScalarFormatter
	import plotly.express as px
	import numpy as np

	st.set_option('deprecation.showPyplotGlobalUse', False)

	# Extract location
	input = 'data_3/data_test.csv'
	output = 'data_3/data_test_city.csv'
	# Load the addresses file into a DataFrame
	addresses_df = pd.read_csv(input, encoding='UTF-8-SIG')
	# print(addresses_df.head())

	# Load the cities/districts file into a DataFrame
	cities_districts_df = pd.read_csv('data_3/Cities.csv', encoding='UTF-8-SIG')

	# Function to find city and district for each address
	def find_city_district(location):
	location = str(location) # Ensure location is a string
	for index, row in cities_districts_df.iterrows():
	if str(row["City"]) in location and str(row["District"]) in location:
	return row["City"], row["District"]
	return None, None

	# Apply the function to the addresses DataFrame
	addresses_df[["City", "District"]] = addresses_df["Location"].apply(find_city_district).apply(pd.Series)

	# Save the new DataFrame to a CSV file
	addresses_df.to_csv(output, index=False)

	data = pd.read_csv('data_3/data_test_city.csv')
	print(data.info())

	df = data.dropna(subset = 'Price')
	df = df.dropna(subset = 'City')
	df=df[~((df['Price'] == 'Thỏa thuận'))]
	df['Price'] = pd.to_numeric(df['Price'].str.replace(',', ''), errors='coerce')
	df['Price'].astype(float)

	print(df.info())

	def plot_minmax_prices(selected_category):
	# Filter the data based on the selected category
	filtered_data = df[df['Category'] == selected_category]

	# Create a pivot table
	pivot_table = filtered_data.pivot_table(index=['City', 'Category'], values='Price', aggfunc=['min', 'max']).reset_index()
	print(pivot_table.head())
	pivot_table.columns=['City','Category','Min Price','Max Price']
	# Display the data table for the filtered data
	st.subheader('Tổng hợp Giá bất động sản cao nhất và thấp nhất ở các tỉnh thành')
	st.dataframe(pivot_table)

	def plot_by_category(selected_category):
	# Get the unique city names and sort them alphabetically
	unique_cities = sorted(df['City'].unique())
	selected_city = st.sidebar.selectbox('Chọn thành phố hoặc tỉnh', unique_cities)
	# Filter the data for the selected city
	filtered_data = df[(df['City'] == selected_city) & (df['Category'] == selected_category)]
	# Display the data table for the filtered data
	# st.write('### Data Table')
	# st.write(filtered_data)

	# Check if data is empty
	if filtered_data.empty:
	print("filtered_data is empty")
	st.warning(f"No data available for {selected_category} in {selected_city}.")
	else:
	# Plot Number of property by District
	st.subheader(f'Số lượng bất động sản {selected_category} ở {selected_city}')
	fig = plt.figure(figsize=(6, 3))
	sns.countplot(data=filtered_data, y='District')
	plt.xticks(rotation=25) # Rotate x-axis labels for better readability
	plt.xlabel('Số lượng')
	plt.ylabel('Quận/Huyện')
	st.pyplot(fig)

	# Plot Price per Area
	st.subheader(f'Giá bất động sản {selected_category} theo M² ở {selected_city}')
	# Create a new column for Price per Area
	filtered_data['Price per Area'] = filtered_data['Price'] / filtered_data['Area']
	# Plot the data
	fig = plt.figure(figsize=(6, 3))
	sns.barplot(data=filtered_data,y='District',x='Price per Area')
	plt.xticks(rotation=45)
	plt.xlabel('Giá trung bình')
	plt.ylabel('Quận/Huyện')
	# Show the full number of price instead of scientific notation
	plt.ticklabel_format(style='plain', axis='x')
	st.pyplot(fig)

	# Plot the estate type by City
	# Create a pie chart showing the proportion of estate types by city
	st.subheader(f'Loại bất động sản ở {selected_city}')
	estate_type_counts = filtered_data['Estate type'].value_counts()
	fig = px.pie(
	values=estate_type_counts.values,
	names=estate_type_counts.index,
	)
	# Display the chart
	st.plotly_chart(fig)

	# Plot the certification status by City
	# Replace empty values (including spaces) with NaN in the 'Certification Status' column
	filtered_data['Certification status'] = filtered_data['Certification status'].replace(' ', pd.NA)
	# Replace blank (empty) values with "Không xác định" in the 'Certification Status' column
	filtered_data['Certification status'].fillna("Không xác định", inplace=True)
	certification_count = len(filtered_data[filtered_data['Certification status'].notna()])
	if certification_count == 0:
	st.write('')
	else:
	# Create a pie chart showing the proportion of certification status by city
	st.subheader(f'Tình trạng pháp lý của bất động sản ở {selected_city}')
	certification_counts = filtered_data['Certification status'].value_counts()
	fig = px.pie(
	values=certification_counts.values,
	names=certification_counts.index,
	)
	# Display the chart
	st.plotly_chart(fig)

	# Plot the directions per city and Category
	direction_count = len(filtered_data[filtered_data['Direction'].notna()])
	if direction_count == 0:
	st.write('')
	else:
	# Create a pie chart showing the proportion of estate types by city
	st.subheader(f'Hướng bất động sản {selected_category} ở {selected_city}')
	# Create a horizontal bar chart
	fig = plt.figure(figsize=(6, 3))
	sns.set(style='whitegrid')
	sns.countplot(data=filtered_data, x="Direction", palette="Spectral")
	plt.xlabel('Hướng')
	plt.ylabel('Số lượng')
	# plt.title(f'Directions of property in {selected_city}')
	plt.show()
	# Display the chart
	st.pyplot(fig)

	# Create a pie chart showing the proportion of estate types by city
	st.subheader(f'Tỷ lệ bất động sản có chỗ đậu xe ở {selected_city}')
	# Create a pie chart to show the proportion of parking slot and non-parking slot
	# parking_slot_count = filtered_data[filtered_data['Parking slot'].notna()]['Parking slot'].count()
	parking_slot_count = len(filtered_data[~np.isnan(filtered_data['Parking slot'])])
	# non_parking_slot_count = filtered_data[filtered_data['Parking slot'].isna()]['Parking slot'].count()
	non_parking_slot_count = len(filtered_data[np.isnan(filtered_data['Parking slot'])])
	fig_pie = px.pie(
	names=['Có chỗ đậu xe', 'Không có chỗ đậu xe'],
	values=[parking_slot_count, non_parking_slot_count]
	)
	# Display the pie chart
	st.plotly_chart(fig_pie)
	if parking_slot_count == 0:
	st.write('')
	else:
	st.subheader(f'Số lượng chỗ đậu xe ở {selected_city}')
	filtered_data2 = filtered_data[filtered_data['Parking slot'].notna() & (filtered_data['Parking slot'] != ' ')]
	# Create a horizontal bar chart
	plt.figure(figsize=(6, 3))
	sns.set(style="whitegrid")
	sns.countplot(data=filtered_data2, x="Parking slot", palette="Spectral")
	plt.xlabel('Số lượng chỗ đậu xe/bất động sản')
	plt.ylabel('Số lượng')
	# Display the chart
	st.pyplot()

	# Create a pie chart showing the proportion of estate types by city
	st.subheader(f'Tỷ lệ người bán ở {selected_city}')
	# Create a pie chart to show the proportion of parking slot and non-parking slot
	personal_count = filtered_data[filtered_data['Seller type'] == 'Cá Nhân - Chính Chủ']['Seller type'].count()
	non_personal_count = filtered_data[filtered_data['Seller type'] == 'Công Ty Nhà Đất - Môi Giới BĐS']['Seller type'].count()
	fig_pie = px.pie(
	names=['Cá Nhân - Chính Chủ', 'Công Ty Nhà Đất - Môi Giới BĐS'],
	values=[personal_count, non_personal_count],
	)
	# Display the pie chart
	st.plotly_chart(fig_pie)