Spaces:
Sleeping
Sleeping
Jan
commited on
Commit
·
ca84f4d
1
Parent(s):
e7eff50
increment for populating the dataframe
Browse files
data_processing/aggregate_data.py
CHANGED
@@ -1,8 +1,14 @@
|
|
1 |
import csv
|
2 |
from age_distribution_by_id import get_age_distribution
|
3 |
from get_smallAreaInfo import get_smallAreas
|
|
|
|
|
|
|
4 |
|
5 |
|
|
|
|
|
|
|
6 |
# Small area id: id of the small area
|
7 |
# Density: current density of the small area
|
8 |
# Income distribution: the distribution of income in the small area per year (dictionary, keys: years, values: income distribution [buckets])
|
@@ -25,6 +31,45 @@ def open_file(filename):
|
|
25 |
return []
|
26 |
|
27 |
# get list of smsv, each represented as {"id": smsv_id, "geometry": [(long, lat), ...]}
|
28 |
-
smsv_id_geom = get_smallAreas()
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
|
|
|
|
|
|
|
1 |
import csv
|
2 |
from age_distribution_by_id import get_age_distribution
|
3 |
from get_smallAreaInfo import get_smallAreas
|
4 |
+
from get_density import get_density
|
5 |
+
import os
|
6 |
+
import pandas as pd
|
7 |
|
8 |
|
9 |
+
# Specify file paths here
|
10 |
+
file_ibuafjoldi = os.path.join('given_data', 'ibuafjoldi.csv')
|
11 |
+
|
12 |
# Small area id: id of the small area
|
13 |
# Density: current density of the small area
|
14 |
# Income distribution: the distribution of income in the small area per year (dictionary, keys: years, values: income distribution [buckets])
|
|
|
31 |
return []
|
32 |
|
33 |
# get list of smsv, each represented as {"id": smsv_id, "geometry": [(long, lat), ...]}
|
34 |
+
smsv_id_geom = get_smallAreas()[:2]
|
35 |
+
smsv_ids = [smsv["id"] for smsv in smsv_id_geom] # list of smsv ids
|
36 |
+
|
37 |
+
# for each smsv_id get the age distribution for several years if required
|
38 |
+
years = [2023, 2024] # Example years for age distribution
|
39 |
+
age_distribution = get_age_distribution(years, smsv_ids, file_ibuafjoldi) # Dict with age data
|
40 |
+
|
41 |
+
# Populate pandas dataframe
|
42 |
+
data = []
|
43 |
+
for smsv in smsv_id_geom:
|
44 |
+
smsv_id = smsv["id"]
|
45 |
+
geometry = smsv["geometry"]
|
46 |
+
|
47 |
+
# Calculate total population for density calculation
|
48 |
+
population = sum(age_distribution.get(smsv_id, {}).get(2024, {}).values())
|
49 |
+
|
50 |
+
# Calculate density
|
51 |
+
try:
|
52 |
+
density = get_density(geometry, population)
|
53 |
+
except ValueError as e:
|
54 |
+
print(f"Density calculation failed for {smsv_id}: {e}")
|
55 |
+
density = None
|
56 |
+
|
57 |
+
# Age distribution
|
58 |
+
age_dist = age_distribution.get(smsv_id, {})
|
59 |
+
|
60 |
+
# Add row to data
|
61 |
+
data.append({
|
62 |
+
"smallAreaId": smsv_id,
|
63 |
+
"density": density,
|
64 |
+
"income_distribution_per_year": {}, # Placeholder for now
|
65 |
+
"age_distribution": age_dist,
|
66 |
+
"geometry": geometry,
|
67 |
+
"projected_dwellings": None # Placeholder for now
|
68 |
+
})
|
69 |
+
|
70 |
+
# Convert to Pandas DataFrame
|
71 |
+
df = pd.DataFrame(data, columns=columns)
|
72 |
|
73 |
+
# Display or save the DataFrame
|
74 |
+
print(df.head())
|
75 |
+
df.to_csv('output.csv', index=False, encoding='utf-8') # Save to CSV
|