Jan commited on
Commit
ca84f4d
·
1 Parent(s): e7eff50

increment for populating the dataframe

Browse files
Files changed (1) hide show
  1. data_processing/aggregate_data.py +47 -2
data_processing/aggregate_data.py CHANGED
@@ -1,8 +1,14 @@
1
  import csv
2
  from age_distribution_by_id import get_age_distribution
3
  from get_smallAreaInfo import get_smallAreas
 
 
 
4
 
5
 
 
 
 
6
  # Small area id: id of the small area
7
  # Density: current density of the small area
8
  # Income distribution: the distribution of income in the small area per year (dictionary, keys: years, values: income distribution [buckets])
@@ -25,6 +31,45 @@ def open_file(filename):
25
  return []
26
 
27
  # get list of smsv, each represented as {"id": smsv_id, "geometry": [(long, lat), ...]}
28
- smsv_id_geom = get_smallAreas()
29
- # get_age_distribution([2024] , [smsv["id"] for smsv in smsv_id_geom], "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
 
 
 
 
1
  import csv
2
  from age_distribution_by_id import get_age_distribution
3
  from get_smallAreaInfo import get_smallAreas
4
+ from get_density import get_density
5
+ import os
6
+ import pandas as pd
7
 
8
 
9
+ # Specify file paths here
10
+ file_ibuafjoldi = os.path.join('given_data', 'ibuafjoldi.csv')
11
+
12
  # Small area id: id of the small area
13
  # Density: current density of the small area
14
  # Income distribution: the distribution of income in the small area per year (dictionary, keys: years, values: income distribution [buckets])
 
31
  return []
32
 
33
  # get list of smsv, each represented as {"id": smsv_id, "geometry": [(long, lat), ...]}
34
+ smsv_id_geom = get_smallAreas()[:2]
35
+ smsv_ids = [smsv["id"] for smsv in smsv_id_geom] # list of smsv ids
36
+
37
+ # for each smsv_id get the age distribution for several years if required
38
+ years = [2023, 2024] # Example years for age distribution
39
+ age_distribution = get_age_distribution(years, smsv_ids, file_ibuafjoldi) # Dict with age data
40
+
41
+ # Populate pandas dataframe
42
+ data = []
43
+ for smsv in smsv_id_geom:
44
+ smsv_id = smsv["id"]
45
+ geometry = smsv["geometry"]
46
+
47
+ # Calculate total population for density calculation
48
+ population = sum(age_distribution.get(smsv_id, {}).get(2024, {}).values())
49
+
50
+ # Calculate density
51
+ try:
52
+ density = get_density(geometry, population)
53
+ except ValueError as e:
54
+ print(f"Density calculation failed for {smsv_id}: {e}")
55
+ density = None
56
+
57
+ # Age distribution
58
+ age_dist = age_distribution.get(smsv_id, {})
59
+
60
+ # Add row to data
61
+ data.append({
62
+ "smallAreaId": smsv_id,
63
+ "density": density,
64
+ "income_distribution_per_year": {}, # Placeholder for now
65
+ "age_distribution": age_dist,
66
+ "geometry": geometry,
67
+ "projected_dwellings": None # Placeholder for now
68
+ })
69
+
70
+ # Convert to Pandas DataFrame
71
+ df = pd.DataFrame(data, columns=columns)
72
 
73
+ # Display or save the DataFrame
74
+ print(df.head())
75
+ df.to_csv('output.csv', index=False, encoding='utf-8') # Save to CSV