File size: 3,213 Bytes
037aeab
35f2f48
 
 
 
ca84f4d
 
a2ef59a
9d4d6fa
037aeab
99932ef
 
 
 
a2ef59a
99932ef
 
 
 
ca84f4d
99932ef
037aeab
99932ef
 
 
 
 
 
 
ca84f4d
99932ef
 
 
ca84f4d
99932ef
 
 
729d7b0
99932ef
 
 
ca84f4d
99932ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729d7b0
99932ef
 
ca84f4d
99932ef
 
 
 
 
 
 
 
 
 
 
037aeab
99932ef
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import csv
from data_processing.age_distribution_by_id import get_age_distribution
from data_processing.get_smallAreaInfo import get_smallAreas
from data_processing.get_density import get_density
from data_processing.income_decile_by_id import get_income_decile
import os
import pandas as pd
import geopandas as gpd


def get_feature_df():
    '''
        # TODO
    '''

    # Specify file paths here
    csv_ibuafjoldi = os.path.join('given_data', 'ibuafjoldi.csv')
    csv_tekjutiundir = os.path.join('given_data', 'tekjutiundir.csv')
    json_ibuafjoldi = os.path.join('given_data', 'smasvaedi_2021.json')

    smallareas = gpd.read_file("given_data/smasvaedi_2021.json")

    # Small area id: id of the small area
    # Density: current density of the small area
    # Income distribution: the distribution of income in the small area per year (dictionary, keys: years, values: income distribution [buckets])
    # Age distribution: distibution of age in the small area (age buckets of 5 years starting at 0-4)
    # Geometry: the lat and long coordinates for the small area polygon
    # Projected dwellings: 
    columns = ["smallAreaId", "density", "income_distribution_per_year", "age_distribution", "geometry", "projected_dwellings"]

    # get list of smsv, each represented as {"id": smsv_id, "geometry": [(long, lat), ...]}
    smsv_id_geom = get_smallAreas()
    smsv_ids = [smsv["id"] for smsv in smsv_id_geom]  # list of smsv ids

    # for each smsv_id get the age distribution for several years if required
    years = [2023, 2024]  # Example years for age distribution
    age_distribution = get_age_distribution(years, smsv_ids, csv_ibuafjoldi)  # Dict with age data

    # for each smsv_id get the income distribution (distributed in deciles) for several years if required
    years = [2023, 2024]  # Example years for age distribution
    income_distribution = get_income_decile(years, smsv_ids, csv_tekjutiundir)  # Dict with income data

    # Populate pandas dataframe
    data = []
    for smsv in smsv_id_geom:
        smsv_id = smsv["id"]
        geometry = smsv["geometry"]
        
        # Calculate total population for density calculation
        population = sum(age_distribution.get(smsv_id, {}).get(2024, {}).values())
        
        # Calculate density
        try:
            density = get_density(geometry, population)
        except ValueError as e:
            print(f"Density calculation failed for {smsv_id}: {e}")
            density = None

        # Age distribution
        age_dist = age_distribution.get(smsv_id, {})

        # Income distribution
        income_dist = income_distribution.get(smsv_id, {})
        # Add row to data
        data.append({
            "smallAreaId": smsv_id,
            "density": density,
            "income_distribution_per_year": income_dist,
            "age_distribution": age_dist,
            "geometry": geometry,
            "projected_dwellings": None  # Placeholder for now
        })

    # Convert to Pandas DataFrame
    df = pd.DataFrame(data, columns=columns)

    # Display or save the DataFrame
    # print(df.head())
    # df.to_csv('output.csv', index=False, encoding='utf-8')  # Save to CSV

    return df