Spaces:

MaxAtoms
/

Datathon-2024

Sleeping

ElvarThorS commited on Nov 23, 2024

Commit

037aeab

1 Parent(s): a37ec6c

Update age_distribution and aggregate_data

Files changed (2) hide show

data_processing/age_distribution_by_id.py CHANGED Viewed

@@ -13,8 +13,8 @@ def open_file(filename):
         print(f"An error occurred: {e}")
         return []
-def get_age_distribution(year, smsv_ids, filename):
-    age_distributions = {smsv_id: {} for smsv_id in smsv_ids} # Initialize output dict
     csv_data = open_file(filename)
     if csv_data: # Check if data is not empty
@@ -27,7 +27,7 @@ def get_age_distribution(year, smsv_ids, filename):
         for row in csv_data[1:]: # Skip header
             row_smsv_id = row[smsv_id_index]
             row_year = int(row[year_index])
-            if row_smsv_id in smsv_ids and row_year == year:
                 age_group = row[agegroup_index]
                 population = int(row[population_index])
@@ -46,7 +46,7 @@ if __name__ == '__main__':
     # Example usage:
     filename = './given_data/ibuafjoldi.csv'
     smsv_ids_to_find = ['0103', '2903', '4002']  # List of desired smsv_ids
-    age_dist = get_age_distribution(2017,smsv_ids_to_find, filename)
     for smsv_id, distributions in age_dist.items():
         print(f"Age Distribution for SMSV ID: {smsv_id}")

         print(f"An error occurred: {e}")
         return []
+def get_age_distribution(years, smsv_ids, filename):
+    age_distributions = {smsv_id: {year: {} for year in years} for smsv_id in smsv_ids} # Initialize output dict
     csv_data = open_file(filename)
     if csv_data: # Check if data is not empty
         for row in csv_data[1:]: # Skip header
             row_smsv_id = row[smsv_id_index]
             row_year = int(row[year_index])
+            if row_smsv_id in smsv_ids and row_year in years:
                 age_group = row[agegroup_index]
                 population = int(row[population_index])
     # Example usage:
     filename = './given_data/ibuafjoldi.csv'
     smsv_ids_to_find = ['0103', '2903', '4002']  # List of desired smsv_ids
+    age_dist = get_age_distribution([2017, 2024],smsv_ids_to_find, filename)
     for smsv_id, distributions in age_dist.items():
         print(f"Age Distribution for SMSV ID: {smsv_id}")

data_processing/aggregate_data.py CHANGED Viewed

@@ -1,7 +1,25 @@
 # Small area id: id of the small area
 # Density: current density of the small area
 # Income distribution: the distribution of income in the small area per year (dictionary, keys: years, values: income distribution [buckets])
 # Age distribution: distibution of age in the small area (age buckets of 5 years starting at 0-4)
 # Geometry: the lat and long coordinates for the small area polygon
 # Projected dwellings:
-columns = ["smallAreaId", "density", "income_distribution_per_year", "age_distribution", "geometry", "projected_dwellings"]

+import csv
+from age_distribution_by_id import get_age_distribution
 # Small area id: id of the small area
 # Density: current density of the small area
 # Income distribution: the distribution of income in the small area per year (dictionary, keys: years, values: income distribution [buckets])
 # Age distribution: distibution of age in the small area (age buckets of 5 years starting at 0-4)
 # Geometry: the lat and long coordinates for the small area polygon
 # Projected dwellings:
+columns = ["smallAreaId", "density", "income_distribution_per_year", "age_distribution", "geometry", "projected_dwellings"]
+def open_file(filename):
+    try:
+        with open(filename, 'r', newline='', encoding='utf-8') as file:
+            csv_reader = csv.reader(file)
+            data = list(csv_reader)
+            return data
+    except FileNotFoundError:
+        print(f"File {filename} not found.")
+        return []
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return []