ElvarThorS commited on
Commit
6c1192a
·
1 Parent(s): 856da7c

age distribution fetching, and smasvaedi_2021.json formatted

Browse files
data_processing/age_distribution_by_id.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+
3
+ def open_file(filename):
4
+ try:
5
+ with open(filename, 'r', newline='', encoding='utf-8') as file:
6
+ csv_reader = csv.reader(file)
7
+ data = list(csv_reader)
8
+ return data
9
+ except FileNotFoundError:
10
+ print(f"File {filename} not found.")
11
+ return []
12
+ except Exception as e:
13
+ print(f"An error occurred: {e}")
14
+ return []
15
+
16
+ def get_age_distribution(year, smsv_ids, filename):
17
+ age_distributions = {smsv_id: {} for smsv_id in smsv_ids} # Initialize output dict
18
+ csv_data = open_file(filename)
19
+
20
+ if csv_data: # Check if data is not empty
21
+ header = csv_data[0] # Extract header for column indexing
22
+ smsv_id_index = header.index('smasvaedi')
23
+ agegroup_index = header.index('aldursflokkur')
24
+ population_index = header.index('fjoldi')
25
+ year_index = header.index('ar')
26
+
27
+ for row in csv_data[1:]: # Skip header
28
+ row_smsv_id = row[smsv_id_index]
29
+ row_year = int(row[year_index])
30
+ if row_smsv_id in smsv_ids and row_year == year:
31
+ age_group = row[agegroup_index]
32
+ population = int(row[population_index])
33
+
34
+ if row_smsv_id not in age_distributions:
35
+ age_distributions[row_smsv_id] = {}
36
+ if row_year not in age_distributions[row_smsv_id]:
37
+ age_distributions[row_smsv_id][row_year] = {}
38
+
39
+ if age_group not in age_distributions[row_smsv_id][row_year]:
40
+ age_distributions[row_smsv_id][row_year][age_group] = population
41
+ else:
42
+ age_distributions[row_smsv_id][row_year][age_group] += population
43
+ return age_distributions
44
+
45
+ if __name__ == '__main__':
46
+ # Example usage:
47
+ filename = './given_data/ibuafjoldi.csv'
48
+ smsv_ids_to_find = ['0103', '2903', '4002'] # List of desired smsv_ids
49
+ age_dist = get_age_distribution(2017,smsv_ids_to_find, filename)
50
+
51
+ for smsv_id, distributions in age_dist.items():
52
+ print(f"Age Distribution for SMSV ID: {smsv_id}")
53
+ for age_group, populations in distributions.items():
54
+ print(f" - {age_group}: {populations}")
55
+ print() # Empty line for better readability
given_data/smasvaedi_2021.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f24c46878e5c68b4a7f4908e0f69cfd98e79f079fd199fd6b90d8d2f447202b6
3
- size 7398089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e475d876ebd7c95293cd9fea7195bd2285d6b31b89338763e37e96db0039f46
3
+ size 12017971