Spaces:
Sleeping
Sleeping
ElvarThorS
commited on
Commit
·
6c1192a
1
Parent(s):
856da7c
age distribution fetching, and smasvaedi_2021.json formatted
Browse files
data_processing/age_distribution_by_id.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
|
3 |
+
def open_file(filename):
|
4 |
+
try:
|
5 |
+
with open(filename, 'r', newline='', encoding='utf-8') as file:
|
6 |
+
csv_reader = csv.reader(file)
|
7 |
+
data = list(csv_reader)
|
8 |
+
return data
|
9 |
+
except FileNotFoundError:
|
10 |
+
print(f"File {filename} not found.")
|
11 |
+
return []
|
12 |
+
except Exception as e:
|
13 |
+
print(f"An error occurred: {e}")
|
14 |
+
return []
|
15 |
+
|
16 |
+
def get_age_distribution(year, smsv_ids, filename):
|
17 |
+
age_distributions = {smsv_id: {} for smsv_id in smsv_ids} # Initialize output dict
|
18 |
+
csv_data = open_file(filename)
|
19 |
+
|
20 |
+
if csv_data: # Check if data is not empty
|
21 |
+
header = csv_data[0] # Extract header for column indexing
|
22 |
+
smsv_id_index = header.index('smasvaedi')
|
23 |
+
agegroup_index = header.index('aldursflokkur')
|
24 |
+
population_index = header.index('fjoldi')
|
25 |
+
year_index = header.index('ar')
|
26 |
+
|
27 |
+
for row in csv_data[1:]: # Skip header
|
28 |
+
row_smsv_id = row[smsv_id_index]
|
29 |
+
row_year = int(row[year_index])
|
30 |
+
if row_smsv_id in smsv_ids and row_year == year:
|
31 |
+
age_group = row[agegroup_index]
|
32 |
+
population = int(row[population_index])
|
33 |
+
|
34 |
+
if row_smsv_id not in age_distributions:
|
35 |
+
age_distributions[row_smsv_id] = {}
|
36 |
+
if row_year not in age_distributions[row_smsv_id]:
|
37 |
+
age_distributions[row_smsv_id][row_year] = {}
|
38 |
+
|
39 |
+
if age_group not in age_distributions[row_smsv_id][row_year]:
|
40 |
+
age_distributions[row_smsv_id][row_year][age_group] = population
|
41 |
+
else:
|
42 |
+
age_distributions[row_smsv_id][row_year][age_group] += population
|
43 |
+
return age_distributions
|
44 |
+
|
45 |
+
if __name__ == '__main__':
|
46 |
+
# Example usage:
|
47 |
+
filename = './given_data/ibuafjoldi.csv'
|
48 |
+
smsv_ids_to_find = ['0103', '2903', '4002'] # List of desired smsv_ids
|
49 |
+
age_dist = get_age_distribution(2017,smsv_ids_to_find, filename)
|
50 |
+
|
51 |
+
for smsv_id, distributions in age_dist.items():
|
52 |
+
print(f"Age Distribution for SMSV ID: {smsv_id}")
|
53 |
+
for age_group, populations in distributions.items():
|
54 |
+
print(f" - {age_group}: {populations}")
|
55 |
+
print() # Empty line for better readability
|
given_data/smasvaedi_2021.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e475d876ebd7c95293cd9fea7195bd2285d6b31b89338763e37e96db0039f46
|
3 |
+
size 12017971
|