danieldux commited on
Commit
ad04d80
1 Parent(s): 2053ce2

Refactored create_hierarchy_dict function to include weights for parent nodes in ISCO-08 structure

Browse files
Files changed (1) hide show
  1. isco.py +9 -5
isco.py CHANGED
@@ -6,15 +6,14 @@ import requests
6
 
7
  def create_hierarchy_dict(file: str) -> dict:
8
  """
9
- Creates a dictionary where keys are nodes and values are sets of parent nodes representing the group level hierarchy of the ISCO-08 structure.
10
- The function assumes that the input CSV file has a column named 'unit' with the 4-digit ISCO-08 codes.
11
- A csv file with the ISCO-08 structure can be downloaded from the International Labour Organization (ILO) at [https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08 EN.csv](https://www.ilo.org/ilostat-files/ISCO/newdocs-08-2021/ISCO-08/ISCO-08%20EN.csv)
12
 
13
  Args:
14
  - file: A string representing the path to the CSV file containing the 4-digit ISCO-08 codes. It can be a local path or a web URL.
15
 
16
  Returns:
17
- - A dictionary where keys are ISCO-08 unit codes and values are sets of their parent codes.
18
  """
19
  isco_hierarchy = {}
20
 
@@ -31,7 +30,12 @@ def create_hierarchy_dict(file: str) -> dict:
31
  minor_code = unit_code[0:3]
32
  sub_major_code = unit_code[0:2]
33
  major_code = unit_code[0]
34
- isco_hierarchy[unit_code] = {minor_code, major_code, sub_major_code}
 
 
 
 
 
35
 
36
  return isco_hierarchy
37
 
 
6
 
7
  def create_hierarchy_dict(file: str) -> dict:
8
  """
9
+ Creates a dictionary where keys are nodes and values are dictionaries of their parent nodes with distance as weights,
10
+ representing the group level hierarchy of the ISCO-08 structure.
 
11
 
12
  Args:
13
  - file: A string representing the path to the CSV file containing the 4-digit ISCO-08 codes. It can be a local path or a web URL.
14
 
15
  Returns:
16
+ - A dictionary where keys are ISCO-08 unit codes and values are dictionaries of their parent codes with distances.
17
  """
18
  isco_hierarchy = {}
19
 
 
30
  minor_code = unit_code[0:3]
31
  sub_major_code = unit_code[0:2]
32
  major_code = unit_code[0]
33
+
34
+ # Assign weights, higher for closer ancestors
35
+ weights = {minor_code: 0.75, sub_major_code: 0.5, major_code: 0.25}
36
+
37
+ # Store ancestors with their weights
38
+ isco_hierarchy[unit_code] = weights
39
 
40
  return isco_hierarchy
41