File size: 1,504 Bytes
10e83ef
 
7375cc5
59e2b2d
7375cc5
 
10e83ef
7375cc5
ad04d80
 
7375cc5
 
59e2b2d
7375cc5
 
ad04d80
7375cc5
 
 
59e2b2d
 
 
 
 
 
 
 
 
 
 
 
 
ad04d80
 
 
 
 
 
7375cc5
 
 
 
 
10e83ef
7375cc5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""This module provides functionality for creating a hierarchy tree and a mapping from ISCO code to node name."""

import csv
import requests


def create_hierarchy_dict(file: str) -> dict:
    """
    Creates a dictionary where keys are nodes and values are dictionaries of their parent nodes with distance as weights,
    representing the group level hierarchy of the ISCO-08 structure.

    Args:
    - file: A string representing the path to the CSV file containing the 4-digit ISCO-08 codes. It can be a local path or a web URL.

    Returns:
    - A dictionary where keys are ISCO-08 unit codes and values are dictionaries of their parent codes with distances.
    """
    isco_hierarchy = {}

    if file.startswith("http://") or file.startswith("https://"):
        response = requests.get(file)
        lines = response.text.splitlines()
    else:
        with open(file, newline="") as csvfile:
            lines = csvfile.readlines()

    reader = csv.DictReader(lines)
    for row in reader:
        unit_code = row["unit"].zfill(4)
        minor_code = unit_code[0:3]
        sub_major_code = unit_code[0:2]
        major_code = unit_code[0]

        # Assign weights, higher for closer ancestors
        weights = {minor_code: 0.75, sub_major_code: 0.5, major_code: 0.25}

        # Store ancestors with their weights
        isco_hierarchy[unit_code] = weights

    return isco_hierarchy


# Example usage:
# hierarchy_dict = create_hierarchy_dict("ISCO_structure.csv")
# print(hierarchy)