File size: 1,982 Bytes
2f044c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import json
import numpy as np
from typing import Iterable


class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)


class RelikReaderSample:
    def __init__(self, **kwargs):
        super().__setattr__("_d", {})
        self._d = kwargs

    def __getattribute__(self, item):
        return super(RelikReaderSample, self).__getattribute__(item)

    def __getattr__(self, item):
        if item.startswith("__") and item.endswith("__"):
            # this is likely some python library-specific variable (such as __deepcopy__ for copy)
            # better follow standard behavior here
            raise AttributeError(item)
        elif item in self._d:
            return self._d[item]
        else:
            return None

    def __setattr__(self, key, value):
        if key in self._d:
            self._d[key] = value
        else:
            super().__setattr__(key, value)

    def to_jsons(self) -> str:
        if "predicted_window_labels" in self._d:
            new_obj = {
                k: v
                for k, v in self._d.items()
                if k != "predicted_window_labels" and k != "span_title_probabilities"
            }
            new_obj["predicted_window_labels"] = [
                [ss, se, pred_title]
                for (ss, se, pred_title) in self.predicted_window_labels_chars
            ]
            return new_obj
        else:
            return json.dumps(self._d, cls=NpEncoder)


def load_relik_reader_samples(path: str) -> Iterable[RelikReaderSample]:
    with open(path) as f:
        for line in f:
            jsonl_line = json.loads(line.strip())
            relik_reader_sample = RelikReaderSample(**jsonl_line)
            yield relik_reader_sample