File size: 4,541 Bytes
2eafbc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from dataclasses import dataclass
from enum import Enum
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy as np

from inference.core.entities.types import DatasetID, WorkspaceID
from inference.core.exceptions import ActiveLearningConfigurationDecodingError

LocalImageIdentifier = str
PredictionType = str
Prediction = dict
SerialisedPrediction = str
PredictionFileType = str


@dataclass(frozen=True)
class ImageDimensions:
    height: int
    width: int

    def to_hw(self) -> Tuple[int, int]:
        return self.height, self.width

    def to_wh(self) -> Tuple[int, int]:
        return self.width, self.height


@dataclass(frozen=True)
class SamplingMethod:
    name: str
    sample: Callable[[np.ndarray, Prediction, PredictionType], bool]


class BatchReCreationInterval(Enum):
    NEVER = "never"
    DAILY = "daily"
    WEEKLY = "weekly"
    MONTHLY = "monthly"


class StrategyLimitType(Enum):
    MINUTELY = "minutely"
    HOURLY = "hourly"
    DAILY = "daily"


@dataclass(frozen=True)
class StrategyLimit:
    limit_type: StrategyLimitType
    value: int

    @classmethod
    def from_dict(cls, specification: dict) -> "StrategyLimit":
        return cls(
            limit_type=StrategyLimitType(specification["type"]),
            value=specification["value"],
        )


@dataclass(frozen=True)
class ActiveLearningConfiguration:
    max_image_size: Optional[ImageDimensions]
    jpeg_compression_level: int
    persist_predictions: bool
    sampling_methods: List[SamplingMethod]
    batches_name_prefix: str
    batch_recreation_interval: BatchReCreationInterval
    max_batch_images: Optional[int]
    workspace_id: WorkspaceID
    dataset_id: DatasetID
    model_id: str
    strategies_limits: Dict[str, List[StrategyLimit]]
    tags: List[str]
    strategies_tags: Dict[str, List[str]]

    @classmethod
    def init(
        cls,
        roboflow_api_configuration: Dict[str, Any],
        sampling_methods: List[SamplingMethod],
        workspace_id: WorkspaceID,
        dataset_id: DatasetID,
        model_id: str,
    ) -> "ActiveLearningConfiguration":
        try:
            max_image_size = roboflow_api_configuration.get("max_image_size")
            if max_image_size is not None:
                max_image_size = ImageDimensions(
                    height=roboflow_api_configuration["max_image_size"][0],
                    width=roboflow_api_configuration["max_image_size"][1],
                )
            strategies_limits = {
                strategy["name"]: [
                    StrategyLimit.from_dict(specification=specification)
                    for specification in strategy.get("limits", [])
                ]
                for strategy in roboflow_api_configuration["sampling_strategies"]
            }
            strategies_tags = {
                strategy["name"]: strategy.get("tags", [])
                for strategy in roboflow_api_configuration["sampling_strategies"]
            }
            return cls(
                max_image_size=max_image_size,
                jpeg_compression_level=roboflow_api_configuration.get(
                    "jpeg_compression_level", 95
                ),
                persist_predictions=roboflow_api_configuration["persist_predictions"],
                sampling_methods=sampling_methods,
                batches_name_prefix=roboflow_api_configuration["batching_strategy"][
                    "batches_name_prefix"
                ],
                batch_recreation_interval=BatchReCreationInterval(
                    roboflow_api_configuration["batching_strategy"][
                        "recreation_interval"
                    ]
                ),
                max_batch_images=roboflow_api_configuration["batching_strategy"].get(
                    "max_batch_images"
                ),
                workspace_id=workspace_id,
                dataset_id=dataset_id,
                model_id=model_id,
                strategies_limits=strategies_limits,
                tags=roboflow_api_configuration.get("tags", []),
                strategies_tags=strategies_tags,
            )
        except (KeyError, ValueError) as e:
            raise ActiveLearningConfigurationDecodingError(
                f"Failed to initialise Active Learning configuration. Cause: {str(e)}"
            ) from e


@dataclass(frozen=True)
class RoboflowProjectMetadata:
    dataset_id: DatasetID
    version_id: str
    workspace_id: WorkspaceID
    dataset_type: str
    active_learning_configuration: dict