Spaces:

Fucius
/

OMG

Running on Zero

App Files Files Community

Fucius commited on Mar 19

Commit

df6c67d

•

1 Parent(s): 73d1b00

Upload 422 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

inference/__init__.py +3 -0
inference/__pycache__/__init__.cpython-310.pyc +0 -0
inference/core/__init__.py +52 -0
inference/core/__pycache__/__init__.cpython-310.pyc +0 -0
inference/core/__pycache__/constants.cpython-310.pyc +0 -0
inference/core/__pycache__/env.cpython-310.pyc +0 -0
inference/core/__pycache__/exceptions.cpython-310.pyc +0 -0
inference/core/__pycache__/logger.cpython-310.pyc +0 -0
inference/core/__pycache__/nms.cpython-310.pyc +0 -0
inference/core/__pycache__/roboflow_api.cpython-310.pyc +0 -0
inference/core/__pycache__/usage.cpython-310.pyc +0 -0
inference/core/__pycache__/version.cpython-310.pyc +0 -0
inference/core/active_learning/__init__.py +0 -0
inference/core/active_learning/__pycache__/__init__.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/accounting.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/batching.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/cache_operations.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/configuration.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/core.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/entities.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/middlewares.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/post_processing.cpython-310.pyc +0 -0
inference/core/active_learning/__pycache__/utils.cpython-310.pyc +0 -0
inference/core/active_learning/accounting.py +96 -0
inference/core/active_learning/batching.py +26 -0
inference/core/active_learning/cache_operations.py +293 -0
inference/core/active_learning/configuration.py +203 -0
inference/core/active_learning/core.py +219 -0
inference/core/active_learning/entities.py +141 -0
inference/core/active_learning/middlewares.py +307 -0
inference/core/active_learning/post_processing.py +128 -0
inference/core/active_learning/samplers/__init__.py +0 -0
inference/core/active_learning/samplers/__pycache__/__init__.cpython-310.pyc +0 -0
inference/core/active_learning/samplers/__pycache__/close_to_threshold.cpython-310.pyc +0 -0
inference/core/active_learning/samplers/__pycache__/contains_classes.cpython-310.pyc +0 -0
inference/core/active_learning/samplers/__pycache__/number_of_detections.cpython-310.pyc +0 -0
inference/core/active_learning/samplers/__pycache__/random.cpython-310.pyc +0 -0
inference/core/active_learning/samplers/close_to_threshold.py +227 -0
inference/core/active_learning/samplers/contains_classes.py +58 -0
inference/core/active_learning/samplers/number_of_detections.py +107 -0
inference/core/active_learning/samplers/random.py +37 -0
inference/core/active_learning/utils.py +16 -0
inference/core/cache/__init__.py +22 -0
inference/core/cache/__pycache__/__init__.cpython-310.pyc +0 -0
inference/core/cache/__pycache__/base.cpython-310.pyc +0 -0
inference/core/cache/__pycache__/memory.cpython-310.pyc +0 -0
inference/core/cache/__pycache__/model_artifacts.cpython-310.pyc +0 -0
inference/core/cache/__pycache__/redis.cpython-310.pyc +0 -0
inference/core/cache/__pycache__/serializers.cpython-310.pyc +0 -0
inference/core/cache/base.py +130 -0

inference/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from inference.core.interfaces.stream.stream import Stream  # isort:skip
+from inference.core.interfaces.stream.inference_pipeline import InferencePipeline
+from inference.models.utils import get_roboflow_model

inference/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (399 Bytes). View file

inference/core/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import threading
+import time
+import requests
+from inference.core.env import DISABLE_VERSION_CHECK, VERSION_CHECK_MODE
+from inference.core.logger import logger
+from inference.core.version import __version__
+latest_release = None
+last_checked = 0
+cache_duration = 86400  # 24 hours
+log_frequency = 300  # 5 minutes
+def get_latest_release_version():
+    global latest_release, last_checked
+    now = time.time()
+    if latest_release is None or now - last_checked > cache_duration:
+        try:
+            logger.debug("Checking for latest inference release version...")
+            response = requests.get(
+                "https://api.github.com/repos/roboflow/inference/releases/latest"
+            )
+            response.raise_for_status()
+            latest_release = response.json()["tag_name"].lstrip("v")
+            last_checked = now
+        except requests.exceptions.RequestException:
+            pass
+def check_latest_release_against_current():
+    get_latest_release_version()
+    if latest_release is not None and latest_release != __version__:
+        logger.warning(
+            f"Your inference package version {__version__} is out of date! Please upgrade to version {latest_release} of inference for the latest features and bug fixes by running `pip install --upgrade inference`."
+        )
+def check_latest_release_against_current_continuous():
+    while True:
+        check_latest_release_against_current()
+        time.sleep(log_frequency)
+if not DISABLE_VERSION_CHECK:
+    if VERSION_CHECK_MODE == "continuous":
+        t = threading.Thread(target=check_latest_release_against_current_continuous)
+        t.daemon = True
+        t.start()
+    else:
+        check_latest_release_against_current()

inference/core/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.73 kB). View file

inference/core/__pycache__/constants.cpython-310.pyc ADDED Viewed

Binary file (371 Bytes). View file

inference/core/__pycache__/env.cpython-310.pyc ADDED Viewed

Binary file (6.87 kB). View file

inference/core/__pycache__/exceptions.cpython-310.pyc ADDED Viewed

Binary file (6.17 kB). View file

inference/core/__pycache__/logger.cpython-310.pyc ADDED Viewed

Binary file (551 Bytes). View file

inference/core/__pycache__/nms.cpython-310.pyc ADDED Viewed

Binary file (4.74 kB). View file

inference/core/__pycache__/roboflow_api.cpython-310.pyc ADDED Viewed

Binary file (10.1 kB). View file

inference/core/__pycache__/usage.cpython-310.pyc ADDED Viewed

Binary file (1.85 kB). View file

inference/core/__pycache__/version.cpython-310.pyc ADDED Viewed

Binary file (250 Bytes). View file

inference/core/active_learning/__init__.py ADDED Viewed

File without changes

inference/core/active_learning/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (192 Bytes). View file

inference/core/active_learning/__pycache__/accounting.cpython-310.pyc ADDED Viewed

Binary file (2.76 kB). View file

inference/core/active_learning/__pycache__/batching.cpython-310.pyc ADDED Viewed

Binary file (921 Bytes). View file

inference/core/active_learning/__pycache__/cache_operations.cpython-310.pyc ADDED Viewed

Binary file (5.9 kB). View file

inference/core/active_learning/__pycache__/configuration.cpython-310.pyc ADDED Viewed

Binary file (5.3 kB). View file

inference/core/active_learning/__pycache__/core.cpython-310.pyc ADDED Viewed

Binary file (5.2 kB). View file

inference/core/active_learning/__pycache__/entities.cpython-310.pyc ADDED Viewed

Binary file (4.72 kB). View file

inference/core/active_learning/__pycache__/middlewares.cpython-310.pyc ADDED Viewed

Binary file (8.68 kB). View file

inference/core/active_learning/__pycache__/post_processing.cpython-310.pyc ADDED Viewed

Binary file (2.94 kB). View file

inference/core/active_learning/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (852 Bytes). View file

inference/core/active_learning/accounting.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from typing import List, Optional
+from inference.core.entities.types import DatasetID, WorkspaceID
+from inference.core.roboflow_api import (
+    get_roboflow_labeling_batches,
+    get_roboflow_labeling_jobs,
+)
+def image_can_be_submitted_to_batch(
+    batch_name: str,
+    workspace_id: WorkspaceID,
+    dataset_id: DatasetID,
+    max_batch_images: Optional[int],
+    api_key: str,
+) -> bool:
+    """Check if an image can be submitted to a batch.
+    Args:
+        batch_name: Name of the batch.
+        workspace_id: ID of the workspace.
+        dataset_id: ID of the dataset.
+        max_batch_images: Maximum number of images allowed in the batch.
+        api_key: API key to use for the request.
+    Returns:
+        True if the image can be submitted to the batch, False otherwise.
+    """
+    if max_batch_images is None:
+        return True
+    labeling_batches = get_roboflow_labeling_batches(
+        api_key=api_key,
+        workspace_id=workspace_id,
+        dataset_id=dataset_id,
+    )
+    matching_labeling_batch = get_matching_labeling_batch(
+        all_labeling_batches=labeling_batches["batches"],
+        batch_name=batch_name,
+    )
+    if matching_labeling_batch is None:
+        return max_batch_images > 0
+    batch_images_under_labeling = 0
+    if matching_labeling_batch["numJobs"] > 0:
+        labeling_jobs = get_roboflow_labeling_jobs(
+            api_key=api_key, workspace_id=workspace_id, dataset_id=dataset_id
+        )
+        batch_images_under_labeling = get_images_in_labeling_jobs_of_specific_batch(
+            all_labeling_jobs=labeling_jobs["jobs"],
+            batch_id=matching_labeling_batch["id"],
+        )
+    total_batch_images = matching_labeling_batch["images"] + batch_images_under_labeling
+    return max_batch_images > total_batch_images
+def get_matching_labeling_batch(
+    all_labeling_batches: List[dict],
+    batch_name: str,
+) -> Optional[dict]:
+    """Get the matching labeling batch.
+    Args:
+        all_labeling_batches: All labeling batches.
+        batch_name: Name of the batch.
+    Returns:
+        The matching labeling batch if found, None otherwise.
+    """
+    matching_batch = None
+    for labeling_batch in all_labeling_batches:
+        if labeling_batch["name"] == batch_name:
+            matching_batch = labeling_batch
+            break
+    return matching_batch
+def get_images_in_labeling_jobs_of_specific_batch(
+    all_labeling_jobs: List[dict],
+    batch_id: str,
+) -> int:
+    """Get the number of images in labeling jobs of a specific batch.
+    Args:
+        all_labeling_jobs: All labeling jobs.
+        batch_id: ID of the batch.
+    Returns:
+        The number of images in labeling jobs of the batch.
+    """
+    matching_jobs = []
+    for labeling_job in all_labeling_jobs:
+        if batch_id in labeling_job["sourceBatch"]:
+            matching_jobs.append(labeling_job)
+    return sum(job["numImages"] for job in matching_jobs)

inference/core/active_learning/batching.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from inference.core.active_learning.entities import (
+    ActiveLearningConfiguration,
+    BatchReCreationInterval,
+)
+from inference.core.active_learning.utils import (
+    generate_start_timestamp_for_this_month,
+    generate_start_timestamp_for_this_week,
+    generate_today_timestamp,
+)
+RECREATION_INTERVAL2TIMESTAMP_GENERATOR = {
+    BatchReCreationInterval.DAILY: generate_today_timestamp,
+    BatchReCreationInterval.WEEKLY: generate_start_timestamp_for_this_week,
+    BatchReCreationInterval.MONTHLY: generate_start_timestamp_for_this_month,
+}
+def generate_batch_name(configuration: ActiveLearningConfiguration) -> str:
+    batch_name = configuration.batches_name_prefix
+    if configuration.batch_recreation_interval is BatchReCreationInterval.NEVER:
+        return batch_name
+    timestamp_generator = RECREATION_INTERVAL2TIMESTAMP_GENERATOR[
+        configuration.batch_recreation_interval
+    ]
+    timestamp = timestamp_generator()
+    return f"{batch_name}_{timestamp}"

inference/core/active_learning/cache_operations.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import threading
+from contextlib import contextmanager
+from datetime import datetime
+from typing import Generator, List, Optional, OrderedDict, Union
+import redis.lock
+from inference.core import logger
+from inference.core.active_learning.entities import StrategyLimit, StrategyLimitType
+from inference.core.active_learning.utils import TIMESTAMP_FORMAT
+from inference.core.cache.base import BaseCache
+MAX_LOCK_TIME = 5
+SECONDS_IN_HOUR = 60 * 60
+USAGE_KEY = "usage"
+LIMIT_TYPE2KEY_INFIX_GENERATOR = {
+    StrategyLimitType.MINUTELY: lambda: f"minute_{datetime.utcnow().minute}",
+    StrategyLimitType.HOURLY: lambda: f"hour_{datetime.utcnow().hour}",
+    StrategyLimitType.DAILY: lambda: f"day_{datetime.utcnow().strftime(TIMESTAMP_FORMAT)}",
+}
+LIMIT_TYPE2KEY_EXPIRATION = {
+    StrategyLimitType.MINUTELY: 120,
+    StrategyLimitType.HOURLY: 2 * SECONDS_IN_HOUR,
+    StrategyLimitType.DAILY: 25 * SECONDS_IN_HOUR,
+}
+def use_credit_of_matching_strategy(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    matching_strategies_limits: OrderedDict[str, List[StrategyLimit]],
+) -> Optional[str]:
+    # In scope of this function, cache keys updates regarding usage limits for
+    # specific :workspace and :project are locked - to ensure increment to be done atomically
+    # Limits are accounted at the moment of registration - which may introduce inaccuracy
+    # given that registration is postponed from prediction
+    # Returns: strategy with spare credit if found - else None
+    with lock_limits(cache=cache, workspace=workspace, project=project):
+        strategy_with_spare_credit = find_strategy_with_spare_usage_credit(
+            cache=cache,
+            workspace=workspace,
+            project=project,
+            matching_strategies_limits=matching_strategies_limits,
+        )
+        if strategy_with_spare_credit is None:
+            return None
+        consume_strategy_limits_usage_credit(
+            cache=cache,
+            workspace=workspace,
+            project=project,
+            strategy_name=strategy_with_spare_credit,
+        )
+        return strategy_with_spare_credit
+def return_strategy_credit(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+) -> None:
+    # In scope of this function, cache keys updates regarding usage limits for
+    # specific :workspace and :project are locked - to ensure decrement to be done atomically
+    # Returning strategy is a bit naive (we may add to a pool of credits from the next period - but only
+    # if we have previously taken from the previous one and some credits are used in the new pool) -
+    # in favour of easier implementation.
+    with lock_limits(cache=cache, workspace=workspace, project=project):
+        return_strategy_limits_usage_credit(
+            cache=cache,
+            workspace=workspace,
+            project=project,
+            strategy_name=strategy_name,
+        )
+@contextmanager
+def lock_limits(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+) -> Generator[Union[threading.Lock, redis.lock.Lock], None, None]:
+    limits_lock_key = generate_cache_key_for_active_learning_usage_lock(
+        workspace=workspace,
+        project=project,
+    )
+    with cache.lock(key=limits_lock_key, expire=MAX_LOCK_TIME) as lock:
+        yield lock
+def find_strategy_with_spare_usage_credit(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    matching_strategies_limits: OrderedDict[str, List[StrategyLimit]],
+) -> Optional[str]:
+    for strategy_name, strategy_limits in matching_strategies_limits.items():
+        rejected_by_strategy = (
+            datapoint_should_be_rejected_based_on_strategy_usage_limits(
+                cache=cache,
+                workspace=workspace,
+                project=project,
+                strategy_name=strategy_name,
+                strategy_limits=strategy_limits,
+            )
+        )
+        if not rejected_by_strategy:
+            return strategy_name
+    return None
+def datapoint_should_be_rejected_based_on_strategy_usage_limits(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+    strategy_limits: List[StrategyLimit],
+) -> bool:
+    for strategy_limit in strategy_limits:
+        limit_reached = datapoint_should_be_rejected_based_on_limit_usage(
+            cache=cache,
+            workspace=workspace,
+            project=project,
+            strategy_name=strategy_name,
+            strategy_limit=strategy_limit,
+        )
+        if limit_reached:
+            logger.debug(
+                f"Violated Active Learning strategy limit: {strategy_limit.limit_type.name} "
+                f"with value {strategy_limit.value} for sampling strategy: {strategy_name}."
+            )
+            return True
+    return False
+def datapoint_should_be_rejected_based_on_limit_usage(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+    strategy_limit: StrategyLimit,
+) -> bool:
+    current_usage = get_current_strategy_limit_usage(
+        cache=cache,
+        workspace=workspace,
+        project=project,
+        strategy_name=strategy_name,
+        limit_type=strategy_limit.limit_type,
+    )
+    if current_usage is None:
+        current_usage = 0
+    return current_usage >= strategy_limit.value
+def consume_strategy_limits_usage_credit(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+) -> None:
+    for limit_type in StrategyLimitType:
+        consume_strategy_limit_usage_credit(
+            cache=cache,
+            workspace=workspace,
+            project=project,
+            strategy_name=strategy_name,
+            limit_type=limit_type,
+        )
+def consume_strategy_limit_usage_credit(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+    limit_type: StrategyLimitType,
+) -> None:
+    current_value = get_current_strategy_limit_usage(
+        cache=cache,
+        limit_type=limit_type,
+        workspace=workspace,
+        project=project,
+        strategy_name=strategy_name,
+    )
+    if current_value is None:
+        current_value = 0
+    current_value += 1
+    set_current_strategy_limit_usage(
+        current_value=current_value,
+        cache=cache,
+        limit_type=limit_type,
+        workspace=workspace,
+        project=project,
+        strategy_name=strategy_name,
+    )
+def return_strategy_limits_usage_credit(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+) -> None:
+    for limit_type in StrategyLimitType:
+        return_strategy_limit_usage_credit(
+            cache=cache,
+            workspace=workspace,
+            project=project,
+            strategy_name=strategy_name,
+            limit_type=limit_type,
+        )
+def return_strategy_limit_usage_credit(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+    limit_type: StrategyLimitType,
+) -> None:
+    current_value = get_current_strategy_limit_usage(
+        cache=cache,
+        limit_type=limit_type,
+        workspace=workspace,
+        project=project,
+        strategy_name=strategy_name,
+    )
+    if current_value is None:
+        return None
+    current_value = max(current_value - 1, 0)
+    set_current_strategy_limit_usage(
+        current_value=current_value,
+        cache=cache,
+        limit_type=limit_type,
+        workspace=workspace,
+        project=project,
+        strategy_name=strategy_name,
+    )
+def get_current_strategy_limit_usage(
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+    limit_type: StrategyLimitType,
+) -> Optional[int]:
+    usage_key = generate_cache_key_for_active_learning_usage(
+        limit_type=limit_type,
+        workspace=workspace,
+        project=project,
+        strategy_name=strategy_name,
+    )
+    value = cache.get(usage_key)
+    if value is None:
+        return value
+    return value[USAGE_KEY]
+def set_current_strategy_limit_usage(
+    current_value: int,
+    cache: BaseCache,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+    limit_type: StrategyLimitType,
+) -> None:
+    usage_key = generate_cache_key_for_active_learning_usage(
+        limit_type=limit_type,
+        workspace=workspace,
+        project=project,
+        strategy_name=strategy_name,
+    )
+    expire = LIMIT_TYPE2KEY_EXPIRATION[limit_type]
+    cache.set(key=usage_key, value={USAGE_KEY: current_value}, expire=expire)  # type: ignore
+def generate_cache_key_for_active_learning_usage_lock(
+    workspace: str,
+    project: str,
+) -> str:
+    return f"active_learning:usage:{workspace}:{project}:usage:lock"
+def generate_cache_key_for_active_learning_usage(
+    limit_type: StrategyLimitType,
+    workspace: str,
+    project: str,
+    strategy_name: str,
+) -> str:
+    time_infix = LIMIT_TYPE2KEY_INFIX_GENERATOR[limit_type]()
+    return f"active_learning:usage:{workspace}:{project}:{strategy_name}:{time_infix}"

inference/core/active_learning/configuration.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import hashlib
+from dataclasses import asdict
+from typing import Any, Dict, List, Optional
+from inference.core import logger
+from inference.core.active_learning.entities import (
+    ActiveLearningConfiguration,
+    RoboflowProjectMetadata,
+    SamplingMethod,
+)
+from inference.core.active_learning.samplers.close_to_threshold import (
+    initialize_close_to_threshold_sampling,
+)
+from inference.core.active_learning.samplers.contains_classes import (
+    initialize_classes_based_sampling,
+)
+from inference.core.active_learning.samplers.number_of_detections import (
+    initialize_detections_number_based_sampling,
+)
+from inference.core.active_learning.samplers.random import initialize_random_sampling
+from inference.core.cache.base import BaseCache
+from inference.core.exceptions import (
+    ActiveLearningConfigurationDecodingError,
+    ActiveLearningConfigurationError,
+    RoboflowAPINotAuthorizedError,
+    RoboflowAPINotNotFoundError,
+)
+from inference.core.roboflow_api import (
+    get_roboflow_active_learning_configuration,
+    get_roboflow_dataset_type,
+    get_roboflow_workspace,
+)
+from inference.core.utils.roboflow import get_model_id_chunks
+TYPE2SAMPLING_INITIALIZERS = {
+    "random": initialize_random_sampling,
+    "close_to_threshold": initialize_close_to_threshold_sampling,
+    "classes_based": initialize_classes_based_sampling,
+    "detections_number_based": initialize_detections_number_based_sampling,
+}
+ACTIVE_LEARNING_CONFIG_CACHE_EXPIRE = 900  # 15 min
+def prepare_active_learning_configuration(
+    api_key: str,
+    model_id: str,
+    cache: BaseCache,
+) -> Optional[ActiveLearningConfiguration]:
+    project_metadata = get_roboflow_project_metadata(
+        api_key=api_key,
+        model_id=model_id,
+        cache=cache,
+    )
+    if not project_metadata.active_learning_configuration.get("enabled", False):
+        return None
+    logger.info(
+        f"Configuring active learning for workspace: {project_metadata.workspace_id}, "
+        f"project: {project_metadata.dataset_id} of type: {project_metadata.dataset_type}. "
+        f"AL configuration: {project_metadata.active_learning_configuration}"
+    )
+    return initialise_active_learning_configuration(
+        project_metadata=project_metadata,
+    )
+def prepare_active_learning_configuration_inplace(
+    api_key: str,
+    model_id: str,
+    active_learning_configuration: Optional[dict],
+) -> Optional[ActiveLearningConfiguration]:
+    if (
+        active_learning_configuration is None
+        or active_learning_configuration.get("enabled", False) is False
+    ):
+        return None
+    dataset_id, version_id = get_model_id_chunks(model_id=model_id)
+    workspace_id = get_roboflow_workspace(api_key=api_key)
+    dataset_type = get_roboflow_dataset_type(
+        api_key=api_key,
+        workspace_id=workspace_id,
+        dataset_id=dataset_id,
+    )
+    project_metadata = RoboflowProjectMetadata(
+        dataset_id=dataset_id,
+        version_id=version_id,
+        workspace_id=workspace_id,
+        dataset_type=dataset_type,
+        active_learning_configuration=active_learning_configuration,
+    )
+    return initialise_active_learning_configuration(
+        project_metadata=project_metadata,
+    )
+def get_roboflow_project_metadata(
+    api_key: str,
+    model_id: str,
+    cache: BaseCache,
+) -> RoboflowProjectMetadata:
+    logger.info(f"Fetching active learning configuration.")
+    config_cache_key = construct_cache_key_for_active_learning_config(
+        api_key=api_key, model_id=model_id
+    )
+    cached_config = cache.get(config_cache_key)
+    if cached_config is not None:
+        logger.info("Found Active Learning configuration in cache.")
+        return parse_cached_roboflow_project_metadata(cached_config=cached_config)
+    dataset_id, version_id = get_model_id_chunks(model_id=model_id)
+    workspace_id = get_roboflow_workspace(api_key=api_key)
+    dataset_type = get_roboflow_dataset_type(
+        api_key=api_key,
+        workspace_id=workspace_id,
+        dataset_id=dataset_id,
+    )
+    try:
+        roboflow_api_configuration = get_roboflow_active_learning_configuration(
+            api_key=api_key, workspace_id=workspace_id, dataset_id=dataset_id
+        )
+    except (RoboflowAPINotAuthorizedError, RoboflowAPINotNotFoundError):
+        # currently backend returns HTTP 404 if dataset does not exist
+        # or workspace_id from api_key indicate that the owner is different,
+        # so in the situation when we query for Universe dataset.
+        # We want the owner of public dataset to be able to set AL configs
+        # and use them, but not other people. At this point it's known
+        # that HTTP 404 means not authorised (which will probably change
+        # in future iteration of backend) - so on both NotAuth and NotFound
+        # errors we assume that we simply cannot use AL with this model and
+        # this api_key.
+        roboflow_api_configuration = {"enabled": False}
+    configuration = RoboflowProjectMetadata(
+        dataset_id=dataset_id,
+        version_id=version_id,
+        workspace_id=workspace_id,
+        dataset_type=dataset_type,
+        active_learning_configuration=roboflow_api_configuration,
+    )
+    cache.set(
+        key=config_cache_key,
+        value=asdict(configuration),
+        expire=ACTIVE_LEARNING_CONFIG_CACHE_EXPIRE,
+    )
+    return configuration
+def construct_cache_key_for_active_learning_config(api_key: str, model_id: str) -> str:
+    dataset_id = model_id.split("/")[0]
+    api_key_hash = hashlib.md5(api_key.encode("utf-8")).hexdigest()
+    return f"active_learning:configurations:{api_key_hash}:{dataset_id}"
+def parse_cached_roboflow_project_metadata(
+    cached_config: dict,
+) -> RoboflowProjectMetadata:
+    try:
+        return RoboflowProjectMetadata(**cached_config)
+    except Exception as error:
+        raise ActiveLearningConfigurationDecodingError(
+            f"Failed to initialise Active Learning configuration. Cause: {str(error)}"
+        ) from error
+def initialise_active_learning_configuration(
+    project_metadata: RoboflowProjectMetadata,
+) -> ActiveLearningConfiguration:
+    sampling_methods = initialize_sampling_methods(
+        sampling_strategies_configs=project_metadata.active_learning_configuration[
+            "sampling_strategies"
+        ],
+    )
+    target_workspace_id = project_metadata.active_learning_configuration.get(
+        "target_workspace", project_metadata.workspace_id
+    )
+    target_dataset_id = project_metadata.active_learning_configuration.get(
+        "target_project", project_metadata.dataset_id
+    )
+    return ActiveLearningConfiguration.init(
+        roboflow_api_configuration=project_metadata.active_learning_configuration,
+        sampling_methods=sampling_methods,
+        workspace_id=target_workspace_id,
+        dataset_id=target_dataset_id,
+        model_id=f"{project_metadata.dataset_id}/{project_metadata.version_id}",
+    )
+def initialize_sampling_methods(
+    sampling_strategies_configs: List[Dict[str, Any]]
+) -> List[SamplingMethod]:
+    result = []
+    for sampling_strategy_config in sampling_strategies_configs:
+        sampling_type = sampling_strategy_config["type"]
+        if sampling_type not in TYPE2SAMPLING_INITIALIZERS:
+            logger.warn(
+                f"Could not identify sampling method `{sampling_type}` - skipping initialisation."
+            )
+            continue
+        initializer = TYPE2SAMPLING_INITIALIZERS[sampling_type]
+        result.append(initializer(sampling_strategy_config))
+    names = set(m.name for m in result)
+    if len(names) != len(result):
+        raise ActiveLearningConfigurationError(
+            "Detected duplication of Active Learning strategies names."
+        )
+    return result

inference/core/active_learning/core.py ADDED Viewed

	@@ -0,0 +1,219 @@

+from collections import OrderedDict
+from typing import List, Optional, Tuple
+from uuid import uuid4
+import numpy as np
+from inference.core import logger
+from inference.core.active_learning.cache_operations import (
+    return_strategy_credit,
+    use_credit_of_matching_strategy,
+)
+from inference.core.active_learning.entities import (
+    ActiveLearningConfiguration,
+    ImageDimensions,
+    Prediction,
+    PredictionType,
+    SamplingMethod,
+)
+from inference.core.active_learning.post_processing import (
+    adjust_prediction_to_client_scaling_factor,
+    encode_prediction,
+)
+from inference.core.cache.base import BaseCache
+from inference.core.env import ACTIVE_LEARNING_TAGS
+from inference.core.roboflow_api import (
+    annotate_image_at_roboflow,
+    register_image_at_roboflow,
+)
+from inference.core.utils.image_utils import encode_image_to_jpeg_bytes
+from inference.core.utils.preprocess import downscale_image_keeping_aspect_ratio
+def execute_sampling(
+    image: np.ndarray,
+    prediction: Prediction,
+    prediction_type: PredictionType,
+    sampling_methods: List[SamplingMethod],
+) -> List[str]:
+    matching_strategies = []
+    for method in sampling_methods:
+        sampling_result = method.sample(image, prediction, prediction_type)
+        if sampling_result:
+            matching_strategies.append(method.name)
+    return matching_strategies
+def execute_datapoint_registration(
+    cache: BaseCache,
+    matching_strategies: List[str],
+    image: np.ndarray,
+    prediction: Prediction,
+    prediction_type: PredictionType,
+    configuration: ActiveLearningConfiguration,
+    api_key: str,
+    batch_name: str,
+) -> None:
+    local_image_id = str(uuid4())
+    encoded_image, scaling_factor = prepare_image_to_registration(
+        image=image,
+        desired_size=configuration.max_image_size,
+        jpeg_compression_level=configuration.jpeg_compression_level,
+    )
+    prediction = adjust_prediction_to_client_scaling_factor(
+        prediction=prediction,
+        scaling_factor=scaling_factor,
+        prediction_type=prediction_type,
+    )
+    matching_strategies_limits = OrderedDict(
+        (strategy_name, configuration.strategies_limits[strategy_name])
+        for strategy_name in matching_strategies
+    )
+    strategy_with_spare_credit = use_credit_of_matching_strategy(
+        cache=cache,
+        workspace=configuration.workspace_id,
+        project=configuration.dataset_id,
+        matching_strategies_limits=matching_strategies_limits,
+    )
+    if strategy_with_spare_credit is None:
+        logger.debug(f"Limit on Active Learning strategy reached.")
+        return None
+    register_datapoint_at_roboflow(
+        cache=cache,
+        strategy_with_spare_credit=strategy_with_spare_credit,
+        encoded_image=encoded_image,
+        local_image_id=local_image_id,
+        prediction=prediction,
+        prediction_type=prediction_type,
+        configuration=configuration,
+        api_key=api_key,
+        batch_name=batch_name,
+    )
+def prepare_image_to_registration(
+    image: np.ndarray,
+    desired_size: Optional[ImageDimensions],
+    jpeg_compression_level: int,
+) -> Tuple[bytes, float]:
+    scaling_factor = 1.0
+    if desired_size is not None:
+        height_before_scale = image.shape[0]
+        image = downscale_image_keeping_aspect_ratio(
+            image=image,
+            desired_size=desired_size.to_wh(),
+        )
+        scaling_factor = image.shape[0] / height_before_scale
+    return (
+        encode_image_to_jpeg_bytes(image=image, jpeg_quality=jpeg_compression_level),
+        scaling_factor,
+    )
+def register_datapoint_at_roboflow(
+    cache: BaseCache,
+    strategy_with_spare_credit: str,
+    encoded_image: bytes,
+    local_image_id: str,
+    prediction: Prediction,
+    prediction_type: PredictionType,
+    configuration: ActiveLearningConfiguration,
+    api_key: str,
+    batch_name: str,
+) -> None:
+    tags = collect_tags(
+        configuration=configuration,
+        sampling_strategy=strategy_with_spare_credit,
+    )
+    roboflow_image_id = safe_register_image_at_roboflow(
+        cache=cache,
+        strategy_with_spare_credit=strategy_with_spare_credit,
+        encoded_image=encoded_image,
+        local_image_id=local_image_id,
+        configuration=configuration,
+        api_key=api_key,
+        batch_name=batch_name,
+        tags=tags,
+    )
+    if is_prediction_registration_forbidden(
+        prediction=prediction,
+        persist_predictions=configuration.persist_predictions,
+        roboflow_image_id=roboflow_image_id,
+    ):
+        return None
+    encoded_prediction, prediction_file_type = encode_prediction(
+        prediction=prediction, prediction_type=prediction_type
+    )
+    _ = annotate_image_at_roboflow(
+        api_key=api_key,
+        dataset_id=configuration.dataset_id,
+        local_image_id=local_image_id,
+        roboflow_image_id=roboflow_image_id,
+        annotation_content=encoded_prediction,
+        annotation_file_type=prediction_file_type,
+        is_prediction=True,
+    )
+def collect_tags(
+    configuration: ActiveLearningConfiguration, sampling_strategy: str
+) -> List[str]:
+    tags = ACTIVE_LEARNING_TAGS if ACTIVE_LEARNING_TAGS is not None else []
+    tags.extend(configuration.tags)
+    tags.extend(configuration.strategies_tags[sampling_strategy])
+    if configuration.persist_predictions:
+        # this replacement is needed due to backend input validation
+        tags.append(configuration.model_id.replace("/", "-"))
+    return tags
+def safe_register_image_at_roboflow(
+    cache: BaseCache,
+    strategy_with_spare_credit: str,
+    encoded_image: bytes,
+    local_image_id: str,
+    configuration: ActiveLearningConfiguration,
+    api_key: str,
+    batch_name: str,
+    tags: List[str],
+) -> Optional[str]:
+    credit_to_be_returned = False
+    try:
+        registration_response = register_image_at_roboflow(
+            api_key=api_key,
+            dataset_id=configuration.dataset_id,
+            local_image_id=local_image_id,
+            image_bytes=encoded_image,
+            batch_name=batch_name,
+            tags=tags,
+        )
+        image_duplicated = registration_response.get("duplicate", False)
+        if image_duplicated:
+            credit_to_be_returned = True
+            logger.warning(f"Image duplication detected: {registration_response}.")
+            return None
+        return registration_response["id"]
+    except Exception as error:
+        credit_to_be_returned = True
+        raise error
+    finally:
+        if credit_to_be_returned:
+            return_strategy_credit(
+                cache=cache,
+                workspace=configuration.workspace_id,
+                project=configuration.dataset_id,
+                strategy_name=strategy_with_spare_credit,
+            )
+def is_prediction_registration_forbidden(
+    prediction: Prediction,
+    persist_predictions: bool,
+    roboflow_image_id: Optional[str],
+) -> bool:
+    return (
+        roboflow_image_id is None
+        or persist_predictions is False
+        or prediction.get("is_stub", False) is True
+        or (len(prediction.get("predictions", [])) == 0 and "top" not in prediction)
+    )

inference/core/active_learning/entities.py ADDED Viewed

	@@ -0,0 +1,141 @@

+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import numpy as np
+from inference.core.entities.types import DatasetID, WorkspaceID
+from inference.core.exceptions import ActiveLearningConfigurationDecodingError
+LocalImageIdentifier = str
+PredictionType = str
+Prediction = dict
+SerialisedPrediction = str
+PredictionFileType = str
+@dataclass(frozen=True)
+class ImageDimensions:
+    height: int
+    width: int
+    def to_hw(self) -> Tuple[int, int]:
+        return self.height, self.width
+    def to_wh(self) -> Tuple[int, int]:
+        return self.width, self.height
+@dataclass(frozen=True)
+class SamplingMethod:
+    name: str
+    sample: Callable[[np.ndarray, Prediction, PredictionType], bool]
+class BatchReCreationInterval(Enum):
+    NEVER = "never"
+    DAILY = "daily"
+    WEEKLY = "weekly"
+    MONTHLY = "monthly"
+class StrategyLimitType(Enum):
+    MINUTELY = "minutely"
+    HOURLY = "hourly"
+    DAILY = "daily"
+@dataclass(frozen=True)
+class StrategyLimit:
+    limit_type: StrategyLimitType
+    value: int
+    @classmethod
+    def from_dict(cls, specification: dict) -> "StrategyLimit":
+        return cls(
+            limit_type=StrategyLimitType(specification["type"]),
+            value=specification["value"],
+        )
+@dataclass(frozen=True)
+class ActiveLearningConfiguration:
+    max_image_size: Optional[ImageDimensions]
+    jpeg_compression_level: int
+    persist_predictions: bool
+    sampling_methods: List[SamplingMethod]
+    batches_name_prefix: str
+    batch_recreation_interval: BatchReCreationInterval
+    max_batch_images: Optional[int]
+    workspace_id: WorkspaceID
+    dataset_id: DatasetID
+    model_id: str
+    strategies_limits: Dict[str, List[StrategyLimit]]
+    tags: List[str]
+    strategies_tags: Dict[str, List[str]]
+    @classmethod
+    def init(
+        cls,
+        roboflow_api_configuration: Dict[str, Any],
+        sampling_methods: List[SamplingMethod],
+        workspace_id: WorkspaceID,
+        dataset_id: DatasetID,
+        model_id: str,
+    ) -> "ActiveLearningConfiguration":
+        try:
+            max_image_size = roboflow_api_configuration.get("max_image_size")
+            if max_image_size is not None:
+                max_image_size = ImageDimensions(
+                    height=roboflow_api_configuration["max_image_size"][0],
+                    width=roboflow_api_configuration["max_image_size"][1],
+                )
+            strategies_limits = {
+                strategy["name"]: [
+                    StrategyLimit.from_dict(specification=specification)
+                    for specification in strategy.get("limits", [])
+                ]
+                for strategy in roboflow_api_configuration["sampling_strategies"]
+            }
+            strategies_tags = {
+                strategy["name"]: strategy.get("tags", [])
+                for strategy in roboflow_api_configuration["sampling_strategies"]
+            }
+            return cls(
+                max_image_size=max_image_size,
+                jpeg_compression_level=roboflow_api_configuration.get(
+                    "jpeg_compression_level", 95
+                ),
+                persist_predictions=roboflow_api_configuration["persist_predictions"],
+                sampling_methods=sampling_methods,
+                batches_name_prefix=roboflow_api_configuration["batching_strategy"][
+                    "batches_name_prefix"
+                ],
+                batch_recreation_interval=BatchReCreationInterval(
+                    roboflow_api_configuration["batching_strategy"][
+                        "recreation_interval"
+                    ]
+                ),
+                max_batch_images=roboflow_api_configuration["batching_strategy"].get(
+                    "max_batch_images"
+                ),
+                workspace_id=workspace_id,
+                dataset_id=dataset_id,
+                model_id=model_id,
+                strategies_limits=strategies_limits,
+                tags=roboflow_api_configuration.get("tags", []),
+                strategies_tags=strategies_tags,
+            )
+        except (KeyError, ValueError) as e:
+            raise ActiveLearningConfigurationDecodingError(
+                f"Failed to initialise Active Learning configuration. Cause: {str(e)}"
+            ) from e
+@dataclass(frozen=True)
+class RoboflowProjectMetadata:
+    dataset_id: DatasetID
+    version_id: str
+    workspace_id: WorkspaceID
+    dataset_type: str
+    active_learning_configuration: dict

inference/core/active_learning/middlewares.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import queue
+from queue import Queue
+from threading import Thread
+from typing import Any, List, Optional
+from inference.core import logger
+from inference.core.active_learning.accounting import image_can_be_submitted_to_batch
+from inference.core.active_learning.batching import generate_batch_name
+from inference.core.active_learning.configuration import (
+    prepare_active_learning_configuration,
+    prepare_active_learning_configuration_inplace,
+)
+from inference.core.active_learning.core import (
+    execute_datapoint_registration,
+    execute_sampling,
+)
+from inference.core.active_learning.entities import (
+    ActiveLearningConfiguration,
+    Prediction,
+    PredictionType,
+)
+from inference.core.cache.base import BaseCache
+from inference.core.utils.image_utils import load_image
+MAX_REGISTRATION_QUEUE_SIZE = 512
+class NullActiveLearningMiddleware:
+    def register_batch(
+        self,
+        inference_inputs: List[Any],
+        predictions: List[Prediction],
+        prediction_type: PredictionType,
+        disable_preproc_auto_orient: bool = False,
+    ) -> None:
+        pass
+    def register(
+        self,
+        inference_input: Any,
+        prediction: dict,
+        prediction_type: PredictionType,
+        disable_preproc_auto_orient: bool = False,
+    ) -> None:
+        pass
+    def start_registration_thread(self) -> None:
+        pass
+    def stop_registration_thread(self) -> None:
+        pass
+    def __enter__(self) -> "NullActiveLearningMiddleware":
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        pass
+class ActiveLearningMiddleware:
+    @classmethod
+    def init(
+        cls, api_key: str, model_id: str, cache: BaseCache
+    ) -> "ActiveLearningMiddleware":
+        configuration = prepare_active_learning_configuration(
+            api_key=api_key,
+            model_id=model_id,
+            cache=cache,
+        )
+        return cls(
+            api_key=api_key,
+            configuration=configuration,
+            cache=cache,
+        )
+    @classmethod
+    def init_from_config(
+        cls, api_key: str, model_id: str, cache: BaseCache, config: Optional[dict]
+    ) -> "ActiveLearningMiddleware":
+        configuration = prepare_active_learning_configuration_inplace(
+            api_key=api_key,
+            model_id=model_id,
+            active_learning_configuration=config,
+        )
+        return cls(
+            api_key=api_key,
+            configuration=configuration,
+            cache=cache,
+        )
+    def __init__(
+        self,
+        api_key: str,
+        configuration: Optional[ActiveLearningConfiguration],
+        cache: BaseCache,
+    ):
+        self._api_key = api_key
+        self._configuration = configuration
+        self._cache = cache
+    def register_batch(
+        self,
+        inference_inputs: List[Any],
+        predictions: List[Prediction],
+        prediction_type: PredictionType,
+        disable_preproc_auto_orient: bool = False,
+    ) -> None:
+        for inference_input, prediction in zip(inference_inputs, predictions):
+            self.register(
+                inference_input=inference_input,
+                prediction=prediction,
+                prediction_type=prediction_type,
+                disable_preproc_auto_orient=disable_preproc_auto_orient,
+            )
+    def register(
+        self,
+        inference_input: Any,
+        prediction: dict,
+        prediction_type: PredictionType,
+        disable_preproc_auto_orient: bool = False,
+    ) -> None:
+        self._execute_registration(
+            inference_input=inference_input,
+            prediction=prediction,
+            prediction_type=prediction_type,
+            disable_preproc_auto_orient=disable_preproc_auto_orient,
+        )
+    def _execute_registration(
+        self,
+        inference_input: Any,
+        prediction: dict,
+        prediction_type: PredictionType,
+        disable_preproc_auto_orient: bool = False,
+    ) -> None:
+        if self._configuration is None:
+            return None
+        image, is_bgr = load_image(
+            value=inference_input,
+            disable_preproc_auto_orient=disable_preproc_auto_orient,
+        )
+        if not is_bgr:
+            image = image[:, :, ::-1]
+        matching_strategies = execute_sampling(
+            image=image,
+            prediction=prediction,
+            prediction_type=prediction_type,
+            sampling_methods=self._configuration.sampling_methods,
+        )
+        if len(matching_strategies) == 0:
+            return None
+        batch_name = generate_batch_name(configuration=self._configuration)
+        if not image_can_be_submitted_to_batch(
+            batch_name=batch_name,
+            workspace_id=self._configuration.workspace_id,
+            dataset_id=self._configuration.dataset_id,
+            max_batch_images=self._configuration.max_batch_images,
+            api_key=self._api_key,
+        ):
+            logger.debug(f"Limit on Active Learning batch size reached.")
+            return None
+        execute_datapoint_registration(
+            cache=self._cache,
+            matching_strategies=matching_strategies,
+            image=image,
+            prediction=prediction,
+            prediction_type=prediction_type,
+            configuration=self._configuration,
+            api_key=self._api_key,
+            batch_name=batch_name,
+        )
+class ThreadingActiveLearningMiddleware(ActiveLearningMiddleware):
+    @classmethod
+    def init(
+        cls,
+        api_key: str,
+        model_id: str,
+        cache: BaseCache,
+        max_queue_size: int = MAX_REGISTRATION_QUEUE_SIZE,
+    ) -> "ThreadingActiveLearningMiddleware":
+        configuration = prepare_active_learning_configuration(
+            api_key=api_key,
+            model_id=model_id,
+            cache=cache,
+        )
+        task_queue = Queue(max_queue_size)
+        return cls(
+            api_key=api_key,
+            configuration=configuration,
+            cache=cache,
+            task_queue=task_queue,
+        )
+    @classmethod
+    def init_from_config(
+        cls,
+        api_key: str,
+        model_id: str,
+        cache: BaseCache,
+        config: Optional[dict],
+        max_queue_size: int = MAX_REGISTRATION_QUEUE_SIZE,
+    ) -> "ThreadingActiveLearningMiddleware":
+        configuration = prepare_active_learning_configuration_inplace(
+            api_key=api_key,
+            model_id=model_id,
+            active_learning_configuration=config,
+        )
+        task_queue = Queue(max_queue_size)
+        return cls(
+            api_key=api_key,
+            configuration=configuration,
+            cache=cache,
+            task_queue=task_queue,
+        )
+    def __init__(
+        self,
+        api_key: str,
+        configuration: ActiveLearningConfiguration,
+        cache: BaseCache,
+        task_queue: Queue,
+    ):
+        super().__init__(api_key=api_key, configuration=configuration, cache=cache)
+        self._task_queue = task_queue
+        self._registration_thread: Optional[Thread] = None
+    def register(
+        self,
+        inference_input: Any,
+        prediction: dict,
+        prediction_type: PredictionType,
+        disable_preproc_auto_orient: bool = False,
+    ) -> None:
+        logger.debug(f"Putting registration task into queue")
+        try:
+            self._task_queue.put_nowait(
+                (
+                    inference_input,
+                    prediction,
+                    prediction_type,
+                    disable_preproc_auto_orient,
+                )
+            )
+        except queue.Full:
+            logger.warning(
+                f"Dropping datapoint registered in Active Learning due to insufficient processing "
+                f"capabilities."
+            )
+    def start_registration_thread(self) -> None:
+        if self._registration_thread is not None:
+            logger.warning(f"Registration thread already started.")
+            return None
+        logger.debug("Staring registration thread")
+        self._registration_thread = Thread(target=self._consume_queue)
+        self._registration_thread.start()
+    def stop_registration_thread(self) -> None:
+        if self._registration_thread is None:
+            logger.warning("Registration thread is already stopped.")
+            return None
+        logger.debug("Stopping registration thread")
+        self._task_queue.put(None)
+        self._registration_thread.join()
+        if self._registration_thread.is_alive():
+            logger.warning(f"Registration thread stopping was unsuccessful.")
+        self._registration_thread = None
+    def _consume_queue(self) -> None:
+        queue_closed = False
+        while not queue_closed:
+            queue_closed = self._consume_queue_task()
+    def _consume_queue_task(self) -> bool:
+        logger.debug("Consuming registration task")
+        task = self._task_queue.get()
+        logger.debug("Received registration task")
+        if task is None:
+            logger.debug("Terminating registration thread")
+            self._task_queue.task_done()
+            return True
+        inference_input, prediction, prediction_type, disable_preproc_auto_orient = task
+        try:
+            self._execute_registration(
+                inference_input=inference_input,
+                prediction=prediction,
+                prediction_type=prediction_type,
+                disable_preproc_auto_orient=disable_preproc_auto_orient,
+            )
+        except Exception as error:
+            # Error handling to be decided
+            logger.warning(
+                f"Error in datapoint registration for Active Learning. Details: {error}. "
+                f"Error is suppressed in favour of normal operations of registration thread."
+            )
+        self._task_queue.task_done()
+        return False
+    def __enter__(self) -> "ThreadingActiveLearningMiddleware":
+        self.start_registration_thread()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        self.stop_registration_thread()

inference/core/active_learning/post_processing.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import json
+from typing import List, Tuple
+from inference.core.active_learning.entities import (
+    Prediction,
+    PredictionFileType,
+    PredictionType,
+    SerialisedPrediction,
+)
+from inference.core.constants import (
+    CLASSIFICATION_TASK,
+    INSTANCE_SEGMENTATION_TASK,
+    OBJECT_DETECTION_TASK,
+)
+from inference.core.exceptions import PredictionFormatNotSupported
+def adjust_prediction_to_client_scaling_factor(
+    prediction: dict, scaling_factor: float, prediction_type: PredictionType
+) -> dict:
+    if abs(scaling_factor - 1.0) < 1e-5:
+        return prediction
+    if "image" in prediction:
+        prediction["image"] = {
+            "width": round(prediction["image"]["width"] / scaling_factor),
+            "height": round(prediction["image"]["height"] / scaling_factor),
+        }
+    if predictions_should_not_be_post_processed(
+        prediction=prediction, prediction_type=prediction_type
+    ):
+        return prediction
+    if prediction_type == INSTANCE_SEGMENTATION_TASK:
+        prediction["predictions"] = (
+            adjust_prediction_with_bbox_and_points_to_client_scaling_factor(
+                predictions=prediction["predictions"],
+                scaling_factor=scaling_factor,
+                points_key="points",
+            )
+        )
+    if prediction_type == OBJECT_DETECTION_TASK:
+        prediction["predictions"] = (
+            adjust_object_detection_predictions_to_client_scaling_factor(
+                predictions=prediction["predictions"],
+                scaling_factor=scaling_factor,
+            )
+        )
+    return prediction
+def predictions_should_not_be_post_processed(
+    prediction: dict, prediction_type: PredictionType
+) -> bool:
+    # excluding from post-processing classification output, stub-output and empty predictions
+    return (
+        "is_stub" in prediction
+        or "predictions" not in prediction
+        or CLASSIFICATION_TASK in prediction_type
+        or len(prediction["predictions"]) == 0
+    )
+def adjust_object_detection_predictions_to_client_scaling_factor(
+    predictions: List[dict],
+    scaling_factor: float,
+) -> List[dict]:
+    result = []
+    for prediction in predictions:
+        prediction = adjust_bbox_coordinates_to_client_scaling_factor(
+            bbox=prediction,
+            scaling_factor=scaling_factor,
+        )
+        result.append(prediction)
+    return result
+def adjust_prediction_with_bbox_and_points_to_client_scaling_factor(
+    predictions: List[dict],
+    scaling_factor: float,
+    points_key: str,
+) -> List[dict]:
+    result = []
+    for prediction in predictions:
+        prediction = adjust_bbox_coordinates_to_client_scaling_factor(
+            bbox=prediction,
+            scaling_factor=scaling_factor,
+        )
+        prediction[points_key] = adjust_points_coordinates_to_client_scaling_factor(
+            points=prediction[points_key],
+            scaling_factor=scaling_factor,
+        )
+        result.append(prediction)
+    return result
+def adjust_bbox_coordinates_to_client_scaling_factor(
+    bbox: dict,
+    scaling_factor: float,
+) -> dict:
+    bbox["x"] = bbox["x"] / scaling_factor
+    bbox["y"] = bbox["y"] / scaling_factor
+    bbox["width"] = bbox["width"] / scaling_factor
+    bbox["height"] = bbox["height"] / scaling_factor
+    return bbox
+def adjust_points_coordinates_to_client_scaling_factor(
+    points: List[dict],
+    scaling_factor: float,
+) -> List[dict]:
+    result = []
+    for point in points:
+        point["x"] = point["x"] / scaling_factor
+        point["y"] = point["y"] / scaling_factor
+        result.append(point)
+    return result
+def encode_prediction(
+    prediction: Prediction,
+    prediction_type: PredictionType,
+) -> Tuple[SerialisedPrediction, PredictionFileType]:
+    if CLASSIFICATION_TASK not in prediction_type:
+        return json.dumps(prediction), "json"
+    if "top" in prediction:
+        return prediction["top"], "txt"
+    raise PredictionFormatNotSupported(
+        f"Prediction type or prediction format not supported."
+    )

inference/core/active_learning/samplers/__init__.py ADDED Viewed

File without changes

inference/core/active_learning/samplers/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (201 Bytes). View file

inference/core/active_learning/samplers/__pycache__/close_to_threshold.cpython-310.pyc ADDED Viewed

Binary file (4.68 kB). View file

inference/core/active_learning/samplers/__pycache__/contains_classes.cpython-310.pyc ADDED Viewed

Binary file (1.71 kB). View file

inference/core/active_learning/samplers/__pycache__/number_of_detections.cpython-310.pyc ADDED Viewed

Binary file (2.74 kB). View file

inference/core/active_learning/samplers/__pycache__/random.cpython-310.pyc ADDED Viewed

Binary file (1.22 kB). View file

inference/core/active_learning/samplers/close_to_threshold.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import random
+from functools import partial
+from typing import Any, Dict, Optional, Set
+import numpy as np
+from inference.core.active_learning.entities import (
+    Prediction,
+    PredictionType,
+    SamplingMethod,
+)
+from inference.core.constants import (
+    CLASSIFICATION_TASK,
+    INSTANCE_SEGMENTATION_TASK,
+    KEYPOINTS_DETECTION_TASK,
+    OBJECT_DETECTION_TASK,
+)
+from inference.core.exceptions import ActiveLearningConfigurationError
+ELIGIBLE_PREDICTION_TYPES = {
+    CLASSIFICATION_TASK,
+    INSTANCE_SEGMENTATION_TASK,
+    KEYPOINTS_DETECTION_TASK,
+    OBJECT_DETECTION_TASK,
+}
+def initialize_close_to_threshold_sampling(
+    strategy_config: Dict[str, Any]
+) -> SamplingMethod:
+    try:
+        selected_class_names = strategy_config.get("selected_class_names")
+        if selected_class_names is not None:
+            selected_class_names = set(selected_class_names)
+        sample_function = partial(
+            sample_close_to_threshold,
+            selected_class_names=selected_class_names,
+            threshold=strategy_config["threshold"],
+            epsilon=strategy_config["epsilon"],
+            only_top_classes=strategy_config.get("only_top_classes", True),
+            minimum_objects_close_to_threshold=strategy_config.get(
+                "minimum_objects_close_to_threshold",
+                1,
+            ),
+            probability=strategy_config["probability"],
+        )
+        return SamplingMethod(
+            name=strategy_config["name"],
+            sample=sample_function,
+        )
+    except KeyError as error:
+        raise ActiveLearningConfigurationError(
+            f"In configuration of `close_to_threshold_sampling` missing key detected: {error}."
+        ) from error
+def sample_close_to_threshold(
+    image: np.ndarray,
+    prediction: Prediction,
+    prediction_type: PredictionType,
+    selected_class_names: Optional[Set[str]],
+    threshold: float,
+    epsilon: float,
+    only_top_classes: bool,
+    minimum_objects_close_to_threshold: int,
+    probability: float,
+) -> bool:
+    if is_prediction_a_stub(prediction=prediction):
+        return False
+    if prediction_type not in ELIGIBLE_PREDICTION_TYPES:
+        return False
+    close_to_threshold = prediction_is_close_to_threshold(
+        prediction=prediction,
+        prediction_type=prediction_type,
+        selected_class_names=selected_class_names,
+        threshold=threshold,
+        epsilon=epsilon,
+        only_top_classes=only_top_classes,
+        minimum_objects_close_to_threshold=minimum_objects_close_to_threshold,
+    )
+    if not close_to_threshold:
+        return False
+    return random.random() < probability
+def is_prediction_a_stub(prediction: Prediction) -> bool:
+    return prediction.get("is_stub", False)
+def prediction_is_close_to_threshold(
+    prediction: Prediction,
+    prediction_type: PredictionType,
+    selected_class_names: Optional[Set[str]],
+    threshold: float,
+    epsilon: float,
+    only_top_classes: bool,
+    minimum_objects_close_to_threshold: int,
+) -> bool:
+    if CLASSIFICATION_TASK not in prediction_type:
+        return detections_are_close_to_threshold(
+            prediction=prediction,
+            selected_class_names=selected_class_names,
+            threshold=threshold,
+            epsilon=epsilon,
+            minimum_objects_close_to_threshold=minimum_objects_close_to_threshold,
+        )
+    checker = multi_label_classification_prediction_is_close_to_threshold
+    if "top" in prediction:
+        checker = multi_class_classification_prediction_is_close_to_threshold
+    return checker(
+        prediction=prediction,
+        selected_class_names=selected_class_names,
+        threshold=threshold,
+        epsilon=epsilon,
+        only_top_classes=only_top_classes,
+    )
+def multi_class_classification_prediction_is_close_to_threshold(
+    prediction: Prediction,
+    selected_class_names: Optional[Set[str]],
+    threshold: float,
+    epsilon: float,
+    only_top_classes: bool,
+) -> bool:
+    if only_top_classes:
+        return (
+            multi_class_classification_prediction_is_close_to_threshold_for_top_class(
+                prediction=prediction,
+                selected_class_names=selected_class_names,
+                threshold=threshold,
+                epsilon=epsilon,
+            )
+        )
+    for prediction_details in prediction["predictions"]:
+        if class_to_be_excluded(
+            class_name=prediction_details["class"],
+            selected_class_names=selected_class_names,
+        ):
+            continue
+        if is_close_to_threshold(
+            value=prediction_details["confidence"], threshold=threshold, epsilon=epsilon
+        ):
+            return True
+    return False
+def multi_class_classification_prediction_is_close_to_threshold_for_top_class(
+    prediction: Prediction,
+    selected_class_names: Optional[Set[str]],
+    threshold: float,
+    epsilon: float,
+) -> bool:
+    if (
+        selected_class_names is not None
+        and prediction["top"] not in selected_class_names
+    ):
+        return False
+    return abs(prediction["confidence"] - threshold) < epsilon
+def multi_label_classification_prediction_is_close_to_threshold(
+    prediction: Prediction,
+    selected_class_names: Optional[Set[str]],
+    threshold: float,
+    epsilon: float,
+    only_top_classes: bool,
+) -> bool:
+    predicted_classes = set(prediction["predicted_classes"])
+    for class_name, prediction_details in prediction["predictions"].items():
+        if only_top_classes and class_name not in predicted_classes:
+            continue
+        if class_to_be_excluded(
+            class_name=class_name, selected_class_names=selected_class_names
+        ):
+            continue
+        if is_close_to_threshold(
+            value=prediction_details["confidence"], threshold=threshold, epsilon=epsilon
+        ):
+            return True
+    return False
+def detections_are_close_to_threshold(
+    prediction: Prediction,
+    selected_class_names: Optional[Set[str]],
+    threshold: float,
+    epsilon: float,
+    minimum_objects_close_to_threshold: int,
+) -> bool:
+    detections_close_to_threshold = count_detections_close_to_threshold(
+        prediction=prediction,
+        selected_class_names=selected_class_names,
+        threshold=threshold,
+        epsilon=epsilon,
+    )
+    return detections_close_to_threshold >= minimum_objects_close_to_threshold
+def count_detections_close_to_threshold(
+    prediction: Prediction,
+    selected_class_names: Optional[Set[str]],
+    threshold: float,
+    epsilon: float,
+) -> int:
+    counter = 0
+    for prediction_details in prediction["predictions"]:
+        if class_to_be_excluded(
+            class_name=prediction_details["class"],
+            selected_class_names=selected_class_names,
+        ):
+            continue
+        if is_close_to_threshold(
+            value=prediction_details["confidence"], threshold=threshold, epsilon=epsilon
+        ):
+            counter += 1
+    return counter
+def class_to_be_excluded(
+    class_name: str, selected_class_names: Optional[Set[str]]
+) -> bool:
+    return selected_class_names is not None and class_name not in selected_class_names
+def is_close_to_threshold(value: float, threshold: float, epsilon: float) -> bool:
+    return abs(value - threshold) < epsilon

inference/core/active_learning/samplers/contains_classes.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from functools import partial
+from typing import Any, Dict, Set
+import numpy as np
+from inference.core.active_learning.entities import (
+    Prediction,
+    PredictionType,
+    SamplingMethod,
+)
+from inference.core.active_learning.samplers.close_to_threshold import (
+    sample_close_to_threshold,
+)
+from inference.core.constants import CLASSIFICATION_TASK
+from inference.core.exceptions import ActiveLearningConfigurationError
+ELIGIBLE_PREDICTION_TYPES = {CLASSIFICATION_TASK}
+def initialize_classes_based_sampling(
+    strategy_config: Dict[str, Any]
+) -> SamplingMethod:
+    try:
+        sample_function = partial(
+            sample_based_on_classes,
+            selected_class_names=set(strategy_config["selected_class_names"]),
+            probability=strategy_config["probability"],
+        )
+        return SamplingMethod(
+            name=strategy_config["name"],
+            sample=sample_function,
+        )
+    except KeyError as error:
+        raise ActiveLearningConfigurationError(
+            f"In configuration of `classes_based_sampling` missing key detected: {error}."
+        ) from error
+def sample_based_on_classes(
+    image: np.ndarray,
+    prediction: Prediction,
+    prediction_type: PredictionType,
+    selected_class_names: Set[str],
+    probability: float,
+) -> bool:
+    if prediction_type not in ELIGIBLE_PREDICTION_TYPES:
+        return False
+    return sample_close_to_threshold(
+        image=image,
+        prediction=prediction,
+        prediction_type=prediction_type,
+        selected_class_names=selected_class_names,
+        threshold=0.5,
+        epsilon=1.0,
+        only_top_classes=True,
+        minimum_objects_close_to_threshold=1,
+        probability=probability,
+    )

inference/core/active_learning/samplers/number_of_detections.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import random
+from functools import partial
+from typing import Any, Dict, Optional, Set
+import numpy as np
+from inference.core.active_learning.entities import (
+    Prediction,
+    PredictionType,
+    SamplingMethod,
+)
+from inference.core.active_learning.samplers.close_to_threshold import (
+    count_detections_close_to_threshold,
+    is_prediction_a_stub,
+)
+from inference.core.constants import (
+    INSTANCE_SEGMENTATION_TASK,
+    KEYPOINTS_DETECTION_TASK,
+    OBJECT_DETECTION_TASK,
+)
+from inference.core.exceptions import ActiveLearningConfigurationError
+ELIGIBLE_PREDICTION_TYPES = {
+    INSTANCE_SEGMENTATION_TASK,
+    KEYPOINTS_DETECTION_TASK,
+    OBJECT_DETECTION_TASK,
+}
+def initialize_detections_number_based_sampling(
+    strategy_config: Dict[str, Any]
+) -> SamplingMethod:
+    try:
+        more_than = strategy_config.get("more_than")
+        less_than = strategy_config.get("less_than")
+        ensure_range_configuration_is_valid(more_than=more_than, less_than=less_than)
+        selected_class_names = strategy_config.get("selected_class_names")
+        if selected_class_names is not None:
+            selected_class_names = set(selected_class_names)
+        sample_function = partial(
+            sample_based_on_detections_number,
+            less_than=less_than,
+            more_than=more_than,
+            selected_class_names=selected_class_names,
+            probability=strategy_config["probability"],
+        )
+        return SamplingMethod(
+            name=strategy_config["name"],
+            sample=sample_function,
+        )
+    except KeyError as error:
+        raise ActiveLearningConfigurationError(
+            f"In configuration of `detections_number_based_sampling` missing key detected: {error}."
+        ) from error
+def sample_based_on_detections_number(
+    image: np.ndarray,
+    prediction: Prediction,
+    prediction_type: PredictionType,
+    more_than: Optional[int],
+    less_than: Optional[int],
+    selected_class_names: Optional[Set[str]],
+    probability: float,
+) -> bool:
+    if is_prediction_a_stub(prediction=prediction):
+        return False
+    if prediction_type not in ELIGIBLE_PREDICTION_TYPES:
+        return False
+    detections_close_to_threshold = count_detections_close_to_threshold(
+        prediction=prediction,
+        selected_class_names=selected_class_names,
+        threshold=0.5,
+        epsilon=1.0,
+    )
+    if is_in_range(
+        value=detections_close_to_threshold, less_than=less_than, more_than=more_than
+    ):
+        return random.random() < probability
+    return False
+def is_in_range(
+    value: int,
+    more_than: Optional[int],
+    less_than: Optional[int],
+) -> bool:
+    # calculates value > more_than and value < less_than, with optional borders of range
+    less_than_satisfied, more_than_satisfied = less_than is None, more_than is None
+    if less_than is not None and value < less_than:
+        less_than_satisfied = True
+    if more_than is not None and value > more_than:
+        more_than_satisfied = True
+    return less_than_satisfied and more_than_satisfied
+def ensure_range_configuration_is_valid(
+    more_than: Optional[int],
+    less_than: Optional[int],
+) -> None:
+    if more_than is None or less_than is None:
+        return None
+    if more_than >= less_than:
+        raise ActiveLearningConfigurationError(
+            f"Misconfiguration of detections number sampling: "
+            f"`more_than` parameter ({more_than}) >= `less_than` ({less_than})."
+        )

inference/core/active_learning/samplers/random.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import random
+from functools import partial
+from typing import Any, Dict
+import numpy as np
+from inference.core.active_learning.entities import (
+    Prediction,
+    PredictionType,
+    SamplingMethod,
+)
+from inference.core.exceptions import ActiveLearningConfigurationError
+def initialize_random_sampling(strategy_config: Dict[str, Any]) -> SamplingMethod:
+    try:
+        sample_function = partial(
+            sample_randomly,
+            traffic_percentage=strategy_config["traffic_percentage"],
+        )
+        return SamplingMethod(
+            name=strategy_config["name"],
+            sample=sample_function,
+        )
+    except KeyError as error:
+        raise ActiveLearningConfigurationError(
+            f"In configuration of `random_sampling` missing key detected: {error}."
+        ) from error
+def sample_randomly(
+    image: np.ndarray,
+    prediction: Prediction,
+    prediction_type: PredictionType,
+    traffic_percentage: float,
+) -> bool:
+    return random.random() < traffic_percentage

inference/core/active_learning/utils.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from datetime import datetime, timedelta
+TIMESTAMP_FORMAT = "%Y_%m_%d"
+def generate_today_timestamp() -> str:
+    return datetime.today().strftime(TIMESTAMP_FORMAT)
+def generate_start_timestamp_for_this_week() -> str:
+    today = datetime.today()
+    return (today - timedelta(days=today.weekday())).strftime(TIMESTAMP_FORMAT)
+def generate_start_timestamp_for_this_month() -> str:
+    return datetime.today().replace(day=1).strftime(TIMESTAMP_FORMAT)

inference/core/cache/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from redis.exceptions import ConnectionError, TimeoutError
+from inference.core import logger
+from inference.core.cache.memory import MemoryCache
+from inference.core.cache.redis import RedisCache
+from inference.core.env import REDIS_HOST, REDIS_PORT, REDIS_SSL, REDIS_TIMEOUT
+if REDIS_HOST is not None:
+    try:
+        cache = RedisCache(
+            host=REDIS_HOST, port=REDIS_PORT, ssl=REDIS_SSL, timeout=REDIS_TIMEOUT
+        )
+        logger.info(f"Redis Cache initialised")
+    except (TimeoutError, ConnectionError):
+        logger.error(
+            f"Could not connect to Redis under {REDIS_HOST}:{REDIS_PORT}. MemoryCache to be used."
+        )
+        cache = MemoryCache()
+        logger.info(f"Memory Cache initialised")
+else:
+    cache = MemoryCache()
+    logger.info(f"Memory Cache initialised")

inference/core/cache/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (864 Bytes). View file

inference/core/cache/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (4.93 kB). View file

inference/core/cache/__pycache__/memory.cpython-310.pyc ADDED Viewed

Binary file (6.56 kB). View file

inference/core/cache/__pycache__/model_artifacts.cpython-310.pyc ADDED Viewed

Binary file (3.17 kB). View file

inference/core/cache/__pycache__/redis.cpython-310.pyc ADDED Viewed

Binary file (7.3 kB). View file

inference/core/cache/__pycache__/serializers.cpython-310.pyc ADDED Viewed

Binary file (1.91 kB). View file

inference/core/cache/base.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from contextlib import contextmanager
+from typing import Any, Optional
+from inference.core import logger
+class BaseCache:
+    """
+    BaseCache is an abstract base class that defines the interface for a cache.
+    """
+    def get(self, key: str):
+        """
+        Gets the value associated with the given key.
+        Args:
+            key (str): The key to retrieve the value.
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError()
+    def set(self, key: str, value: str, expire: float = None):
+        """
+        Sets a value for a given key with an optional expire time.
+        Args:
+            key (str): The key to store the value.
+            value (str): The value to store.
+            expire (float, optional): The time, in seconds, after which the key will expire. Defaults to None.
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError()
+    def zadd(self, key: str, value: str, score: float, expire: float = None):
+        """
+        Adds a member with the specified score to the sorted set stored at key.
+        Args:
+            key (str): The key of the sorted set.
+            value (str): The value to add to the sorted set.
+            score (float): The score associated with the value.
+            expire (float, optional): The time, in seconds, after which the key will expire. Defaults to None.
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError()
+    def zrangebyscore(
+        self,
+        key: str,
+        min: Optional[float] = -1,
+        max: Optional[float] = float("inf"),
+        withscores: bool = False,
+    ):
+        """
+        Retrieves a range of members from a sorted set.
+        Args:
+            key (str): The key of the sorted set.
+            start (int, optional): The starting index of the range. Defaults to -1.
+            stop (int, optional): The ending index of the range. Defaults to float("inf").
+            withscores (bool, optional): Whether to return the scores along with the values. Defaults to False.
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError()
+    def zremrangebyscore(
+        self,
+        key: str,
+        start: Optional[int] = -1,
+        stop: Optional[int] = float("inf"),
+    ):
+        """
+        Removes all members in a sorted set within the given scores.
+        Args:
+            key (str): The key of the sorted set.
+            start (int, optional): The minimum score of the range. Defaults to -1.
+            stop (int, optional): The maximum score of the range. Defaults to float("inf").
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError()
+    def acquire_lock(self, key: str, expire: float = None) -> Any:
+        raise NotImplementedError()
+    @contextmanager
+    def lock(self, key: str, expire: float = None) -> Any:
+        logger.debug(f"Acquiring lock at cache key: {key}")
+        l = self.acquire_lock(key, expire=expire)
+        try:
+            yield l
+        finally:
+            logger.debug(f"Releasing lock at cache key: {key}")
+            l.release()
+    def set_numpy(self, key: str, value: Any, expire: float = None):
+        """
+        Caches a numpy array.
+        Args:
+            key (str): The key to store the value.
+            value (Any): The value to store.
+            expire (float, optional): The time, in seconds, after which the key will expire. Defaults to None.
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError()
+    def get_numpy(self, key: str) -> Any:
+        """
+        Retrieves a numpy array from the cache.
+        Args:
+            key (str): The key of the value to retrieve.
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError()