Spaces:
Sleeping
Sleeping
| # ------------------------------------------------------------------------ | |
| # RF-DETR | |
| # Copyright (c) 2025 Roboflow. All Rights Reserved. | |
| # Licensed under the Apache License, Version 2.0 [see LICENSE for details] | |
| # ------------------------------------------------------------------------ | |
| from pydantic import BaseModel | |
| from typing import List, Optional, Literal, Type | |
| import torch | |
| DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" | |
| class ModelConfig(BaseModel): | |
| encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] | |
| out_feature_indexes: List[int] | |
| dec_layers: int | |
| two_stage: bool = True | |
| projector_scale: List[Literal["P3", "P4", "P5"]] | |
| hidden_dim: int | |
| patch_size: int | |
| num_windows: int | |
| sa_nheads: int | |
| ca_nheads: int | |
| dec_n_points: int | |
| bbox_reparam: bool = True | |
| lite_refpoint_refine: bool = True | |
| layer_norm: bool = True | |
| amp: bool = True | |
| num_classes: int = 90 | |
| pretrain_weights: Optional[str] = None | |
| device: Literal["cpu", "cuda", "mps"] = DEVICE | |
| resolution: int | |
| group_detr: int = 13 | |
| gradient_checkpointing: bool = False | |
| positional_encoding_size: int | |
| class RFDETRBaseConfig(ModelConfig): | |
| """ | |
| The configuration for an RF-DETR Base model. | |
| """ | |
| encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_small" | |
| hidden_dim: int = 256 | |
| patch_size: int = 14 | |
| num_windows: int = 4 | |
| dec_layers: int = 3 | |
| sa_nheads: int = 8 | |
| ca_nheads: int = 16 | |
| dec_n_points: int = 2 | |
| num_queries: int = 300 | |
| num_select: int = 300 | |
| projector_scale: List[Literal["P3", "P4", "P5"]] = ["P4"] | |
| out_feature_indexes: List[int] = [2, 5, 8, 11] | |
| pretrain_weights: Optional[str] = "rf-detr-base.pth" | |
| resolution: int = 560 | |
| positional_encoding_size: int = 37 | |
| class RFDETRLargeConfig(RFDETRBaseConfig): | |
| """ | |
| The configuration for an RF-DETR Large model. | |
| """ | |
| encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_base" | |
| hidden_dim: int = 384 | |
| sa_nheads: int = 12 | |
| ca_nheads: int = 24 | |
| dec_n_points: int = 4 | |
| projector_scale: List[Literal["P3", "P4", "P5"]] = ["P3", "P5"] | |
| pretrain_weights: Optional[str] = "rf-detr-large.pth" | |
| class RFDETRNanoConfig(RFDETRBaseConfig): | |
| """ | |
| The configuration for an RF-DETR Nano model. | |
| """ | |
| out_feature_indexes: List[int] = [3, 6, 9, 12] | |
| num_windows: int = 2 | |
| dec_layers: int = 2 | |
| patch_size: int = 16 | |
| resolution: int = 384 | |
| positional_encoding_size: int = 24 | |
| pretrain_weights: Optional[str] = "rf-detr-nano.pth" | |
| class RFDETRSmallConfig(RFDETRBaseConfig): | |
| """ | |
| The configuration for an RF-DETR Small model. | |
| """ | |
| out_feature_indexes: List[int] = [3, 6, 9, 12] | |
| num_windows: int = 2 | |
| dec_layers: int = 3 | |
| patch_size: int = 16 | |
| resolution: int = 512 | |
| positional_encoding_size: int = 32 | |
| pretrain_weights: Optional[str] = "rf-detr-small.pth" | |
| class RFDETRMediumConfig(RFDETRBaseConfig): | |
| """ | |
| The configuration for an RF-DETR Medium model. | |
| """ | |
| out_feature_indexes: List[int] = [3, 6, 9, 12] | |
| num_windows: int = 2 | |
| dec_layers: int = 4 | |
| patch_size: int = 16 | |
| resolution: int = 576 | |
| positional_encoding_size: int = 36 | |
| pretrain_weights: Optional[str] = "rf-detr-medium.pth" | |
| class TrainConfig(BaseModel): | |
| lr: float = 1e-4 | |
| lr_encoder: float = 1.5e-4 | |
| batch_size: int = 4 | |
| grad_accum_steps: int = 4 | |
| epochs: int = 100 | |
| ema_decay: float = 0.993 | |
| ema_tau: int = 100 | |
| lr_drop: int = 100 | |
| checkpoint_interval: int = 10 | |
| warmup_epochs: int = 0 | |
| lr_vit_layer_decay: float = 0.8 | |
| lr_component_decay: float = 0.7 | |
| drop_path: float = 0.0 | |
| group_detr: int = 13 | |
| ia_bce_loss: bool = True | |
| cls_loss_coef: float = 1.0 | |
| num_select: int = 300 | |
| dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow" | |
| square_resize_div_64: bool = True | |
| dataset_dir: str | |
| output_dir: str = "output" | |
| multi_scale: bool = True | |
| expanded_scales: bool = True | |
| do_random_resize_via_padding: bool = False | |
| use_ema: bool = True | |
| num_workers: int = 2 | |
| weight_decay: float = 1e-4 | |
| early_stopping: bool = False | |
| early_stopping_patience: int = 10 | |
| early_stopping_min_delta: float = 0.001 | |
| early_stopping_use_ema: bool = False | |
| tensorboard: bool = True | |
| wandb: bool = False | |
| project: Optional[str] = None | |
| run: Optional[str] = None | |
| class_names: List[str] = None | |
| run_test: bool = True | |