Spaces:
Running
Running
| """ | |
| Optimized configuration for HF Spaces with intelligent handling of large models. | |
| This file contains recommended settings based on available hardware. | |
| """ | |
| import os | |
| from typing import Dict, Any | |
| class HFSpacesConfig: | |
| """Optimized configuration for different HF Spaces tiers""" | |
| # Timeouts (in seconds) | |
| TIMEOUT_SMALL_MODEL = 120 # Models <2B parameters | |
| TIMEOUT_MEDIUM_MODEL = 300 # Models 2-5B parameters | |
| TIMEOUT_LARGE_MODEL = 600 # Models >5B parameters | |
| TIMEOUT_PING = 5 # Health checks | |
| # Recommended memory limits (GB) per HF Spaces tier | |
| MEMORY_LIMITS = { | |
| "free": 16, # Free HF Spaces | |
| "pro": 32, # HF Spaces PRO | |
| "enterprise": 64 # HF Spaces Enterprise | |
| } | |
| # Recommended models per tier | |
| RECOMMENDED_MODELS = { | |
| "free": [ | |
| "meta-llama/Llama-3.2-1B", | |
| "oopere/pruned40-llama-3.2-1B", | |
| "oopere/Fair-Llama-3.2-1B", | |
| "google/gemma-3-1b-pt", | |
| "Qwen/Qwen3-1.7B", | |
| ], | |
| "pro": [ | |
| "meta-llama/Llama-3.2-3B", | |
| "meta-llama/Llama-3-8B", | |
| ], | |
| "enterprise": [ | |
| "meta-llama/Llama-3-70B", | |
| ] | |
| } | |
| # Model loading configuration | |
| MODEL_LOAD_CONFIG = { | |
| "small": { # <2B params | |
| "low_cpu_mem_usage": True, | |
| "torch_dtype": "auto", | |
| "device_map": "auto", | |
| "timeout": TIMEOUT_SMALL_MODEL, | |
| }, | |
| "medium": { # 2-8B params | |
| "low_cpu_mem_usage": True, | |
| "torch_dtype": "float16", # Reduces memory | |
| "device_map": "auto", | |
| "timeout": TIMEOUT_MEDIUM_MODEL, | |
| }, | |
| "large": { # >8B params | |
| "low_cpu_mem_usage": True, | |
| "torch_dtype": "float16", | |
| "device_map": "auto", | |
| "load_in_8bit": True, # int8 quantization | |
| "timeout": TIMEOUT_LARGE_MODEL, | |
| } | |
| } | |
| def get_model_size_category(cls, model_name: str) -> str: | |
| """ | |
| Determines the model size category based on the name. | |
| Returns: | |
| "small", "medium", or "large" | |
| """ | |
| model_lower = model_name.lower() | |
| # Detect by parameters in the name | |
| if any(size in model_lower for size in ["1b", "1.7b", "1.5b"]): | |
| return "small" | |
| elif any(size in model_lower for size in ["3b", "7b", "8b"]): | |
| return "medium" | |
| elif any(size in model_lower for size in ["13b", "30b", "70b"]): | |
| return "large" | |
| # Default: small (assume the safest case) | |
| return "small" | |
| def get_timeout_for_model(cls, model_name: str) -> int: | |
| """Gets the recommended timeout for a model.""" | |
| size = cls.get_model_size_category(model_name) | |
| return cls.MODEL_LOAD_CONFIG[size]["timeout"] | |
| def get_load_config(cls, model_name: str) -> Dict[str, Any]: | |
| """Gets the optimized loading configuration for a model.""" | |
| size = cls.get_model_size_category(model_name) | |
| return cls.MODEL_LOAD_CONFIG[size].copy() | |
| def is_model_recommended(cls, model_name: str, tier: str = "free") -> bool: | |
| """Verifies if a model is recommended for the current tier.""" | |
| return model_name in cls.RECOMMENDED_MODELS.get(tier, []) | |
| def get_memory_warning(cls, model_name: str, tier: str = "free") -> str: | |
| """ | |
| Generates a warning if the model may exceed memory limits. | |
| Returns: | |
| String with warning, or empty string if no problem | |
| """ | |
| if cls.is_model_recommended(model_name, tier): | |
| return "" | |
| size = cls.get_model_size_category(model_name) | |
| if size == "medium" and tier == "free": | |
| return ( | |
| "β οΈ **Warning**: This model may be too large for free HF Spaces. " | |
| "Consider upgrading to HF Spaces PRO or using a smaller model." | |
| ) | |
| elif size == "large" and tier in ["free", "pro"]: | |
| return ( | |
| "β **Error**: This model is too large for your HF Spaces tier. " | |
| "Use a smaller model or upgrade to Enterprise." | |
| ) | |
| return "" | |
| # Usage example: | |
| def get_optimized_request_config(model_name: str) -> dict: | |
| """ | |
| Gets optimized configuration for HTTP requests based on the model. | |
| Usage: | |
| config = get_optimized_request_config("meta-llama/Llama-3.2-1B") | |
| response = requests.post(url, json=payload, **config) | |
| """ | |
| return { | |
| "timeout": HFSpacesConfig.get_timeout_for_model(model_name), | |
| } | |
| # Default configuration for general use | |
| DEFAULT_CONFIG = { | |
| "timeout": HFSpacesConfig.TIMEOUT_MEDIUM_MODEL, | |
| "max_retries": 2, | |
| "retry_delay": 5, # seconds between retries | |
| } | |
| if __name__ == "__main__": | |
| # Usage examples | |
| print("π§ Optimized configuration for HF Spaces\n") | |
| test_models = [ | |
| "meta-llama/Llama-3.2-1B", | |
| "meta-llama/Llama-3.2-3B", | |
| "meta-llama/Llama-3-8B", | |
| ] | |
| for model in test_models: | |
| print(f"π¦ Model: {model}") | |
| print(f" Category: {HFSpacesConfig.get_model_size_category(model)}") | |
| print(f" Timeout: {HFSpacesConfig.get_timeout_for_model(model)}s") | |
| print(f" Recommended (free): {HFSpacesConfig.is_model_recommended(model, 'free')}") | |
| warning = HFSpacesConfig.get_memory_warning(model, "free") | |
| if warning: | |
| print(f" {warning}") | |
| print() | |