Spaces:
Sleeping
Sleeping
| # # model.py - Optimized version | |
| # from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # import torch | |
| # from functools import lru_cache | |
| # import os | |
| # import asyncio | |
| # from concurrent.futures import ThreadPoolExecutor | |
| # import logging | |
| # logger = logging.getLogger(__name__) | |
| # # Global variables to store loaded model | |
| # _tokenizer = None | |
| # _model = None | |
| # _model_loading = False | |
| # _model_loaded = False | |
| # @lru_cache(maxsize=1) | |
| # def get_model_config(): | |
| # """Cache model configuration""" | |
| # return { | |
| # "model_id": "deepseek-ai/deepseek-coder-1.3b-instruct", | |
| # "torch_dtype": torch.bfloat16, | |
| # "device_map": "auto", | |
| # "trust_remote_code": True, | |
| # # Add these optimizations | |
| # "low_cpu_mem_usage": True, | |
| # "use_cache": True, | |
| # } | |
| # def load_model_sync(): | |
| # """Synchronous model loading with optimizations""" | |
| # global _tokenizer, _model, _model_loaded | |
| # if _model_loaded: | |
| # return _tokenizer, _model | |
| # config = get_model_config() | |
| # model_id = config["model_id"] | |
| # logger.info(f"π§ Loading model {model_id}...") | |
| # try: | |
| # # Set cache directory to avoid re-downloading | |
| # cache_dir = os.environ.get("TRANSFORMERS_CACHE", "./model_cache") | |
| # os.makedirs(cache_dir, exist_ok=True) | |
| # # Load tokenizer first (faster) | |
| # logger.info("π Loading tokenizer...") | |
| # _tokenizer = AutoTokenizer.from_pretrained( | |
| # model_id, | |
| # trust_remote_code=config["trust_remote_code"], | |
| # cache_dir=cache_dir, | |
| # use_fast=True, # Use fast tokenizer if available | |
| # ) | |
| # # Load model with optimizations | |
| # logger.info("π§ Loading model...") | |
| # _model = AutoModelForCausalLM.from_pretrained( | |
| # model_id, | |
| # trust_remote_code=config["trust_remote_code"], | |
| # torch_dtype=config["torch_dtype"], | |
| # device_map=config["device_map"], | |
| # low_cpu_mem_usage=config["low_cpu_mem_usage"], | |
| # cache_dir=cache_dir, | |
| # offload_folder="offload", | |
| # offload_state_dict=True | |
| # ) | |
| # # Set to evaluation mode | |
| # _model.eval() | |
| # _model_loaded = True | |
| # logger.info("β Model loaded successfully!") | |
| # return _tokenizer, _model | |
| # except Exception as e: | |
| # logger.error(f"β Failed to load model: {e}") | |
| # raise | |
| # async def load_model_async(): | |
| # """Asynchronous model loading""" | |
| # global _model_loading | |
| # if _model_loaded: | |
| # return _tokenizer, _model | |
| # if _model_loading: | |
| # # Wait for ongoing loading to complete | |
| # while _model_loading and not _model_loaded: | |
| # await asyncio.sleep(0.1) | |
| # return _tokenizer, _model | |
| # _model_loading = True | |
| # try: | |
| # # Run model loading in thread pool to avoid blocking | |
| # loop = asyncio.get_event_loop() | |
| # with ThreadPoolExecutor(max_workers=1) as executor: | |
| # tokenizer, model = await loop.run_in_executor( | |
| # executor, load_model_sync | |
| # ) | |
| # return tokenizer, model | |
| # finally: | |
| # _model_loading = False | |
| # def get_model(): | |
| # """Get the loaded model (for synchronous access)""" | |
| # if not _model_loaded: | |
| # return load_model_sync() | |
| # return _tokenizer, _model | |
| # def is_model_loaded(): | |
| # """Check if model is loaded""" | |
| # return _model_loaded | |
| # def get_model_info(): | |
| # """Get model information without loading""" | |
| # config = get_model_config() | |
| # return { | |
| # "model_id": config["model_id"], | |
| # "loaded": _model_loaded, | |
| # "loading": _model_loading, | |
| # } | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| from functools import lru_cache | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| _model_loaded = False | |
| _tokenizer = None | |
| _model = None | |
| def get_model_config(): | |
| return { | |
| "model_id": "Salesforce/codet5p-220m", | |
| "trust_remote_code": True | |
| } | |
| def load_model_sync(): | |
| global _tokenizer, _model, _model_loaded | |
| if _model_loaded: | |
| return _tokenizer, _model | |
| config = get_model_config() | |
| model_id = config["model_id"] | |
| try: | |
| _tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| _model = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
| _model.eval() | |
| _model_loaded = True | |
| return _tokenizer, _model | |
| except Exception as e: | |
| logger.error(f"β Failed to load model: {e}") | |
| raise | |