Spaces:
Running
Running
| import os | |
| # Set cache dirs (must match Dockerfile env vars) | |
| os.environ['HOME'] = '/app' | |
| os.environ['HF_HOME'] = '/app/.hf_cache' | |
| os.environ['LANGTOOL_HOME'] = '/app/.ltool_cache' | |
| os.environ['XDG_CACHE_HOME'] = '/app/.cache' | |
| import language_tool_python | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
| import torch | |
| def pre_cache_models(): | |
| """ | |
| Downloads and caches all required models and dependencies. | |
| This script is run during the Docker build process. | |
| """ | |
| print("Caching LanguageTool model...") | |
| try: | |
| # This will download and cache the LanguageTool server files | |
| language_tool_python.LanguageTool('en-US') | |
| print("LanguageTool model cached successfully.") | |
| except Exception as e: | |
| print(f"Failed to cache LanguageTool: {e}") | |
| print("\nCaching Hugging Face models...") | |
| models_to_cache = [ | |
| "vennify/t5-base-grammar-correction", | |
| "humarin/chatgpt_paraphraser_on_T5_base" | |
| ] | |
| for model_name in models_to_cache: | |
| try: | |
| print(f"Caching {model_name}...") | |
| # Cache both tokenizer and model files | |
| AutoTokenizer.from_pretrained(model_name) | |
| AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| print(f"{model_name} cached successfully.") | |
| except Exception as e: | |
| print(f"Failed to cache {model_name}: {e}") | |
| print("\nAll models have been cached.") | |
| if __name__ == "__main__": | |
| pre_cache_models() |