import os from transformers import AutoTokenizer, AutoModelForSequenceClassification from interfaces.cap import languages as languages_cap from interfaces.cap import domains as domains_cap from interfaces.cap import build_huggingface_path as hf_cap_path from interfaces.manifesto import build_huggingface_path as hf_manifesto_path from interfaces.sentiment import build_huggingface_path as hf_sentiment_path from interfaces.emotion import build_huggingface_path as hf_emotion_path HF_TOKEN = os.environ["hf_read"] # should be a temporary solution models = [hf_manifesto_path(""), hf_sentiment_path(""), hf_emotion_path("")] domains_cap = list(domains_cap.values()) for language in languages_cap: for domain in domains_cap: models.append(hf_cap_path(language, domain)) tokenizers = ["xlm-roberta-large"] def download_hf_models(): for model_id in models: AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto", offload_folder="offload", token=HF_TOKEN) for tokenizer_id in tokenizers: AutoTokenizer.from_pretrained(tokenizer_id)