vickeee465 commited on
Commit
90425de
1 Parent(s): 8dc5af0

cache models during build

Browse files
Files changed (2) hide show
  1. app.py +9 -1
  2. utils.py +28 -0
app.py CHANGED
@@ -1,3 +1,10 @@
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
  from interfaces.cap import demo as cap_demo
@@ -6,7 +13,7 @@ from interfaces.sentiment import demo as sentiment_demo
6
  from interfaces.emotion import demo as emotion_demo
7
  from interfaces.ner import demo as ner_demo
8
  from interfaces.ner import download_models as download_spacy_models
9
-
10
 
11
  with gr.Blocks() as demo:
12
  gr.Markdown(
@@ -29,6 +36,7 @@ with gr.Blocks() as demo:
29
  )
30
 
31
  if __name__ == "__main__":
 
32
  download_spacy_models()
33
  demo.launch()
34
 
 
1
+ import os
2
+ PATH = '/data/' # at least 150GB storage needs to be attached
3
+ os.environ['TRANSFORMERS_CACHE'] = PATH
4
+ os.environ['HF_HOME'] = PATH
5
+ os.environ['HF_DATASETS_CACHE'] = PATH
6
+ os.environ['TORCH_HOME'] = PATH
7
+
8
  import gradio as gr
9
 
10
  from interfaces.cap import demo as cap_demo
 
13
  from interfaces.emotion import demo as emotion_demo
14
  from interfaces.ner import demo as ner_demo
15
  from interfaces.ner import download_models as download_spacy_models
16
+ from utils import download_hf_models
17
 
18
  with gr.Blocks() as demo:
19
  gr.Markdown(
 
36
  )
37
 
38
  if __name__ == "__main__":
39
+ download_hf_models()
40
  download_spacy_models()
41
  demo.launch()
42
 
utils.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+
4
+ from interfaces.cap import languages as languages_cap
5
+ from interfaces.cap import domains as domains_cap
6
+
7
+ from interfaces.cap import build_huggingface_path as hf_cap_path
8
+ from interfaces.manifesto import build_huggingface_path as hf_manifesto_path
9
+ from interfaces.sentiment import build_huggingface_path as hf_sentiment_path
10
+ from interfaces.emotion import build_huggingface_path as hf_emotion_path
11
+
12
+ HF_TOKEN = os.environ["hf_read"]
13
+
14
+ # should be a temporary solution
15
+ models = [hf_manifesto_path(""), hf_sentiment_path(""), hf_emotion_path("")]
16
+ for language in languages_cap:
17
+ for domain in domains_cap:
18
+ models.append(hf_cap_path(language, domain))
19
+
20
+ tokenizers = ["xlm-roberta-large"]
21
+
22
+ def download_hf_models():
23
+ for model_id in models:
24
+ AutoModelForSequenceClassification.from_pretrained(model_id, low_cpu_mem_usage=True, device_map="auto",
25
+ token=HF_TOKEN)
26
+ for tokenizer_id in tokenizers:
27
+ AutoTokenizer.from_pretrained(tokenizer_id)
28
+