celise88 commited on
Commit
dfd490b
1 Parent(s): 88a5ae5

add model shards to reduce memory consumption

Browse files
Files changed (2) hide show
  1. main.py +4 -2
  2. requirements.txt +2 -1
main.py CHANGED
@@ -14,7 +14,7 @@ import numpy as np
14
  from numpy.linalg import norm
15
  from nltk.tokenize import SpaceTokenizer
16
  import nltk
17
- from transformers import pipeline
18
  from dotenv import load_dotenv
19
  load_dotenv()
20
 
@@ -25,7 +25,9 @@ templates = Jinja2Templates(directory="templates/")
25
  onet = pd.read_csv('static/ONET_JobTitles.csv')
26
  simdat = pd.read_csv('static/cohere_embeddings.csv')
27
 
28
- classifier = pipeline('text-classification', model = 'static/model_shards', tokenizer = 'static/tokenizer_shards')
 
 
29
 
30
  ### job information center ###
31
  # get
 
14
  from numpy.linalg import norm
15
  from nltk.tokenize import SpaceTokenizer
16
  import nltk
17
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
18
  from dotenv import load_dotenv
19
  load_dotenv()
20
 
 
25
  onet = pd.read_csv('static/ONET_JobTitles.csv')
26
  simdat = pd.read_csv('static/cohere_embeddings.csv')
27
 
28
+ model = AutoModelForSequenceClassification.from_pretrained('static/model_shards', low_cpu_mem_usage=True)
29
+ tokenizer = AutoTokenizer.from_pretrained('static/tokenizer_shards', low_cpu_mem_usage=True)
30
+ classifier = pipeline('text-classification', model = model, tokenizer = tokenizer)
31
 
32
  ### job information center ###
33
  # get
requirements.txt CHANGED
@@ -14,4 +14,5 @@ unidecode==1.3.6
14
  cohere==3.1.5
15
  python-dotenv==0.21.1
16
  transformers==4.25.1
17
- torch==1.13.1
 
 
14
  cohere==3.1.5
15
  python-dotenv==0.21.1
16
  transformers==4.25.1
17
+ torch==1.13.1
18
+ accelerate==0.16.0