Yash Sachdeva commited on
Commit
21e7dd1
·
1 Parent(s): 5ed2b9f
Files changed (1) hide show
  1. question_paper.py +20 -6
question_paper.py CHANGED
@@ -6,13 +6,27 @@ from fastapi import FastAPI
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
  app = FastAPI()
 
 
 
 
 
9
  @app.get("/")
10
  def llama():
11
- tokenizer = AutoTokenizer.from_pretrained("Upstage/SOLAR-10.7B-v1.0")
12
- model = AutoModelForCausalLM.from_pretrained("Upstage/SOLAR-10.7B-v1.0", device_map="auto", torch_dtype=torch.float16,)
13
  text = "Hi, my name is "
14
- inputs = tokenizer(text, return_tensors="pt")
15
- outputs = model.generate(**inputs, max_new_tokens=64)
16
- print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 
 
 
 
17
 
18
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
  app = FastAPI()
9
+
10
+ MODEL = None
11
+ TOKENIZER = None
12
+
13
+
14
  @app.get("/")
15
  def llama():
 
 
16
  text = "Hi, my name is "
17
+ inputs = TOKENIZER(text, return_tensors="pt")
18
+ outputs = MODEL.generate(**inputs, max_new_tokens=64)
19
+ tresponse = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
20
+ print(tresponse)
21
+
22
+ return tresponse
23
+
24
 
25
+ @app.on_event("startup")
26
+ def init_model():
27
+ global MODEL
28
+ if not MODEL:
29
+ print("loading model")
30
+ TOKENIZER = AutoTokenizer.from_pretrained("Upstage/SOLAR-10.7B-v1.0")
31
+ MODEL = AutoModelForCausalLM.from_pretrained("Upstage/SOLAR-10.7B-v1.0", device_map="auto", torch_dtype=torch.float16,)
32
+ print("loaded model")