ncoop57 commited on
Commit
fb2b996
·
1 Parent(s): 250f5b6

Attempting with hf model

Browse files
Files changed (2) hide show
  1. app.py +11 -1
  2. utils/hf_model.py +27 -0
app.py CHANGED
@@ -8,11 +8,13 @@ from sse_starlette.sse import EventSourceResponse
8
 
9
  from config.log_config import uvicorn_logger
10
  from models import OpenAIinput
11
- from utils.codegen import CodeGenProxy
12
  from utils.errors import FauxPilotException
13
 
14
  logging.config.dictConfig(uvicorn_logger)
15
 
 
 
16
  codegen = CodeGenProxy(
17
  host=os.environ.get("TRITON_HOST", "triton"),
18
  port=os.environ.get("TRITON_PORT", 8001),
@@ -63,3 +65,11 @@ async def completions(data: OpenAIinput):
63
 
64
  if __name__ == "__main__":
65
  uvicorn.run("app:app", host="0.0.0.0", port=5000)
 
 
 
 
 
 
 
 
 
8
 
9
  from config.log_config import uvicorn_logger
10
  from models import OpenAIinput
11
+ from utils.hf_model import HFModel
12
  from utils.errors import FauxPilotException
13
 
14
  logging.config.dictConfig(uvicorn_logger)
15
 
16
+ model = HFModel("bigcode/santacoder")
17
+
18
  codegen = CodeGenProxy(
19
  host=os.environ.get("TRITON_HOST", "triton"),
20
  port=os.environ.get("TRITON_PORT", 8001),
 
65
 
66
  if __name__ == "__main__":
67
  uvicorn.run("app:app", host="0.0.0.0", port=5000)
68
+
69
+
70
+ # curl request to test the API
71
+ curl -X POST "http://localhost:5000/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"
72
+
73
+ curl -X POST "https://huggingface.co/spaces/ncoop57/santacoder-openai/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"
74
+
75
+ curl -X POST "https://ncoop57-santacoder-openai.hf.space/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"
utils/hf_model.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ class HFModel:
6
+ def __init__(self, model_name):
7
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ self.model = AutoModelForCausalLM.from_pretrained(model_name)
9
+
10
+ def __call__(self, request):
11
+ prompt = request.get("prompt")
12
+ input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
13
+ choices = self.model.generate(input_ids, max_length=50, do_sample=True)
14
+ choices self.tokenizer.decode(choices, skip_special_tokens=True)
15
+ completion = {
16
+ 'id': None, # fill in
17
+ 'model': 'codegen',
18
+ 'object': 'text_completion',
19
+ 'created': int(time.time()),
20
+ 'choices': None, # fill in
21
+ 'usage': {
22
+ 'completion_tokens': int(sum([len(c.split()) for c in choices])),
23
+ 'prompt_tokens': int(len(prompt.split())),
24
+ 'total_tokens': int(sum([len(c.split()) for c in choices]) + len(prompt.split())),
25
+ }
26
+ }
27
+ return completion, choices