Spaces:
Runtime error
Runtime error
ncoop57
commited on
Commit
·
fb2b996
1
Parent(s):
250f5b6
Attempting with hf model
Browse files- app.py +11 -1
- utils/hf_model.py +27 -0
app.py
CHANGED
@@ -8,11 +8,13 @@ from sse_starlette.sse import EventSourceResponse
|
|
8 |
|
9 |
from config.log_config import uvicorn_logger
|
10 |
from models import OpenAIinput
|
11 |
-
from utils.
|
12 |
from utils.errors import FauxPilotException
|
13 |
|
14 |
logging.config.dictConfig(uvicorn_logger)
|
15 |
|
|
|
|
|
16 |
codegen = CodeGenProxy(
|
17 |
host=os.environ.get("TRITON_HOST", "triton"),
|
18 |
port=os.environ.get("TRITON_PORT", 8001),
|
@@ -63,3 +65,11 @@ async def completions(data: OpenAIinput):
|
|
63 |
|
64 |
if __name__ == "__main__":
|
65 |
uvicorn.run("app:app", host="0.0.0.0", port=5000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
from config.log_config import uvicorn_logger
|
10 |
from models import OpenAIinput
|
11 |
+
from utils.hf_model import HFModel
|
12 |
from utils.errors import FauxPilotException
|
13 |
|
14 |
logging.config.dictConfig(uvicorn_logger)
|
15 |
|
16 |
+
model = HFModel("bigcode/santacoder")
|
17 |
+
|
18 |
codegen = CodeGenProxy(
|
19 |
host=os.environ.get("TRITON_HOST", "triton"),
|
20 |
port=os.environ.get("TRITON_PORT", 8001),
|
|
|
65 |
|
66 |
if __name__ == "__main__":
|
67 |
uvicorn.run("app:app", host="0.0.0.0", port=5000)
|
68 |
+
|
69 |
+
|
70 |
+
# curl request to test the API
|
71 |
+
curl -X POST "http://localhost:5000/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"
|
72 |
+
|
73 |
+
curl -X POST "https://huggingface.co/spaces/ncoop57/santacoder-openai/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"
|
74 |
+
|
75 |
+
curl -X POST "https://ncoop57-santacoder-openai.hf.space/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"
|
utils/hf_model.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
|
5 |
+
class HFModel:
|
6 |
+
def __init__(self, model_name):
|
7 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
self.model = AutoModelForCausalLM.from_pretrained(model_name)
|
9 |
+
|
10 |
+
def __call__(self, request):
|
11 |
+
prompt = request.get("prompt")
|
12 |
+
input_ids = self.tokenizer.encode(prompt, return_tensors='pt')
|
13 |
+
choices = self.model.generate(input_ids, max_length=50, do_sample=True)
|
14 |
+
choices self.tokenizer.decode(choices, skip_special_tokens=True)
|
15 |
+
completion = {
|
16 |
+
'id': None, # fill in
|
17 |
+
'model': 'codegen',
|
18 |
+
'object': 'text_completion',
|
19 |
+
'created': int(time.time()),
|
20 |
+
'choices': None, # fill in
|
21 |
+
'usage': {
|
22 |
+
'completion_tokens': int(sum([len(c.split()) for c in choices])),
|
23 |
+
'prompt_tokens': int(len(prompt.split())),
|
24 |
+
'total_tokens': int(sum([len(c.split()) for c in choices]) + len(prompt.split())),
|
25 |
+
}
|
26 |
+
}
|
27 |
+
return completion, choices
|