Spaces:
Runtime error
Runtime error
import logging | |
import os | |
import torch | |
import json | |
import torch | |
import time | |
import uvicorn | |
from fastapi import FastAPI, Request, Response | |
from fastapi.responses import JSONResponse | |
from sse_starlette.sse import EventSourceResponse | |
from config.log_config import uvicorn_logger | |
from models import OpenAIinput | |
from utils.codegen import CodeGenProxy | |
from utils.errors import FauxPilotException | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
logging.config.dictConfig(uvicorn_logger) | |
# token = os.environ.get("HUB_TOKEN", None) | |
# device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
# tokenizer = AutoTokenizer.from_pretrained("bigcode/christmas-models", use_auth_token=token) | |
# model = AutoModelForCausalLM.from_pretrained("bigcode/christmas-models", trust_remote_code=True, use_auth_token=token).to(device) | |
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device) | |
codegen = CodeGenProxy( | |
host=os.environ.get("TRITON_HOST", "triton"), | |
port=os.environ.get("TRITON_PORT", 8001), | |
verbose=os.environ.get("TRITON_VERBOSITY", False) | |
) | |
app = FastAPI( | |
title="FauxPilot", | |
description="This is an attempt to build a locally hosted version of GitHub Copilot. It uses the SalesForce CodeGen" | |
"models inside of NVIDIA's Triton Inference Server with the FasterTransformer backend.", | |
docs_url="/", | |
swagger_ui_parameters={"defaultModelsExpandDepth": -1} | |
) | |
async def fauxpilot_handler(request: Request, exc: FauxPilotException): | |
return JSONResponse( | |
status_code=400, | |
content=exc.json() | |
) | |
async def completions(data: OpenAIinput): | |
data = data.dict() | |
try: | |
content = codegen(data=data) | |
# prompt = data.get("prompt") | |
# choices = [pipe(prompt, do_sample=True, top_p=0.95, max_new_tokens=50)[0]['generated_text']] | |
# completion = { | |
# 'id': None, # fill in | |
# 'model': 'codegen', | |
# 'object': 'text_completion', | |
# 'created': int(time.time()), | |
# 'choices': None, # fill in | |
# 'usage': { | |
# 'completion_tokens': int(sum([len(c.split()) for c in choices])), | |
# 'prompt_tokens': int(len(prompt.split())), | |
# 'total_tokens': int(sum([len(c.split()) for c in choices]) + len(prompt.split())), | |
# } | |
# } | |
# completion['id'] = 10 | |
# completion['choices'] = choices | |
# content = json.dumps(completion) | |
except Exception as E: | |
raise FauxPilotException( | |
message=str(E), | |
type="invalid_request_error", | |
param=None, | |
code=None, | |
) | |
if data.get("stream") is not None: | |
return EventSourceResponse( | |
content=content, | |
status_code=200, | |
media_type="text/event-stream" | |
) | |
else: | |
return Response( | |
status_code=200, | |
content=content, | |
media_type="application/json" | |
) | |
if __name__ == "__main__": | |
uvicorn.run("app:app", host="0.0.0.0", port=5000) | |
# curl request to test the API | |
# curl -X POST "http://localhost:5000/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}" | |
# curl -X POST "https://huggingface.co/spaces/ncoop57/santacoder-openai/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}" | |
# curl -X POST "https://ncoop57-santacoder-openai.hf.space/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}" |