Spaces:

ncoop57
/

santacoder-openai

Runtime error

File size: 3,834 Bytes

b97f6e6
 
bab8078
 
 
 
b97f6e6
 
 
 
 
 
 
bab8078
b97f6e6
bab8078
b97f6e6
 
 
bab8078
 
 
 
 
 
fb2b996
b97f6e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bab8078
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b97f6e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb2b996
 
 
caf7eff
fb2b996
caf7eff
fb2b996
caf7eff

import logging
import os
import torch
import json
import torch
import time
import uvicorn
from fastapi import FastAPI, Request, Response
from fastapi.responses import JSONResponse
from sse_starlette.sse import EventSourceResponse

from config.log_config import uvicorn_logger
from models import OpenAIinput
from utils.codegen import CodeGenProxy
from utils.errors import FauxPilotException
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

logging.config.dictConfig(uvicorn_logger)

# token = os.environ.get("HUB_TOKEN", None)
# device = "cuda:0" if torch.cuda.is_available() else "cpu"

# tokenizer = AutoTokenizer.from_pretrained("bigcode/christmas-models", use_auth_token=token)
# model = AutoModelForCausalLM.from_pretrained("bigcode/christmas-models", trust_remote_code=True, use_auth_token=token).to(device)
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)

codegen = CodeGenProxy(
    host=os.environ.get("TRITON_HOST", "triton"),
    port=os.environ.get("TRITON_PORT", 8001),
    verbose=os.environ.get("TRITON_VERBOSITY", False)
)

app = FastAPI(
    title="FauxPilot",
    description="This is an attempt to build a locally hosted version of GitHub Copilot. It uses the SalesForce CodeGen"
                "models inside of NVIDIA's Triton Inference Server with the FasterTransformer backend.",
    docs_url="/",
    swagger_ui_parameters={"defaultModelsExpandDepth": -1}
)

@app.exception_handler(FauxPilotException)
async def fauxpilot_handler(request: Request, exc: FauxPilotException):
    return JSONResponse(
        status_code=400,
        content=exc.json()
    )

@app.post("/v1/engines/codegen/completions")
@app.post("/v1/completions")
async def completions(data: OpenAIinput):
    data = data.dict()
    try:
        content = codegen(data=data)
        # prompt = data.get("prompt")
        # choices = [pipe(prompt, do_sample=True, top_p=0.95, max_new_tokens=50)[0]['generated_text']]
        # completion = {
        #     'id': None,  # fill in
        #     'model': 'codegen',
        #     'object': 'text_completion',
        #     'created': int(time.time()),
        #     'choices': None,  # fill in
        #     'usage': {
        #         'completion_tokens': int(sum([len(c.split()) for c in choices])),
        #         'prompt_tokens': int(len(prompt.split())),
        #         'total_tokens': int(sum([len(c.split()) for c in choices]) + len(prompt.split())),
        #     }
        # }
        # completion['id'] = 10
        # completion['choices'] = choices
        # content = json.dumps(completion)
    except Exception as E:
        raise FauxPilotException(
            message=str(E),
            type="invalid_request_error",
            param=None,
            code=None,
        )

    if data.get("stream") is not None:
        return EventSourceResponse(
            content=content,
            status_code=200,
            media_type="text/event-stream"
        )
    else:
        return Response(
            status_code=200,
            content=content,
            media_type="application/json"
        )

if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=5000)


# curl request to test the API
# curl -X POST "http://localhost:5000/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"

# curl -X POST "https://huggingface.co/spaces/ncoop57/santacoder-openai/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"

# curl -X POST "https://ncoop57-santacoder-openai.hf.space/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"