File size: 3,834 Bytes
b97f6e6
 
bab8078
 
 
 
b97f6e6
 
 
 
 
 
 
bab8078
b97f6e6
bab8078
b97f6e6
 
 
bab8078
 
 
 
 
 
fb2b996
b97f6e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bab8078
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b97f6e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb2b996
 
 
caf7eff
fb2b996
caf7eff
fb2b996
caf7eff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import logging
import os
import torch
import json
import torch
import time
import uvicorn
from fastapi import FastAPI, Request, Response
from fastapi.responses import JSONResponse
from sse_starlette.sse import EventSourceResponse

from config.log_config import uvicorn_logger
from models import OpenAIinput
from utils.codegen import CodeGenProxy
from utils.errors import FauxPilotException
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

logging.config.dictConfig(uvicorn_logger)

# token = os.environ.get("HUB_TOKEN", None)
# device = "cuda:0" if torch.cuda.is_available() else "cpu"

# tokenizer = AutoTokenizer.from_pretrained("bigcode/christmas-models", use_auth_token=token)
# model = AutoModelForCausalLM.from_pretrained("bigcode/christmas-models", trust_remote_code=True, use_auth_token=token).to(device)
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)

codegen = CodeGenProxy(
    host=os.environ.get("TRITON_HOST", "triton"),
    port=os.environ.get("TRITON_PORT", 8001),
    verbose=os.environ.get("TRITON_VERBOSITY", False)
)

app = FastAPI(
    title="FauxPilot",
    description="This is an attempt to build a locally hosted version of GitHub Copilot. It uses the SalesForce CodeGen"
                "models inside of NVIDIA's Triton Inference Server with the FasterTransformer backend.",
    docs_url="/",
    swagger_ui_parameters={"defaultModelsExpandDepth": -1}
)

@app.exception_handler(FauxPilotException)
async def fauxpilot_handler(request: Request, exc: FauxPilotException):
    return JSONResponse(
        status_code=400,
        content=exc.json()
    )

@app.post("/v1/engines/codegen/completions")
@app.post("/v1/completions")
async def completions(data: OpenAIinput):
    data = data.dict()
    try:
        content = codegen(data=data)
        # prompt = data.get("prompt")
        # choices = [pipe(prompt, do_sample=True, top_p=0.95, max_new_tokens=50)[0]['generated_text']]
        # completion = {
        #     'id': None,  # fill in
        #     'model': 'codegen',
        #     'object': 'text_completion',
        #     'created': int(time.time()),
        #     'choices': None,  # fill in
        #     'usage': {
        #         'completion_tokens': int(sum([len(c.split()) for c in choices])),
        #         'prompt_tokens': int(len(prompt.split())),
        #         'total_tokens': int(sum([len(c.split()) for c in choices]) + len(prompt.split())),
        #     }
        # }
        # completion['id'] = 10
        # completion['choices'] = choices
        # content = json.dumps(completion)
    except Exception as E:
        raise FauxPilotException(
            message=str(E),
            type="invalid_request_error",
            param=None,
            code=None,
        )

    if data.get("stream") is not None:
        return EventSourceResponse(
            content=content,
            status_code=200,
            media_type="text/event-stream"
        )
    else:
        return Response(
            status_code=200,
            content=content,
            media_type="application/json"
        )

if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=5000)


# curl request to test the API
# curl -X POST "http://localhost:5000/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"

# curl -X POST "https://huggingface.co/spaces/ncoop57/santacoder-openai/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"

# curl -X POST "https://ncoop57-santacoder-openai.hf.space/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"