Spaces:

ncoop57
/

santacoder-openai

Runtime error

ncoop57

Get minimum working openai server

bab8078 almost 2 years ago

3.83 kB

	import logging
	import os
	import torch
	import json
	import torch
	import time
	import uvicorn
	from fastapi import FastAPI, Request, Response
	from fastapi.responses import JSONResponse
	from sse_starlette.sse import EventSourceResponse

	from config.log_config import uvicorn_logger
	from models import OpenAIinput
	from utils.codegen import CodeGenProxy
	from utils.errors import FauxPilotException
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	logging.config.dictConfig(uvicorn_logger)

	# token = os.environ.get("HUB_TOKEN", None)
	# device = "cuda:0" if torch.cuda.is_available() else "cpu"

	# tokenizer = AutoTokenizer.from_pretrained("bigcode/christmas-models", use_auth_token=token)
	# model = AutoModelForCausalLM.from_pretrained("bigcode/christmas-models", trust_remote_code=True, use_auth_token=token).to(device)
	# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)

	codegen = CodeGenProxy(
	host=os.environ.get("TRITON_HOST", "triton"),
	port=os.environ.get("TRITON_PORT", 8001),
	verbose=os.environ.get("TRITON_VERBOSITY", False)
	)

	app = FastAPI(
	title="FauxPilot",
	description="This is an attempt to build a locally hosted version of GitHub Copilot. It uses the SalesForce CodeGen"
	"models inside of NVIDIA's Triton Inference Server with the FasterTransformer backend.",
	docs_url="/",
	swagger_ui_parameters={"defaultModelsExpandDepth": -1}
	)

	@app.exception_handler(FauxPilotException)
	async def fauxpilot_handler(request: Request, exc: FauxPilotException):
	return JSONResponse(
	status_code=400,
	content=exc.json()
	)

	@app.post("/v1/engines/codegen/completions")
	@app.post("/v1/completions")
	async def completions(data: OpenAIinput):
	data = data.dict()
	try:
	content = codegen(data=data)
	# prompt = data.get("prompt")
	# choices = [pipe(prompt, do_sample=True, top_p=0.95, max_new_tokens=50)[0]['generated_text']]
	# completion = {
	# 'id': None, # fill in
	# 'model': 'codegen',
	# 'object': 'text_completion',
	# 'created': int(time.time()),
	# 'choices': None, # fill in
	# 'usage': {
	# 'completion_tokens': int(sum([len(c.split()) for c in choices])),
	# 'prompt_tokens': int(len(prompt.split())),
	# 'total_tokens': int(sum([len(c.split()) for c in choices]) + len(prompt.split())),
	# }
	# }
	# completion['id'] = 10
	# completion['choices'] = choices
	# content = json.dumps(completion)
	except Exception as E:
	raise FauxPilotException(
	message=str(E),
	type="invalid_request_error",
	param=None,
	code=None,
	)

	if data.get("stream") is not None:
	return EventSourceResponse(
	content=content,
	status_code=200,
	media_type="text/event-stream"
	)
	else:
	return Response(
	status_code=200,
	content=content,
	media_type="application/json"
	)

	if __name__ == "__main__":
	uvicorn.run("app:app", host="0.0.0.0", port=5000)


	# curl request to test the API
	# curl -X POST "http://localhost:5000/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"

	# curl -X POST "https://huggingface.co/spaces/ncoop57/santacoder-openai/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"

	# curl -X POST "https://ncoop57-santacoder-openai.hf.space/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"