Spaces:

ncoop57
/

santacoder-openai

Runtime error

ncoop57

Get minimum working openai server

bab8078 over 1 year ago

No virus

3.83 kB

	import logging
	import os
	import torch
	import json
	import torch
	import time
	import uvicorn
	from fastapi import FastAPI, Request, Response
	from fastapi.responses import JSONResponse
	from sse_starlette.sse import EventSourceResponse

	from config.log_config import uvicorn_logger
	from models import OpenAIinput
	from utils.codegen import CodeGenProxy
	from utils.errors import FauxPilotException
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	logging.config.dictConfig(uvicorn_logger)

	# token = os.environ.get("HUB_TOKEN", None)
	# device = "cuda:0" if torch.cuda.is_available() else "cpu"

	# tokenizer = AutoTokenizer.from_pretrained("bigcode/christmas-models", use_auth_token=token)
	# model = AutoModelForCausalLM.from_pretrained("bigcode/christmas-models", trust_remote_code=True, use_auth_token=token).to(device)
	# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)

	codegen = CodeGenProxy(
	host=os.environ.get("TRITON_HOST", "triton"),
	port=os.environ.get("TRITON_PORT", 8001),
	verbose=os.environ.get("TRITON_VERBOSITY", False)
	)

	app = FastAPI(
	title="FauxPilot",
	description="This is an attempt to build a locally hosted version of GitHub Copilot. It uses the SalesForce CodeGen"
	"models inside of NVIDIA's Triton Inference Server with the FasterTransformer backend.",
	docs_url="/",
	swagger_ui_parameters={"defaultModelsExpandDepth": -1}
	)

	@app.exception_handler(FauxPilotException)
	async def fauxpilot_handler(request: Request, exc: FauxPilotException):
	return JSONResponse(
	status_code=400,
	content=exc.json()
	)

	@app.post("/v1/engines/codegen/completions")
	@app.post("/v1/completions")
	async def completions(data: OpenAIinput):
	data = data.dict()
	try:
	content = codegen(data=data)
	# prompt = data.get("prompt")
	# choices = [pipe(prompt, do_sample=True, top_p=0.95, max_new_tokens=50)[0]['generated_text']]
	# completion = {
	# 'id': None, # fill in
	# 'model': 'codegen',
	# 'object': 'text_completion',
	# 'created': int(time.time()),
	# 'choices': None, # fill in
	# 'usage': {
	# 'completion_tokens': int(sum([len(c.split()) for c in choices])),
	# 'prompt_tokens': int(len(prompt.split())),
	# 'total_tokens': int(sum([len(c.split()) for c in choices]) + len(prompt.split())),
	# }
	# }
	# completion['id'] = 10
	# completion['choices'] = choices
	# content = json.dumps(completion)
	except Exception as E:
	raise FauxPilotException(
	message=str(E),
	type="invalid_request_error",
	param=None,
	code=None,
	)

	if data.get("stream") is not None:
	return EventSourceResponse(
	content=content,
	status_code=200,
	media_type="text/event-stream"
	)
	else:
	return Response(
	status_code=200,
	content=content,
	media_type="application/json"
	)

	if __name__ == "__main__":
	uvicorn.run("app:app", host="0.0.0.0", port=5000)


	# curl request to test the API
	# curl -X POST "http://localhost:5000/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"

	# curl -X POST "https://huggingface.co/spaces/ncoop57/santacoder-openai/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"

	# curl -X POST "https://ncoop57-santacoder-openai.hf.space/v1/engines/codegen/completions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"prompt\": \"import numpy as np\"}"