matthoffner commited on
Commit
406422f
1 Parent(s): 280efe7

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +34 -0
main.py CHANGED
@@ -1,8 +1,10 @@
 
1
  from typing import List
2
  import fastapi
3
  import markdown
4
  import uvicorn
5
  from ctransformers import AutoModelForCausalLM
 
6
  from fastapi.responses import HTMLResponse
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from sse_starlette.sse import EventSourceResponse
@@ -57,6 +59,38 @@ class ChatCompletionRequest(BaseModel):
57
  system_message = "Below is a conversation between a human user and a helpful AI coding assistant."
58
 
59
  @app.post("/v1/chat/completions")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  async def chat(request: ChatCompletionRequest, response_mode=None):
61
  kwargs = request.dict()
62
  dialogue_template = DialogueTemplate(
 
1
+ import json
2
  from typing import List
3
  import fastapi
4
  import markdown
5
  import uvicorn
6
  from ctransformers import AutoModelForCausalLM
7
+ from fastapi import HTTPException
8
  from fastapi.responses import HTMLResponse
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from sse_starlette.sse import EventSourceResponse
 
59
  system_message = "Below is a conversation between a human user and a helpful AI coding assistant."
60
 
61
  @app.post("/v1/chat/completions")
62
+ async def chat(request: ChatCompletionRequest):
63
+ kwargs = request.dict()
64
+ dialogue_template = DialogueTemplate(
65
+ system=system_message, messages=kwargs['messages']
66
+ )
67
+ prompt = dialogue_template.get_inference_prompt()
68
+ tokens = llm.tokenize(combined_messages)
69
+
70
+ try:
71
+ chat_chunks = llm.generate(tokens)
72
+ except Exception as e:
73
+ raise HTTPException(status_code=500, detail=str(e))
74
+
75
+ async def format_response(chat_chunks: Generator) -> Any:
76
+ for chat_chunk in chat_chunks:
77
+ response = {
78
+ 'choices': [
79
+ {
80
+ 'message': {
81
+ 'role': 'system',
82
+ 'content': llm.detokenize(chat_chunk)
83
+ },
84
+ 'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
85
+ }
86
+ ]
87
+ }
88
+ yield f"data: {json.dumps(response)}\n\n"
89
+ yield "event: done\ndata: {}\n\n"
90
+
91
+ return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
92
+
93
+ @app.post("/v0/chat/completions")
94
  async def chat(request: ChatCompletionRequest, response_mode=None):
95
  kwargs = request.dict()
96
  dialogue_template = DialogueTemplate(