matthoffner commited on
Commit
9e9fcb0
1 Parent(s): 463d78b

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +34 -25
main.py CHANGED
@@ -92,35 +92,44 @@ async def chat(request: ChatCompletionRequest):
92
 
93
  return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
94
 
95
- @app.post("/v2/chat/completions")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  async def chatV2(request: Request, body: ChatCompletionRequest):
97
  combined_messages = ' '.join([message.content for message in body.messages])
98
  tokens = llm.tokenize(combined_messages)
99
 
100
- send_chan, recv_chan = create_memory_object_stream(10)
101
-
102
- async def event_publisher(inner_send_chan):
103
- async with inner_send_chan:
104
- try:
105
- iterator: Generator = await run_sync(llm.generate, tokens)
106
- for chat_chunk in iterator:
107
- response = {
108
- 'choices': [
109
- {
110
- 'message': {
111
- 'role': 'system',
112
- 'content': llm.detokenize(chat_chunk)
113
- },
114
- 'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
115
- }
116
- ]
117
- }
118
- await inner_send_chan.send(f"data: {json.dumps(response)}\n\n")
119
- await inner_send_chan.send("event: done\ndata: {}\n\n")
120
- except Exception as e:
121
- print(f"Exception in event publisher: {str(e)}")
122
-
123
- return StreamingResponse(recv_chan, media_type="text/event-stream", data_sender_callable=partial(event_publisher, send_chan))
124
 
125
  @app.post("/v0/chat/completions")
126
  async def chat(request: ChatCompletionRequestV0, response_mode=None):
 
92
 
93
  return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
94
 
95
+ async def stream_response(send: Callable) -> None:
96
+ async with send:
97
+ try:
98
+ iterator: Generator = llm.generate(tokens)
99
+ async for chat_chunk in iterator:
100
+ response = {
101
+ 'choices': [
102
+ {
103
+ 'message': {
104
+ 'role': 'system',
105
+ 'content': llm.detokenize(chat_chunk)
106
+ },
107
+ 'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
108
+ }
109
+ ]
110
+ }
111
+ await send({
112
+ "type": "http.response.body",
113
+ "body": f"data: {json.dumps(response)}\n\n".encode("utf-8"),
114
+ "more_body": True,
115
+ })
116
+ await send({
117
+ "type": "http.response.body",
118
+ "body": b"event: done\ndata: {}\n\n",
119
+ "more_body": False,
120
+ })
121
+ except Exception as e:
122
+ print(f"Exception in event publisher: {str(e)}")
123
+
124
  async def chatV2(request: Request, body: ChatCompletionRequest):
125
  combined_messages = ' '.join([message.content for message in body.messages])
126
  tokens = llm.tokenize(combined_messages)
127
 
128
+ return ResponseGenerator(stream_response)
129
+
130
+ @app.post("/v2/chat/completions")
131
+ async def chatV2(request: Request, body: ChatCompletionRequest):
132
+ return await chatV2(request, body)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  @app.post("/v0/chat/completions")
135
  async def chat(request: ChatCompletionRequestV0, response_mode=None):