matthoffner commited on
Commit
0207752
1 Parent(s): dcac576

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +7 -12
main.py CHANGED
@@ -6,7 +6,7 @@ from functools import partial
6
  import fastapi
7
  import uvicorn
8
  from fastapi import HTTPException, Depends, Request
9
- from fastapi.responses import HTMLResponse, StreamingResponse
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from sse_starlette.sse import EventSourceResponse
12
  from anyio import create_memory_object_stream
@@ -87,7 +87,7 @@ async def chat(request: ChatCompletionRequest):
87
 
88
  return EventSourceResponse(generate_response(chat_chunks, llm))
89
 
90
- async def stream_response(tokens: Any) -> None:
91
  try:
92
  iterator: Generator = llm.generate(tokens)
93
  for chat_chunk in iterator:
@@ -102,22 +102,17 @@ async def stream_response(tokens: Any) -> None:
102
  }
103
  ]
104
  }
105
- yield f"data: {json.dumps(response)}\n\n"
106
- yield b"event: done\ndata: {}\n\n"
107
  except Exception as e:
108
  print(f"Exception in event publisher: {str(e)}")
109
 
110
-
111
- async def chatV2(request: Request, body: ChatCompletionRequest):
112
- combined_messages = ' '.join([message.content for message in body.messages])
113
- tokens = llm.tokenize(combined_messages)
114
-
115
- return StreamingResponse(stream_response(tokens))
116
-
117
  @app.post("/v2/chat/completions")
118
  async def chatV2_endpoint(request: Request, body: ChatCompletionRequest):
119
- return await chatV2(request, body)
 
120
 
 
121
 
122
  @app.post("/v0/chat/completions")
123
  async def chat(request: ChatCompletionRequestV0, response_mode=None):
 
6
  import fastapi
7
  import uvicorn
8
  from fastapi import HTTPException, Depends, Request
9
+ from fastapi.responses import HTMLResponse
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from sse_starlette.sse import EventSourceResponse
12
  from anyio import create_memory_object_stream
 
87
 
88
  return EventSourceResponse(generate_response(chat_chunks, llm))
89
 
90
+ async def stream_response(tokens, llm):
91
  try:
92
  iterator: Generator = llm.generate(tokens)
93
  for chat_chunk in iterator:
 
102
  }
103
  ]
104
  }
105
+ yield dict(data=json.dumps(response))
106
+ yield dict(data="[DONE]")
107
  except Exception as e:
108
  print(f"Exception in event publisher: {str(e)}")
109
 
 
 
 
 
 
 
 
110
  @app.post("/v2/chat/completions")
111
  async def chatV2_endpoint(request: Request, body: ChatCompletionRequest):
112
+ combined_messages = ' '.join([message.content for message in body.messages])
113
+ tokens = llm.tokenize(combined_messages)
114
 
115
+ return EventSourceResponse(stream_response(tokens, llm))
116
 
117
  @app.post("/v0/chat/completions")
118
  async def chat(request: ChatCompletionRequestV0, response_mode=None):