Update api/utils.py
Browse files- api/utils.py +35 -10
api/utils.py
CHANGED
@@ -122,6 +122,7 @@ async def process_streaming_response(request: ChatRequest):
|
|
122 |
|
123 |
json_data = build_json_data(request, h_value, model_prefix)
|
124 |
|
|
|
125 |
async with httpx.AsyncClient() as client:
|
126 |
try:
|
127 |
async with client.stream(
|
@@ -133,20 +134,44 @@ async def process_streaming_response(request: ChatRequest):
|
|
133 |
) as response:
|
134 |
response.raise_for_status()
|
135 |
async for chunk in response.aiter_text():
|
136 |
-
timestamp = int(datetime.now().timestamp())
|
137 |
if chunk:
|
138 |
-
|
139 |
-
if
|
140 |
-
|
141 |
-
# Remove the blocked message if present
|
142 |
-
if BLOCKED_MESSAGE in content:
|
143 |
logger.info("Blocked message detected in response.")
|
144 |
-
|
145 |
-
if not
|
146 |
-
continue # Skip if
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
149 |
|
|
|
|
|
150 |
yield f"data: {json.dumps(create_chat_completion_data('', request.model, timestamp, 'stop'))}\n\n"
|
151 |
yield "data: [DONE]\n\n"
|
152 |
except httpx.HTTPStatusError as e:
|
|
|
122 |
|
123 |
json_data = build_json_data(request, h_value, model_prefix)
|
124 |
|
125 |
+
buffer = ""
|
126 |
async with httpx.AsyncClient() as client:
|
127 |
try:
|
128 |
async with client.stream(
|
|
|
134 |
) as response:
|
135 |
response.raise_for_status()
|
136 |
async for chunk in response.aiter_text():
|
|
|
137 |
if chunk:
|
138 |
+
buffer += chunk
|
139 |
+
# Check if buffer contains the blocked message
|
140 |
+
if BLOCKED_MESSAGE in buffer:
|
|
|
|
|
141 |
logger.info("Blocked message detected in response.")
|
142 |
+
buffer = buffer.replace(BLOCKED_MESSAGE, '').strip()
|
143 |
+
if not buffer:
|
144 |
+
continue # Skip if buffer is empty after removal
|
145 |
+
|
146 |
+
# Process the buffer to yield content
|
147 |
+
while buffer:
|
148 |
+
# Decide on a suitable chunk size or yield the entire buffer
|
149 |
+
content_to_yield = buffer
|
150 |
+
buffer = ""
|
151 |
+
# Remove the prefix if present
|
152 |
+
cleaned_content = strip_model_prefix(content_to_yield, model_prefix)
|
153 |
+
timestamp = int(datetime.now().timestamp())
|
154 |
+
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
155 |
+
|
156 |
+
# After streaming is done, check if any remaining content is in the buffer
|
157 |
+
if buffer:
|
158 |
+
# Remove the blocked message if present
|
159 |
+
if BLOCKED_MESSAGE in buffer:
|
160 |
+
logger.info("Blocked message detected in remaining buffer.")
|
161 |
+
buffer = buffer.replace(BLOCKED_MESSAGE, '').strip()
|
162 |
+
if not buffer:
|
163 |
+
pass # No content to yield
|
164 |
+
else:
|
165 |
+
cleaned_content = strip_model_prefix(buffer, model_prefix)
|
166 |
+
timestamp = int(datetime.now().timestamp())
|
167 |
+
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
168 |
+
else:
|
169 |
+
cleaned_content = strip_model_prefix(buffer, model_prefix)
|
170 |
+
timestamp = int(datetime.now().timestamp())
|
171 |
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
172 |
|
173 |
+
# Signal the end of the streaming
|
174 |
+
timestamp = int(datetime.now().timestamp())
|
175 |
yield f"data: {json.dumps(create_chat_completion_data('', request.model, timestamp, 'stop'))}\n\n"
|
176 |
yield "data: [DONE]\n\n"
|
177 |
except httpx.HTTPStatusError as e:
|