Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -277,6 +277,51 @@ async def llm_agent(query: LLMAgentQueryModel, background_tasks: BackgroundTasks
|
|
277 |
|
278 |
return StreamingResponse(process_response(), media_type="text/event-stream")
|
279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
import edge_tts
|
281 |
import io
|
282 |
|
|
|
277 |
|
278 |
return StreamingResponse(process_response(), media_type="text/event-stream")
|
279 |
|
280 |
+
@app.post("/v2/llm-agent")
|
281 |
+
async def llm_agent_v2(query: LLMAgentQueryModel, background_tasks: BackgroundTasks, api_key: str = Depends(verify_api_key)):
|
282 |
+
"""
|
283 |
+
LLM agent endpoint that provides responses based on user queries, maintaining conversation history.
|
284 |
+
Accepts custom system messages and allows selection of different models.
|
285 |
+
Requires API Key authentication via X-API-Key header.
|
286 |
+
"""
|
287 |
+
logger.info(f"Received LLM agent query: {query.prompt}")
|
288 |
+
|
289 |
+
# Generate a new conversation ID if not provided
|
290 |
+
if not query.conversation_id:
|
291 |
+
query.conversation_id = str(uuid4())
|
292 |
+
|
293 |
+
# Initialize or retrieve conversation history
|
294 |
+
if query.conversation_id not in conversations:
|
295 |
+
system_message = query.system_message or "You are a helpful assistant."
|
296 |
+
conversations[query.conversation_id] = [
|
297 |
+
{"role": "system", "content": system_message}
|
298 |
+
]
|
299 |
+
elif query.system_message:
|
300 |
+
# Update system message if provided
|
301 |
+
conversations[query.conversation_id][0] = {"role": "system", "content": query.system_message}
|
302 |
+
|
303 |
+
# Add user's prompt to conversation history
|
304 |
+
conversations[query.conversation_id].append({"role": "user", "content": query.prompt})
|
305 |
+
last_activity[query.conversation_id] = time.time()
|
306 |
+
|
307 |
+
# Limit tokens in the conversation history
|
308 |
+
limited_conversation = limit_conversation_history(conversations[query.conversation_id])
|
309 |
+
|
310 |
+
def process_response():
|
311 |
+
full_response = ""
|
312 |
+
for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
|
313 |
+
full_response += content
|
314 |
+
yield json.dumps({"type": "response","content": content}) + "\n"
|
315 |
+
|
316 |
+
# Add the assistant's response to the conversation history
|
317 |
+
conversations[query.conversation_id].append({"role": "assistant", "content": full_response})
|
318 |
+
|
319 |
+
background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.prompt, full_response)
|
320 |
+
logger.info(f"Completed LLM agent response for query: {query.prompt}")
|
321 |
+
|
322 |
+
return StreamingResponse(process_response(), media_type="text/event-stream")
|
323 |
+
|
324 |
+
|
325 |
import edge_tts
|
326 |
import io
|
327 |
|