LLM_Ariphes / api.py
Euryeth's picture
Update api.py
c60c816 verified
raw
history blame
1.5 kB
from flask import Flask, request, jsonify
from app import generate_chat_completion
import time
app = Flask(__name__)
@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
data = request.json
messages = data.get('messages', [])
max_tokens = data.get('max_tokens', 560)
temperature = data.get('temperature', 0.8)
if not messages or not isinstance(messages, list):
return jsonify({"error": "A valid 'messages' list is required."}), 400
try:
start_time = time.time()
# Expecting plain role-content dicts (not Gradio tuples)
result = generate_chat_completion(
message_history=messages,
max_tokens=max_tokens,
temperature=temperature
)
# Get only the assistant's latest message
assistant_msg = result[-1] if isinstance(result, list) else result
elapsed = time.time() - start_time
return jsonify({
"model": "mistralai/Mistral-7B-Instruct-v0.2",
"choices": [{
"message": {
"role": "assistant",
"content": assistant_msg
}
}],
"usage": {
"generation_time": round(elapsed, 2)
}
})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/')
def health_check():
return "LLM API is running", 200
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8081)