from flask import Flask, request, jsonify import ollama app = Flask(__name__) @app.route('/generate', methods=['POST']) def generate_response(): try: # Get input text from the request input_text = request.json.get('input_text') # Ensure the user message contains content if input_text: # Send user message through the chat stream # chat_stream.send(messages = {'role': 'user', 'content': input_text}) chat_stream = ollama.chat( model="mistral", messages=[{'role': 'user', 'content': input_text}], stream=True ) print('text sent to model ') # Collect response chunks from the stream response_chunks = [] for chunk in chat_stream: response_chunks.append(chunk['message']['content']) # Check for Mistral's "end_of_response" signal if chunk['message']['role'] == 'system' and chunk['message'].get('end_of_response'): break # Complete response response = ''.join(response_chunks) print('response ' , response) return jsonify({"response": response}) else: return jsonify({"error": "User message must contain content."}) except Exception as e: return jsonify({"error": str(e)}) if __name__ == '__main__': app.run(host='0.0.0.0', port=8000)