from flask import Flask, request, jsonify, render_template from llama_cpp import Llama import os app = Flask(__name__) # Initialize the LLM llm = Llama.from_pretrained( repo_id="unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF", filename="DeepSeek-R1-Distill-Qwen-1.5B-Q2_K.gguf", ) @app.route('/') def index(): return render_template('index.html') @app.route('/chat', methods=['POST']) def chat(): data = request.json message = data.get('message', '') # Format the message for chat completion messages = [ {"role": "user", "content": message} ] try: response = llm.create_chat_completion(messages=messages) return jsonify({ "response": response['choices'][0]['message']['content'] }) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)