|
from flask import Flask, request, jsonify, render_template |
|
from llama_cpp import Llama |
|
|
|
app = Flask(__name__) |
|
|
|
llm = Llama(model_path="./yuna-ai-v3-q3_k_m.gguf", verbose=False) |
|
|
|
@app.route('/') |
|
def index(): |
|
return render_template('index.html') |
|
|
|
@app.route('/api/generate', methods=['POST']) |
|
def generate(): |
|
user_message = request.json['message'] |
|
output = llm( |
|
f"Yuki: {user_message}\nYuna:", |
|
max_tokens=16, |
|
stop=["Yuki:", "Yuna:", "\n"], |
|
echo=False |
|
) |
|
|
|
return jsonify({'response': output['choices'][0]['text']}) |
|
|
|
if __name__ == '__main__': |
|
app.run(host='0.0.0.0', port=7860) |