Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from flask import Flask, request, jsonify,
|
| 2 |
from llama_cpp import Llama
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
import threading, time
|
|
@@ -6,85 +6,34 @@ import threading, time
|
|
| 6 |
app = Flask(__name__)
|
| 7 |
start_time = time.time()
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
FILE = "unsloth.Q8_0.gguf" # ~9 GB quant
|
| 12 |
|
| 13 |
-
print("🔽 Downloading model
|
| 14 |
MODEL_PATH = hf_hub_download(REPO, FILE, local_dir=".", local_dir_use_symlinks=False)
|
| 15 |
|
| 16 |
-
print("🔄 Loading model
|
| 17 |
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=8)
|
| 18 |
|
| 19 |
-
# 🏠 Serve HTML directly from code
|
| 20 |
@app.route("/", methods=["GET"])
|
| 21 |
-
def
|
| 22 |
-
return
|
| 23 |
-
<!DOCTYPE html>
|
| 24 |
-
<html lang="en">
|
| 25 |
-
<head>
|
| 26 |
-
<meta charset="UTF-8" />
|
| 27 |
-
<title>🤖 Chat with LawLLaMA</title>
|
| 28 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 29 |
-
<style>
|
| 30 |
-
body { font-family: sans-serif; background: #f4f4f4; padding: 20px; margin: 0; }
|
| 31 |
-
h2 { text-align: center; color: #333; }
|
| 32 |
-
textarea { width: 100%; font-size: 16px; padding: 10px; border: 1px solid #ccc; border-radius: 6px; margin-top: 10px; }
|
| 33 |
-
button { width: 100%; padding: 12px; background: #4CAF50; color: white; border: none; border-radius: 6px; font-size: 16px; margin-top: 10px; cursor: pointer; }
|
| 34 |
-
button:hover { background: #45a049; }
|
| 35 |
-
#response { margin-top: 20px; background: #fff; padding: 15px; border-radius: 6px; border-left: 5px solid #4CAF50; white-space: pre-wrap; color: #333; }
|
| 36 |
-
</style>
|
| 37 |
-
</head>
|
| 38 |
-
<body>
|
| 39 |
-
<h2>💬 LawLLaMA 8B Chat</h2>
|
| 40 |
-
<textarea id="msg" rows="4" placeholder="Type your legal or coding question..."></textarea>
|
| 41 |
-
<button onclick="send()">Send</button>
|
| 42 |
-
<div id="response">Reply will appear here...</div>
|
| 43 |
-
|
| 44 |
-
<script>
|
| 45 |
-
async function send() {
|
| 46 |
-
const msg = document.getElementById("msg").value.trim();
|
| 47 |
-
const resBox = document.getElementById("response");
|
| 48 |
-
if (!msg) {
|
| 49 |
-
alert("Please enter a message.");
|
| 50 |
-
return;
|
| 51 |
-
}
|
| 52 |
-
resBox.innerText = "⏳ Thinking...";
|
| 53 |
-
try {
|
| 54 |
-
const res = await fetch("/chat", {
|
| 55 |
-
method: "POST",
|
| 56 |
-
headers: { "Content-Type": "application/json" },
|
| 57 |
-
body: JSON.stringify({ message: msg })
|
| 58 |
-
});
|
| 59 |
-
const data = await res.json();
|
| 60 |
-
resBox.innerText = data.reply ? "🧠 " + data.reply : "⚠️ " + data.error;
|
| 61 |
-
} catch (err) {
|
| 62 |
-
resBox.innerText = "❌ Error: " + err.message;
|
| 63 |
-
}
|
| 64 |
-
}
|
| 65 |
-
</script>
|
| 66 |
-
</body>
|
| 67 |
-
</html>
|
| 68 |
-
""")
|
| 69 |
|
| 70 |
@app.route("/chat", methods=["POST"])
|
| 71 |
def chat():
|
| 72 |
msg = request.json.get("message", "").strip()
|
| 73 |
if not msg:
|
| 74 |
return jsonify({"error": "Empty message"}), 400
|
| 75 |
-
prompt = f"
|
| 76 |
-
out = llm(prompt, max_tokens=
|
| 77 |
return jsonify({"reply": out["choices"][0]["text"].strip()})
|
| 78 |
|
| 79 |
@app.route("/status")
|
| 80 |
def status():
|
| 81 |
return jsonify({
|
| 82 |
-
"
|
| 83 |
"model": FILE
|
| 84 |
})
|
| 85 |
|
| 86 |
-
def run_app():
|
| 87 |
-
app.run(host="0.0.0.0", port=7860)
|
| 88 |
-
|
| 89 |
if __name__ == "__main__":
|
| 90 |
-
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify, send_file
|
| 2 |
from llama_cpp import Llama
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
import threading, time
|
|
|
|
| 6 |
app = Flask(__name__)
|
| 7 |
start_time = time.time()
|
| 8 |
|
| 9 |
+
REPO = "TheBloke/Qwen2.5-1.8B-Chat-GGUF"
|
| 10 |
+
FILE = "qwen2_5-1.8b-chat.Q4_K_M.gguf"
|
|
|
|
| 11 |
|
| 12 |
+
print("🔽 Downloading model...")
|
| 13 |
MODEL_PATH = hf_hub_download(REPO, FILE, local_dir=".", local_dir_use_symlinks=False)
|
| 14 |
|
| 15 |
+
print("🔄 Loading model...")
|
| 16 |
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=8)
|
| 17 |
|
|
|
|
| 18 |
@app.route("/", methods=["GET"])
|
| 19 |
+
def root():
|
| 20 |
+
return send_file("index.html")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
@app.route("/chat", methods=["POST"])
|
| 23 |
def chat():
|
| 24 |
msg = request.json.get("message", "").strip()
|
| 25 |
if not msg:
|
| 26 |
return jsonify({"error": "Empty message"}), 400
|
| 27 |
+
prompt = f"<|user|>\n{msg}\n<|assistant|>"
|
| 28 |
+
out = llm(prompt, max_tokens=300, temperature=0.7, stop=["<|user|>", "<|assistant|>"])
|
| 29 |
return jsonify({"reply": out["choices"][0]["text"].strip()})
|
| 30 |
|
| 31 |
@app.route("/status")
|
| 32 |
def status():
|
| 33 |
return jsonify({
|
| 34 |
+
"uptime": round(time.time() - start_time),
|
| 35 |
"model": FILE
|
| 36 |
})
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
if __name__ == "__main__":
|
| 39 |
+
app.run(host="0.0.0.0", port=7860)
|