dexsecon commited on
Commit
d02f5a2
·
verified ·
1 Parent(s): d86d3ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -62
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from flask import Flask, request, jsonify, render_template_string
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
  import threading, time
@@ -6,85 +6,34 @@ import threading, time
6
  app = Flask(__name__)
7
  start_time = time.time()
8
 
9
- # 🔧 CONFIG
10
- REPO = "sens2010/law_llama3_8B_8bit_GUFF"
11
- FILE = "unsloth.Q8_0.gguf" # ~9 GB quant
12
 
13
- print("🔽 Downloading model")
14
  MODEL_PATH = hf_hub_download(REPO, FILE, local_dir=".", local_dir_use_symlinks=False)
15
 
16
- print("🔄 Loading model… (this may take a while)")
17
  llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=8)
18
 
19
- # 🏠 Serve HTML directly from code
20
  @app.route("/", methods=["GET"])
21
- def homepage():
22
- return render_template_string("""
23
- <!DOCTYPE html>
24
- <html lang="en">
25
- <head>
26
- <meta charset="UTF-8" />
27
- <title>🤖 Chat with LawLLaMA</title>
28
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
29
- <style>
30
- body { font-family: sans-serif; background: #f4f4f4; padding: 20px; margin: 0; }
31
- h2 { text-align: center; color: #333; }
32
- textarea { width: 100%; font-size: 16px; padding: 10px; border: 1px solid #ccc; border-radius: 6px; margin-top: 10px; }
33
- button { width: 100%; padding: 12px; background: #4CAF50; color: white; border: none; border-radius: 6px; font-size: 16px; margin-top: 10px; cursor: pointer; }
34
- button:hover { background: #45a049; }
35
- #response { margin-top: 20px; background: #fff; padding: 15px; border-radius: 6px; border-left: 5px solid #4CAF50; white-space: pre-wrap; color: #333; }
36
- </style>
37
- </head>
38
- <body>
39
- <h2>💬 LawLLaMA 8B Chat</h2>
40
- <textarea id="msg" rows="4" placeholder="Type your legal or coding question..."></textarea>
41
- <button onclick="send()">Send</button>
42
- <div id="response">Reply will appear here...</div>
43
-
44
- <script>
45
- async function send() {
46
- const msg = document.getElementById("msg").value.trim();
47
- const resBox = document.getElementById("response");
48
- if (!msg) {
49
- alert("Please enter a message.");
50
- return;
51
- }
52
- resBox.innerText = "⏳ Thinking...";
53
- try {
54
- const res = await fetch("/chat", {
55
- method: "POST",
56
- headers: { "Content-Type": "application/json" },
57
- body: JSON.stringify({ message: msg })
58
- });
59
- const data = await res.json();
60
- resBox.innerText = data.reply ? "🧠 " + data.reply : "⚠️ " + data.error;
61
- } catch (err) {
62
- resBox.innerText = "❌ Error: " + err.message;
63
- }
64
- }
65
- </script>
66
- </body>
67
- </html>
68
- """)
69
 
70
  @app.route("/chat", methods=["POST"])
71
  def chat():
72
  msg = request.json.get("message", "").strip()
73
  if not msg:
74
  return jsonify({"error": "Empty message"}), 400
75
- prompt = f"# User:\n{msg}\n# Assistant:\n"
76
- out = llm(prompt, max_tokens=256, temperature=0.2, stop=["# User:", "# Assistant:"])
77
  return jsonify({"reply": out["choices"][0]["text"].strip()})
78
 
79
  @app.route("/status")
80
  def status():
81
  return jsonify({
82
- "uptime_s": int(time.time() - start_time),
83
  "model": FILE
84
  })
85
 
86
- def run_app():
87
- app.run(host="0.0.0.0", port=7860)
88
-
89
  if __name__ == "__main__":
90
- threading.Thread(target=run_app).start()
 
1
+ from flask import Flask, request, jsonify, send_file
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
  import threading, time
 
6
  app = Flask(__name__)
7
  start_time = time.time()
8
 
9
+ REPO = "TheBloke/Qwen2.5-1.8B-Chat-GGUF"
10
+ FILE = "qwen2_5-1.8b-chat.Q4_K_M.gguf"
 
11
 
12
+ print("🔽 Downloading model...")
13
  MODEL_PATH = hf_hub_download(REPO, FILE, local_dir=".", local_dir_use_symlinks=False)
14
 
15
+ print("🔄 Loading model...")
16
  llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=8)
17
 
 
18
  @app.route("/", methods=["GET"])
19
+ def root():
20
+ return send_file("index.html")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  @app.route("/chat", methods=["POST"])
23
  def chat():
24
  msg = request.json.get("message", "").strip()
25
  if not msg:
26
  return jsonify({"error": "Empty message"}), 400
27
+ prompt = f"<|user|>\n{msg}\n<|assistant|>"
28
+ out = llm(prompt, max_tokens=300, temperature=0.7, stop=["<|user|>", "<|assistant|>"])
29
  return jsonify({"reply": out["choices"][0]["text"].strip()})
30
 
31
  @app.route("/status")
32
  def status():
33
  return jsonify({
34
+ "uptime": round(time.time() - start_time),
35
  "model": FILE
36
  })
37
 
 
 
 
38
  if __name__ == "__main__":
39
+ app.run(host="0.0.0.0", port=7860)