AhmadA82 commited on
Commit
e16f88f
·
verified ·
1 Parent(s): 3e6a9ce
Files changed (1) hide show
  1. app.py +81 -76
app.py CHANGED
@@ -1,77 +1,82 @@
1
- import os
2
- from huggingface_hub import hf_hub_download
3
- from llama_cpp import Llama
4
- import gradio as gr
5
-
6
- # التأكد من أن مجلد كاش هوغينغ فيس قابل للكتابة
7
- os.makedirs("/home/user/app/data/cache", exist_ok=True)
8
-
9
- MODEL_REPO = "QuantFactory/Qwen2.5-7B-Instruct-GGUF"
10
- MODEL_FILE = "Qwen2.5-7B-Instruct.Q4_K_M.gguf"
11
- MODEL_PATH = f"/home/user/app/data/cache/{MODEL_FILE}" # استخدام مجلد الكاش بدلاً من /tmp
12
-
13
- # تحميل النموذج إذا لم يكن موجودًا
14
- if not os.path.exists(MODEL_PATH):
15
- hf_hub_download(
16
- repo_id=MODEL_REPO,
17
- filename=MODEL_FILE,
18
- local_dir="/home/user/app/data/cache",
19
- )
20
-
21
- llm = Llama(
22
- model_path=MODEL_PATH,
23
- n_ctx=16000,
24
- n_threads=6,
25
- n_gpu_layers=0,
26
- verbose=False
27
- )
28
-
29
- SYSTEM_PROMPT = """<|im_start|>system
30
- You are Qwen, created by Alibaba Cloud. You are an AI development assistant. Follow these rules:
31
- 1. If request is simple (single file, <50 lines), handle it directly
32
- 2. For complex requests (multiple files, >50 lines), just respond with "CODER"
33
- 3. Always check code for errors before sending
34
- 4. Never execute unsafe code<|im_end|>
35
- """
36
-
37
- def format_prompt(messages):
38
- chat = []
39
- for role, content in messages:
40
- if role == "system":
41
- chat.append(f"<|im_start|>system\n{content}<|im_end|>")
42
- elif role == "user":
43
- chat.append(f"<|im_start|>user\n{content}<|im_end|>")
44
- else:
45
- chat.append(f"<|im_start|>assistant\n{content}<|im_end|>")
46
- chat.append("<|im_start|>assistant\n")
47
- return "\n".join(chat)
48
-
49
- def generate_reply(message, history):
50
- messages = [("system", SYSTEM_PROMPT.strip())]
51
- for user_msg, bot_msg in history:
52
- messages.append(("user", user_msg))
53
- messages.append(("assistant", bot_msg))
54
- messages.append(("user", message))
55
- prompt = format_prompt(messages)
56
- output = llm(
57
- prompt,
58
- max_tokens=1024,
59
- temperature=0.7,
60
- top_p=0.9,
61
- repeat_penalty=1.05,
62
- stop=["<|im_end|>"]
63
- )
64
- reply = output["choices"][0]["text"].split("<|im_end|>")[0].strip()
65
- history.append((message, reply))
66
- return "", history
67
-
68
- with gr.Blocks(title="Qwen 2.5 Chat (Q4_K_M)") as demo:
69
- gr.Markdown("## 🤖 Qwen 2.5 Chat (Q4_K_M) - Arabic Ready")
70
- chatbot = gr.Chatbot(label="المحادثة")
71
- with gr.Row():
72
- msg = gr.Textbox(placeholder="اكتب سؤالك هنا...", label="س跪الك")
73
- clear = gr.Button("مسح المحادثة")
74
- msg.submit(generate_reply, [msg, chatbot], [msg, chatbot])
75
- clear.click(lambda: ("", []), None, [msg, chatbot])
76
-
 
 
 
 
 
77
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ import os
2
+ from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
+ import gradio as gr
5
+
6
+ # التأكد من أن مجلد كاش هوغينغ فيس قابل للكتابة
7
+ os.makedirs("/home/user/app/data/cache", exist_ok=True)
8
+
9
+ MODEL_REPO = "QuantFactory/Qwen2.5-7B-Instruct-GGUF"
10
+ MODEL_FILE = "Qwen2.5-7B-Instruct.Q4_K_M.gguf"
11
+ MODEL_PATH = f"/home/user/app/data/cache/{MODEL_FILE}" # استخدام مجلد الكاش بدلاً من /tmp
12
+
13
+ # تحميل النموذج إذا لم يكن موجودًا
14
+ if not os.path.exists(MODEL_PATH):
15
+ hf_hub_download(
16
+ repo_id=MODEL_REPO,
17
+ filename=MODEL_FILE,
18
+ local_dir="/home/user/app/data/cache",
19
+ )
20
+
21
+ if os.path.exists(MODEL_PATH):
22
+ print(f"Model found at {MODEL_PATH}")
23
+ else:
24
+ print(f"Model not found at {MODEL_PATH}")
25
+
26
+ llm = Llama(
27
+ model_path=MODEL_PATH,
28
+ n_ctx=16000,
29
+ n_threads=6,
30
+ n_gpu_layers=0,
31
+ verbose=False
32
+ )
33
+
34
+ SYSTEM_PROMPT = """<|im_start|>system
35
+ You are Qwen, created by Alibaba Cloud. You are an AI development assistant. Follow these rules:
36
+ 1. If request is simple (single file, <50 lines), handle it directly
37
+ 2. For complex requests (multiple files, >50 lines), just respond with "CODER"
38
+ 3. Always check code for errors before sending
39
+ 4. Never execute unsafe code<|im_end|>
40
+ """
41
+
42
+ def format_prompt(messages):
43
+ chat = []
44
+ for role, content in messages:
45
+ if role == "system":
46
+ chat.append(f"<|im_start|>system\n{content}<|im_end|>")
47
+ elif role == "user":
48
+ chat.append(f"<|im_start|>user\n{content}<|im_end|>")
49
+ else:
50
+ chat.append(f"<|im_start|>assistant\n{content}<|im_end|>")
51
+ chat.append("<|im_start|>assistant\n")
52
+ return "\n".join(chat)
53
+
54
+ def generate_reply(message, history):
55
+ messages = [("system", SYSTEM_PROMPT.strip())]
56
+ for user_msg, bot_msg in history:
57
+ messages.append(("user", user_msg))
58
+ messages.append(("assistant", bot_msg))
59
+ messages.append(("user", message))
60
+ prompt = format_prompt(messages)
61
+ output = llm(
62
+ prompt,
63
+ max_tokens=1024,
64
+ temperature=0.7,
65
+ top_p=0.9,
66
+ repeat_penalty=1.05,
67
+ stop=["<|im_end|>"]
68
+ )
69
+ reply = output["choices"][0]["text"].split("<|im_end|>")[0].strip()
70
+ history.append((message, reply))
71
+ return "", history
72
+
73
+ with gr.Blocks() as demo:
74
+ gr.Markdown("## 🤖 Qwen 2.5 Chat (Q4_K_M) - Arabic Ready")
75
+ chatbot = gr.Chatbot(label="المحادثة")
76
+ with gr.Row():
77
+ msg = gr.Textbox(placeholder="اكتب سؤالك هنا...", label="س跪الك")
78
+ clear = gr.Button("مسح المحادثة")
79
+ msg.submit(generate_reply, [msg, chatbot], [msg, chatbot])
80
+ clear.click(lambda: ("", []), None, [msg, chatbot])
81
+
82
  demo.launch(server_name="0.0.0.0", server_port=7860)