Spaces:

CMLM
/

ZhongJing-V2-1_8b-4bit

Runtime error

App Files Files Community

CMLL commited on Jun 17, 2024

Commit

ed0a7ea

verified ·

1 Parent(s): 44ac364

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -60

app.py CHANGED Viewed

@@ -1,76 +1,51 @@
-# ライブラリのインストール
 import os
-import subprocess
-# 必要なライブラリのインストール
-subprocess.check_call(["pip", "install", "llama-cpp-python"])
-subprocess.check_call(["pip", "install", "gradio"])
-# モデルのダウンロード
 model_url = "https://huggingface.co/CMLL/ZhongJing-2-1_8b-GGUF/resolve/main/ZhongJing1_5-1_8b-fp16.gguf"
-model_path = "ggml-model.gguf"
-if not os.path.exists(model_path):
-    subprocess.check_call(["wget", model_url, "-O", model_path])
-# ウェブUIの起動
-import gradio as gr
-import copy
-import time
-from llama_cpp import Llama
-llm = Llama(
-    model_path=model_path,
-    n_ctx=2048,
-    # n_gpu_layers=100, # CPUで実行する場合は削除
-)
-history = []
-system_message = """
-You are a helpful TCM medical assistant named 仲景中医大语言模型.
-"""
-def generate_text(message, history):
-    temp = ""
-    input_prompt = f"{system_message}"
-    for interaction in history:
-        input_prompt = input_prompt + "\nUSER: " + str(interaction[0]) + "\nASSISTANT: " + str(interaction[1])
-    input_prompt = input_prompt + "\nUSER: " + str(message) + "\nASSISTANT: "
-    output = llm.create_completion(
-        input_prompt,
-        temperature=0.7,
-        top_p=0.3,
-        top_k=40,
-        repeat_penalty=1.1,
-        max_tokens=1024,
-        stop=[
-            "ASSISTANT:",
-            "USER:",
-            "SYSTEM:",
-        ],
-        stream=True,
-    )
-    for out in output:
-        stream = copy.deepcopy(out)
-        temp += stream["choices"][0]["text"]
-        yield temp
-    history.append((message, temp))
-demo = gr.ChatInterface(
-    generate_text,
-    title="ZhongJingGPT-V2-1_8B-GGUF chatbot using llama-cpp-python",
-    description="",
-    examples=["日本の四国にある県名を挙げてください。"],
-    cache_examples=True,
-    retry_btn=None,
-    undo_btn="Remove last",
-    clear_btn="Clear all",
-)
-demo.launch(debug=True, share=True, max_threads=10)

+import gradio as gr
+from huggingface_hub import hf_hub_download
 import os
+# 模型下载链接
 model_url = "https://huggingface.co/CMLL/ZhongJing-2-1_8b-GGUF/resolve/main/ZhongJing1_5-1_8b-fp16.gguf"
+# 下载模型
+def download_model(url, model_dir="models"):
+    os.makedirs(model_dir, exist_ok=True)
+    model_path = hf_hub_download(repo_id="CMLL/ZhongJing-2-1_8b-GGUF", filename="ZhongJing1_5-1_8b-fp16.gguf", local_dir=model_dir)
+    return model_path
+model_path = download_model(model_url)
+# llama.cpp 克隆并编译
+if not os.path.exists("llama.cpp"):
+    os.system("git clone https://github.com/ggerganov/llama.cpp.git")
+    os.system("cd llama.cpp && mkdir build && cd build && cmake .. && make")
+# 创建 prompts/TcmChat.txt 文件
+prompts_dir = "llama.cpp/prompts"
+os.makedirs(prompts_dir, exist_ok=True)
+with open(os.path.join(prompts_dir, "TcmChat.txt"), "w") as f:
+    f.write("You are a helpful TCM medical assistant named 仲景中医大语言模型.\n")
+# Gradio 接口
+def chat_with_model(user_input, history):
+    prompt = f"You are a helpful TCM medical assistant named 仲景中医大语言模型.\nUser: {user_input}\nAssistant:"
+    response = os.popen(f"./llama.cpp/build/bin/main -m {model_path} -n 256 --repeat_penalty 1.0 --color -i -r \"User:\" -f {os.path.join(prompts_dir, 'TcmChat.txt')}").read()
+    history.append((user_input, response))
+    return history, history
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot()
+    state = gr.State([])
+    with gr.Row():
+        with gr.Column():
+            user_input = gr.Textbox(show_label=False, placeholder="Enter your message...")
+        with gr.Column():
+            submit_btn = gr.Button("Submit")
+    submit_btn.click(chat_with_model, [user_input, state], [chatbot, state])
+if __name__ == "__main__":
+    demo.launch()