Spaces:

nchen909
/

Apollo-GGUF-Playground

Running

App Files Files Community

nchen909 commited on 6 days ago

Commit

6805d46

•

1 Parent(s): 00e079e

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

.gradio/certificate.pem +31 -0
app_new.py +153 -124

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

app_new.py CHANGED Viewed

@@ -1,63 +1,97 @@
 import gradio as gr
 import os
 from huggingface_hub.file_download import http_get
 from llama_cpp import Llama
 SYSTEM_PROMPT = "You are Apollo, a multilingual medical model. You communicate with people and assist them."
 def get_message_tokens(model, role, content):
     content = f"{role}\n{content}\n</s>"
     content = content.encode("utf-8")
     return model.tokenize(content, special=True)
 def get_system_tokens(model):
     system_message = {"role": "system", "content": SYSTEM_PROMPT}
     return get_message_tokens(model, **system_message)
-def load_model(
-    directory: str = ".",
-    model_name: str = "apollo2-7b-q4_k_m.gguf",
-    model_url: str = "https://huggingface.co/nchen909/Apollo2-7B-Q4_K_M-GGUF/resolve/main/apollo2-7b-q4_k_m.gguf"
-):
     final_model_path = os.path.join(directory, model_name)
-    print("Downloading all files...")
     if not os.path.exists(final_model_path):
         with open(final_model_path, "wb") as f:
             http_get(model_url, f)
     os.chmod(final_model_path, 0o777)
-    print("Files downloaded!")
-    model = Llama(
-        model_path=final_model_path,
-        n_ctx=1024
-    )
-    print("Model loaded!")
     return model
-MODEL = load_model()
 def user(message, history):
     new_history = history + [[message, None]]
     return "", new_history
-def bot(
-    history,
-    system_prompt,
-    top_p,
-    top_k,
-    temp
-):
     model = MODEL
     tokens = get_system_tokens(model)[:]
@@ -74,132 +108,127 @@ def bot(
     role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
     tokens.extend(role_tokens)
-    generator = model.generate(
-        tokens,
-        top_k=top_k,
-        top_p=top_p,
-        temp=temp
-    )
     partial_text = ""
     for i, token in enumerate(generator):
         if token == model.token_eos():
             break
         partial_text += model.detokenize([token]).decode("utf-8", "ignore")
         history[-1][1] = partial_text
         yield history
 with gr.Blocks(
-    theme=gr.themes.Soft()
 ) as demo:
-    favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
     gr.Markdown(
-        f"""<h1><center>{favicon}Saiga2 13B GGUF Q4_K</center></h1>
-        This is a demo of a **Russian**-speaking LLaMA2-based model. If you are interested in other languages, please check other models, such as [MPT-7B-Chat](https://huggingface.co/spaces/mosaicml/mpt-7b-chat).
-        Это демонстрационная версия [квантованной Сайги-2 с 13 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga2_13b_ggml), работающая на CPU.
-        Сайга-2 — это разговорная языковая модель, которая основана на [LLaMA-2](https://ai.meta.com/llama/) и дообучена на корпусах, сгенерированных ChatGPT, таких как [ru_turbo_alpaca](https://huggingface.co/datasets/IlyaGusev/ru_turbo_alpaca), [ru_turbo_saiga](https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga) и [gpt_roleplay_realm](https://huggingface.co/datasets/IlyaGusev/gpt_roleplay_realm).
         """
     )
     with gr.Row():
-        with gr.Column(scale=5):
-            system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
-            chatbot = gr.Chatbot(label="Диалог")
-        with gr.Column(min_width=80, scale=1):
-            with gr.Tab(label="Параметры генерации"):
-                top_p = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    value=0.9,
-                    step=0.05,
-                    interactive=True,
-                    label="Top-p",
-                )
-                top_k = gr.Slider(
-                    minimum=10,
-                    maximum=100,
-                    value=30,
-                    step=5,
                     interactive=True,
-                    label="Top-k",
                 )
-                temp = gr.Slider(
-                    minimum=0.0,
-                    maximum=2.0,
-                    value=0.01,
-                    step=0.01,
                     interactive=True,
-                    label="Температура"
                 )
-    with gr.Row():
-        with gr.Column():
-            msg = gr.Textbox(
-                label="Отправить сообщение",
-                placeholder="Отправить сообщение",
-                show_label=False,
             )
-        with gr.Column():
-            with gr.Row():
-                submit = gr.Button("Отправить")
-                stop = gr.Button("Остановить")
-                clear = gr.Button("Очистить")
-    with gr.Row():
-        gr.Markdown(
-            """ПРЕДУПРЕЖДЕНИЕ: Модель может генерировать фактически или этически некорректные тексты. Мы не несём за это ответственность."""
-        )
-    # Pressing Enter
-    submit_event = msg.submit(
-        fn=user,
-        inputs=[msg, chatbot],
-        outputs=[msg, chatbot],
-        queue=False,
-    ).success(
-        fn=bot,
-        inputs=[
-            chatbot,
-            system_prompt,
-            top_p,
-            top_k,
-            temp
-        ],
-        outputs=chatbot,
-        queue=True,
-    )
-    # Pressing the button
-    submit_click_event = submit.click(
         fn=user,
         inputs=[msg, chatbot],
         outputs=[msg, chatbot],
         queue=False,
     ).success(
         fn=bot,
-        inputs=[
-            chatbot,
-            system_prompt,
-            top_p,
-            top_k,
-            temp
-        ],
         outputs=chatbot,
         queue=True,
     )
-    # Stop generation
-    stop.click(
-        fn=None,
-        inputs=None,
-        outputs=None,
-        cancels=[submit_event, submit_click_event],
-        queue=False,
-    )
-    # Clear history
-    clear.click(lambda: None, None, chatbot, queue=False)
 demo.queue(max_size=128)
 demo.launch(show_error=True, share=True)

 import gradio as gr
 import os
 from huggingface_hub.file_download import http_get
 from llama_cpp import Llama
 SYSTEM_PROMPT = "You are Apollo, a multilingual medical model. You communicate with people and assist them."
+# Define the directory dynamically
+dir = "."
 def get_message_tokens(model, role, content):
     content = f"{role}\n{content}\n</s>"
     content = content.encode("utf-8")
     return model.tokenize(content, special=True)
 def get_system_tokens(model):
     system_message = {"role": "system", "content": SYSTEM_PROMPT}
     return get_message_tokens(model, **system_message)
+def load_model(directory, model_name, model_url):
     final_model_path = os.path.join(directory, model_name)
+    print(f"Checking model: {model_name}")
     if not os.path.exists(final_model_path):
+        print(f"Downloading model: {model_name}")
         with open(final_model_path, "wb") as f:
             http_get(model_url, f)
     os.chmod(final_model_path, 0o777)
+    print(f"Model {model_name} ready!")
+    model = Llama(model_path=final_model_path, n_ctx=1024)
+    print(f"Model {model_name} loaded successfully!")
     return model
+MODEL_OPTIONS = {
+    "Apollo 0.5B": {
+        "directory": dir,
+        "model_name": "apollo-0.5b.gguf",
+        "model_url": "https://huggingface.co/path_to_apollo_0.5b_model"
+    },
+    "Apollo 2B": {
+        "directory": dir,
+        "model_name": "apollo-2b.gguf",
+        "model_url": "https://huggingface.co/path_to_apollo_2b_model"
+    },
+    "Apollo 7B": {
+        "directory": dir,
+        "model_name": "Apollo-7B-q8_0.gguf",
+        "model_url": "https://huggingface.co/FreedomIntelligence/Apollo-7B-GGUF/resolve/main/Apollo-7B-q8_0.gguf"
+    },
+    "Apollo2 0.5B": {
+        "directory": dir,
+        "model_name": "Apollo-0.5B-q8_0.gguf",
+        "model_url": "https://huggingface.co/FreedomIntelligence/Apollo-0.5B-GGUF/resolve/main/Apollo-0.5B-q8_0.gguf"
+    },
+    "Apollo2 2B": {
+        "directory": dir,
+        "model_name": "Apollo-2B-q8_0.gguf",
+        "model_url": "https://huggingface.co/FreedomIntelligence/Apollo-2B-GGUF/resolve/main/Apollo-2B-q8_0.gguf"
+    },
+    "Apollo2 7B": {
+        "directory": dir,
+        "model_name": "apollo2-7b-q8_0.gguf",
+        "model_url": "https://huggingface.co/nchen909/Apollo2-7B-Q8_0-GGUF/resolve/main/apollo2-7b-q8_0.gguf"
+    }
+}
+MODEL = None
+def get_model_key(model_type, model_size):
+    return f"{model_type} {model_size}"
+def initialize_model(model_type="Apollo2", model_size="7B"):
+    global MODEL
+    model_key = get_model_key(model_type, model_size)
+    try:
+        print(f"Initializing model: {model_key}")
+        selected_model = MODEL_OPTIONS[model_key]
+        MODEL = load_model(
+            directory=selected_model["directory"],
+            model_name=selected_model["model_name"],
+            model_url=selected_model["model_url"]
+        )
+        print(f"Model initialized: {model_key}")
+    except Exception as e:
+        print(f"Failed to initialize model {model_key}: {e}")
+        MODEL = None
 def user(message, history):
     new_history = history + [[message, None]]
     return "", new_history
+def bot(history, top_p, top_k, temp):
+    global MODEL
+    if MODEL is None:
+        raise RuntimeError("Model has not been initialized. Please select a model to load.")
     model = MODEL
     tokens = get_system_tokens(model)[:]
     role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
     tokens.extend(role_tokens)
+    generator = model.generate(tokens, top_k=top_k, top_p=top_p, temp=temp)
     partial_text = ""
     for i, token in enumerate(generator):
         if token == model.token_eos():
             break
         partial_text += model.detokenize([token]).decode("utf-8", "ignore")
         history[-1][1] = partial_text
         yield history
 with gr.Blocks(
+    theme=gr.themes.Monochrome(),
+    analytics_enabled=False,
 ) as demo:
+    favicon = '<img src="https://huggingface.co/FreedomIntelligence/Apollo2-7B/resolve/main/assets/apollo_medium_final.png" width="148px" style="display: inline">'
     gr.Markdown(
+        f"""# {favicon} Apollo GGUF Playground
+        This is a demo of multilingual medical model series **[Apollo](https://huggingface.co/FreedomIntelligence/Apollo-7B-GGUF)**, GGUF version. [Apollo1](https://arxiv.org/abs/2403.03640) covers 6 languages. [Apollo2](https://arxiv.org/abs/2410.10626) covers 50 languages.
         """
     )
     with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="Conversation")
+            msg = gr.Textbox(
+                label="Send Message",
+                placeholder="Send Message",
+                show_label=False,
+                elem_id="send-message-box"
+            )
+        with gr.Column(scale=1):
+            # 将 model_type 和 model_size 包含在同一个 gr.Row 中
+            with gr.Row(equal_height=False):
+                model_type = gr.Dropdown(
+                    choices=["Apollo", "Apollo2"],
+                    value="Apollo2",
+                    label="Select Model",
                     interactive=True,
+                    elem_id="model-type-dropdown",
                 )
+                model_size = gr.Dropdown(
+                    choices=["0.5B", "2B", "7B"],
+                    value="7B",
+                    label="Select Size",
                     interactive=True,
+                    elem_id="model-size-dropdown",
                 )
+            #gr.Markdown("### Generation Parameters")
+            top_p = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                value=0.9,
+                step=0.05,
+                interactive=True,
+                label="Top-p",
             )
+            top_k = gr.Slider(
+                minimum=10,
+                maximum=100,
+                value=30,
+                step=5,
+                interactive=True,
+                label="Top-k",
+            )
+            temp = gr.Slider(
+                minimum=0.0,
+                maximum=2.0,
+                value=0.01,
+                step=0.01,
+                interactive=True,
+                label="Temperature"
+            )
+            with gr.Row(equal_height=False):
+                submit = gr.Button("Send", elem_id="send-btn")
+                stop = gr.Button("Stop", elem_id="stop-btn")
+                clear = gr.Button("Clear", elem_id="clear-btn")
+    def update_model(model_type, model_size):
+        initialize_model(model_type, model_size)
+    model_type.change(update_model, [model_type, model_size], None)
+    model_size.change(update_model, [model_type, model_size], None)
+    msg.submit(
         fn=user,
         inputs=[msg, chatbot],
         outputs=[msg, chatbot],
         queue=False,
     ).success(
         fn=bot,
+        inputs=[chatbot, top_p, top_k, temp],
         outputs=chatbot,
         queue=True,
     )
 demo.queue(max_size=128)
+demo.css = """
+footer {display: none !important;}
+#send-message-box {width: 100%;}
+#send-btn, #stop-btn, #clear-btn {
+    display: inline-block; /* 强制内联块 */
+    width: 30%; /* 设置按钮宽度为父容器的 30% */
+    margin-right: 2px; /* 按钮之间增加间距 */
+    text-align: center; /* 按钮内容居中 */
+}
+.gr-row {
+    display: flex !important; /* 强制使用 flex 布局 */
+    flex-direction: row !important; /* 水平排列 */
+    justify-content: space-between; /* 组件之间的间距调整 */
+    align-items: center; /* 垂直居中对齐 */
+    flex-wrap: nowrap; /* 禁止按钮换行 */
+}
+"""
+# Initialize the default model at startup
+initialize_model("Apollo2", "7B")
 demo.launch(show_error=True, share=True)