Spaces:

DFofanov78
/

RKF

Sleeping

App Files Files Community

DFofanov78 commited on 21 days ago

Commit

12420cf

•

1 Parent(s): a40e9ff

Update app.py

Browse files

Files changed (1) hide show

app.py +186 -50

app.py CHANGED Viewed

@@ -1,63 +1,199 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
 ):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
         top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import os
+from huggingface_hub.file_download import http_get
+from llama_cpp import Llama
+SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
+def get_message_tokens(model, role, content):
+    content = f"{role}\n{content}\n</s>"
+    content = content.encode("utf-8")
+    return model.tokenize(content, special=True)
+def get_system_tokens(model):
+    system_message = {"role": "system", "content": SYSTEM_PROMPT}
+    return get_message_tokens(model, **system_message)
+def load_model(
+    directory: str = ".",
+    model_name: str = "RKF-v1-8b-Instruct-q4_k_m-gguf-unsloth.Q4_K_M.gguf",
+    model_url: str = "https://huggingface.co/DFofanov78/RKF-v1-8b-Instruct-q4_k_m-gguf/resolve/main/RKF-v1-8b-Instruct-q4_k_m-gguf-unsloth.Q4_K_M.gguf"
 ):
+    final_model_path = os.path.join(directory, model_name)
+    print("Downloading all files...")
+    if not os.path.exists(final_model_path):
+        with open(final_model_path, "wb") as f:
+            http_get(model_url, f)
+    os.chmod(final_model_path, 0o777)
+    print("Files downloaded!")
+    model = Llama(
+        model_path=final_model_path,
+        n_ctx=1024
+    )
+    print("Model loaded!")
+    return model
+MODEL = load_model()
+def user(message, history):
+    new_history = history + [[message, None]]
+    return "", new_history
+def bot(
+    history,
+    system_prompt,
+    top_p,
+    top_k,
+    temp
+):
+    model = MODEL
+    tokens = get_system_tokens(model)[:]
+    for user_message, bot_message in history[:-1]:
+        message_tokens = get_message_tokens(model=model, role="user", content=user_message)
+        tokens.extend(message_tokens)
+        if bot_message:
+            message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
+            tokens.extend(message_tokens)
+    last_user_message = history[-1][0]
+    message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
+    tokens.extend(message_tokens)
+    role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
+    tokens.extend(role_tokens)
+    generator = model.generate(
+        tokens,
+        top_k=top_k,
         top_p=top_p,
+        temp=temp
+    )
+    partial_text = ""
+    for i, token in enumerate(generator):
+        if token == model.token_eos():
+            break
+        partial_text += model.detokenize([token]).decode("utf-8", "ignore")
+        history[-1][1] = partial_text
+        yield history
+with gr.Blocks(
+    theme=gr.themes.Soft()
+) as demo:
+    favicon = '<img src="https://cdn.midjourney.com/b88e5beb-6324-4820-8504-a1a37a9ba36d/0_1.png" width="48px" style="display: inline">'
+    gr.Markdown(
+        f"""<h1><center>{favicon}Saiga2 13B GGUF Q4_K</center></h1>
+        This is a demo of a **Russian**-speaking LLaMA2-based model. If you are interested in other languages, please check other models, such as [MPT-7B-Chat](https://huggingface.co/spaces/mosaicml/mpt-7b-chat).
+        Это демонстрационная версия [квантованной Сайги-2 с 13 миллиардами параметров](https://huggingface.co/IlyaGusev/saiga2_13b_ggml), работающая на CPU.
+        Сайга-2 — это разговорная языковая модель, которая основана на [LLaMA-2](https://ai.meta.com/llama/) и дообучена на корпусах, сгенерированных ChatGPT, таких как [ru_turbo_alpaca](https://huggingface.co/datasets/IlyaGusev/ru_turbo_alpaca), [ru_turbo_saiga](https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga) и [gpt_roleplay_realm](https://huggingface.co/datasets/IlyaGusev/gpt_roleplay_realm).
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=5):
+            system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
+            chatbot = gr.Chatbot(label="Диалог")
+        with gr.Column(min_width=80, scale=1):
+            with gr.Tab(label="Параметры генерации"):
+                top_p = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.9,
+                    step=0.05,
+                    interactive=True,
+                    label="Top-p",
+                )
+                top_k = gr.Slider(
+                    minimum=10,
+                    maximum=100,
+                    value=30,
+                    step=5,
+                    interactive=True,
+                    label="Top-k",
+                )
+                temp = gr.Slider(
+                    minimum=0.0,
+                    maximum=2.0,
+                    value=0.01,
+                    step=0.01,
+                    interactive=True,
+                    label="Температура"
+                )
+    with gr.Row():
+        with gr.Column():
+            msg = gr.Textbox(
+                label="Отправить сообщение",
+                placeholder="Отправить сообщение",
+                show_label=False,
+            )
+        with gr.Column():
+            with gr.Row():
+                submit = gr.Button("Отправить")
+                stop = gr.Button("Остановить")
+                clear = gr.Button("Очистить")
+    with gr.Row():
+        gr.Markdown(
+            """ПРЕДУПРЕЖДЕНИЕ: Модель может генерировать фактически или этически некорректные тексты. Мы не несём за это ответственность."""
+        )
+    # Pressing Enter
+    submit_event = msg.submit(
+        fn=user,
+        inputs=[msg, chatbot],
+        outputs=[msg, chatbot],
+        queue=False,
+    ).success(
+        fn=bot,
+        inputs=[
+            chatbot,
+            system_prompt,
+            top_p,
+            top_k,
+            temp
+        ],
+        outputs=chatbot,
+        queue=True,
+    )
+    # Pressing the button
+    submit_click_event = submit.click(
+        fn=user,
+        inputs=[msg, chatbot],
+        outputs=[msg, chatbot],
+        queue=False,
+    ).success(
+        fn=bot,
+        inputs=[
+            chatbot,
+            system_prompt,
+            top_p,
+            top_k,
+            temp
+        ],
+        outputs=chatbot,
+        queue=True,
+    )
+    # Stop generation
+    stop.click(
+        fn=None,
+        inputs=None,
+        outputs=None,
+        cancels=[submit_event, submit_click_event],
+        queue=False,
+    )
+    # Clear history
+    clear.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":
+  demo.queue(max_size=128)
+  demo.launch(show_error=True)