Spaces:

r3gm
/

LlamaVerse

Running on Zero

App Files Files Community

r3gm commited on May 13

Commit

7455667

•

1 Parent(s): fe3aef4

Upload 6 files

Browse files

Files changed (6) hide show

README.md +8 -7
app.py +269 -0
assets/avatar_llama.jpeg +0 -0
assets/avatar_user.jpeg +0 -0
assets/background.png +0 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: LlamaVerse
-emoji: 📚
-colorFrom: purple
-colorTo: red
 sdk: gradio
-sdk_version: 4.31.0
 app_file: app.py
-pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: 🦙 Llama 3 70B
+emoji: 💬🖥️
+colorFrom: yellow
+colorTo: green
 sdk: gradio
+sdk_version: 4.28.3
 app_file: app.py
+pinned: true
 license: mit
+short_description: Llama 3 70B powered by the LLaMA.cpp backend
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import spaces
+import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp_cuda_tensorcores import Llama
+REPO_ID = "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF"
+MODEL_NAME = "Meta-Llama-3-70B-Instruct.Q3_K_L.gguf"
+MAX_CONTEXT_LENGTH = 8192
+CUDA = True
+SYSTEM_PROMPT = "You are a helpful, smart, kind, and efficient AI assistant. You always fulfill the user's requests to the best of your ability."
+TOKEN_STOP = ["<|eot_id|>"]
+SYS_MSG = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nSYSTEM_PROMPT<|eot_id|>\n"
+USER_PROMPT = (
+    "<|start_header_id|>user<|end_header_id|>\n\nUSER_PROMPT<|eot_id|>\n"
+)
+ASSIS_PROMPT = "<|start_header_id|>assistant<|end_header_id|>\n\n"
+END_ASSIS_PREVIOUS_RESPONSE = "<|eot_id|>\n"
+TASK_PROMPT = {
+    "Assistant": SYSTEM_PROMPT,
+    "Translate": "You are an expert translator. Translate the following text into English.",
+    "Summarization": "Summarizing information is my specialty. Let me know what you'd like summarized.",
+    "Grammar correction": "Grammar is my forte! Feel free to share the text you'd like me to proofread and correct.",
+    "Stable diffusion prompt generator": "You are a stable diffusion prompt generator. Break down the user's text and create a more elaborate prompt.",
+    "Play Trivia": "Engage the user in a trivia game on various topics.",
+    "Share Fun Facts": "Share interesting and fun facts on various topics.",
+    "Explain code": "You are an expert programmer guiding someone through a piece of code step by step, explaining each line and its function in detail.",
+    "Paraphrase Master": "You have the knack for transforming complex or verbose text into simpler, clearer language while retaining the original meaning and essence.",
+    "Recommend Movies": "Recommend movies based on the user's preferences.",
+    "Offer Motivational Quotes": "Offer motivational quotes to inspire the user.",
+    "Recommend Books": "Recommend books based on the user's favorite genres or interests.",
+    "Philosophical discussion": "Engage the user in a philosophical discussion",
+    "Music recommendation": "Tune time! What kind of music are you in the mood for? I'll find the perfect song for you.",
+    "Generate a Joke": "Generate a witty joke suitable for a stand-up comedy routine.",
+    "Roleplay as a Detective": "Roleplay as a detective interrogating a suspect in a murder case.",
+    "Act as a News Reporter": "Act as a news reporter covering breaking news about an alien invasion.",
+    "Play as a Space Explorer": "Play as a space explorer encountering a new alien civilization.",
+    "Be a Medieval Knight": "Imagine yourself as a medieval knight embarking on a quest to rescue a princess.",
+    "Act as a Superhero": "Act as a superhero saving a city from a supervillain's evil plot.",
+    "Play as a Pirate Captain": "Play as a pirate captain searching for buried treasure on a remote island.",
+    "Be a Famous Celebrity": "Imagine yourself as a famous celebrity attending a glamorous red-carpet event.",
+    "Design a New Invention": "Imagine you're an inventor tasked with designing a revolutionary new invention that will change the world.",
+    "Act as a Time Traveler": "You've just discovered time travel! Describe your adventures as you journey through different eras.",
+    "Play as a Magical Girl": "You are a magical girl with extraordinary powers, battling dark forces to protect your city and friends.",
+    "Act as a Shonen Protagonist": "You are a determined and spirited shonen protagonist on a quest for strength, friendship, and victory.",
+    "Roleplay as a Tsundere Character": "You are a tsundere character, initially cold and aloof but gradually warming up to others through unexpected acts of kindness.",
+}
+css = ".gradio-container {background-image: url('file=./assets/background.png'); background-size: cover; background-position: center; background-repeat: no-repeat;}"
+class ChatLLM:
+    def __init__(self, config_model):
+        self.llm = None
+        self.config_model = config_model
+        # self.load_cpp_model()
+    def load_cpp_model(self):
+        self.llm = Llama(**config_model)
+    def apply_chat_template(
+        self,
+        history,
+        system_message,
+    ):
+        history = history or []
+        messages = SYS_MSG.replace("SYSTEM_PROMPT", system_message.strip())
+        for msg in history:
+            messages += (
+                USER_PROMPT.replace("USER_PROMPT", msg[0]) + ASSIS_PROMPT + msg[1]
+            )
+            messages += END_ASSIS_PREVIOUS_RESPONSE if msg[1] else ""
+        print(messages)
+        # messages = messages[:-1]
+        return messages
+    @spaces.GPU(duration=120)
+    def response(
+        self,
+        history,
+        system_message,
+        max_tokens,
+        temperature,
+        top_p,
+        top_k,
+        repeat_penalty,
+    ):
+        messages = self.apply_chat_template(history, system_message)
+        history[-1][1] = ""
+        if not self.llm:
+            print("Loading model")
+            self.load_cpp_model()
+        for output in self.llm(
+            messages,
+            echo=False,
+            stream=True,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repeat_penalty=repeat_penalty,
+            stop=TOKEN_STOP,
+        ):
+            answer = output["choices"][0]["text"]
+            history[-1][1] += answer
+            # stream the response
+            yield history, history
+def user(message, history):
+    history = history or []
+    # Append the user's message to the conversation history
+    history.append([message, ""])
+    return "", history
+def clear_chat(chat_history_state, chat_message):
+    chat_history_state = []
+    chat_message = ""
+    return chat_history_state, chat_message
+def gui(llm_chat):
+    with gr.Blocks(theme="NoCrypt/miku", css=css) as app:
+        gr.Markdown("# Llama 3 70B Instruct GGUF")
+        gr.Markdown(
+            f"""
+                ### This demo utilizes the repository ID {REPO_ID} with the model {MODEL_NAME}, powered by the LLaMA.cpp backend.
+                """
+        )
+        with gr.Row():
+            with gr.Column(scale=2):
+                chatbot = gr.Chatbot(
+                    label="Chat",
+                    height=700,
+                    avatar_images=(
+                        "assets/avatar_user.jpeg",
+                        "assets/avatar_llama.jpeg",
+                    ),
+                )
+            with gr.Column(scale=1):
+                with gr.Row():
+                    message = gr.Textbox(
+                        label="Message",
+                        placeholder="Ask me anything.",
+                        lines=3,
+                    )
+                with gr.Row():
+                    submit = gr.Button(value="Send message", variant="primary")
+                    clear = gr.Button(value="New chat", variant="primary")
+                    stop = gr.Button(value="Stop", variant="secondary")
+                with gr.Accordion("Contextual Prompt Editor"):
+                    default_task = "Assistant"
+                    task_prompts_gui = gr.Dropdown(
+                        TASK_PROMPT,
+                        value=default_task,
+                        label="Prompt selector",
+                        visible=True,
+                        interactive=True,
+                    )
+                    system_msg = gr.Textbox(
+                        TASK_PROMPT[default_task],
+                        label="System Message",
+                        placeholder="system prompt",
+                        lines=4,
+                    )
+                    def task_selector(choice):
+                        return gr.update(value=TASK_PROMPT[choice])
+                    task_prompts_gui.change(
+                        task_selector,
+                        [task_prompts_gui],
+                        [system_msg],
+                    )
+                with gr.Accordion("Advanced settings", open=False):
+                    with gr.Column():
+                        max_tokens = gr.Slider(
+                            20, 4096, label="Max Tokens", step=20, value=400
+                        )
+                        temperature = gr.Slider(
+                            0.2, 2.0, label="Temperature", step=0.1, value=0.8
+                        )
+                        top_p = gr.Slider(
+                            0.0, 1.0, label="Top P", step=0.05, value=0.95
+                        )
+                        top_k = gr.Slider(
+                            0, 100, label="Top K", step=1, value=40
+                        )
+                        repeat_penalty = gr.Slider(
+                            0.0,
+                            2.0,
+                            label="Repetition Penalty",
+                            step=0.1,
+                            value=1.1,
+                        )
+                chat_history_state = gr.State()
+                clear.click(
+                    clear_chat,
+                    inputs=[chat_history_state, message],
+                    outputs=[chat_history_state, message],
+                    queue=False,
+                )
+                clear.click(lambda: None, None, chatbot, queue=False)
+                submit_click_event = submit.click(
+                    fn=user,
+                    inputs=[message, chat_history_state],
+                    outputs=[message, chat_history_state],
+                    queue=True,
+                ).then(
+                    fn=llm_chat.response,
+                    inputs=[
+                        chat_history_state,
+                        system_msg,
+                        max_tokens,
+                        temperature,
+                        top_p,
+                        top_k,
+                        repeat_penalty,
+                    ],
+                    outputs=[chatbot, chat_history_state],
+                    queue=True,
+                )
+                stop.click(
+                    fn=None,
+                    inputs=None,
+                    outputs=None,
+                    cancels=[submit_click_event],
+                    queue=False,
+                )
+    return app
+if __name__ == "__main__":
+    model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME)
+    config_model = {
+        "model_path": model_path,
+        "n_ctx": MAX_CONTEXT_LENGTH,
+        "n_gpu_layers": -1 if CUDA else 0,
+    }
+    llm_chat = ChatLLM(config_model)
+    app = gui(llm_chat)
+    app.queue(default_concurrency_limit=40)
+    app.launch(
+        max_threads=40,
+        share=False,
+        show_error=True,
+        quiet=False,
+        debug=True,
+        allowed_paths=["./assets/"],
+    )

assets/avatar_llama.jpeg ADDED Viewed

assets/avatar_user.jpeg ADDED Viewed

assets/background.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.69+cpuavx2-cp310-cp310-linux_x86_64.whl
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.69+cu121-cp310-cp310-linux_x86_64.whl
+torch==2.2.0