Spaces:

llamafactory
/

Llama3-8B-Chinese-Chat

Running on Zero

App Files Files Community

hiyouga commited on Apr 29

Commit

e1e6dcd

•

1 Parent(s): ea4be8b

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -25

app.py CHANGED Viewed

@@ -6,27 +6,17 @@ from threading import Thread
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-TITLE = "<h1><center>LLaMA Board: A One-stop Web UI for Getting Started with LLaMA Factory</center></h1>"
-DESCRIPTION = "<h3><center>Visit <a href='' target='_blank'>LLaMA Factory</a> for details.</center></h3>"
-CSS = r"""
-.duplicate-button {
-  margin: auto !important;
-  color: white !important;
-  background: black !important;
-  border-radius: 100vh !important;
-}
-"""
 tokenizer = AutoTokenizer.from_pretrained("shenzhi-wang/Llama3-8B-Chinese-Chat")
 model = AutoModelForCausalLM.from_pretrained("shenzhi-wang/Llama3-8B-Chinese-Chat", device_map="auto")
-@spaces.GPU(duration=120)
-def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int):
-    conversation = []
     for prompt, answer in history:
         conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
@@ -54,16 +44,17 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
         yield output
-with gr.Blocks(fill_height=True, css=CSS) as demo:
-    gr.HTML(TITLE)
-    gr.HTML(DESCRIPTION)
-    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
         fn=stream_chat,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
             gr.Slider(
                 minimum=0,
                 maximum=1,
@@ -82,13 +73,12 @@ with gr.Blocks(fill_height=True, css=CSS) as demo:
             ),
         ],
         examples=[
-            ['How to setup a human base on Mars? Give short answer.'],
-            ['Explain theory of relativity to me like I’m 8 years old.'],
-            ['What is 9,000 * 9,000?'],
-            ['Write a pun-filled happy birthday message to my friend Alex.'],
-            ['Justify why a penguin might make a good king of the jungle.']
         ],
         cache_examples=False,
     )

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+TITLE = "Chat with Llama3-8B-Chinese"
+DESCRIPTION = "Visit <a href='https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat' target='_blank'>our model page</a> for details."
 tokenizer = AutoTokenizer.from_pretrained("shenzhi-wang/Llama3-8B-Chinese-Chat")
 model = AutoModelForCausalLM.from_pretrained("shenzhi-wang/Llama3-8B-Chinese-Chat", device_map="auto")
+@spaces.GPU
+def stream_chat(message: str, history: list, system: str, temperature: float, max_new_tokens: int):
+    conversation = [{"role": "system", "content": system}]
     for prompt, answer in history:
         conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
         yield output
+with gr.Blocks(fill_height=True) as demo:
     gr.ChatInterface(
         fn=stream_chat,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
+            gr.Text(
+                value="You are a helpful assistant.",
+                label="System",
+                render=False,
+            ),
             gr.Slider(
                 minimum=0,
                 maximum=1,
             ),
         ],
         examples=[
+            ["我的蓝牙耳机坏了，我该去看牙科还是耳鼻喉科？"],
+            ["今日行军进展如何", "扮演诸葛亮和我对话。"],
         ],
         cache_examples=False,
+        title=TITLE,
+        description=DESCRIPTION,
     )