ernie4.5_21b_a3b_thinking_demo

Running

App Files Files Community

jzhang533 commited on Apr 25

Commit

050dcec

1 Parent(s): 1665267

launch ernie x1 turbo

Browse files

Signed-off-by: Zhang Jun <jzhang533@gmail.com>

Files changed (2) hide show

README.md +1 -1
app.py +64 -0

README.md CHANGED Viewed

@@ -8,7 +8,7 @@ sdk_version: 5.26.0
 app_file: app.py
 pinned: false
 license: apache-2.0
-short_description: ernie x1
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: BAIDU's LLM, https://yiyan.baidu.com/
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import os
+import gradio as gr
+from openai import OpenAI
+title = "ERNIE X1 Turbo: BAIDU's Reasoning LLM"
+description = """
+- Official Website: <https://yiyan.baidu.com/> (UI in Chinese)
+- API services: [Qianfan Large Model Platform](https://cloud.baidu.com/product-s/qianfan_home) (cloud platform providing LLM services, UI in Chinese)
+- [ERNIE 4.5 Turbo Demo](https://huggingface.co/spaces/PaddlePaddle/ernie_demo) |  [ERNIE X1 Turbo Demo](https://huggingface.co/spaces/PaddlePaddle/ernie_x1_demo)
+"""
+qianfan_api_key = os.getenv("QIANFAN_TOKEN")
+qianfan_model = "ernie-x1-turbo-32k"
+client = OpenAI(base_url="https://qianfan.baidubce.com/v2", api_key=qianfan_api_key)
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+):
+    messages = [{"role": "system", "content": system_message}]
+    messages.append({"role": "user", "content": message})
+    response = client.chat.completions.create(
+        model=qianfan_model,
+        messages=messages,
+        max_completion_tokens=max_tokens,
+        stream=True,
+    )
+    reasoning_content = "**Thinking**:\n"
+    content = "\n\n**Answer**: \n"
+    for chunk in response:
+        if hasattr(chunk.choices[0].delta, 'reasoning_content'):
+            token = chunk.choices[0].delta.reasoning_content
+            if token:
+                reasoning_content += token
+                yield reasoning_content
+        elif hasattr(chunk.choices[0].delta, 'content'):
+            token = chunk.choices[0].delta.content
+            if token:
+                content += token
+                yield reasoning_content + content
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Textbox(value="", label="System message"),
+        gr.Slider(minimum=2, maximum=16384, value=10240, step=1, label="Max new tokens"),
+    ],
+    title=title,
+    description=description,
+    type='messages',
+)
+if __name__ == "__main__":
+    demo.launch()