SmolLM-Instruct

Running

vilarin commited on Jul 9

Commit

3fb77c6

•

1 Parent(s): 9746484

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import time
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
@@ -9,14 +10,14 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
 MODEL_ID = os.environ.get("MODEL_ID", None)
 MODEL_NAME = MODEL_ID.split("/")[-1]
-TITLE = "<h1><center>MiniCPM-1B-chat</center></h1>"
 DESCRIPTION = f"""
 <h3>MODEL NOW: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
 """
 PLACEHOLDER = """
 <center>
-<p>MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 1.2B parameters excluding embeddings.</p>
 </center>
 """
@@ -36,11 +37,12 @@ h3 {
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=True,
     trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
 def stream_chat(
     message: str,
     history: list,
@@ -52,6 +54,7 @@ def stream_chat(
 ):
     print(f'message: {message}')
     print(f'history: {history}')
     resp, history = model.chat(
         tokenizer,
         query = message,
@@ -124,7 +127,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
             ["Tell me a random fun fact about the Roman Empire."],
             ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
         ],
-        cache_examples="lazy",
     )

 import os
 import time
+import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 MODEL_ID = os.environ.get("MODEL_ID", None)
 MODEL_NAME = MODEL_ID.split("/")[-1]
+TITLE = "<h1><center>MiniCPM-S-1B-chat</center></h1>"
 DESCRIPTION = f"""
 <h3>MODEL NOW: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
 """
 PLACEHOLDER = """
 <center>
+<p>MiniCPM is an End-Size LLM with only 1.2B parameters excluding embeddings.</p>
 </center>
 """
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
+    device_map='auto',
     low_cpu_mem_usage=True,
     trust_remote_code=True)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
+@spaces.GPU()
 def stream_chat(
     message: str,
     history: list,
 ):
     print(f'message: {message}')
     print(f'history: {history}')
+    torch.manual_seed(0)
     resp, history = model.chat(
         tokenizer,
         query = message,
             ["Tell me a random fun fact about the Roman Empire."],
             ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
         ],
+        cache_examples=False,
     )