Mistral-lab

Running on Zero

App Files Files Community

vilarin commited on Jul 9

Commit

6d1d1e9

•

1 Parent(s): 4ed884e

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -12

app.py CHANGED Viewed

@@ -1,23 +1,22 @@
 import os
 import time
-import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
-MODEL_LIST = ["internlm/internlm2_5-7b-chat", "internlm/internlm2_5-7b-chat-1m"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 MODEL_ID = os.environ.get("MODEL_ID", None)
 MODEL_NAME = MODEL_ID.split("/")[-1]
-TITLE = "<h1><center>internlm2.5-7b-chat</center></h1>"
 DESCRIPTION = f"""
 <h3>MODEL NOW: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
 """
 PLACEHOLDER = """
 <center>
-<p>InternLM2.5 has open-sourced a 7 billion parameter base model<br> and a chat model tailored for practical scenarios.</p>
 </center>
 """
@@ -36,13 +35,12 @@ h3 {
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
-    torch_dtype=torch.float16,
-    trust_remote_code=True).cuda()
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-model = model.eval()
-@spaces.GPU()
 def stream_chat(
     message: str,
     history: list,
@@ -54,11 +52,11 @@ def stream_chat(
 ):
     print(f'message: {message}')
     print(f'history: {history}')
-    for resp, history in model.stream_chat(
         tokenizer,
         query = message,
         history = history,
-        max_new_tokens = max_new_tokens,
         do_sample = False if temperature == 0 else True,
         top_p = top_p,
         top_k = top_k,
@@ -92,7 +90,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
                 maximum=8192,
                 step=1,
                 value=1024,
-                label="Max New Tokens",
                 render=False,
             ),
             gr.Slider(

 import os
 import time
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
+MODEL_LIST = ["openbmb/MiniCPM-1B-sft-bf16", "openbmb/MiniCPM-S-1B-sft"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 MODEL_ID = os.environ.get("MODEL_ID", None)
 MODEL_NAME = MODEL_ID.split("/")[-1]
+TITLE = "<h1><center>MiniCPM-1B-chat</center></h1>"
 DESCRIPTION = f"""
 <h3>MODEL NOW: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
 """
 PLACEHOLDER = """
 <center>
+<p>MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 1.2B parameters excluding embeddings.</p>
 </center>
 """
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    device_map='auto',
+    trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 def stream_chat(
     message: str,
     history: list,
 ):
     print(f'message: {message}')
     print(f'history: {history}')
+    for resp, history in model.chat(
         tokenizer,
         query = message,
         history = history,
+        max_length = max_new_tokens,
         do_sample = False if temperature == 0 else True,
         top_p = top_p,
         top_k = top_k,
                 maximum=8192,
                 step=1,
                 value=1024,
+                label="Max Length",
                 render=False,
             ),
             gr.Slider(