Hermes-3-Llama-3.2-3B

Running on Zero

App Files Files Community

vilarin commited on Jul 5

Commit

99a7a45

•

1 Parent(s): d5d8ee3

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -17

app.py CHANGED Viewed

@@ -8,14 +8,13 @@ from threading import Thread
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-MODEL_ID = "THUDM/glm-4-9b-chat"
-MODEL_ID2 = "THUDM/glm-4-9b-chat-1m"
-MODELS = os.environ.get("MODELS")
-MODEL_NAME = MODELS.split("/")[-1]
-TITLE = "<h1><center>GLM-4-9B</center></h1>"
-DESCRIPTION = f'<h3><center>MODEL: <a href="https://hf.co/{MODELS}">{MODEL_NAME}</a></center></h3>'
 CSS = """
 .duplicate-button {
@@ -26,18 +25,26 @@ CSS = """
 }
 """
-model = AutoModelForCausalLM.from_pretrained(
-        MODELS,
         torch_dtype=torch.bfloat16,
         low_cpu_mem_usage=True,
         trust_remote_code=True,
         ).to(0).eval()
-tokenizer = AutoTokenizer.from_pretrained(MODELS,trust_remote_code=True)
 @spaces.GPU
-def stream_chat(message: str, history: list, temperature: float, max_length: int):
     print(f'message is - {message}')
     print(f'history is - {history}')
     conversation = []
@@ -46,7 +53,14 @@ def stream_chat(message: str, history: list, temperature: float, max_length: int
     conversation.append({"role": "user", "content": message})
     print(f"Conversation is -\n{conversation}")
     input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
@@ -68,14 +82,10 @@ def stream_chat(message: str, history: list, temperature: float, max_length: int
             buffer += new_text
             yield buffer
-chatbot = gr.Chatbot(height=450)
 with gr.Blocks(css=CSS) as demo:
     gr.HTML(TITLE)
-    gr.HTML(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
         fn=stream_chat,
@@ -99,6 +109,10 @@ with gr.Blocks(css=CSS) as demo:
                 label="Max Length",
                 render=False,
             ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
@@ -108,7 +122,7 @@ with gr.Blocks(css=CSS) as demo:
         ],
         cache_examples=False,
     )
 if __name__ == "__main__":
     demo.launch()

 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MODEL_LIST = "THUDM/glm-4-9b-chat, THUDM/glm-4-9b-chat-1m, THUDM/codegeex4-all-9b"
+#MODELS = os.environ.get("MODELS")
+#MODEL_NAME = MODELS.split("/")[-1]
+TITLE = "<h1><center>GLM SPACE</center></h1>"
+PLACEHOLDER = f'<h3><center>Feel Free To Test GLM</center></h3>'
 CSS = """
 .duplicate-button {
 }
 """
+model_chat = AutoModelForCausalLM.from_pretrained(
+        "THUDM/glm-4-9b-chat",
         torch_dtype=torch.bfloat16,
         low_cpu_mem_usage=True,
         trust_remote_code=True,
         ).to(0).eval()
+tokenizer_chat = AutoTokenizer.from_pretrained("THUDM/glm-4-9b-chat",trust_remote_code=True)
+model_code = AutoModelForCausalLM.from_pretrained(
+    "THUDM/codegeex4-all-9b",
+    torch_dtype=torch.bfloat16,
+    low_cpu_mem_usage=True,
+    trust_remote_code=True
+).to(device).eval()
+tokenizer_code = AutoTokenizer.from_pretrained("THUDM/codegeex4-all-9b", trust_remote_code=True)
 @spaces.GPU
+def stream_chat(message: str, history: list, temperature: float, max_length: int, model: str):
     print(f'message is - {message}')
     print(f'history is - {history}')
     conversation = []
     conversation.append({"role": "user", "content": message})
     print(f"Conversation is -\n{conversation}")
+    if mode == "glm-4-9b-chat":
+        tokenizer = tokenizer_chat
+        model = model_chat
+    else:
+        model = model_code
+        tokenizer = tokenizer_code
     input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
             buffer += new_text
             yield buffer
+chatbot = gr.Chatbot(height=600, placeholder = PLACEHOLDER)
 with gr.Blocks(css=CSS) as demo:
     gr.HTML(TITLE)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
         fn=stream_chat,
                 label="Max Length",
                 render=False,
             ),
+            choice = gr.Radio(
+                ["glm-4-9b-chat", "codegeex4-all-9b"],
+                label="Load Model"
+            ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
         ],
         cache_examples=False,
     )
 if __name__ == "__main__":
     demo.launch()