Nemo-Mistral-Minitron

Running on Zero

App Files Files Community

Tonic commited on Oct 3

Commit

ee5b6dd

•

1 Parent(s): 3e040ae

improve interface and ZeroGPU logic

Browse files

Files changed (1) hide show

app.py +14 -30

app.py CHANGED Viewed

@@ -1,23 +1,14 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-from globe import title, description, customtool , presentation1, presentation2, joinus
 import spaces
 model_path = "nvidia/Mistral-NeMo-Minitron-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path)
-# # Extract config info from model's configuration
-# config_info = model.config
-# # Create a Markdown string to display the complete model configuration information
-# model_info_md = "### Model Configuration: Mistral-NeMo-Minitron-8B-Instruct\n\n"
-# for key, value in config_info.to_dict().items():
-#     model_info_md += f"- **{key.replace('_', ' ').capitalize()}**: {value}\n"
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# pipe.tokenizer = tokenizer
 def create_prompt(system_message, user_message, tool_definition="", context=""):
     if tool_definition:
@@ -43,22 +34,13 @@ def generate_response(message, history, system_message, max_tokens, temperature,
     full_prompt = create_prompt(system_message, message, tool_definition, context)
     if use_pipeline:
-        prompt = [{"role": "system", "content": system_message}, {"role": "user", "content": message}]
-        response = pipe(prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, stop_strings=["<extra_id_1>"])[0]['generated_text']
     else:
-        tokenized_chat = tokenizer.apply_chat_template(
-            [
-                {"role": "system", "content": system_message},
-                {"role": "user", "content": message},
-            ],
-            tokenize=True,
-            add_generation_prompt=True,
-            return_tensors="pt"
-        )
         with torch.no_grad():
             output_ids = model.generate(
-                tokenized_chat['input_ids'],
                 max_new_tokens=max_tokens,
                 temperature=temperature,
                 top_p=top_p,
@@ -84,12 +66,11 @@ with gr.Blocks() as demo:
         with gr.Column(scale=1):
             with gr.Group():
                 gr.Markdown(presentation1)
-    # with gr.Column(scale=1):
-    #         with gr.Group():
-    #             gr.Markdown(model_info_md)
     with gr.Row():
-        with gr.Column(scale=3):
-            chatbot = gr.Chatbot(label="🤖 Mistral-NeMo", height=400)
             msg = gr.Textbox(label="User Input", placeholder="Ask a question or request a task...")
             with gr.Accordion(label="🧪Advanced Settings", open=False):
                 system_message = gr.Textbox(
@@ -111,13 +92,16 @@ with gr.Blocks() as demo:
                 with gr.Column(visible=False) as tool_options:
                     tool_definition = gr.Code(
                         label="Tool Definition (JSON)",
-                        value="{}",
                         lines=15,
                         language="json"
                     )
             with gr.Row():
                 clear = gr.Button("Clear")
                 send = gr.Button("Send")
     def user(user_message, history):
         return "", history + [[user_message, None]]
@@ -143,5 +127,5 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    demo.queue
-    demo.launch()

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from globe import title, description, customtool, presentation1, presentation2, joinus
 import spaces
 model_path = "nvidia/Mistral-NeMo-Minitron-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(model_path)
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def create_prompt(system_message, user_message, tool_definition="", context=""):
     if tool_definition:
     full_prompt = create_prompt(system_message, message, tool_definition, context)
     if use_pipeline:
+        response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
     else:
+        inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
         with torch.no_grad():
             output_ids = model.generate(
+                inputs.input_ids,
                 max_new_tokens=max_tokens,
                 temperature=temperature,
                 top_p=top_p,
         with gr.Column(scale=1):
             with gr.Group():
                 gr.Markdown(presentation1)
+        with gr.Column(scale=1):
+            with gr.Group():
+                gr.Markdown(joinus)
     with gr.Row():
+        with gr.Column(scale=1):
             msg = gr.Textbox(label="User Input", placeholder="Ask a question or request a task...")
             with gr.Accordion(label="🧪Advanced Settings", open=False):
                 system_message = gr.Textbox(
                 with gr.Column(visible=False) as tool_options:
                     tool_definition = gr.Code(
                         label="Tool Definition (JSON)",
+                        value=customtool,
                         lines=15,
                         language="json"
                     )
             with gr.Row():
                 clear = gr.Button("Clear")
                 send = gr.Button("Send")
+        with gr.Column(scale=1):
+            chatbot = gr.Chatbot(label="🤖 Mistral-NeMo", height=400)
     def user(user_message, history):
         return "", history + [[user_message, None]]
     )
 if __name__ == "__main__":
+    demo.queue()
+    demo.launch()