Tonic commited on
Commit
ee5b6dd
1 Parent(s): 3e040ae

improve interface and ZeroGPU logic

Browse files
Files changed (1) hide show
  1. app.py +14 -30
app.py CHANGED
@@ -1,23 +1,14 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
- from globe import title, description, customtool , presentation1, presentation2, joinus
5
  import spaces
6
 
7
  model_path = "nvidia/Mistral-NeMo-Minitron-8B-Instruct"
8
  tokenizer = AutoTokenizer.from_pretrained(model_path)
9
  model = AutoModelForCausalLM.from_pretrained(model_path)
10
 
11
- # # Extract config info from model's configuration
12
- # config_info = model.config
13
-
14
- # # Create a Markdown string to display the complete model configuration information
15
- # model_info_md = "### Model Configuration: Mistral-NeMo-Minitron-8B-Instruct\n\n"
16
- # for key, value in config_info.to_dict().items():
17
- # model_info_md += f"- **{key.replace('_', ' ').capitalize()}**: {value}\n"
18
-
19
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
20
- # pipe.tokenizer = tokenizer
21
 
22
  def create_prompt(system_message, user_message, tool_definition="", context=""):
23
  if tool_definition:
@@ -43,22 +34,13 @@ def generate_response(message, history, system_message, max_tokens, temperature,
43
  full_prompt = create_prompt(system_message, message, tool_definition, context)
44
 
45
  if use_pipeline:
46
- prompt = [{"role": "system", "content": system_message}, {"role": "user", "content": message}]
47
- response = pipe(prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, stop_strings=["<extra_id_1>"])[0]['generated_text']
48
  else:
49
- tokenized_chat = tokenizer.apply_chat_template(
50
- [
51
- {"role": "system", "content": system_message},
52
- {"role": "user", "content": message},
53
- ],
54
- tokenize=True,
55
- add_generation_prompt=True,
56
- return_tensors="pt"
57
- )
58
 
59
  with torch.no_grad():
60
  output_ids = model.generate(
61
- tokenized_chat['input_ids'],
62
  max_new_tokens=max_tokens,
63
  temperature=temperature,
64
  top_p=top_p,
@@ -84,12 +66,11 @@ with gr.Blocks() as demo:
84
  with gr.Column(scale=1):
85
  with gr.Group():
86
  gr.Markdown(presentation1)
87
- # with gr.Column(scale=1):
88
- # with gr.Group():
89
- # gr.Markdown(model_info_md)
90
  with gr.Row():
91
- with gr.Column(scale=3):
92
- chatbot = gr.Chatbot(label="🤖 Mistral-NeMo", height=400)
93
  msg = gr.Textbox(label="User Input", placeholder="Ask a question or request a task...")
94
  with gr.Accordion(label="🧪Advanced Settings", open=False):
95
  system_message = gr.Textbox(
@@ -111,13 +92,16 @@ with gr.Blocks() as demo:
111
  with gr.Column(visible=False) as tool_options:
112
  tool_definition = gr.Code(
113
  label="Tool Definition (JSON)",
114
- value="{}",
115
  lines=15,
116
  language="json"
117
  )
118
  with gr.Row():
119
  clear = gr.Button("Clear")
120
  send = gr.Button("Send")
 
 
 
121
 
122
  def user(user_message, history):
123
  return "", history + [[user_message, None]]
@@ -143,5 +127,5 @@ with gr.Blocks() as demo:
143
  )
144
 
145
  if __name__ == "__main__":
146
- demo.queue
147
- demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ from globe import title, description, customtool, presentation1, presentation2, joinus
5
  import spaces
6
 
7
  model_path = "nvidia/Mistral-NeMo-Minitron-8B-Instruct"
8
  tokenizer = AutoTokenizer.from_pretrained(model_path)
9
  model = AutoModelForCausalLM.from_pretrained(model_path)
10
 
 
 
 
 
 
 
 
 
11
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
12
 
13
  def create_prompt(system_message, user_message, tool_definition="", context=""):
14
  if tool_definition:
 
34
  full_prompt = create_prompt(system_message, message, tool_definition, context)
35
 
36
  if use_pipeline:
37
+ response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
 
38
  else:
39
+ inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
40
 
41
  with torch.no_grad():
42
  output_ids = model.generate(
43
+ inputs.input_ids,
44
  max_new_tokens=max_tokens,
45
  temperature=temperature,
46
  top_p=top_p,
 
66
  with gr.Column(scale=1):
67
  with gr.Group():
68
  gr.Markdown(presentation1)
69
+ with gr.Column(scale=1):
70
+ with gr.Group():
71
+ gr.Markdown(joinus)
72
  with gr.Row():
73
+ with gr.Column(scale=1):
 
74
  msg = gr.Textbox(label="User Input", placeholder="Ask a question or request a task...")
75
  with gr.Accordion(label="🧪Advanced Settings", open=False):
76
  system_message = gr.Textbox(
 
92
  with gr.Column(visible=False) as tool_options:
93
  tool_definition = gr.Code(
94
  label="Tool Definition (JSON)",
95
+ value=customtool,
96
  lines=15,
97
  language="json"
98
  )
99
  with gr.Row():
100
  clear = gr.Button("Clear")
101
  send = gr.Button("Send")
102
+ with gr.Column(scale=1):
103
+ chatbot = gr.Chatbot(label="🤖 Mistral-NeMo", height=400)
104
+
105
 
106
  def user(user_message, history):
107
  return "", history + [[user_message, None]]
 
127
  )
128
 
129
  if __name__ == "__main__":
130
+ demo.queue()
131
+ demo.launch()