Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,067 Bytes
e10040f ee5b6dd dec5480 6d32964 d60b605 651f3a2 d60b605 a3cff49 e10040f d60b605 e10040f 18d32fd d60b605 e10040f d60b605 e10040f d60b605 e10040f ca7faf3 5736661 ca7faf3 18d32fd 4299336 e10040f 84a2509 e10040f 5736661 55b91e5 e10040f 55b91e5 e10040f ca7faf3 55b91e5 e10040f 4299336 d60b605 4299336 e10040f 4299336 e10040f 719044f 71aded3 719044f ca7faf3 e10040f 18d32fd 4299336 ade11b4 4299336 ee5b6dd e10040f ca7faf3 719044f d60b605 ca7faf3 ad72fd3 719044f ad72fd3 ee5b6dd ad72fd3 ca7faf3 d60b605 0071153 ca7faf3 d60b605 ca7faf3 18d32fd ca7faf3 719044f ca7faf3 719044f e10040f 651f3a2 e10040f ee5b6dd d60b605 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from globe import title, description, customtool, presentation1, presentation2, joinus
import spaces
model_path = "nvidia/OpenReasoning-Nemotron-14B"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map="auto")
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")
def create_prompt(system_message, user_message, tool_definition="", context=""):
# OpenReasoning-Nemotron uses a different prompt format
if tool_definition:
return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
System Context: {system_message}
Tool Definition: {tool_definition}
Context: {context}
User: {user_message}
Assistant: Let me think about this step by step."""
else:
return f"""You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
System Context: {system_message}
User: {user_message}
Assistant: Let me think about this step by step."""
@spaces.GPU(duration=94)
def generate_response(message, history, system_message, max_tokens, temperature, top_p, do_sample, use_pipeline=False, tool_definition="", context=""):
full_prompt = create_prompt(system_message, message, tool_definition, context)
if use_pipeline:
response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=do_sample)[0]['generated_text']
else:
max_model_length = model.config.max_position_embeddings if hasattr(model.config, 'max_position_embeddings') else 8192
max_length = max_model_length - max_tokens
inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
input_ids = inputs['input_ids'].to(model.device)
attention_mask = inputs['attention_mask'].to(model.device)
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=do_sample,
attention_mask=attention_mask
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Extract the assistant response (everything after "Assistant: ")
assistant_response = response.split("Assistant: ")[-1].strip()
if tool_definition and "<toolcall>" in assistant_response:
tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
assistant_response += f"\n\nTool Call: {tool_call}\n\nNote: This is a simulated tool call. In a real scenario, the tool would be executed and its output would be used to generate a final response."
return assistant_response
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history, system_prompt, max_length, temperature, top_p, advanced_checkbox, use_pipeline, tool_definition):
user_message = history[-1][0]
do_sample = advanced_checkbox
bot_message = generate_response(user_message, history, system_prompt, max_length, temperature, top_p, do_sample, use_pipeline, tool_definition)
history[-1][1] = bot_message
return history
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown(title)
with gr.Row():
gr.Markdown(description)
with gr.Row():
with gr.Column(scale=1):
with gr.Group():
gr.Markdown(presentation1)
with gr.Column(scale=1):
with gr.Group():
gr.Markdown(joinus)
with gr.Row():
with gr.Column(scale=2):
system_prompt = gr.TextArea(label="📑Context", placeholder="add context here...", lines=5)
user_input = gr.TextArea(label="🤷🏻♂️User Input", placeholder="Hi there my name is Tonic!", lines=2)
advanced_checkbox = gr.Checkbox(label="🧪 Advanced Settings", value=False)
with gr.Column(visible=False) as advanced_settings:
max_length = gr.Slider(label="📏Max Length", minimum=12, maximum=64000, value=2048, step=1)
temperature = gr.Slider(label="🌡️Temperature", minimum=0.01, maximum=1.0, value=0.7, step=0.01)
top_p = gr.Slider(label="⚛️Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.01)
use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
use_tool = gr.Checkbox(label="Use Function Calling", value=False)
with gr.Column(visible=False) as tool_options:
tool_definition = gr.Code(
label="Tool Definition (JSON)",
value=customtool,
lines=15,
language="json"
)
generate_button = gr.Button(value="🤖OpenReasoning-Nemotron-14B")
with gr.Column(scale=2):
chatbot = gr.Chatbot(label="🤖OpenReasoning-Nemotron-14B")
generate_button.click(
user,
[user_input, chatbot],
[user_input, chatbot],
queue=False
).then(
bot,
[chatbot, system_prompt, max_length, temperature, top_p, advanced_checkbox, use_pipeline, tool_definition],
chatbot
)
advanced_checkbox.change(
fn=lambda x: gr.update(visible=x),
inputs=[advanced_checkbox],
outputs=[advanced_settings]
)
use_tool.change(
fn=lambda x: gr.update(visible=x),
inputs=[use_tool],
outputs=[tool_options]
)
if __name__ == "__main__":
demo.queue()
demo.launch(ssr_mode=False, mcp_server=True) |