Spaces:
Runtime error
Runtime error
| import torch | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # Load model and tokenizer once at startup | |
| print("Loading model...") | |
| tok = AutoTokenizer.from_pretrained("openai/circuit-sparsity", trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "openai/circuit-sparsity", | |
| trust_remote_code=True, | |
| torch_dtype="auto", | |
| ) | |
| model.to("cuda" if torch.cuda.is_available() else "cpu") | |
| print(f"Model loaded on {model.device}") | |
| def generate(prompt, max_new_tokens, temperature, top_p): | |
| inputs = tok(prompt, return_tensors="pt", add_special_tokens=False)["input_ids"].to( | |
| model.device | |
| ) | |
| with torch.no_grad(): | |
| out = model.generate( | |
| inputs, | |
| max_new_tokens=int(max_new_tokens), | |
| do_sample=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| return_dict_in_generate=False, | |
| ) | |
| return tok.decode(out[0], skip_special_tokens=True) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Circuit Sparsity Model Demo") | |
| with gr.Row(): | |
| with gr.Column(): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| lines=8, | |
| value="def square_sum(xs):\n return sum(x * x for x in xs)\n\nsquare_sum([1, 2, 3])\n", | |
| ) | |
| with gr.Row(): | |
| max_tokens = gr.Slider(1, 256, value=64, step=1, label="Max New Tokens") | |
| temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature") | |
| top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top P") | |
| generate_btn = gr.Button("Generate", variant="primary") | |
| with gr.Column(): | |
| output = gr.Textbox(label="Output", lines=12) | |
| generate_btn.click(generate, inputs=[prompt, max_tokens, temperature, top_p], outputs=output) | |
| if __name__ == "__main__": | |
| demo.launch() |