import spaces import json import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Load model and tokenizer model_name = "Salesforce/xLAM-1b-fc-r" title = f"# 🚀 Eval Model: {model_name}" model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_name) # Set random seed for reproducibility torch.random.manual_seed(0) @spaces.GPU def generate_response(query): messages = [ {'role': 'user', 'content': query} ] inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id) result = tokenizer.decode(outputs[0], skip_special_tokens=True) return result # Gradio interface with gr.Blocks() as demo: gr.Markdown(title) with gr.Row(): with gr.Column(): query_input = gr.Code( label="User Content", lines=20, language='json' ) submit_button = gr.Button("Generate Response") with gr.Column(): output = gr.Code(label="Response :", lines=20, language="json") submit_button.click(generate_response, inputs=[query_input], outputs=output) if __name__ == "__main__": demo.launch()