import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch import spaces model_name = "MBZUAI-Paris/Atlas-Chat-9B" dtype = torch.bfloat16 model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=dtype, device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained(model_name) @spaces.GPU def chat(input_text, history=[]): # Tokenize the input and generate response inputs = tokenizer(input_text, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=150) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Update the conversation history history.append((input_text, response)) return history, history with gr.Blocks() as app: gr.Markdown("