import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig import torch import spaces MODEL_PATH = "benhaotang/mistral-small-physics-finetuned-bnb-4bit" MODEL_URL = f"https://huggingface.co/{MODEL_PATH}" def load_model(): bnb_config = BitsAndBytesConfig( load_in_8bit=False, llm_int8_enable_fp32_cpu_offload=True ) model = AutoModelForCausalLM.from_pretrained( "benhaotang/mistral-small-physics-finetuned-bnb-4bit", device_map="auto", torch_dtype=torch.float16, offload_folder="offload_folder", quantization_config=bnb_config ) tokenizer = AutoTokenizer.from_pretrained("benhaotang/mistral-small-physics-finetuned-bnb-4bit") return model, tokenizer model, tokenizer = load_model() @spaces.GPU(duration=110) # Added the decorator here def generate_response(prompt, max_length=1024): inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") outputs = model.generate(**inputs, max_length=max_length) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response demo = gr.Interface( fn=generate_response, inputs=[ gr.Textbox( label="Enter your physics question", placeholder="Ask me anything about physics...", lines=5 ), ], outputs=gr.Textbox(label="Response", lines=10), title="Physics AI Assistant", description=f"""Ask questions about physics concepts, and I'll provide detailed explanations. Model: [benhaotang/mistral-small-physics-finetuned-bnb-4bit]({MODEL_URL})""", examples=[ ["Give me a short introduction to renormalization group(RG) flow in physics?"], ["What is quantum entanglement?"], ["Explain the concept of gauge symmetry in physics."] ] ) demo.launch()