Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
import torch | |
import spaces | |
MODEL_PATH = "benhaotang/mistral-small-physics-finetuned-bnb-4bit" | |
MODEL_URL = f"https://huggingface.co/{MODEL_PATH}" | |
def load_model(): | |
bnb_config = BitsAndBytesConfig( | |
load_in_8bit=False, | |
llm_int8_enable_fp32_cpu_offload=True | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
"benhaotang/mistral-small-physics-finetuned-bnb-4bit", | |
device_map="auto", | |
torch_dtype=torch.float16, | |
offload_folder="offload_folder", | |
quantization_config=bnb_config | |
) | |
tokenizer = AutoTokenizer.from_pretrained("benhaotang/mistral-small-physics-finetuned-bnb-4bit") | |
return model, tokenizer | |
model, tokenizer = load_model() | |
# Added the decorator here | |
def generate_response(prompt, max_length=1024): | |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") | |
outputs = model.generate(**inputs, max_length=max_length) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
demo = gr.Interface( | |
fn=generate_response, | |
inputs=[ | |
gr.Textbox( | |
label="Enter your physics question", | |
placeholder="Ask me anything about physics...", | |
lines=5 | |
), | |
], | |
outputs=gr.Textbox(label="Response", lines=10), | |
title="Physics AI Assistant", | |
description=f"""Ask questions about physics concepts, and I'll provide detailed explanations. | |
Model: [benhaotang/mistral-small-physics-finetuned-bnb-4bit]({MODEL_URL})""", | |
examples=[ | |
["Give me a short introduction to renormalization group(RG) flow in physics?"], | |
["What is quantum entanglement?"], | |
["Explain the concept of gauge symmetry in physics."] | |
] | |
) | |
demo.launch() |