benhaotang's picture
Update app.py
fafa393 verified
raw
history blame
1.9 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import spaces
MODEL_PATH = "benhaotang/mistral-small-physics-finetuned-bnb-4bit"
MODEL_URL = f"https://huggingface.co/{MODEL_PATH}"
def load_model():
bnb_config = BitsAndBytesConfig(
load_in_8bit=False,
llm_int8_enable_fp32_cpu_offload=True
)
model = AutoModelForCausalLM.from_pretrained(
"benhaotang/mistral-small-physics-finetuned-bnb-4bit",
device_map="auto",
torch_dtype=torch.float16,
offload_folder="offload_folder",
quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained("benhaotang/mistral-small-physics-finetuned-bnb-4bit")
return model, tokenizer
model, tokenizer = load_model()
@spaces.GPU(duration=80) # Added the decorator here
def generate_response(prompt, max_length=2048):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
outputs = model.generate(**inputs, max_length=max_length)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
demo = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(
label="Enter your physics question",
placeholder="Ask me anything about physics...",
lines=5
),
],
outputs=gr.Textbox(label="Response", lines=10),
title="Physics AI Assistant",
description=f"""Ask questions about physics concepts, and I'll provide detailed explanations.
Model: [benhaotang/mistral-small-physics-finetuned-bnb-4bit]({MODEL_URL})""",
examples=[
["Give me a short introduction to renormalization group(RG) flow in physics?"],
["What is quantum entanglement?"],
["Explain the concept of gauge symmetry in physics."]
]
)
demo.launch()