|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
|
import torch |
|
import spaces |
|
|
|
MODEL_PATH = "benhaotang/mistral-small-physics-finetuned-bnb-4bit" |
|
MODEL_URL = f"https://huggingface.co/{MODEL_PATH}" |
|
|
|
def load_model(): |
|
bnb_config = BitsAndBytesConfig( |
|
load_in_8bit=False, |
|
llm_int8_enable_fp32_cpu_offload=True |
|
) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"benhaotang/mistral-small-physics-finetuned-bnb-4bit", |
|
device_map="auto", |
|
torch_dtype=torch.float16, |
|
offload_folder="offload_folder", |
|
quantization_config=bnb_config |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("benhaotang/mistral-small-physics-finetuned-bnb-4bit") |
|
return model, tokenizer |
|
|
|
model, tokenizer = load_model() |
|
|
|
@spaces.GPU(duration=80) |
|
def generate_response(prompt, max_length=2048): |
|
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") |
|
outputs = model.generate(**inputs, max_length=max_length) |
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return response |
|
|
|
demo = gr.Interface( |
|
fn=generate_response, |
|
inputs=[ |
|
gr.Textbox( |
|
label="Enter your physics question", |
|
placeholder="Ask me anything about physics...", |
|
lines=5 |
|
), |
|
], |
|
outputs=gr.Textbox(label="Response", lines=10), |
|
title="Physics AI Assistant", |
|
description=f"""Ask questions about physics concepts, and I'll provide detailed explanations. |
|
|
|
Model: [benhaotang/mistral-small-physics-finetuned-bnb-4bit]({MODEL_URL})""", |
|
examples=[ |
|
["Give me a short introduction to renormalization group(RG) flow in physics?"], |
|
["What is quantum entanglement?"], |
|
["Explain the concept of gauge symmetry in physics."] |
|
] |
|
) |
|
|
|
demo.launch() |