Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,899 Bytes
1d556e8 582aa9d 1d556e8 fafa393 582aa9d 433da6f a209453 582aa9d 1d556e8 582aa9d fafa393 582aa9d 1d556e8 582aa9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import spaces
MODEL_PATH = "benhaotang/mistral-small-physics-finetuned-bnb-4bit"
MODEL_URL = f"https://huggingface.co/{MODEL_PATH}"
def load_model():
bnb_config = BitsAndBytesConfig(
load_in_8bit=False,
llm_int8_enable_fp32_cpu_offload=True
)
model = AutoModelForCausalLM.from_pretrained(
"benhaotang/mistral-small-physics-finetuned-bnb-4bit",
device_map="auto",
torch_dtype=torch.float16,
offload_folder="offload_folder",
quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained("benhaotang/mistral-small-physics-finetuned-bnb-4bit")
return model, tokenizer
model, tokenizer = load_model()
@spaces.GPU(duration=110) # Added the decorator here
def generate_response(prompt, max_length=1024):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
outputs = model.generate(**inputs, max_length=max_length)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
demo = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(
label="Enter your physics question",
placeholder="Ask me anything about physics...",
lines=5
),
],
outputs=gr.Textbox(label="Response", lines=10),
title="Physics AI Assistant",
description=f"""Ask questions about physics concepts, and I'll provide detailed explanations.
Model: [benhaotang/mistral-small-physics-finetuned-bnb-4bit]({MODEL_URL})""",
examples=[
["Give me a short introduction to renormalization group(RG) flow in physics?"],
["What is quantum entanglement?"],
["Explain the concept of gauge symmetry in physics."]
]
)
demo.launch() |