billwang37 commited on
Commit
54b1ef1
·
verified ·
1 Parent(s): 29f5266

Create app.py

Browse files

transformers
peft
bitsandbytes
accelerate
torch
gradio
spaces

Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
+ from peft import PeftModel
6
+ import os
7
+
8
+ BASE_MODEL = "Qwen/Qwen2.5-Math-72B-Instruct"
9
+ ADAPTER_REPO = "billwang37/mathbio-qwen-72b-round2"
10
+ HF_TOKEN = os.environ.get("HF_TOKEN")
11
+
12
+ # Load in 4-bit to fit on ZeroGPU H200
13
+ bnb_config = BitsAndBytesConfig(
14
+ load_in_4bit=True,
15
+ bnb_4bit_compute_dtype=torch.bfloat16,
16
+ bnb_4bit_quant_type="nf4",
17
+ )
18
+
19
+ print("Loading tokenizer...")
20
+ tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, token=HF_TOKEN)
21
+
22
+ print("Loading base model in 4-bit...")
23
+ base_model = AutoModelForCausalLM.from_pretrained(
24
+ BASE_MODEL,
25
+ quantization_config=bnb_config,
26
+ torch_dtype=torch.bfloat16,
27
+ device_map="auto",
28
+ )
29
+
30
+ print("Loading LoRA adapter...")
31
+ model = PeftModel.from_pretrained(base_model, ADAPTER_REPO, token=HF_TOKEN)
32
+ model.eval()
33
+ print("Model ready.")
34
+
35
+ SYSTEM_PROMPT = "You are MathBioAgent, an expert AI assistant specialized in mathematical biology, epidemiology, operator learning, and partial differential equations."
36
+
37
+ @spaces.GPU(duration=120)
38
+ def chat(message, history):
39
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
40
+ for h in history:
41
+ messages.append({"role": "user", "content": h[0]})
42
+ messages.append({"role": "assistant", "content": h[1]})
43
+ messages.append({"role": "user", "content": message})
44
+
45
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
46
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
47
+
48
+ with torch.no_grad():
49
+ outputs = model.generate(
50
+ **inputs,
51
+ max_new_tokens=1024,
52
+ temperature=0.3,
53
+ do_sample=True,
54
+ top_p=0.9,
55
+ )
56
+ response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
57
+ return response
58
+
59
+ demo = gr.ChatInterface(
60
+ fn=chat,
61
+ title="MathBio AI — Specialized for Mathematical Biology",
62
+ description="Ask questions about epidemic modeling, PDEs, operator learning, and mathematical biology. Based on Qwen2.5-Math-72B fine-tuned on 27K research examples.",
63
+ examples=[
64
+ "What is R0 for an SIR model with beta=0.4 and gamma=0.1?",
65
+ "Derive the stability condition for the SEIR endemic equilibrium.",
66
+ "Explain the Keller-Segel chemotaxis model.",
67
+ ],
68
+ )
69
+
70
+ demo.launch()