dotnet-runtime / app.py
kotlarmilos's picture
Add space
9613ebd
# import gradio as gr
# def greet(name):
# return "Hello " + name + "!!"
# demo = gr.Interface(fn=greet, inputs="text", outputs="text")
# demo.launch()
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import gradio as gr
# Load base model
base_model = "microsoft/Phi-4-mini-instruct"
lora_path = "./phi4-lora-finetuned-10k" # or use HF repo name if pushed
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
base = AutoModelForCausalLM.from_pretrained(
base_model,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
model = PeftModel.from_pretrained(base, lora_path)
def generate(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.7)
return tokenizer.decode(output[0], skip_special_tokens=True)
gr.Interface(fn=generate, inputs="text", outputs="text").launch()