Trial / app.py
chandhron's picture
Update app.py
05a9876
raw
history blame
1.59 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
import torch
from peft import PeftModel, PeftConfig
base_model = "TinyPixel/Llama-2-7B-bf16-sharded"
tuned_adapter = "newronai/llama-2-7b-QLoRA-Trial1"
# bnb_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.float16,
# )
# bnb_config = BitsAndBytesConfig()
config = PeftConfig.from_pretrained(tuned_adapter)
model = AutoModelForCausalLM.from_pretrained(base_model,
use_cache="cache",
# quantization_config=bnb_config
)
model = PeftModel.from_pretrained(model, tuned_adapter)
print("Model Downloaded")
tokenizer = AutoTokenizer.from_pretrained(base_model,
use_cache="cache")
tokenizer.pad_token = tokenizer.eos_token
print("Tokenizer Ready")
def question_answer(context, question):
tokens = tokenizer.encode(question, return_tensors="pt").to("cuda")
output = model.generate(input_tokens)
output_text = tokenizer.batch_decode(output, skip_special_tokens = True)[0]
return output_text
gr.Interface(fn=question_answer, inputs=[gr.inputs.Textbox(lines=7, label="Context Paragraph"),
gr.inputs.Textbox(lines=2, label="Question"),],
outputs=[gr.outputs.Textbox(label="Answer")]).launch()