Trial / app.py
chandhron's picture
Update app.py
d5dbfd1
raw
history blame contribute delete
No virus
1.61 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer
import torch
from peft import PeftModel, PeftConfig
base_model = "TinyPixel/Llama-2-7B-bf16-sharded"
tuned_adapter = "newronai/llama-2-7b-QLoRA-Trial1"
# bnb_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.float16,
# )
# bnb_config = BitsAndBytesConfig()
config = PeftConfig.from_pretrained(tuned_adapter)
model = AutoModelForCausalLM.from_pretrained(base_model,
use_cache="cache",
load_in_8bit = True
# quantization_config=bnb_config
)
model = PeftModel.from_pretrained(model, tuned_adapter)
print("Model Downloaded")
tokenizer = AutoTokenizer.from_pretrained(base_model,
use_cache="cache")
tokenizer.pad_token = tokenizer.eos_token
print("Tokenizer Ready")
def question_answer(context, question):
tokens = tokenizer.encode(question, return_tensors="pt").to("cuda")
output = model.generate(input_tokens)
output_text = tokenizer.batch_decode(output, skip_special_tokens = True)[0]
return output_text
gr.Interface(fn=question_answer, inputs=[gr.inputs.Textbox(lines=7, label="Context Paragraph"),
gr.inputs.Textbox(lines=2, label="Question"),],
outputs=[gr.outputs.Textbox(label="Answer")]).launch()