Spaces:
Paused
Paused
File size: 1,515 Bytes
d49f601 f308f42 e544d42 f165e87 17018a6 84de4b9 5fcd6db f165e87 17018a6 3ee1657 f308f42 5fcd6db 3ee1657 b27ed41 84de4b9 b27ed41 84de4b9 ef44ed5 f165e87 e544d42 f165e87 84de4b9 e544d42 96d175f e544d42 f308f42 e88841b e602433 c22d12b e544d42 4db03af e544d42 e88841b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig,BitsAndBytesConfig
import torch
model_id = "truongghieu/deci-finetuned_Prj2"
# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Just for GPU
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# Load model in this way if use GPU
if torch.cuda.is_available():
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, quantization_config=bnb_config)
else:
model = AutoModelForCausalLM.from_pretrained("truongghieu/deci-finetuned", trust_remote_code=True)
generation_config = GenerationConfig(
penalty_alpha=0.6,
do_sample=True,
top_k=3,
temperature=0.5,
repetition_penalty=1.2,
max_new_tokens=50,
pad_token_id=tokenizer.eos_token_id
)
# Define a function that takes a text input and generates a text output
def generate_text(text):
input_text = f'###Human: \"{text}\"'
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
output_ids = model.generate(input_ids, generation_config=generation_config)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return output_text
iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
iface.launch()
|