File size: 1,515 Bytes
d49f601
f308f42
e544d42
f165e87
17018a6
84de4b9
5fcd6db
f165e87
 
17018a6
3ee1657
f308f42
 
 
 
 
5fcd6db
3ee1657
b27ed41
84de4b9
b27ed41
84de4b9
ef44ed5
f165e87
e544d42
 
f165e87
84de4b9
e544d42
 
96d175f
e544d42
 
 
f308f42
 
e88841b
 
e602433
c22d12b
e544d42
4db03af
e544d42
e88841b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig,BitsAndBytesConfig

import torch

model_id = "truongghieu/deci-finetuned_Prj2"

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Just for GPU 
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )


tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# Load model in this way if use GPU
if torch.cuda.is_available():
    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, quantization_config=bnb_config)
else:
    model = AutoModelForCausalLM.from_pretrained("truongghieu/deci-finetuned", trust_remote_code=True)


generation_config = GenerationConfig(
    penalty_alpha=0.6,
    do_sample=True,
    top_k=3,
    temperature=0.5,
    repetition_penalty=1.2,
    max_new_tokens=50,
    pad_token_id=tokenizer.eos_token_id
)



# Define a function that takes a text input and generates a text output
def generate_text(text):
    input_text = f'###Human: \"{text}\"'
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
    output_ids = model.generate(input_ids, generation_config=generation_config)
    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return output_text

iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
iface.launch()