File size: 1,289 Bytes
27d5bfa
ba7e4e4
 
27d5bfa
aa1ce11
ba7e4e4
3c55e75
 
 
 
 
 
 
9affd7f
3c55e75
b1d4a37
3c55e75
27d5bfa
 
c97e8d2
27d5bfa
 
d76a641
b57fb6b
6a7ba66
27d5bfa
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = "microsoft/phi-2"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="cuda:0",
    trust_remote_code=True,
    #token=True,
)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id

def generate_answer(question):
    #inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
    inputs = tokenizer(question, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=250, num_return_sequences=1, do_sample=True)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

iface = gr.Interface(
    fn=generate_answer,
    inputs="text",
    outputs="text",
    title="The Art of Prompt Engineering",
    description="Definiere deine Prompt, am besten auf Deutsch",
)

iface.launch(share=True)  # Deploy the interface