Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gradio as gr | |
model = "janny127/autotrain-7qmts-cs1er" | |
tokenizer = AutoTokenizer.from_pretrained(model) | |
pipeline = pipeline( | |
"text-generation", | |
model=model, | |
torch_dtype=torch.float32, | |
device_map="auto", | |
) | |
def generate_answer(query, sample_num=3): | |
formatted_prompt = ( | |
f"<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n" | |
) | |
sequences = pipeline( | |
formatted_prompt, | |
do_sample=True, | |
top_k=50, | |
top_p = 0.9, | |
num_return_sequences=sample_num, | |
repetition_penalty=1.1, | |
max_new_tokens=150, | |
eos_token_id=CHAT_EOS_TOKEN_ID, | |
) | |
answers = list() | |
for seq in sequences: | |
answer = seq['generated_text'].replace(formatted_prompt, "") | |
answers.append(answer) | |
#print(f"Result: {answer}") | |
#print("------------------------------------------") | |
return answers | |
interface = gr.ChatInterface( | |
fn=generate_answer, | |
stop_btn=None | |
) | |
with gr.Blocks() as demo: | |
interface.render() | |
demo.launch() |