from transformers import BlenderbotTokenizer, BlenderbotForConditionalGeneration import torch import gradio as gr mname = "facebook/blenderbot-400M-distill" model = BlenderbotForConditionalGeneration.from_pretrained(mname) tokenizer = BlenderbotTokenizer.from_pretrained(mname) def take_last_tokens(inputs, note_history, history): """Filter the last 128 tokens""" if inputs['input_ids'].shape[1] > 128: inputs['input_ids'] = torch.tensor([inputs['input_ids'][0][-128:].tolist()]) inputs['attention_mask'] = torch.tensor([inputs['attention_mask'][0][-128:].tolist()]) note_history = [' '.join(note_history[0].split(' ')[2:])] history = history[1:] return inputs, note_history, history def add_note_to_history(note, note_history): """Add a note to the historical information""" note_history.append(note) note_history = ' '.join(note_history) return [note_history] title = "Blenderbot Tokenizer with Conditional Generation State of the Art" description = """Blenderbot""" def chat(message, history): history = history or [] if history: history_useful = [' '.join([str(a[0])+' '+str(a[1]) for a in history])] else: history_useful = [] history_useful = add_note_to_history(message, history_useful) inputs = tokenizer(history_useful, return_tensors="pt") inputs, history_useful, history = take_last_tokens(inputs, history_useful, history) reply_ids = model.generate(**inputs) response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0] history_useful = add_note_to_history(response, history_useful) list_history = history_useful[0].split(' ') history.append((list_history[-2], list_history[-1])) return history, history gr.Interface( fn=chat, theme="huggingface", css=".footer {display:none !important}", inputs=["text", "state"], outputs=["chatbot", "state"], title=title, description=description, allow_flagging="never", ).launch()