from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch

tokenizer = AutoTokenizer.from_pretrained("microsoft/GODEL-v1_1-base-seq2seq")
model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/GODEL-v1_1-base-seq2seq")
from IPython.core import history
def predict(input, history=[]):
  instruction = 'Instruction: given a dialog context, you need to response empathically'
  knowledge = ''
  s = list(sum(history, ()))
  s.append(input)
  dialog = ' EOS ' .join(s)
  print(dialog)
  query = f"{instruction} [CONTEXT] {dialog} {knowledge}"
  top_p = 0.9
  min_length = 8
  max_length = 64

  new_user_input_ids = tokenizer.encode(f"{query}", return_tensors='pt')

  output = model.generate(new_user_input_ids, min_length=int(min_length), max_length=int(max_length), top_p=top_p, do_sample=True).tolist()

  response = tokenizer.decode(output[0], skip_special_tokens=True)
  history.append((input, response))

  return history, history


import gradio as gr
gr.Interface(fn=predict,
             inputs=["text", 'state'],
             outputs =["chatbot", 'state']).launch(debug = True, share = True)