import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

tokenizer = AutoTokenizer.from_pretrained("haidlir/bloom-chatml-id")
model = AutoModelForCausalLM.from_pretrained("haidlir/bloom-chatml-id")

def predict(message, history):

    history_chatml_format = []
    for human, assistant in history:
        history_chatml_format.append({"role": "user", "content": human })
        history_chatml_format.append({"role": "assistant", "content":assistant})
    prefix = "Kamu adalah BaGoEs, sebuah chatbot. Beri jawaban pendek dan singkat."
    history_chatml_format.append({"role": "system", "content": prefix})
    history_chatml_format.append({"role": "user", "content": message})

    model_inputs = tokenizer.apply_chat_template(
                    history_chatml_format,
                    tokenize=True,
                    add_generation_prompt=True,
                    return_tensors="pt",
                )

    generated_text = model.generate(input_ids=model_inputs,
                                    generation_config=GenerationConfig(max_new_tokens=512),
                                   )
    len_input = len(model_inputs[0])
    return tokenizer.decode(generated_text[0][len_input:], skip_special_tokens=True).strip()

gr.ChatInterface(predict).launch()