import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig tokenizer = AutoTokenizer.from_pretrained("haidlir/bloom-chatml-id") model = AutoModelForCausalLM.from_pretrained("haidlir/bloom-chatml-id") def predict(message, history): history_chatml_format = [] for human, assistant in history: history_chatml_format.append({"role": "user", "content": human }) history_chatml_format.append({"role": "assistant", "content":assistant}) prefix = "Kamu adalah BaGoEs, sebuah chatbot. Beri jawaban pendek dan singkat." history_chatml_format.append({"role": "system", "content": prefix}) history_chatml_format.append({"role": "user", "content": message}) model_inputs = tokenizer.apply_chat_template( history_chatml_format, tokenize=True, add_generation_prompt=True, return_tensors="pt", ) generated_text = model.generate(input_ids=model_inputs, generation_config=GenerationConfig(max_new_tokens=512), ) len_input = len(model_inputs[0]) return tokenizer.decode(generated_text[0][len_input:], skip_special_tokens=True).strip() gr.ChatInterface(predict).launch()