import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline model_id = "xlm-roberta-base" peft_model_id = "rasyosef/xlm-roberta-base-lora-amharic-news-classification" categories = ['ሀገር አቀፍ ዜና', 'መዝናኛ', 'ስፖርት', 'ቢዝነስ', 'ዓለም አቀፍ ዜና', 'ፖለቲካ'] id2label = {i: lbl for i, lbl in enumerate(categories)} label2id = {lbl: i for i, lbl in enumerate(categories)} tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForSequenceClassification.from_pretrained( model_id, num_labels=len(categories), # 6 id2label=id2label, label2id=label2id ) model.load_adapter(peft_model_id) classifier = pipeline("text-classification", model=model, tokenizer=tokenizer) def predict(text): return classifier([text])[0] with gr.Blocks() as demo: gr.Markdown( """ # Amharic News Article Classification This RoBERTa model (xlm-roberta-base) was finetuned using Low-Rank Adaptation (LoRA) that classifies amharic news articles into one of the following 6 categories. - ሀገር አቀፍ ዜና (Local News) - መዝናኛ (Entertainment) - ስፖርት (Sports) - ቢዝነስ (Business) - ዓለም አቀፍ ዜና (International News) - ፖለቲካ (Politics) """ ) with gr.Row(): with gr.Column(): input = gr.Textbox(label="Amharic text", placeholder="Enter text here", lines=3) classify_btn = gr.Button(value="Classify") with gr.Column(): output = gr.Textbox(label="Predicted class") classify_btn.click(predict, inputs=input, outputs=output) examples = gr.Examples( examples=[ "ኢትዮጵያ ፕሪምየር ሊግ 6ኛ ሳምንት የእሁድ ጨዋታዎች ቅድመ ዳሰሳ", "በአፄ ቴዎድሮስ የንግስና ቦታ ደረስጌ ማሪያም ተጀምሮ የቆመው የሙዚየሙ ግንባታ ተጠናቀቆ ስራ እንዲጀምር ነዋሪዎች ጠይቀዋል።" ], inputs=[input], ) demo.launch()