NusaBERT

Sleeping

+from utils import (
+    SentenceSimilarity,
+    pos_tagging,
+    text_analysis,
+    text_interface,
+    sentence_similarity,
+)
+from script import details
+from transformers import pipeline
+import gradio as gr
+from functools import partial
+pipes = {
+    "Sentiment Analysis": pipeline(
+        "text-classification",
+        model="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
+        tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
+    ),
+    "Emotion Classifier": pipeline(
+        "text-classification",
+        model="w11wo/indonesian-roberta-base-sentiment-classifier",
+        tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier",
+    ),
+    "summarization": pipeline(
+        "summarization",
+        model="LazarusNLP/IndoNanoT5-base-IndoSum",
+        tokenizer="LazarusNLP/IndoNanoT5-base-IndoSum",
+    ),
+    "sentence-similarity": SentenceSimilarity(model="LazarusNLP/all-indobert-base-v2"),
+    "POS Tagging": pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"),
+}
+if __name__ == "__main__":
+    # list of collections of all demos
+    classifiers = ["Sentiment Analysis", "Emotion Classifier"]
+    # Summary
+    summary_interface = gr.Interface.from_pipeline(
+        pipes["summarization"],
+        title="Summarization",
+        examples=details["summarization"]["examples"],
+        description=details["summarization"]["description"],
+        allow_flagging="never",
+    )
+    # Pos Tagging
+    pos_interface = gr.Interface(
+        fn=partial(pos_tagging, pipe=pipes["POS Tagging"]),
+        inputs=[
+            gr.Textbox(placeholder="Masukan kalimat di sini...", label="Input Text"),
+        ],
+        outputs=[gr.HighlightedText()],
+        title="POS Tagging",
+        examples=details["POS Tagging"]["examples"],
+        description=details["POS Tagging"]["description"],
+        allow_flagging="never",
+    )
+    # Text Analysis
+    with gr.Blocks() as text_analysis_interface:
+        gr.Markdown("# Text Analysis")
+        gr.Markdown(details["Text Analysis"]["description"])
+        input_text = gr.Textbox(lines=5, label="Input Text")
+        with gr.Row():
+            smsa = gr.Label(label="Sentiment Analysis")
+            emot = gr.Label(label="Emotion Classification")
+            pos = gr.HighlightedText(label="POS Tagging")
+        btn = gr.Button("Analyze")
+        btn.click(
+            fn=partial(text_analysis, pipes=pipes),
+            inputs=[input_text],
+            outputs=[smsa, emot, pos],
+        )
+        gr.Examples(
+            details["Text Analysis"]["examples"],
+            inputs=input_text,
+            outputs=[smsa, emot, pos],
+        )
+    with gr.Blocks() as sentence_similarity_interface:
+        gr.Markdown("# Document Search 🔍")
+        gr.Markdown(details["sentence-similarity"]["description"])
+        with gr.Row():
+            with gr.Column():
+                input_text = gr.Textbox(lines=5, label="Query")
+                file_input = gr.File(
+                    label="Documents", file_types=[".txt"], file_count="multiple"
+                )
+                button = gr.Button("Search...")
+            output = gr.Label()
+        button.click(
+            fn=partial(sentence_similarity, pipe=pipes["sentence-similarity"]),
+            inputs=[input_text, file_input],
+            outputs=[output],
+        )
+    demo_interface = {
+        "demo": [
+            text_interface(
+                pipes[name],
+                details[name]["examples"],
+                name,
+                name,
+                details[name]["description"],
+            )
+            for name in classifiers
+        ]
+        + [
+            sentence_similarity_interface,
+            summary_interface,
+            pos_interface,
+            text_analysis_interface,
+        ],
+        "titles": classifiers
+        + ["Document Search", "Summarization", "POS Tagging", "Text Analysis"],
+    }
+    # with gr.Blocks() as demo:
+    #     with gr.Column():
+    #         gr.Markdown("# Title")
+    #         gr.TabbedInterface(
+    #             demo_interface["demo"], demo_interface["titles"], theme="soft"
+    #         )
+    demo = gr.TabbedInterface(
+        demo_interface["demo"], demo_interface["titles"], theme="soft"
+    )
+    demo.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==4.19.1
+scipy==1.12.0
+sentence_transformers==2.3.1
+transformers==4.37.2

script.py ADDED Viewed

	@@ -0,0 +1,42 @@

+details = {
+    "Sentiment Analysis": {
+        "examples": [
+            "saya kecewa karena pengeditan biodata penumpang dilakukan by sistem tanpa konfirmasi dan solusi permasalahan nya pun dianggap sepele karena dibiarkan begitu saja sedang pelayanan pelanggan yang sudah berkali-berkali dihubungi pun hanya seperti mengulur waktu.",
+            "saya sudah transfer ratusan ribu dan sesuai nominal transfer. tapi tiket belum muncul juga. harus diwaspadai ini aplikasi ini.",
+            "keren sekali aplikasi ini bisa menunjukan data diri secara detail, sangat di rekomendasikan untuk di pakai.",
+        ],
+        "description": "A sentiment-text-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's SmSA dataset consisting of Indonesian comments and reviews.",
+    },
+    "Emotion Classifier": {
+        "examples": [
+            "Siapa sih di dunia yg ngga punya hater? Rasul yg mulia aja punya. Budha aja punya. Nabi Isa aja punya. Nah apalagi eloh ama gueh .... ya kaaan",
+            "saya ganteng, kalau tidak-suka mati saja kamu",
+            "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
+        ],
+        "description": "An emotion classifier based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's EmoT dataset",
+    },
+    "summarization": {
+        "examples": [],
+        "description": "This model is a fine-tuned version of LazarusNLP/IndoNanoT5-base on the indonlg dataset.",
+    },
+    "POS Tagging": {
+        "examples": [
+            "Siapa sih di dunia yg ngga punya hater? Rasul yg mulia aja punya. Budha aja punya. Nabi Isa aja punya. Nah apalagi eloh ama gueh .... ya kaaan",
+            "saya ganteng, kalau tidak-suka mati saja kamu",
+            "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
+        ],
+        "description": "A part-of-speech token-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's POSP dataset consisting of tag-labelled news.",
+    },
+    "Text Analysis": {
+        "examples": [
+            "Siapa sih di dunia yg ngga punya hater? Rasul yg mulia aja punya. Budha aja punya. Nabi Isa aja punya. Nah apalagi eloh ama gueh .... ya kaaan",
+            "saya ganteng, kalau tidak-suka mati saja kamu",
+            "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
+        ],
+        "description": "A tool to showcase the full capabilities of text analysis LazarusNLP has to offer.",
+    },
+    "sentence-similarity": {
+        "examples": [],
+        "description": "A semantic search tool to get the most related documents 📖 based on user's query.",
+    },
+}

utils.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import gradio as gr
+from functools import partial
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer, util
+from scipy.special import softmax
+import os
+class SentenceSimilarity:
+    def __init__(self, model: str):
+        self.model = SentenceTransformer(model)
+    def __call__(self, query: str, corpus: list[str]):
+        query_embedding = self.model.encode(query)
+        corpus_embeddings = self.model.encode(corpus)
+        output = util.semantic_search(query_embedding, corpus_embeddings)
+        sorted_output = sorted(output[0], key=lambda x: x["corpus_id"])
+        probabilities = softmax([x["score"] for x in sorted_output])
+        return probabilities
+# Sentence Similarity
+def sentence_similarity(text: str, documents: list[str], pipe: SentenceSimilarity):
+    doc_texts = []
+    for doc in documents:
+        f = open(doc, "r")
+        doc_texts.append(f.read())
+    answer = pipe(query=text, corpus=doc_texts)
+    return {os.path.basename(doc): prob for doc, prob in zip(documents, answer)}
+# Text Analysis
+def cls_inference(input: list[str], pipe: pipeline) -> str:
+    results = pipe(input, top_k=None)
+    return {x["label"]: x["score"] for x in results[0]}
+def text_interface(
+    pipe: pipeline, examples: list[str], output_label: str, title: str, desc: str
+):
+    return gr.Interface(
+        fn=partial(cls_inference, pipe=pipe),
+        inputs=[
+            gr.Textbox(lines=5, label="Input Text"),
+        ],
+        title=title,
+        description=desc,
+        outputs=[gr.Label(label=output_label)],
+        examples=examples,
+        allow_flagging="never",
+    )
+# POSP
+def pos_tagging(text: str, pipe: pipeline):
+    output = pipe(text)
+    return {"text": text, "entities": output}
+# Text Analysis
+def text_analysis(text, pipes: dict):
+    sa = cls_inference(text, pipes["Sentiment Analysis"])
+    emot = cls_inference(text, pipes["Emotion Classifier"])
+    pos = pos_tagging(text, pipes["POS Tagging"])
+    return (sa, emot, pos)