StevenLimcorn commited on
Commit
8a8ccdb
1 Parent(s): 76e95b2

Initial commit of LazarusNLP Demo

Browse files
__pycache__/script.cpython-311.pyc ADDED
Binary file (2.28 kB). View file
 
__pycache__/utils.cpython-311.pyc ADDED
Binary file (4.86 kB). View file
 
__pycache__/utils.cpython-39.pyc ADDED
Binary file (3.11 kB). View file
 
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils import (
2
+ SentenceSimilarity,
3
+ pos_tagging,
4
+ text_analysis,
5
+ text_interface,
6
+ sentence_similarity,
7
+ )
8
+ from script import details
9
+ from transformers import pipeline
10
+ import gradio as gr
11
+ from functools import partial
12
+
13
+ pipes = {
14
+ "Sentiment Analysis": pipeline(
15
+ "text-classification",
16
+ model="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
17
+ tokenizer="StevenLimcorn/indonesian-roberta-base-emotion-classifier",
18
+ ),
19
+ "Emotion Classifier": pipeline(
20
+ "text-classification",
21
+ model="w11wo/indonesian-roberta-base-sentiment-classifier",
22
+ tokenizer="w11wo/indonesian-roberta-base-sentiment-classifier",
23
+ ),
24
+ "summarization": pipeline(
25
+ "summarization",
26
+ model="LazarusNLP/IndoNanoT5-base-IndoSum",
27
+ tokenizer="LazarusNLP/IndoNanoT5-base-IndoSum",
28
+ ),
29
+ "sentence-similarity": SentenceSimilarity(model="LazarusNLP/all-indobert-base-v2"),
30
+ "POS Tagging": pipeline(model="w11wo/indonesian-roberta-base-posp-tagger"),
31
+ }
32
+
33
+ if __name__ == "__main__":
34
+ # list of collections of all demos
35
+ classifiers = ["Sentiment Analysis", "Emotion Classifier"]
36
+ # Summary
37
+ summary_interface = gr.Interface.from_pipeline(
38
+ pipes["summarization"],
39
+ title="Summarization",
40
+ examples=details["summarization"]["examples"],
41
+ description=details["summarization"]["description"],
42
+ allow_flagging="never",
43
+ )
44
+ # Pos Tagging
45
+ pos_interface = gr.Interface(
46
+ fn=partial(pos_tagging, pipe=pipes["POS Tagging"]),
47
+ inputs=[
48
+ gr.Textbox(placeholder="Masukan kalimat di sini...", label="Input Text"),
49
+ ],
50
+ outputs=[gr.HighlightedText()],
51
+ title="POS Tagging",
52
+ examples=details["POS Tagging"]["examples"],
53
+ description=details["POS Tagging"]["description"],
54
+ allow_flagging="never",
55
+ )
56
+ # Text Analysis
57
+ with gr.Blocks() as text_analysis_interface:
58
+ gr.Markdown("# Text Analysis")
59
+ gr.Markdown(details["Text Analysis"]["description"])
60
+ input_text = gr.Textbox(lines=5, label="Input Text")
61
+ with gr.Row():
62
+ smsa = gr.Label(label="Sentiment Analysis")
63
+ emot = gr.Label(label="Emotion Classification")
64
+ pos = gr.HighlightedText(label="POS Tagging")
65
+ btn = gr.Button("Analyze")
66
+ btn.click(
67
+ fn=partial(text_analysis, pipes=pipes),
68
+ inputs=[input_text],
69
+ outputs=[smsa, emot, pos],
70
+ )
71
+ gr.Examples(
72
+ details["Text Analysis"]["examples"],
73
+ inputs=input_text,
74
+ outputs=[smsa, emot, pos],
75
+ )
76
+
77
+ with gr.Blocks() as sentence_similarity_interface:
78
+ gr.Markdown("# Document Search 🔍")
79
+ gr.Markdown(details["sentence-similarity"]["description"])
80
+ with gr.Row():
81
+ with gr.Column():
82
+ input_text = gr.Textbox(lines=5, label="Query")
83
+ file_input = gr.File(
84
+ label="Documents", file_types=[".txt"], file_count="multiple"
85
+ )
86
+ button = gr.Button("Search...")
87
+ output = gr.Label()
88
+ button.click(
89
+ fn=partial(sentence_similarity, pipe=pipes["sentence-similarity"]),
90
+ inputs=[input_text, file_input],
91
+ outputs=[output],
92
+ )
93
+
94
+ demo_interface = {
95
+ "demo": [
96
+ text_interface(
97
+ pipes[name],
98
+ details[name]["examples"],
99
+ name,
100
+ name,
101
+ details[name]["description"],
102
+ )
103
+ for name in classifiers
104
+ ]
105
+ + [
106
+ sentence_similarity_interface,
107
+ summary_interface,
108
+ pos_interface,
109
+ text_analysis_interface,
110
+ ],
111
+ "titles": classifiers
112
+ + ["Document Search", "Summarization", "POS Tagging", "Text Analysis"],
113
+ }
114
+
115
+ # with gr.Blocks() as demo:
116
+ # with gr.Column():
117
+ # gr.Markdown("# Title")
118
+ # gr.TabbedInterface(
119
+ # demo_interface["demo"], demo_interface["titles"], theme="soft"
120
+ # )
121
+
122
+ demo = gr.TabbedInterface(
123
+ demo_interface["demo"], demo_interface["titles"], theme="soft"
124
+ )
125
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==4.19.1
2
+ scipy==1.12.0
3
+ sentence_transformers==2.3.1
4
+ transformers==4.37.2
script.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ details = {
2
+ "Sentiment Analysis": {
3
+ "examples": [
4
+ "saya kecewa karena pengeditan biodata penumpang dilakukan by sistem tanpa konfirmasi dan solusi permasalahan nya pun dianggap sepele karena dibiarkan begitu saja sedang pelayanan pelanggan yang sudah berkali-berkali dihubungi pun hanya seperti mengulur waktu.",
5
+ "saya sudah transfer ratusan ribu dan sesuai nominal transfer. tapi tiket belum muncul juga. harus diwaspadai ini aplikasi ini.",
6
+ "keren sekali aplikasi ini bisa menunjukan data diri secara detail, sangat di rekomendasikan untuk di pakai.",
7
+ ],
8
+ "description": "A sentiment-text-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's SmSA dataset consisting of Indonesian comments and reviews.",
9
+ },
10
+ "Emotion Classifier": {
11
+ "examples": [
12
+ "Siapa sih di dunia yg ngga punya hater? Rasul yg mulia aja punya. Budha aja punya. Nabi Isa aja punya. Nah apalagi eloh ama gueh .... ya kaaan",
13
+ "saya ganteng, kalau tidak-suka mati saja kamu",
14
+ "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
15
+ ],
16
+ "description": "An emotion classifier based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's EmoT dataset",
17
+ },
18
+ "summarization": {
19
+ "examples": [],
20
+ "description": "This model is a fine-tuned version of LazarusNLP/IndoNanoT5-base on the indonlg dataset.",
21
+ },
22
+ "POS Tagging": {
23
+ "examples": [
24
+ "Siapa sih di dunia yg ngga punya hater? Rasul yg mulia aja punya. Budha aja punya. Nabi Isa aja punya. Nah apalagi eloh ama gueh .... ya kaaan",
25
+ "saya ganteng, kalau tidak-suka mati saja kamu",
26
+ "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
27
+ ],
28
+ "description": "A part-of-speech token-classification model based on the RoBERTa model. The model was originally the pre-trained Indonesian RoBERTa Base model, which is then fine-tuned on indonlu's POSP dataset consisting of tag-labelled news.",
29
+ },
30
+ "Text Analysis": {
31
+ "examples": [
32
+ "Siapa sih di dunia yg ngga punya hater? Rasul yg mulia aja punya. Budha aja punya. Nabi Isa aja punya. Nah apalagi eloh ama gueh .... ya kaaan",
33
+ "saya ganteng, kalau tidak-suka mati saja kamu",
34
+ "Bahaha.. dia ke kasir after me. Sambil ngangkat keresek belanjaanku, masih sempet liat mas nya nyodorin barang belanjaannya",
35
+ ],
36
+ "description": "A tool to showcase the full capabilities of text analysis LazarusNLP has to offer.",
37
+ },
38
+ "sentence-similarity": {
39
+ "examples": [],
40
+ "description": "A semantic search tool to get the most related documents 📖 based on user's query.",
41
+ },
42
+ }
utils.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from functools import partial
3
+ from transformers import pipeline
4
+ from sentence_transformers import SentenceTransformer, util
5
+ from scipy.special import softmax
6
+ import os
7
+
8
+
9
+ class SentenceSimilarity:
10
+
11
+ def __init__(self, model: str):
12
+ self.model = SentenceTransformer(model)
13
+
14
+ def __call__(self, query: str, corpus: list[str]):
15
+ query_embedding = self.model.encode(query)
16
+ corpus_embeddings = self.model.encode(corpus)
17
+ output = util.semantic_search(query_embedding, corpus_embeddings)
18
+ sorted_output = sorted(output[0], key=lambda x: x["corpus_id"])
19
+ probabilities = softmax([x["score"] for x in sorted_output])
20
+ return probabilities
21
+
22
+
23
+ # Sentence Similarity
24
+ def sentence_similarity(text: str, documents: list[str], pipe: SentenceSimilarity):
25
+ doc_texts = []
26
+ for doc in documents:
27
+ f = open(doc, "r")
28
+ doc_texts.append(f.read())
29
+ answer = pipe(query=text, corpus=doc_texts)
30
+ return {os.path.basename(doc): prob for doc, prob in zip(documents, answer)}
31
+
32
+
33
+ # Text Analysis
34
+ def cls_inference(input: list[str], pipe: pipeline) -> str:
35
+ results = pipe(input, top_k=None)
36
+ return {x["label"]: x["score"] for x in results[0]}
37
+
38
+
39
+ def text_interface(
40
+ pipe: pipeline, examples: list[str], output_label: str, title: str, desc: str
41
+ ):
42
+ return gr.Interface(
43
+ fn=partial(cls_inference, pipe=pipe),
44
+ inputs=[
45
+ gr.Textbox(lines=5, label="Input Text"),
46
+ ],
47
+ title=title,
48
+ description=desc,
49
+ outputs=[gr.Label(label=output_label)],
50
+ examples=examples,
51
+ allow_flagging="never",
52
+ )
53
+
54
+
55
+ # POSP
56
+ def pos_tagging(text: str, pipe: pipeline):
57
+ output = pipe(text)
58
+ return {"text": text, "entities": output}
59
+
60
+
61
+ # Text Analysis
62
+ def text_analysis(text, pipes: dict):
63
+ sa = cls_inference(text, pipes["Sentiment Analysis"])
64
+ emot = cls_inference(text, pipes["Emotion Classifier"])
65
+ pos = pos_tagging(text, pipes["POS Tagging"])
66
+ return (sa, emot, pos)