Upload 2 files
Browse files- app.py +18 -29
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,21 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForSequenceClassification
|
3 |
from transformers import AutoTokenizer
|
4 |
-
import pandas as pd
|
5 |
import random
|
|
|
6 |
import torch
|
7 |
|
8 |
|
9 |
-
README = """
|
10 |
-
# Movie Review Score Discriminator
|
11 |
-
It is a program that classifies whether it is positive or negative by entering movie reviews.
|
12 |
-
You can choose between the Korean version and the English version.
|
13 |
-
## Usage
|
14 |
-
|
15 |
-
"""
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
|
20 |
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
|
21 |
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
|
@@ -26,7 +16,6 @@ description = "It is a program that classifies whether it is positive or negativ
|
|
26 |
|
27 |
|
28 |
|
29 |
-
|
30 |
def tokenized_data(tokenizer, inputs):
|
31 |
return tokenizer.batch_encode_plus(
|
32 |
[inputs],
|
@@ -37,12 +26,9 @@ def tokenized_data(tokenizer, inputs):
|
|
37 |
|
38 |
|
39 |
|
40 |
-
|
41 |
-
examples_eng = ["the greatest musicians ", "cold movie "]
|
42 |
-
examples_kor = ["긍정", "부정"]
|
43 |
-
|
44 |
examples = []
|
45 |
df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
|
|
|
46 |
for i in range(2):
|
47 |
idx = random.randint(0, 50)
|
48 |
examples.append(['Eng', df.iloc[idx, 0]])
|
@@ -60,13 +46,13 @@ eng_model = AutoModelForSequenceClassification.from_pretrained(
|
|
60 |
)
|
61 |
|
62 |
|
63 |
-
kor_model_name = "
|
64 |
kor_step = 2400
|
65 |
-
kor_tokenizer = AutoTokenizer.from_pretrained(kor_model_name
|
66 |
-
kor_file_name = "{}-{}.pt".format(kor_model_name, kor_step)
|
67 |
kor_state_dict = torch.load(kor_file_name)
|
68 |
kor_model = AutoModelForSequenceClassification.from_pretrained(
|
69 |
-
kor_model_name
|
70 |
state_dict=kor_state_dict
|
71 |
)
|
72 |
|
@@ -86,28 +72,31 @@ def builder(lang, text):
|
|
86 |
logits = model(input_ids=inputs['input_ids'],
|
87 |
attention_mask=inputs['attention_mask']).logits
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
prediction = torch.argmax(logits, axis=1)
|
90 |
|
|
|
91 |
return id2label[prediction.item()]
|
92 |
|
93 |
|
94 |
-
def builder2(inputs):
|
95 |
-
return eng_model(inputs)
|
96 |
|
97 |
-
|
98 |
-
|
|
|
99 |
title=title, description=description, examples=examples)
|
100 |
|
101 |
-
# demo2 = gr.Interface(builder2, inputs="text", outputs="text",
|
102 |
-
# title=title, theme="peach",
|
103 |
-
# allow_flagging="auto",
|
104 |
-
# description=description, examples=examples)
|
105 |
|
106 |
# demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
|
107 |
# title=title, theme="peach",
|
108 |
# allow_flagging="auto",
|
109 |
# description=description, examples=examples)
|
110 |
-
|
|
|
111 |
if __name__ == "__main__":
|
112 |
# print(examples)
|
113 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoModelForSequenceClassification
|
3 |
from transformers import AutoTokenizer
|
|
|
4 |
import random
|
5 |
+
import numpy as np
|
6 |
import torch
|
7 |
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
|
11 |
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
|
|
|
16 |
|
17 |
|
18 |
|
|
|
19 |
def tokenized_data(tokenizer, inputs):
|
20 |
return tokenizer.batch_encode_plus(
|
21 |
[inputs],
|
|
|
26 |
|
27 |
|
28 |
|
|
|
|
|
|
|
|
|
29 |
examples = []
|
30 |
df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
|
31 |
+
random.seed(100)
|
32 |
for i in range(2):
|
33 |
idx = random.randint(0, 50)
|
34 |
examples.append(['Eng', df.iloc[idx, 0]])
|
|
|
46 |
)
|
47 |
|
48 |
|
49 |
+
kor_model_name = "klue/roberta-small"
|
50 |
kor_step = 2400
|
51 |
+
kor_tokenizer = AutoTokenizer.from_pretrained(kor_model_name)
|
52 |
+
kor_file_name = "{}-{}.pt".format(kor_model_name.replace('/', '_'), kor_step)
|
53 |
kor_state_dict = torch.load(kor_file_name)
|
54 |
kor_model = AutoModelForSequenceClassification.from_pretrained(
|
55 |
+
kor_model_name, num_labels=2, id2label=id2label, label2id=label2id,
|
56 |
state_dict=kor_state_dict
|
57 |
)
|
58 |
|
|
|
72 |
logits = model(input_ids=inputs['input_ids'],
|
73 |
attention_mask=inputs['attention_mask']).logits
|
74 |
|
75 |
+
|
76 |
+
|
77 |
+
m = torch.nn.Softmax(dim=1)
|
78 |
+
output = m(logits)
|
79 |
+
# print(logits, output)
|
80 |
+
|
81 |
prediction = torch.argmax(logits, axis=1)
|
82 |
|
83 |
+
return {id2label[1]: output[0][1].item(), id2label[0]: output[0][0].item()}
|
84 |
return id2label[prediction.item()]
|
85 |
|
86 |
|
|
|
|
|
87 |
|
88 |
+
demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"],
|
89 |
+
# outputs=gr.Label(num_top_classes=2),
|
90 |
+
outputs='label',
|
91 |
title=title, description=description, examples=examples)
|
92 |
|
|
|
|
|
|
|
|
|
93 |
|
94 |
# demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
|
95 |
# title=title, theme="peach",
|
96 |
# allow_flagging="auto",
|
97 |
# description=description, examples=examples)
|
98 |
+
output = []
|
99 |
+
|
100 |
if __name__ == "__main__":
|
101 |
# print(examples)
|
102 |
demo.launch()
|
requirements.txt
CHANGED
@@ -2,4 +2,5 @@ gradio
|
|
2 |
datasets
|
3 |
transformers
|
4 |
torch
|
5 |
-
pandas
|
|
|
|
2 |
datasets
|
3 |
transformers
|
4 |
torch
|
5 |
+
pandas
|
6 |
+
numpy
|