mdj1412 commited on
Commit
81eed54
·
1 Parent(s): 1b7928b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +18 -29
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,21 +1,11 @@
1
  import gradio as gr
2
  from transformers import AutoModelForSequenceClassification
3
  from transformers import AutoTokenizer
4
- import pandas as pd
5
  import random
 
6
  import torch
7
 
8
 
9
- README = """
10
- # Movie Review Score Discriminator
11
- It is a program that classifies whether it is positive or negative by entering movie reviews.
12
- You can choose between the Korean version and the English version.
13
- ## Usage
14
-
15
- """
16
-
17
-
18
-
19
 
20
  id2label = {0: "NEGATIVE", 1: "POSITIVE"}
21
  label2id = {"NEGATIVE": 0, "POSITIVE": 1}
@@ -26,7 +16,6 @@ description = "It is a program that classifies whether it is positive or negativ
26
 
27
 
28
 
29
-
30
  def tokenized_data(tokenizer, inputs):
31
  return tokenizer.batch_encode_plus(
32
  [inputs],
@@ -37,12 +26,9 @@ def tokenized_data(tokenizer, inputs):
37
 
38
 
39
 
40
-
41
- examples_eng = ["the greatest musicians ", "cold movie "]
42
- examples_kor = ["긍정", "부정"]
43
-
44
  examples = []
45
  df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
 
46
  for i in range(2):
47
  idx = random.randint(0, 50)
48
  examples.append(['Eng', df.iloc[idx, 0]])
@@ -60,13 +46,13 @@ eng_model = AutoModelForSequenceClassification.from_pretrained(
60
  )
61
 
62
 
63
- kor_model_name = "klue_roberta-small"
64
  kor_step = 2400
65
- kor_tokenizer = AutoTokenizer.from_pretrained(kor_model_name.replace('_', '/'))
66
- kor_file_name = "{}-{}.pt".format(kor_model_name, kor_step)
67
  kor_state_dict = torch.load(kor_file_name)
68
  kor_model = AutoModelForSequenceClassification.from_pretrained(
69
- kor_model_name.replace('_', '/'), num_labels=2, id2label=id2label, label2id=label2id,
70
  state_dict=kor_state_dict
71
  )
72
 
@@ -86,28 +72,31 @@ def builder(lang, text):
86
  logits = model(input_ids=inputs['input_ids'],
87
  attention_mask=inputs['attention_mask']).logits
88
 
 
 
 
 
 
 
89
  prediction = torch.argmax(logits, axis=1)
90
 
 
91
  return id2label[prediction.item()]
92
 
93
 
94
- def builder2(inputs):
95
- return eng_model(inputs)
96
 
97
-
98
- demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"], outputs="text",
 
99
  title=title, description=description, examples=examples)
100
 
101
- # demo2 = gr.Interface(builder2, inputs="text", outputs="text",
102
- # title=title, theme="peach",
103
- # allow_flagging="auto",
104
- # description=description, examples=examples)
105
 
106
  # demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
107
  # title=title, theme="peach",
108
  # allow_flagging="auto",
109
  # description=description, examples=examples)
110
-
 
111
  if __name__ == "__main__":
112
  # print(examples)
113
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForSequenceClassification
3
  from transformers import AutoTokenizer
 
4
  import random
5
+ import numpy as np
6
  import torch
7
 
8
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  id2label = {0: "NEGATIVE", 1: "POSITIVE"}
11
  label2id = {"NEGATIVE": 0, "POSITIVE": 1}
 
16
 
17
 
18
 
 
19
  def tokenized_data(tokenizer, inputs):
20
  return tokenizer.batch_encode_plus(
21
  [inputs],
 
26
 
27
 
28
 
 
 
 
 
29
  examples = []
30
  df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
31
+ random.seed(100)
32
  for i in range(2):
33
  idx = random.randint(0, 50)
34
  examples.append(['Eng', df.iloc[idx, 0]])
 
46
  )
47
 
48
 
49
+ kor_model_name = "klue/roberta-small"
50
  kor_step = 2400
51
+ kor_tokenizer = AutoTokenizer.from_pretrained(kor_model_name)
52
+ kor_file_name = "{}-{}.pt".format(kor_model_name.replace('/', '_'), kor_step)
53
  kor_state_dict = torch.load(kor_file_name)
54
  kor_model = AutoModelForSequenceClassification.from_pretrained(
55
+ kor_model_name, num_labels=2, id2label=id2label, label2id=label2id,
56
  state_dict=kor_state_dict
57
  )
58
 
 
72
  logits = model(input_ids=inputs['input_ids'],
73
  attention_mask=inputs['attention_mask']).logits
74
 
75
+
76
+
77
+ m = torch.nn.Softmax(dim=1)
78
+ output = m(logits)
79
+ # print(logits, output)
80
+
81
  prediction = torch.argmax(logits, axis=1)
82
 
83
+ return {id2label[1]: output[0][1].item(), id2label[0]: output[0][0].item()}
84
  return id2label[prediction.item()]
85
 
86
 
 
 
87
 
88
+ demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"],
89
+ # outputs=gr.Label(num_top_classes=2),
90
+ outputs='label',
91
  title=title, description=description, examples=examples)
92
 
 
 
 
 
93
 
94
  # demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
95
  # title=title, theme="peach",
96
  # allow_flagging="auto",
97
  # description=description, examples=examples)
98
+ output = []
99
+
100
  if __name__ == "__main__":
101
  # print(examples)
102
  demo.launch()
requirements.txt CHANGED
@@ -2,4 +2,5 @@ gradio
2
  datasets
3
  transformers
4
  torch
5
- pandas
 
 
2
  datasets
3
  transformers
4
  torch
5
+ pandas
6
+ numpy