mdj1412 commited on
Commit
509d266
β€’
1 Parent(s): d6fbd75

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +68 -34
  2. examples.csv +51 -0
  3. klue:roberta-small-2400.pt +3 -0
  4. roberta-base-1900.pt +3 -0
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import gradio as gr
2
- from datasets import load_dataset
 
3
  import random
 
4
 
5
 
6
  README = """
@@ -12,10 +14,6 @@ README = """
12
  """
13
 
14
 
15
- model_name = "roberta-base"
16
- learning_rate = 5e-5
17
- batch_size_train = 64
18
- step = 1900
19
 
20
 
21
  id2label = {0: "NEGATIVE", 1: "POSITIVE"}
@@ -26,46 +24,82 @@ title = "Movie Review Score Discriminator"
26
  description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."
27
 
28
 
29
- examples = ["the greatest musicians ", "cold movie "]
30
- # imdb_dataset = load_dataset('imdb')
31
- # examples = []
32
- # for i in range(3):
33
- # idx = random.randrange(len(imdb_dataset['train']))
34
- # examples.append(imdb_dataset['train'][idx]['text'])
35
 
36
 
 
 
 
 
 
 
 
37
 
38
- def fn(text):
39
- return "hello, " + text
40
 
41
 
42
- # demo1 = gr.Interface.load("models/cardiffnlp/twitter-roberta-base-sentiment", inputs="text", outputs="text",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # title=title, theme="peach",
44
  # allow_flagging="auto",
45
  # description=description, examples=examples)
46
 
47
- # demo = gr.Interface(fn=greet, inputs="text", outputs="text")
48
-
49
- # demo2 = gr.Interface(fn=greet, inputs="text", outputs="text",
50
  # title=title, theme="peach",
51
  # allow_flagging="auto",
52
  # description=description, examples=examples)
53
-
54
- here = gr.Interface(fn,
55
- inputs= gr.inputs.Textbox( lines=1, placeholder=None, default="", label=None),
56
- outputs='text',
57
- title="Sentiment analysis of movie reviews",
58
- description=description,
59
- theme="peach",
60
- allow_flagging="auto",
61
- flagging_dir='flagging records')
62
-
63
-
64
- demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
65
- title=title, theme="peach",
66
- allow_flagging="auto",
67
- description=description, examples=examples)
68
 
69
  if __name__ == "__main__":
70
- # here.launch()
71
- demo3.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForSequenceClassification
3
+ from transformers import AutoTokenizer
4
  import random
5
+ import torch
6
 
7
 
8
  README = """
 
14
  """
15
 
16
 
 
 
 
 
17
 
18
 
19
  id2label = {0: "NEGATIVE", 1: "POSITIVE"}
 
24
  description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."
25
 
26
 
 
 
 
 
 
 
27
 
28
 
29
+ def tokenized_data(tokenizer, inputs):
30
+ return tokenizer.batch_encode_plus(
31
+ inputs,
32
+ return_tensors="pt",
33
+ padding="max_length",
34
+ max_length=64,
35
+ truncation=True)
36
 
 
 
37
 
38
 
39
+
40
+ examples_eng = ["the greatest musicians ", "cold movie "]
41
+ examples_kor = ["긍정", "λΆ€μ •"]
42
+
43
+ examples = []
44
+ df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
45
+ for i in range(2):
46
+ idx = random.randint(0, 50)
47
+ examples.append(df.iloc[idx, 0])
48
+ examples.append(df.iloc[idx, 1])
49
+
50
+
51
+
52
+
53
+ model_kor = gr.Interface.load("models/cardiffnlp/twitter-roberta-base-sentiment")
54
+ model_eng = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng")
55
+
56
+
57
+
58
+ def builder(version, inputs):
59
+ if version == 'Eng':
60
+ model_name = "roberta-base"
61
+ step = 1900
62
+
63
+ else:
64
+ model_name = "klue/roberta-small"
65
+ step = 2400
66
+
67
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
68
+ inputs = tokenized_data(tokenizer, inputs)
69
+ file_name = "{}-{}.pt".format(model_name, step)
70
+ state_dict = torch.load(file_name)
71
+ model = AutoModelForSequenceClassification.from_pretrained(
72
+ model_name, num_labels=2, id2label=id2label, label2id=label2id,
73
+ state_dict=state_dict
74
+ )
75
+
76
+ model.eval()
77
+ with torch.no_grad():
78
+ logits = model(input_ids=inputs['input_ids'],
79
+ attention_mask=inputs['attention_mask']).logits
80
+
81
+ prediction = torch.argmax(logits, axis=1)
82
+
83
+ return id2label[prediction.item()]
84
+
85
+
86
+ def builder2(inputs):
87
+ return model_eng(inputs)
88
+
89
+
90
+ demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"], outputs="text",
91
+ title=title, description=description, examples=[examples])
92
+
93
+ # demo2 = gr.Interface(builder2, inputs="text", outputs="text",
94
  # title=title, theme="peach",
95
  # allow_flagging="auto",
96
  # description=description, examples=examples)
97
 
98
+ # demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
 
 
99
  # title=title, theme="peach",
100
  # allow_flagging="auto",
101
  # description=description, examples=examples)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  if __name__ == "__main__":
104
+ demo.launch()
105
+ # demo3.launch()
examples.csv ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eng kor
2
+ 0 of saucy 1점도아깝닀4λͺ…λ³΄λ‹€μž¬λ―Έμ—†μ–΄μ„œ2λͺ…λ‚˜κ°
3
+ 1 cold movie 맀트릭슀?γ„΄γ„΄ 짜μž₯ 묻은 μ‘΄μœ…
4
+ 2 redundant concept 개인의 μ„ νƒμœΌλ‘œ 1점을 μ€¬μŠ΅λ‹ˆλ‹€
5
+ 3 in world cinema λ³΄λŠ”λ‚΄λ‚΄ λ‹ˆ μƒκ°λ§Œ 났닀.
6
+ 4 on all cylinders μ˜ν™”λ³΄λ‹€κ°€ μž λ“ μ μ€ μ²˜μŒμ΄λ„€μš”
7
+ 5 sit through , λ”°λœ»ν•œ μ˜ν™”μ—μš”~^^μΆ”μ²œν•΄μš”!
8
+ 6 heroes λ³„λ‘œμ—μš” 생각보닀 λ…ΈμžΌμž„
9
+ 7 sharply μ’‹μ•„μš” κ°€μ‘±λ“€κ³Ό 보기 μ’‹μ•„μš”
10
+ 8 sometimes dry β™‘ 재밌게 μž˜λ΄€μŠ΅λ‹ˆλ‹€γ…Žγ…Ž
11
+ 9 disappointments 반제 ν˜ΈλΉ— μ‚¬λž‘ν•΄μš”~
12
+ 10 the horrors λ˜₯도 이런 κ±°λŒ€ν•œ λ˜₯이 μ—†μ—ˆλ‹€..
13
+ 11 many pointless κ°œμ§€λ¦½λ‹ˆλ‹€ λ‚˜λ§Œλ‹Ήν• μˆœμ—†μ§€
14
+ 12 a beautifully 이게무슨...λ§Œν™”λ„€ λ§Œν™” γ…‰γ…‰γ…‰
15
+ 13 a doa 7광ꡬ와 μŒλ²½μ„ μ΄λ£¨λŠ” λ§μž‘
16
+ 14 no apparent joy μ˜ν™” 보닀가 쀑간에 λ‚˜μ™”μŠ΅λ‹ˆλ‹€
17
+ 15 seem fresh μ΅œμ•… κ·Έλƒ₯ 보지 λ§ˆμ„Έμš”μ§„μ§œ λ…ΈμžΌ
18
+ 16 weak and 짱ꡬ κ·Ήμž₯νŒμ€ μ–Έμ œλ‚˜ μ΅œκ³ μ—μš”
19
+ 17 skip this dreck , λ‚΄ μ‹œκ°„μ€ μ†Œμ€‘ν•œ κ±°λ‹€.
20
+ 18 generates κ²λ‚˜ μž¬λ°ŒλŠ”λ””,,,,
21
+ 19 funny yet κ·Έλƒ₯ 개재밌음 평점 믿으면 μ•ˆλ¨
22
+ 20 in memory μž¬λ°‹κ²Œ μž˜λ΄£μŠ΅λ‹ˆλ‹€ λ„ˆλ¬΄μ’‹μŠ΅λ‹ˆλ‹€μš”
23
+ 21 hawaiian shirt λ°₯ λ¨ΉμœΌλ©΄μ„œ 보기 쒋은 μ˜ν™”
24
+ 22 grievous but μž¬λ―Έμ™€ 감동을 κ²ΈλΉ„ν•œ λͺ…μž‘μž…λ‹ˆλ‹€!!
25
+ 23 hopeless μž¬κ°œλ΄‰ κ°μ‚¬ν•©λ‹ˆλ‹€.μ •λ§λ‘œ
26
+ 24 bring tissues . 끝더 이상 μ„€λͺ…이 ν•„μš”ν• κΉŒ.
27
+ 25 just too silly μ—­μ‹œ 믿보 ν™©.μ •.λ―Ό λ°°μš°λ‹˜~^^
28
+ 26 cinematic bon bons μ—°μΆœ+μ—°κΈ°+μŠ€ν† λ¦¬+μ˜μƒλ―Έ+OST
29
+ 27 irritates and 좔얡에 묻어두지 κ·Έλž¬λƒ
30
+ 28 collapse μ΄μ‹œλŒ€ 졜고의 μ½”λ―Έλ”” μ˜ν™”
31
+ 29 no lika da 재미있게 κ΄€λžŒν•˜μ˜€μŠ΅λ‹ˆλ‹€
32
+ 30 a welcome relief μŠ€λ§ˆμš°κ·Έλž‘ μžˆμ„λ• 슀릴이 λ§Žλ‹€.
33
+ 31 , compelling 처음으둜 κ·Ήμž₯μ—μ„œ μž€μŠ΅λ‹ˆλ‹€
34
+ 32 infectiously λ„ˆλ¬΄λ‚˜λ„ μž˜λ΄€μ–΄μš” κ΅Ώμž…λ‹ˆλŒœ
35
+ 33 imax in short γ…ˆγ„Ήκ²Œ 웃기고 μžΌμžˆλ„€.γ…‹
36
+ 34 i hate it . 연말에 보면 λ­‰ν΄ν•˜λ‹€ 정말
37
+ 35 a good one κ·Έλƒ₯ κ²Œμž„μœΌλ‘œ 내지 κ·Έλž¬λƒ.
38
+ 36 , plodding picture μ§„μ§œ κ°•μΆ” 졜고의 ν•œκ΅­μ˜ν™”
39
+ 37 inane and awful μ§„μ§œμ΅œμ•…μž…λ‹ˆλ‹€...λͺ…μ ˆμ—λ³΄μ„Έμš”
40
+ 38 whole mess λŒ€λ§μž‘ λ³΄μ§€λ§ˆμ„Έμš” 돈 μ•„κΉŒμ›€
41
+ 39 enjoy the ride 이거 λ³Ό μ‹œκ°„μ— μ•Όλ™μ΄λ‚˜ 봐라
42
+ 40 the horror λ„ˆλ¬΄λ„ˆλ¬΄ 재밌음 λ²„μ¦ˆ 졜고
43
+ 41 a dim 3μ‹œκ°„μ΄ μ „ν˜€ 아깝지 μ•Šμ€
44
+ 42 amazingly lame . μ‘Έμž‘μ΄λ‹€..
45
+ 43 to spare wildlife λ…Έμš°μžΌμŠ€γ…‘ 이만작 μ—΄μž μ±„μš°κΈ°
46
+ 44 carnage and 2022λ…„ 졜고 ν•œκ΅­μ˜ν™”
47
+ 45 second fiddle μž¬λ―Έμ—†λ‹€λ„ˆλ¬΄μž¬λ―Έμ—†λ‹€OST지겹닀
48
+ 46 a stylish exercise λ‚˜λ¦„ 재밌게 λ΄„ κ°€λ³κ²Œ 보기 쒋은듯
49
+ 47 than this mess 와...κ°λ…νŒμ΄ 더쒋닀... 더긴데
50
+ 48 valuable messages κ°‘μžκΈ° λ„κ²Œμž γ„Ήγ…‡γ…‹γ…‹
51
+ 49 usual worst 별점 1점도 μ£ΌκΈ°κ°€ μ•„κΉŒμš΄ μ˜ν™”..
klue:roberta-small-2400.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b572a576888999c3696750507168b1ec8c194b93e3b0a5fb69d5932cb61a410
3
+ size 272408049
roberta-base-1900.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f0dcb5d42751656f47868d0b1cd793c33bd2c497df57dde5514a2b15a791d05
3
+ size 498658641