mdj1412 commited on
Commit
6c5db00
1 Parent(s): e3822e3

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +26 -3
  2. lid.176.ftz +3 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -5,6 +5,7 @@ import random
5
  import numpy as np
6
  import pandas as pd
7
  import torch
 
8
 
9
 
10
 
@@ -13,7 +14,21 @@ label2id = {"NEGATIVE": 0, "POSITIVE": 1}
13
 
14
 
15
  title = "Movie Review Score Discriminator"
16
- description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
 
@@ -58,10 +73,18 @@ kor_model = AutoModelForSequenceClassification.from_pretrained(
58
 
59
 
60
  def builder(lang, text):
 
 
 
 
 
 
 
 
61
  if lang == 'Eng':
62
  model = eng_model
63
  tokenizer = eng_tokenizer
64
- else:
65
  model = kor_model
66
  tokenizer = kor_tokenizer
67
 
@@ -85,7 +108,7 @@ def builder(lang, text):
85
 
86
 
87
 
88
- demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"],
89
  outputs=gr.Label(num_top_classes=2, label='Res', color='CadetBlue'),
90
  # outputs='label',
91
  title=title, description=description, examples=examples)
 
5
  import numpy as np
6
  import pandas as pd
7
  import torch
8
+ import fasttext
9
 
10
 
11
 
 
14
 
15
 
16
  title = "Movie Review Score Discriminator"
17
+ description = "It is a program that classifies whether it is positive or negative by entering movie reviews. \
18
+ You can choose between the Korean version and the English version. \
19
+ It also provides a version called Any, which determines whether it is Korean or English and predicts it."
20
+
21
+
22
+ class LanguageIdentification:
23
+ def __init__(self):
24
+ pretrained_lang_model = "./lid.176.ftz"
25
+ self.model = fasttext.load_model(pretrained_lang_model)
26
+
27
+ def predict_lang(self, text):
28
+ predictions = self.model.predict(text, k=2) # returns top 2 matching languages
29
+ return predictions
30
+
31
+ LANGUAGE = LanguageIdentification()
32
 
33
 
34
 
 
73
 
74
 
75
  def builder(lang, text):
76
+ if lang == 'Any':
77
+ pred = LANGUAGE.predict_lang(text)
78
+ if pred[0][0] == '__label__ko':
79
+ lang = 'Kor'
80
+ else: # '__label__en'
81
+ lang = 'Eng'
82
+ # else:
83
+ # raise NotImplementedError("It's neither Korean nor English.")
84
  if lang == 'Eng':
85
  model = eng_model
86
  tokenizer = eng_tokenizer
87
+ if lang == 'Kor':
88
  model = kor_model
89
  tokenizer = kor_tokenizer
90
 
 
108
 
109
 
110
 
111
+ demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Any', 'Eng', 'Kor']), "text"],
112
  outputs=gr.Label(num_top_classes=2, label='Res', color='CadetBlue'),
113
  # outputs='label',
114
  title=title, description=description, examples=examples)
lid.176.ftz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3472cfe8738a7b6099e8e999c3cbfae0dcd15696aac7d7738a8039db603e83
3
+ size 938013
requirements.txt CHANGED
@@ -3,4 +3,5 @@ datasets
3
  transformers
4
  torch
5
  pandas
6
- numpy
 
 
3
  transformers
4
  torch
5
  pandas
6
+ numpy
7
+ fasttext