Spaces:

mdj1412
/

movie_review_score_discriminator

Running

App Files Files Community

mdj1412 commited on Jan 25, 2023

Commit

6c5db00

1 Parent(s): e3822e3

Upload 3 files

Browse files

Files changed (3) hide show

app.py +26 -3
lid.176.ftz +3 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import random
 import numpy as np
 import pandas as pd
 import torch
@@ -13,7 +14,21 @@ label2id = {"NEGATIVE": 0, "POSITIVE": 1}
 title = "Movie Review Score Discriminator"
-description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."
@@ -58,10 +73,18 @@ kor_model = AutoModelForSequenceClassification.from_pretrained(
 def builder(lang, text):
     if lang == 'Eng':
         model = eng_model
         tokenizer = eng_tokenizer
-    else:
         model = kor_model
         tokenizer = kor_tokenizer
@@ -85,7 +108,7 @@ def builder(lang, text):
-demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"],
                             outputs=gr.Label(num_top_classes=2, label='Res', color='CadetBlue'),
                             # outputs='label',
                             title=title, description=description, examples=examples)

 import numpy as np
 import pandas as pd
 import torch
+import fasttext
 title = "Movie Review Score Discriminator"
+description = "It is a program that classifies whether it is positive or negative by entering movie reviews. \
+                You can choose between the Korean version and the English version. \
+                It also provides a version called Any, which determines whether it is Korean or English and predicts it."
+class LanguageIdentification:
+    def __init__(self):
+        pretrained_lang_model = "./lid.176.ftz"
+        self.model = fasttext.load_model(pretrained_lang_model)
+    def predict_lang(self, text):
+        predictions = self.model.predict(text, k=2) # returns top 2 matching languages
+        return predictions
+LANGUAGE = LanguageIdentification()
 def builder(lang, text):
+    if lang == 'Any':
+        pred = LANGUAGE.predict_lang(text)
+        if pred[0][0] == '__label__ko':
+            lang = 'Kor'
+        else: # '__label__en'
+            lang = 'Eng'
+        # else:
+        #     raise NotImplementedError("It's neither Korean nor English.")
     if lang == 'Eng':
         model = eng_model
         tokenizer = eng_tokenizer
+    if lang == 'Kor':
         model = kor_model
         tokenizer = kor_tokenizer
+demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Any', 'Eng', 'Kor']), "text"],
                             outputs=gr.Label(num_top_classes=2, label='Res', color='CadetBlue'),
                             # outputs='label',
                             title=title, description=description, examples=examples)

lid.176.ftz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f3472cfe8738a7b6099e8e999c3cbfae0dcd15696aac7d7738a8039db603e83
+size 938013

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ datasets
 transformers
 torch
 pandas
-numpy

 transformers
 torch
 pandas
+numpy
+fasttext