Spaces:

iamkb
/

zero-shot-nlp-classifier-multi-lang

Runtime error

kaushikbar commited on Feb 16, 2022

Commit

33a2a6e

1 Parent(s): 83f2778

Used fasttext for language detection

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import datetime
 import gradio as gr
 from langdetect import detect, DetectorFactory, detect_langs
 from transformers import pipeline
 models = {'en': 'Narsil/deberta-large-mnli-zero-cls', # English
@@ -34,6 +36,8 @@ classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hy
                'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
                               model=models['no'])}
 def prep_examples():
     example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
     people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
@@ -100,8 +104,10 @@ def detect_lang(sequence, labels):
     seq_lang = 'en'
     try:
-        seq_lang = detect(sequence)
-        lbl_lang = detect(labels)
     except:
         print("Language detection failed!",
               "Date:{}, Sequence:{}, Labels:{}".format(

 import datetime
 import gradio as gr
+from huggingface_hub import hf_hub_download
 from langdetect import detect, DetectorFactory, detect_langs
+import fasttext
 from transformers import pipeline
 models = {'en': 'Narsil/deberta-large-mnli-zero-cls', # English
                'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
                               model=models['no'])}
+fasttext_model = fasttext.load_model(hf_hub_download("julien-c/fasttext-language-id", "model.bin"))
 def prep_examples():
     example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
     people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
     seq_lang = 'en'
     try:
+        # seq_lang = detect(sequence)
+        # lbl_lang = detect(labels)
+        seq_lang = fasttext_model.predict(sequence)[0][0].split("__label__")[1]
+        lbl_lang = fasttext_model.predict(labels)[0][0].split("__label__")[1]
     except:
         print("Language detection failed!",
               "Date:{}, Sequence:{}, Labels:{}".format(