lang_id_flask / config.py
MSP RAJA
fixed output
a30b891
from transformers import pipeline
THRESHOLD = 0.80
model_ckpt = "papluca/xlm-roberta-base-language-detection"
pipe = pipeline("text-classification", model=model_ckpt)
labels = {"ar" : "Arabic", "bg" : "Bulgarian", "de" : "German", "el" : "Modern Greek",
"en" : "English", "es" : "Spanish", "fr" : "French", "hi" : "Hindi", "it" : "Italian",
"ja" : "Japanese", "nl" : "Dutch", "pl" : "Polish", "pt" : "Portuguese", "ru" : "Russian",
"sw" : "Swahili", "th" : "Thai", "tr" : "Turkish", "ur" : "Urdu", "vi" : "Vietnamese", "zh" : "Chinese"}