update
Browse files- lang_id_examples.json +45 -0
- main.py +2 -0
lang_id_examples.json
CHANGED
@@ -8,5 +8,50 @@
|
|
8 |
"普段使いとバイクに乗るときのブーツ兼用として購入しました。",
|
9 |
"ja",
|
10 |
"langid"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
]
|
12 |
]
|
|
|
8 |
"普段使いとバイクに乗るときのブーツ兼用として購入しました。",
|
9 |
"ja",
|
10 |
"langid"
|
11 |
+
],
|
12 |
+
[
|
13 |
+
"Am Anfang schuf Gott Himmel und Erde.",
|
14 |
+
"de",
|
15 |
+
"langid"
|
16 |
+
],
|
17 |
+
[
|
18 |
+
"En el principio creó Dios los cielos y la tierra",
|
19 |
+
"es",
|
20 |
+
"langid"
|
21 |
+
],
|
22 |
+
[
|
23 |
+
"Alussa loi Jumala taivaan ja maan.",
|
24 |
+
"fi",
|
25 |
+
"langid"
|
26 |
+
],
|
27 |
+
[
|
28 |
+
"Au commencement, Dieu créa les cieux et la terre.",
|
29 |
+
"fr",
|
30 |
+
"langid"
|
31 |
+
],
|
32 |
+
[
|
33 |
+
"आदि में परमेश्वर ने आकाश और पृथ्वी की सृष्टि की।",
|
34 |
+
"hi",
|
35 |
+
"langid"
|
36 |
+
],
|
37 |
+
[
|
38 |
+
"hello tum kaise ho? Kya tumne Batman Begins ke bare mein suna hai? Kya great movie hai!",
|
39 |
+
"hi_en",
|
40 |
+
"langid"
|
41 |
+
],
|
42 |
+
[
|
43 |
+
"I begynnelsen skapte Gud himmelen og jorden.",
|
44 |
+
"no",
|
45 |
+
"langid"
|
46 |
+
],
|
47 |
+
[
|
48 |
+
"Член ООН с 1991 года, ЕС и НАТО — с 2004 года.",
|
49 |
+
"ru",
|
50 |
+
"langid"
|
51 |
+
],
|
52 |
+
[
|
53 |
+
"Thina, Bantu baseNingizimu Afrika, Siyakukhumbula ukucekelwa phansi kwamalungelo okwenzeka eminyakeni eyadlula",
|
54 |
+
"zu",
|
55 |
+
"langid"
|
56 |
]
|
57 |
]
|
main.py
CHANGED
@@ -45,6 +45,8 @@ def click_lang_id_button(text: str, ground_true: str, model_name: str):
|
|
45 |
global lang_id_identifier
|
46 |
global fasttext_model
|
47 |
|
|
|
|
|
48 |
if model_name == "langid":
|
49 |
label, prob = lang_id_identifier.classify(text)
|
50 |
elif model_name == "fasttext":
|
|
|
45 |
global lang_id_identifier
|
46 |
global fasttext_model
|
47 |
|
48 |
+
text = str(text).strip()
|
49 |
+
|
50 |
if model_name == "langid":
|
51 |
label, prob = lang_id_identifier.classify(text)
|
52 |
elif model_name == "fasttext":
|