Spaces:
Sleeping
Sleeping
Added translator
Browse filesAdded translator to translate the text to get accurate output
app.py
CHANGED
@@ -3,10 +3,21 @@ from transformers import pipeline
|
|
3 |
import openpyxl
|
4 |
import tempfile
|
5 |
import pycountry
|
|
|
|
|
6 |
|
7 |
classifier = pipeline("zero-shot-classification",
|
8 |
model="LogicSpine/address-large-text-classifier")
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def check_for_third(address: str) -> bool:
|
11 |
countries = [country.name.lower() for country in pycountry.countries]
|
12 |
old_country_names = [
|
@@ -26,7 +37,9 @@ def check_for_third(address: str) -> bool:
|
|
26 |
"gold coast",
|
27 |
"nyasaland",
|
28 |
"korea",
|
29 |
-
"russia"
|
|
|
|
|
30 |
]
|
31 |
countries = countries + old_country_names
|
32 |
if "," in address:
|
@@ -194,23 +207,23 @@ def swapper(i1, i2, i3):
|
|
194 |
for data in inputs:
|
195 |
original_data = data
|
196 |
|
197 |
-
if check_for_third(data):
|
198 |
if third is None:
|
199 |
third = data
|
200 |
else:
|
201 |
third, data = data, third
|
202 |
-
if check_for_first(data):
|
203 |
first_candidates.append(data)
|
204 |
-
elif check_for_second(data):
|
205 |
if second is None:
|
206 |
second = data
|
207 |
else:
|
208 |
second, data = data, second
|
209 |
|
210 |
-
elif check_for_first(data):
|
211 |
first_candidates.append(data)
|
212 |
|
213 |
-
elif check_for_second(data):
|
214 |
if second is None:
|
215 |
second = data
|
216 |
else:
|
|
|
3 |
import openpyxl
|
4 |
import tempfile
|
5 |
import pycountry
|
6 |
+
from deep_translator import GoogleTranslator
|
7 |
+
from langdetect import detect
|
8 |
|
9 |
classifier = pipeline("zero-shot-classification",
|
10 |
model="LogicSpine/address-large-text-classifier")
|
11 |
|
12 |
+
def translate_text(text: str):
|
13 |
+
text = text.strip()
|
14 |
+
translator = GoogleTranslator(source='auto', target='en')
|
15 |
+
detected_lang = detect(text)
|
16 |
+
if detected_lang == 'en':
|
17 |
+
return text
|
18 |
+
translated = translator.translate(text)
|
19 |
+
return translated
|
20 |
+
|
21 |
def check_for_third(address: str) -> bool:
|
22 |
countries = [country.name.lower() for country in pycountry.countries]
|
23 |
old_country_names = [
|
|
|
37 |
"gold coast",
|
38 |
"nyasaland",
|
39 |
"korea",
|
40 |
+
"russia",
|
41 |
+
"usa",
|
42 |
+
"uk"
|
43 |
]
|
44 |
countries = countries + old_country_names
|
45 |
if "," in address:
|
|
|
207 |
for data in inputs:
|
208 |
original_data = data
|
209 |
|
210 |
+
if check_for_third(translate_text(data)):
|
211 |
if third is None:
|
212 |
third = data
|
213 |
else:
|
214 |
third, data = data, third
|
215 |
+
if check_for_first(translate_text(data)):
|
216 |
first_candidates.append(data)
|
217 |
+
elif check_for_second(translate_text(data)):
|
218 |
if second is None:
|
219 |
second = data
|
220 |
else:
|
221 |
second, data = data, second
|
222 |
|
223 |
+
elif check_for_first(translate_text(data)):
|
224 |
first_candidates.append(data)
|
225 |
|
226 |
+
elif check_for_second(translate_text(data)):
|
227 |
if second is None:
|
228 |
second = data
|
229 |
else:
|