5m4ck3r commited on
Commit
6d85c23
1 Parent(s): 56d3173

Added translator

Browse files

Added translator to translate the text to get accurate output

Files changed (1) hide show
  1. app.py +19 -6
app.py CHANGED
@@ -3,10 +3,21 @@ from transformers import pipeline
3
  import openpyxl
4
  import tempfile
5
  import pycountry
 
 
6
 
7
  classifier = pipeline("zero-shot-classification",
8
  model="LogicSpine/address-large-text-classifier")
9
 
 
 
 
 
 
 
 
 
 
10
  def check_for_third(address: str) -> bool:
11
  countries = [country.name.lower() for country in pycountry.countries]
12
  old_country_names = [
@@ -26,7 +37,9 @@ def check_for_third(address: str) -> bool:
26
  "gold coast",
27
  "nyasaland",
28
  "korea",
29
- "russia"
 
 
30
  ]
31
  countries = countries + old_country_names
32
  if "," in address:
@@ -194,23 +207,23 @@ def swapper(i1, i2, i3):
194
  for data in inputs:
195
  original_data = data
196
 
197
- if check_for_third(data):
198
  if third is None:
199
  third = data
200
  else:
201
  third, data = data, third
202
- if check_for_first(data):
203
  first_candidates.append(data)
204
- elif check_for_second(data):
205
  if second is None:
206
  second = data
207
  else:
208
  second, data = data, second
209
 
210
- elif check_for_first(data):
211
  first_candidates.append(data)
212
 
213
- elif check_for_second(data):
214
  if second is None:
215
  second = data
216
  else:
 
3
  import openpyxl
4
  import tempfile
5
  import pycountry
6
+ from deep_translator import GoogleTranslator
7
+ from langdetect import detect
8
 
9
  classifier = pipeline("zero-shot-classification",
10
  model="LogicSpine/address-large-text-classifier")
11
 
12
+ def translate_text(text: str):
13
+ text = text.strip()
14
+ translator = GoogleTranslator(source='auto', target='en')
15
+ detected_lang = detect(text)
16
+ if detected_lang == 'en':
17
+ return text
18
+ translated = translator.translate(text)
19
+ return translated
20
+
21
  def check_for_third(address: str) -> bool:
22
  countries = [country.name.lower() for country in pycountry.countries]
23
  old_country_names = [
 
37
  "gold coast",
38
  "nyasaland",
39
  "korea",
40
+ "russia",
41
+ "usa",
42
+ "uk"
43
  ]
44
  countries = countries + old_country_names
45
  if "," in address:
 
207
  for data in inputs:
208
  original_data = data
209
 
210
+ if check_for_third(translate_text(data)):
211
  if third is None:
212
  third = data
213
  else:
214
  third, data = data, third
215
+ if check_for_first(translate_text(data)):
216
  first_candidates.append(data)
217
+ elif check_for_second(translate_text(data)):
218
  if second is None:
219
  second = data
220
  else:
221
  second, data = data, second
222
 
223
+ elif check_for_first(translate_text(data)):
224
  first_candidates.append(data)
225
 
226
+ elif check_for_second(translate_text(data)):
227
  if second is None:
228
  second = data
229
  else: