Abdul-Ib commited on
Commit
4593e66
1 Parent(s): abe481f

Update normalizer.py

Browse files
Files changed (1) hide show
  1. normalizer.py +69 -13
normalizer.py CHANGED
@@ -4,6 +4,12 @@ import pandas as pd
4
  from aiogoogletrans import Translator
5
  from spellchecker import SpellChecker
6
  from nltk.tokenize import RegexpTokenizer
 
 
 
 
 
 
7
 
8
 
9
  class Normalizer:
@@ -353,7 +359,7 @@ class Normalizer:
353
  print(f"An error occurred during character repetition removal: {e}")
354
  return word
355
 
356
- def translate_text(self, text: str) -> str:
357
  """
358
  Translate the given text to English and return the translated text.
359
 
@@ -364,19 +370,14 @@ class Normalizer:
364
  - str: The translated text.
365
  """
366
  try:
367
- loop = asyncio.get_event_loop()
368
- translated_text = (
369
- loop.run_until_complete(self._translator.translate(text))
370
- .text.lower()
371
- .strip()
372
- )
373
  except Exception as e:
374
  print(f"Text Translation failed: {e}")
375
- translated_text = (
376
- text.lower().strip()
377
- ) # Use original text if translation fails
378
  return translated_text
379
 
 
380
  def check_spelling(self, query: str) -> str:
381
  """
382
  Check the spelling of the input query and return the corrected version.
@@ -388,9 +389,8 @@ class Normalizer:
388
  - str: The corrected query.
389
  """
390
  try:
391
- # Detect the language of the input query using Google Translate API
392
- # input_language = self._translator.detect(query)
393
- input_language = "en" if query.encode().isalpha() else "ar"
394
 
395
  # Initialize SpellChecker with detected language, fallback to English if language detection fails
396
  try:
@@ -423,6 +423,62 @@ class Normalizer:
423
  except Exception as e:
424
  print(f"An error occurred during spelling check: {e}")
425
  return query
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
 
427
  def clean_text(self, text):
428
  """
 
4
  from aiogoogletrans import Translator
5
  from spellchecker import SpellChecker
6
  from nltk.tokenize import RegexpTokenizer
7
+ from langdetect import detect
8
+ from fake_useragent import UserAgent
9
+ import json
10
+ import requests
11
+ from fastapi import HTTPException
12
+ from main import logger
13
 
14
 
15
  class Normalizer:
 
359
  print(f"An error occurred during character repetition removal: {e}")
360
  return word
361
 
362
+ async def translate_text(self, text: str) -> str:
363
  """
364
  Translate the given text to English and return the translated text.
365
 
 
370
  - str: The translated text.
371
  """
372
  try:
373
+ translated_text = await self._translator.translate(text, dest="en")
374
+ translated_text = translated_text.text.lower().strip()
 
 
 
 
375
  except Exception as e:
376
  print(f"Text Translation failed: {e}")
377
+ translated_text = text.lower().strip() # Use original text if translation fails
 
 
378
  return translated_text
379
 
380
+
381
  def check_spelling(self, query: str) -> str:
382
  """
383
  Check the spelling of the input query and return the corrected version.
 
389
  - str: The corrected query.
390
  """
391
  try:
392
+ # Detect the language of the input query
393
+ input_language = detect(query)
 
394
 
395
  # Initialize SpellChecker with detected language, fallback to English if language detection fails
396
  try:
 
423
  except Exception as e:
424
  print(f"An error occurred during spelling check: {e}")
425
  return query
426
+
427
+ def query_suggestions(self, query: str) -> str:
428
+ """
429
+ Get suggestions for a given query string using Google's Suggest API.
430
+
431
+ Parameters:
432
+ query (str): The query string for which suggestions are to be retrieved.
433
+
434
+ Returns:
435
+ str: The suggested query string or the original query if no suggestions are available.
436
+
437
+ Raises:
438
+ HTTPException: If an HTTP error occurs during the request to Google Suggest API.
439
+ """
440
+
441
+ try:
442
+ # Detect language of the query
443
+ lang = detect(query)
444
+ lang = 'en' if lang != 'ar' else 'ar'
445
+
446
+ # Prepare the query for the URL
447
+ modified_query = query.replace(" ", "+")
448
+
449
+ # Construct the URL with the query and language
450
+ url = f"http://suggestqueries.google.com/complete/search?output=firefox&gl={lang}&hl=sa&q={modified_query}"
451
+
452
+ # logger.info(f"Google Suggest API URL: {url}")
453
+ # Generate a random user-agent
454
+ ua = UserAgent()
455
+ headers = {"user-agent": ua.chrome}
456
+
457
+ # Make the request to the Google Suggest API
458
+ response = requests.get(url, headers=headers, verify=True)
459
+
460
+ if response.status_code != 200:
461
+ raise HTTPException(
462
+ status_code=response.status_code,
463
+ detail=f"An error occurred during the request to Google Suggest API: {response.text}"
464
+ )
465
+ # Parse the response JSON
466
+ suggestions = json.loads(response.text)
467
+ # logger.info(f"Google Suggest API Response: {suggestions[1]}")
468
+
469
+ # If suggestions are available, return the first one
470
+ if suggestions[1]:
471
+ return suggestions[1][0]
472
+
473
+ # If no suggestions are returned, return the original query
474
+ return query
475
+
476
+ except Exception as e:
477
+ # If any other exception occurs, raise an HTTPException
478
+ raise HTTPException(
479
+ status_code=500,
480
+ detail=f"An error occurred during the request to Google Suggest API: {e}"
481
+ )
482
 
483
  def clean_text(self, text):
484
  """