# Created by Leandro Carneiro at 19/01/2024 # Description: # ------------------------------------------------ import os.path import time from googleapiclient.discovery import build import requests from bs4 import BeautifulSoup import constants def google_search_api(search_term, api_key, cse_id, **kwargs): try: service = build("customsearch", "v1", developerKey=api_key) res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() return res['items'] except Exception as e: return -1 def search_google(subject, sites): try: results = [] for site in sites: print(' Buscando notícias no domínio: ' + site) query = f"{subject} site:{site}" sites_searched = google_search_api(query, os.environ['GOOGLE_KEY'], os.environ['GOOGLE_SEARCH'], num=constants.num_sites) if sites_searched == -1: results.append(site) else: for s in sites_searched: if 'pdf' not in s['link'].lower(): results.append(s['link']) else: print(' Arquivo PDF encontrado: ' + s['link']) #time.sleep(3) print(' Total de sites encontrados: ' + str(len(results))) return results except Exception as e: print(str(e)) return str(e) def retrieve_text_from_site(sites): result = [] for site in sites: print(' Baixando texto do site: ' + site) try: response = requests.get(site) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') result.append(soup.get_text()) except Exception as e: result.append('Erro na recuperação do texto: ' + str(e)) return result def delete_base(local_base): try: for i in os.listdir(local_base): file_path = os.path.join(local_base, i) os.remove(file_path) return 0 except Exception as e: return str(e) def save_on_base(sites, texts, local_base): try: for i in range(len(sites)): filename = f'news{i}.txt' with open(os.path.join(local_base, filename), 'w', encoding='utf-8') as file: file.write(texts[i]) with open(os.path.join(local_base, 'filename_url.csv'), 'a', encoding='utf-8') as file: file.write(filename + ';' + sites[i] + '\n') return 0 except Exception as e: return str(e)