# Created by Leandro Carneiro at 19/01/2024 # Description: # ------------------------------------------------ import os.path import time from googlesearch import search import requests from bs4 import BeautifulSoup import constants def search_google(subject, sites): try: results = [] for site in sites: print(' Buscando notícias no domínio: ' + site) query = f"{subject} site:{site}" sites_searched = search(query, num_results=constants.num_sites) for s in sites_searched: results.append(s) #time.sleep(3) print(' Total de sites encontrados: ' + str(len(results))) return results except Exception as e: print(str(e)) return str(e) def retrieve_text_from_site(sites): try: result = [] for site in sites: print(' Baixando texto do site: ' + site) response = requests.get(site) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') result.append(soup.get_text()) return result except Exception as e: return str(e) def delete_base(local_base): try: for i in os.listdir(local_base): file_path = os.path.join(local_base, i) os.remove(file_path) return 0 except Exception as e: return str(e) def save_on_base(sites, texts, local_base): try: for i in range(len(sites)): filename = f'news{i}.txt' with open(os.path.join(local_base, filename), 'w', encoding='utf-8') as file: file.write(texts[i]) with open(os.path.join(local_base, 'filename_url.csv'), 'a', encoding='utf-8') as file: file.write(filename + ';' + sites[i] + '\n') return 0 except Exception as e: return str(e)