import time import os from time import sleep from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.firefox.options import Options as FirefoxOptions from selenium.common.exceptions import ElementNotInteractableException posts_content = [] def scrapper_func(): options = FirefoxOptions() options.add_argument('--headless') browser = webdriver.Firefox(options=options, executable_path='geckodriver-v0.33.0-win32/geckodriver.exe', service_log_path=os.devnull) posts_content = [] try: browser.get('https://www.facebook.com/login') username = browser.find_element("xpath", '//*[@id="email"]') username.send_keys('reaznadlan@gmail.com') # username.send_keys(Keys.RETURN) # my_password = getpass.getpass() # Wait till it loads then move forward password = browser.find_element("xpath", '//*[@id="pass"]') password.send_keys('hadad070707') password.send_keys(Keys.RETURN) # Delay before executing the next code time.sleep(15) # Adjust the delay duration as needed browser.get('https://www.facebook.com/groups/lands.israel/') i = 0 while i < 5: posts = browser.find_elements("xpath", '//div[@class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz xt0b8zv xzsf02u x1s688f"]') for post in posts: if post.is_displayed() and post.is_enabled(): try: post.send_keys(Keys.RETURN) except ElementNotInteractableException: continue browser.execute_script('window.scrollTo(0, document.body.scrollHeight);') sleep(5) i += 1 # Scroll back to the top of the page browser.execute_script('window.scrollTo(0, 0);') # Finding Duplicates and not appending them i = 0 while i < 20: posts = browser.find_elements("xpath", '//span[@class="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x xudqn12 x3x7a5m x6prxxf xvq8zen xo1l8bm xzsf02u x1yc453h"]') for post in posts: if post.text not in posts_content: posts_content.append(post.text) browser.execute_script('window.scrollBy(0, 2000);') sleep(1) i = i + 1 # Scroll back to the top of the page browser.execute_script('window.scrollTo(0, 0);') # Checking if theres no See more updated_posts = [] for post in posts_content: if "See more" not in str(post): updated_posts.append(post) len(updated_posts) return updated_posts finally: browser.quit() # Make sure to quit the browser when done if __name__ == "__main__": posts_content = scrapper_func()