|
import time |
|
import os |
|
from time import sleep |
|
from selenium import webdriver |
|
from selenium.webdriver.common.keys import Keys |
|
from selenium.webdriver.firefox.options import Options as FirefoxOptions |
|
from selenium.common.exceptions import ElementNotInteractableException |
|
|
|
posts_content = [] |
|
|
|
def scrapper_func(): |
|
options = FirefoxOptions() |
|
options.add_argument('--headless') |
|
browser = webdriver.Firefox(options=options, executable_path='geckodriver-v0.33.0-win32/geckodriver.exe', service_log_path=os.devnull) |
|
posts_content = [] |
|
|
|
try: |
|
browser.get('https://www.facebook.com/login') |
|
|
|
username = browser.find_element("xpath", '//*[@id="email"]') |
|
username.send_keys('reaznadlan@gmail.com') |
|
|
|
|
|
|
|
|
|
|
|
password = browser.find_element("xpath", '//*[@id="pass"]') |
|
password.send_keys('hadad070707') |
|
password.send_keys(Keys.RETURN) |
|
|
|
|
|
time.sleep(15) |
|
|
|
browser.get('https://www.facebook.com/groups/lands.israel/') |
|
|
|
i = 0 |
|
while i < 5: |
|
posts = browser.find_elements("xpath", '//div[@class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz xt0b8zv xzsf02u x1s688f"]') |
|
for post in posts: |
|
if post.is_displayed() and post.is_enabled(): |
|
try: |
|
post.send_keys(Keys.RETURN) |
|
except ElementNotInteractableException: |
|
continue |
|
browser.execute_script('window.scrollTo(0, document.body.scrollHeight);') |
|
sleep(5) |
|
i += 1 |
|
|
|
|
|
browser.execute_script('window.scrollTo(0, 0);') |
|
|
|
i = 0 |
|
while i < 20: |
|
posts = browser.find_elements("xpath", '//span[@class="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x xudqn12 x3x7a5m x6prxxf xvq8zen xo1l8bm xzsf02u x1yc453h"]') |
|
for post in posts: |
|
if post.text not in posts_content: |
|
posts_content.append(post.text) |
|
browser.execute_script('window.scrollBy(0, 2000);') |
|
sleep(1) |
|
i = i + 1 |
|
|
|
|
|
browser.execute_script('window.scrollTo(0, 0);') |
|
|
|
|
|
updated_posts = [] |
|
for post in posts_content: |
|
if "See more" not in str(post): |
|
updated_posts.append(post) |
|
len(updated_posts) |
|
|
|
return updated_posts |
|
|
|
finally: |
|
browser.quit() |
|
|
|
if __name__ == "__main__": |
|
posts_content = scrapper_func() |