ARAZIM / models /scrapper.py
RANA
scrapper no browser 2
d34f1ab
import time
import os
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.common.exceptions import ElementNotInteractableException
posts_content = []
def scrapper_func():
options = FirefoxOptions()
options.add_argument('--headless')
browser = webdriver.Firefox(options=options, executable_path='geckodriver-v0.33.0-win32/geckodriver.exe', service_log_path=os.devnull)
posts_content = []
try:
browser.get('https://www.facebook.com/login')
username = browser.find_element("xpath", '//*[@id="email"]')
username.send_keys('reaznadlan@gmail.com')
# username.send_keys(Keys.RETURN)
# my_password = getpass.getpass()
# Wait till it loads then move forward
password = browser.find_element("xpath", '//*[@id="pass"]')
password.send_keys('hadad070707')
password.send_keys(Keys.RETURN)
# Delay before executing the next code
time.sleep(15) # Adjust the delay duration as needed
browser.get('https://www.facebook.com/groups/lands.israel/')
i = 0
while i < 5:
posts = browser.find_elements("xpath", '//div[@class="x1i10hfl xjbqb8w x6umtig x1b1mbwd xaqea5y xav7gou x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz xt0b8zv xzsf02u x1s688f"]')
for post in posts:
if post.is_displayed() and post.is_enabled():
try:
post.send_keys(Keys.RETURN)
except ElementNotInteractableException:
continue
browser.execute_script('window.scrollTo(0, document.body.scrollHeight);')
sleep(5)
i += 1
# Scroll back to the top of the page
browser.execute_script('window.scrollTo(0, 0);')
# Finding Duplicates and not appending them
i = 0
while i < 20:
posts = browser.find_elements("xpath", '//span[@class="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x xudqn12 x3x7a5m x6prxxf xvq8zen xo1l8bm xzsf02u x1yc453h"]')
for post in posts:
if post.text not in posts_content:
posts_content.append(post.text)
browser.execute_script('window.scrollBy(0, 2000);')
sleep(1)
i = i + 1
# Scroll back to the top of the page
browser.execute_script('window.scrollTo(0, 0);')
# Checking if theres no See more
updated_posts = []
for post in posts_content:
if "See more" not in str(post):
updated_posts.append(post)
len(updated_posts)
return updated_posts
finally:
browser.quit() # Make sure to quit the browser when done
if __name__ == "__main__":
posts_content = scrapper_func()