import sys sys.path.append("..") import time import urllib.request import os import re from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException from datetime import date, datetime from random import randint from commands.driver_instance import create_url_headers, tab_handler from commands.exec_path import imgList from commands.universal import searchQuery, save_Search, continue_Search, contains_works from ai.classifying_ai import img_classifier def getOrderedPixivImages(driver,exec_path,user_search,num_pics,num_pages,searchTypes,viewRestriction,imageControl, n_likes,n_bookmarks,n_views, start_date=0,end_date=0, user_name=0, pass_word=0): global image_locations, image_names, ultimatium, ai_mode, prev_search image_names = imgList(mode=1) image_locations = [] prev_search = 0 link = "https://www.pixiv.net/tags/illustration" success_login = False filters = { "likes": 0 if not n_likes else n_likes, "bookmarks": 0 if not n_bookmarks else n_bookmarks, "viewcount": 0 if not n_views else n_views, } searchLimit = {"pagecount": num_pages, "imagecount": num_pics} start_date = start_date if date_handler(start_date) else "" end_date = date.today() if not date_handler(end_date) else end_date if 1 in imageControl: continue_Search(driver, link, mode=0) else: driver.get(link) # Will use those when not logged in bar_search = generate_xpath_query("//input", "@placeholder", "search works") li_search = generate_xpath_query("//h3", "text()", "works", "illustrations and manga", "illustrations") + "/ancestor::section[1]/div[2]//li" premium_search = generate_xpath_query("//h3", 'text()', 'popular works') + "/ancestor::section[1]/div[2]//li" search_param = { "bar_search": bar_search, "li_search": li_search, "premium_search": premium_search, } # Check if logged in otherwise log in with credentials try: # Check for favorite button (only appears for logged in users) favorite_buttons = driver.find_elements(By.XPATH, case_insensitive_xpath_contains("//button", 'Add to your favorites')) if favorite_buttons: success_login = True elif user_name and pass_word: print("Logging in...") if login_handler(driver, exec_path, user_name, pass_word): success_login = True if not success_login: print("Failed! You are not logged in...") except Exception as e: print(f"Failed! You are not logged in... Exception: {e}") if 1 not in imageControl: searchQuery(user_search, driver, search_param["bar_search"], isLoggedIn=success_login) time.sleep(2) if start_date and not success_login: driver.get(driver.current_url + f"?scd={start_date}&ecd={end_date}") time.sleep(2) elif start_date and success_login: cur_url = driver.current_url.split("?") driver.get(cur_url[0] + f"?scd={start_date}&ecd={end_date}&" + cur_url[1]) time.sleep(2) premiumSearch = 1 if 0 in searchTypes else 0 freemiumSearch = 1 if 1 in searchTypes else 0 pg_friendly = 1 if 0 in viewRestriction else 0 r_18 = 1 if 1 in viewRestriction else 0 ultimatium = 1 if 0 in imageControl else 0 order_by_oldest = 1 if 2 in imageControl else 0 ai_mode = 1 if not contains_works(driver, search_param["li_search"]): print("No works found...") return [] if premiumSearch == 1: search_image(driver, exec_path, filters, search_param) # Switch to english try: english_span = driver.find_element(By.XPATH, "//span[contains(text(), 'English')]") driver.execute_script("arguments[0].click();", english_span) except: pass # Apply filters if logged in if success_login: try: driver.find_element(By.XPATH, "/html/body/div[1]/div[2]/div/div[3]/div/div[5]/nav/a[2]").click() print("Illustrations only") time.sleep(1) mode = "" order = "" if pg_friendly == 1 and r_18 == 1: print("PG Friendly and r-18") elif pg_friendly == 1: mode = "mode=safe&" print("PG Friendly") elif r_18 == 1: mode = "mode=r18&" print("r-18") if order_by_oldest == 1: order = "order=date&" print("Order by oldest") cur_url = driver.current_url.split("?") driver.get(cur_url[0] + f"?{order}{mode}" + cur_url[1]) except: pass # Click show all results try: time.sleep(1) show_all_div = driver.find_element(By.XPATH, case_insensitive_xpath_contains("//div", 'Show all')) if show_all_div: show_all_div.click() except: pass prev_search = len(image_locations) curr_page = driver.current_url if freemiumSearch: while len(image_locations) < num_pics*num_pages: search_image(driver,exec_path,filters,search_param=search_param,searchLimit=searchLimit) if len(image_locations) < num_pics*num_pages and not valid_page(driver): print("Reached end of search results") break driver.quit() return image_locations def search_image(driver,exec_path,filters,search_param,searchLimit={"pagecount": 1, "imagecount": 99}): # Searches using premium or freemium search_type = awaitPageLoad(driver=driver,searchLimit=searchLimit,search_param=search_param) if search_type == -1: return # The main image searcher for page in range(searchLimit["pagecount"]): temp_img_len = len(image_locations) WebDriverWait(driver, timeout=9).until( EC.presence_of_element_located( (By.XPATH, search_param["li_search"] + "//a"))) images = search_image_type(search_type, driver, search_param=search_param) for image in images: if len(image_locations) - prev_search >= searchLimit["imagecount"]*searchLimit["pagecount"] or len(image_locations) - temp_img_len >= searchLimit["imagecount"]: break image = image.find_element(By.XPATH, "." + "/" + "/a") imageLink = image.find_elements(By.XPATH, ".//img") if image.get_attribute("href").rsplit("/", 1)[-1] not in image_names: if ai_mode == 1 and process_ai_mode(imageLink, image, driver, exec_path): continue try: if sum(filters.values()) == 0 and len(imageLink): # Dl the image directly from the grid thumbnailDownloader(imageLink=imageLink, image=image, driver=driver, exec_path=exec_path) else: # Dl the image from the image page (opens a new tab) driver, tempImg = tab_handler(driver=driver, image=image) WebDriverWait(driver, timeout=11).until(EC.presence_of_element_located((By.XPATH, "//div[@role='presentation']"))) tempDL = driver.find_element(By.XPATH, "//div[@role='presentation']//img") imagePopularity = parseImageData(filters=filters, Data=driver.find_elements(By.TAG_NAME, "dd")) time.sleep(1) if filterOptions(filters, imagePopularity=imagePopularity): # Check if image filters are satisfied tempDLLink = tempDL.get_attribute("src") # Dl the original rez image if ultimatium: tempDLLink = tempDLLink.replace("img-master", "img-original" ).replace("_master1200", "") download_image(imageLink=tempDLLink, exec_path=exec_path, driver=driver) else: print("\nImage filters not satisfied...") driver = tab_handler(driver=driver) time.sleep(0.3) # In case of stale element or any other errors except: if driver.window_handles[-1] != driver.window_handles[0]: print("\nI ran into an error, moving on...") driver = tab_handler(driver=driver) time.sleep(randint(1, 3) + randint(0, 9) / 10) continue else: print("\nImage already exists, moving to another image...") save_Search(driver, mode=0) if not valid_page(driver): break ######## FUNCTIONS PRONE TO CHANGE ######## def login_handler(driver, exec_path, user_name, pass_word): time.sleep(5) login_btn = driver.find_elements(By.XPATH, "//*[@class='sc-oh3a2p-4 gHKmNu']//a")[1] login_btn.click() WebDriverWait(driver, timeout=11).until( EC.presence_of_element_located((By.XPATH, "//*[@class='sc-2o1uwj-0 elngKN']")) ) user_btn = driver.find_element( By.XPATH, "//*[@class='sc-2o1uwj-0 elngKN']" ).find_elements(By.TAG_NAME, "fieldset") user_btn[0] actions = ActionChains(driver) actions.click(user_btn[0]).send_keys(user_name).perform() time.sleep(0.5) actions.click(user_btn[1]).send_keys(pass_word).perform() # Log in button driver.find_element(By.XPATH,"//button[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'log in')]").click() return True def download_image(imageLink, exec_path, driver, mode=1): tempDLName = imageLink.rsplit("/", 1)[-1] img_loc = f"./{exec_path.folder_path}/{tempDLName}" if not ultimatium or not mode: installUrlOpeners(driver=driver,mode=0) else: installUrlOpeners(imageLink) try: requestUrlretrieve(imageLink=imageLink, img_loc=img_loc) except: imageLink = imageLink.rsplit(".",1)[0]+".png" requestUrlretrieve(imageLink, img_loc=img_loc) print(f"\n{imageLink}") if mode: image_locations.append(f"./{exec_path.folder_path}/{tempDLName}") image_names.append(f"{tempDLName.split('.')[0]}") else: return img_loc def thumbnailDownloader(imageLink, image, driver, exec_path, mode=1): imageLink = image_type(imageLink=imageLink, mode=mode) action = ActionChains(driver=driver) action.move_to_element(image.find_element(By.XPATH, ".//img")).perform() return download_image(imageLink=imageLink, exec_path=exec_path, driver=driver, mode=mode) ######## URLLIB LIBRARY ######## def installUrlOpeners(driver,mode=1): # Mode 0 means its a thumbnail if ultimatium and mode: urllib.request.install_opener(create_url_headers(driver)) else: urllib.request.install_opener(create_url_headers(driver.current_url)) def requestUrlretrieve(imageLink, img_loc): # Download the image urllib.request.urlretrieve(imageLink, img_loc) ######## HELPER FUNCTIONS (UNLIKELY TO CHANGE) ######## # Handles the search type (premium or freemium) def search_image_type(search_type, driver, search_param): if search_type == 0: return driver.find_elements(By.XPATH, search_param["premium_search"]) elif search_type == 1: return driver.find_elements(By.XPATH, search_param["li_search"]) # Handles the image type (if mode then it is not a thumbnail, so switch it to view res else Max res) def image_type(imageLink, mode=0): imageLink = imageLink[0].get_attribute("src") if mode: # View res imageLink = re.sub(r"c/.*?/.*?/", "img-master/", imageLink) imageLink = imageLink.replace("square", "master").replace("custom", "master") if ultimatium: # Max res imageLink = imageLink.replace("img-master", "img-original").replace("_master1200", "") return imageLink # Handles finding the popular or freemium section def awaitPageLoad(driver, searchLimit, search_param, search_type=0): # Waits on the page to load (for popular or freemium) if searchLimit["imagecount"] == 99: try: WebDriverWait(driver, timeout=12).until( EC.presence_of_element_located( (By.XPATH, search_param["premium_search"]) ) ) print("Premium section found, searching for images...") except: print("No popular section") search_type = -1 return search_type else: try: WebDriverWait(driver, timeout=12).until( EC.presence_of_element_located((By.XPATH, search_param["li_search"])) ) print("\nFreemium section found, searching for images...") except: driver.refresh() time.sleep(12) if not driver.find_elements(By.XPATH, search_param["li_search"]): return search_type = 1 return search_type def filterOptions(filters, imagePopularity): for key in filters.keys(): if filters[key] > imagePopularity[key]: return False return True def parseImageData(Data, filters): parsedData = {} for iter, key in enumerate(filters.keys()): parsedData[key] = int(Data[iter].text.replace(",", "")) return parsedData def valid_page(driver): cur_url = driver.current_url try: next_page = ( driver.find_element(By.XPATH, '//*[@class="sc-xhhh7v-0 kYtoqc"]') .find_elements(By.XPATH, ".//a")[-1] .get_attribute("href") ) if cur_url == next_page: return 0 if next_page: driver.get(next_page) return 1 except: return 0 def date_handler(sel_date): temp = sel_date.split("-") try: datetime(int(temp[0]), int(temp[1]), int(temp[2])) except ValueError: return 0 return 1 def process_ai_mode(imageLink, image, driver, exec_path): try: # Dl the image thumbnail from the grid img_loc = thumbnailDownloader(imageLink=imageLink, image=image, driver=driver, exec_path=exec_path, mode=0) if img_classifier(img_loc): print("AI Mode: I approve this image") return False else: print("AI Mode: Skipping this image") return True os.remove(img_loc) except: return True def case_insensitive_xpath_contains(xpath, text): return f"{xpath}[contains(translate(text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '{text.lower()}')]" def generate_xpath_query(base_xpath, attribute, *args): return base_xpath + "[" + " or ".join(f"translate({attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz') = '{arg.lower()}'" for arg in args) + "]"