File size: 5,500 Bytes
f4d52c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import time
import urllib.request
import os
from random import randint
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from commands.driver_instance import create_url_headers, tab_handler
from commands.exec_path import imgList
from commands.universal import contains_works, save_Search, continue_Search
from ai.classifying_ai import img_classifier

def getOrderedZerochanImages(driver, exec_path, user_search, num_pics, num_pages, n_likes, filters, imageControl):
    global image_locations, image_names, ultimatium, ai_mode
    image_names = imgList(mode=1)
    image_locations = []

    ai_mode = 1 
    filters={'likes': 0 if not n_likes else n_likes}
    searchLimit={'pagecount': num_pages,'imagecount':num_pics}
    user_search = user_search.replace(" ","+").capitalize()
    link = "https://www.zerochan.net/" + user_search

    if not imageControl:
        driver.get(link)
    if imageControl:
        continue_Search(driver, link, mode=2)

    if driver.current_url == "https://www.zerochan.net/":
        print("You continued for the first time, but there was no previous search to continue from!")
        driver.get(driver.current_url + 'angry')

    
    is_valid_search(driver)
    if not contains_works(driver, '//*[@id="thumbs2"]'):
        print("No works found...")
        return []

    curr_page = driver.current_url
    while len(image_locations) < num_pics*num_pages:
        search_image(driver,exec_path,filters,searchLimit=searchLimit)
        if curr_page == driver.current_url and len(image_locations) < num_pics*num_pages or image_locations[-1]==-1:
            image_locations.pop()
            print("Reached end of search results")
            break
        curr_page = driver.current_url
    driver.quit()

    return image_locations

def search_image(driver, exec_path, filters, searchLimit):
    filter_link = "https://www.zerochan.net/register"

    # The main image searcher
    for page in range(searchLimit["pagecount"]):
        temp_img_len = len(image_locations)
        save_Search(driver=driver, mode=2)
        WebDriverWait(driver, timeout=11).until(EC.presence_of_element_located((By.XPATH, "//*[@id='thumbs2']")))
        images = driver.find_elements(By.XPATH, "//*[@id='thumbs2']//li")
        if image_locations and image_locations[-1] == -1:
            break

        for curr_iter,image in enumerate(images):
            tempImg = image.find_element(By.XPATH,".//a").get_attribute("href")
            if len(image_locations) >= searchLimit['imagecount']*searchLimit['pagecount'] or len(image_locations) - temp_img_len >= searchLimit['imagecount']:
                break
            try:
                tempDLLink = image.find_elements(By.XPATH, ".//p//a")[0].get_attribute("href")
                if tempDLLink.split(".")[-1] not in ["jpg","png","jpeg"]:
                    tempDLLink = image.find_elements(By.XPATH, ".//p//a")[1].get_attribute("href")
                tempDLAttr = tempDLLink.split("/")[-1]
                counts = tempDLAttr.count(".")-1
                tempDLAttr = tempDLAttr.replace(".", " ", counts).encode("ascii", "ignore").decode("ascii")

                if tempImg == filter_link or tempDLAttr in image_names:
                    print("\nImage already exists, moving to another image...")
                    continue

                rand_time = randint(0,1) + randint(0,9)/10
                time.sleep(rand_time)     
                if int(image.find_element(By.XPATH, './/*[@class="fav"]').get_property("text"))>filters["likes"]:
                    urllib.request.install_opener(create_url_headers(tempImg=tempImg))
                    urllib.request.urlretrieve(
                        tempDLLink, f"./{exec_path.folder_path}/{tempDLAttr}"
                    )
                    image_locations.append(f"./{exec_path.folder_path}/{tempDLAttr}")
                    image_names.append(f"{tempDLAttr}")
                    print(f"\n{tempDLLink}")
                    if ai_mode:
                        if img_classifier(image_locations[-1]):
                            print("AI Mode: I approve this image")
                        else:
                            os.remove(image_locations[-1])
                            image_locations.pop()
                            image_names.pop()
                            print("AI Mode: Skipping this image")

                else:
                    image_locations.append(-1)

            # In case of stale element or any other errors
            except:
                if driver.window_handles[-1] != driver.window_handles[0]:
                    print("I ran into an error, closing the tab and moving on...")
                    driver = tab_handler(driver=driver)
                time.sleep(randint(1,3) + randint(0,9)/10)
                continue

            
        if not valid_page(driver):
             break

def valid_page(driver):
    try:
        driver.get(driver.find_elements(By.XPATH, "//*[@class='pagination']//a")[-1].get_attribute("href"))
        return True
    except:
        return False
    
def is_valid_search(driver):
    try:
        titles = driver.find_element(By.XPATH, "//*[@id='children']//a").get_attribute("href")
        if titles:
            driver.get(titles)
    except:
        pass