File size: 9,435 Bytes
f4d52c1
 
 
71de518
 
f4d52c1
 
 
 
 
 
 
 
 
 
71de518
f4d52c1
 
 
 
 
 
 
 
 
 
 
71de518
 
f4d52c1
 
 
 
 
71de518
f4d52c1
71de518
f4d52c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71de518
f4d52c1
 
 
 
 
 
 
 
 
 
 
 
 
71de518
f4d52c1
 
 
 
 
 
71de518
f4d52c1
 
 
 
71de518
f4d52c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71de518
f4d52c1
 
 
 
 
 
 
 
 
 
 
71de518
f4d52c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71de518
f4d52c1
 
 
 
 
 
 
 
 
 
71de518
 
 
 
 
 
 
 
 
 
f4d52c1
71de518
f4d52c1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import time
import urllib.request
import os
import aiohttp
import asyncio
from random import randint
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from commands.driver_instance import create_url_headers, tab_handler
from commands.exec_path import imgList
from commands.universal import searchQuery, save_Search, continue_Search, contains_works
from ai.classifying_ai import img_classifier

async def getOrderedDanbooruImages(driver, user_search, num_pics, num_pages, filters, bl_tags, inc_tags, exec_path, imageControl):
    global image_locations, bl_tags_list, inc_tags_list, image_names, ai_mode,rating_filters
    image_names = imgList(mode=0)
    image_locations = []
    link = "https://danbooru.donmai.us/"

    if 0 in imageControl:
        continue_Search(driver, link, mode=1)
    else:
        driver.get(link)

    # Rating Filter Creation
    rating_filters = ["e"] if 2 in filters else []
    rating_filters = ["s","e"] if 3 in filters else []
    rating_filters = ["q","s","e"] if 4 in filters else []

    # Tag list creation
    score = 1 if 0 in filters else 0
    match_type = 1 if 1 in filters else 0
    r_18 = pg_lenient() if 2 in filters else []
    r_18 = pg_strict() if 3 in filters else r_18
    ai_mode = 1 if 5 in filters else 0

    continue_search = 1 if imageControl else 0

    # Replace spaces to make spaces feasible by the user
    user_search = user_search.replace(" ", "_")
    score = filter_score(score)

    bl_tags_list = create_filter_tag_list(bl_tags, r_18)
    inc_tags_list = create_tag_list(inc_tags, match_type) if inc_tags else []
    
    if 0 not in imageControl:
        searchQuery(user_search, driver, '//*[@name="tags"]', mode=1, score=score)
    
    if not contains_works(driver, '//*[@class="posts-container gap-2"]'):
        print("No works found...")
        return []
    
    if ai_mode:
        WebDriverWait(driver, timeout=11).until(EC.presence_of_element_located((By.XPATH, '//*[@class="popup-menu-content"]')))
        driver.get(driver.find_element(By.XPATH, '(//*[@class="popup-menu-content"]//li)[6]//a').get_attribute("href"))

    curr_page = driver.current_url
    while len(image_locations) < num_pics*num_pages:
        await pages_to_search(driver, num_pages, num_pics, exec_path)
        if curr_page == driver.current_url and len(image_locations) < num_pics*num_pages:
            print("Reached end of search results")
            break
        curr_page = driver.current_url
    driver.close()

    return image_locations

def filter_score(score):
    if score:
        return " order:score"
    return ""

async def pages_to_search(driver, num_pages, num_pics, exec_path):
    for i in range(num_pages):
        WebDriverWait(driver, timeout=11).until(EC.presence_of_element_located((By.XPATH, '//*[@class="posts-container gap-2"]')))
        # Selects the picture grids
        images = driver.find_element(
            By.XPATH, '//*[@class="posts-container gap-2"]'
        ).find_elements(By.CLASS_NAME, "post-preview-link")
        await grid_search(driver, num_pics, images, exec_path, num_pages)
        save_Search(driver, mode=1)
        if not valid_page(driver) or len(image_locations) >= num_pics*num_pages:
            break

async def grid_search(driver, num_pics, images, exec_path, num_pages):
    temp_img_len = len(image_locations)
    for n_iter, image in enumerate(images):
        if len(image_locations) >= num_pics*num_pages or len(image_locations) - temp_img_len >= num_pics:
            break

        try:
            if image.find_element(By.XPATH, ".//img").get_attribute('src').split("/")[-1].split(".")[0].encode("ascii", "ignore").decode("ascii") in image_names:
                print("\nImage already exists, moving to another image...")
                continue

            # Has to be checked this way otherwise tags are not visible in headless mode
            img_tags = driver.find_elements(By.CLASS_NAME, "post-preview")[n_iter].get_attribute('data-tags')
            img_rating = driver.find_elements(By.CLASS_NAME, "post-preview")[n_iter].get_attribute('data-rating')

            if filter_ratings(img_rating,rating_filters) and filter_tags(bl_tags_list, inc_tags_list, img_tags):
                
                
                if ai_mode:
                    checker = 0
                    image_loc = await download_image(exec_path=exec_path, driver=driver, image=image)
                    if img_classifier(image_loc):
                        print("AI Mode: I approve this image")
                    else:
                        print("AI Mode: Skipping this image")
                        checker = 1
                    os.remove(image_loc)
                    if checker:
                        continue

                driver, tempImg = tab_handler(driver=driver,image=image)
                WebDriverWait(driver, timeout=15).until(EC.presence_of_element_located((By.XPATH, '//*[@id="post-option-download"]/a')))
                await download_image(exec_path=exec_path, driver=driver)
                driver = tab_handler(driver=driver)

            else:
                print("\nFilters did not match/Not an image, moving to another image...")

        except:
            print("\nI ran into an error, closing the tab and moving on...")
            if driver.window_handles[-1] != driver.window_handles[0]:
                driver = tab_handler(driver=driver)
            time.sleep(randint(0,2) + randint(0,9)/10)

def filter_ratings(img_rating,rating_filters):
    if img_rating not in rating_filters:
        return True
    return False

def filter_tags(bl_tags_list, inc_tags_list, img_tags):
    # Hashmap of picture's tags for O(1) time searching
    img_hash = {}
    for img_tag in img_tags.split(" "):
        img_hash[img_tag] = 1

    # Included tags (exact match or not exact)
    if inc_tags_list and inc_tags_list[-1] == 1:
        inc_tags_list.pop()
        for tag in inc_tags_list:
            if not img_hash.get(tag, 0):
                return False
    elif inc_tags_list:
        cond = False
        for tag in inc_tags_list:
            if img_hash.get(tag, 0):
                cond = True
                break
        if not cond:
            return False

    # Note that bl_tags_list is never empty since it filters videos
    for tag in bl_tags_list:
        if img_hash.get(tag,0):
            return False
    return True

def create_tag_list(inc_tags, match_type):
    temp_tags = [tag.lstrip().replace(" ","_") for tag in inc_tags.split(",")]
    if match_type:
        temp_tags.append(1)
    return temp_tags

def create_filter_tag_list(bl_tags, r_18):
    temp_tags = ["animated", "video", "sound"]
    if bl_tags:
        temp_tags += [tag.lstrip().replace(" ","_") for tag in bl_tags.split(",")]
    if r_18:
        temp_tags += r_18
    return temp_tags

# Find the next page and ensure it isn't the last page
def valid_page(driver):
    cur_url = driver.current_url
    driver.find_element(By.CLASS_NAME, "paginator-next").click()
    if cur_url == driver.current_url:
        return 0
    return 1

def pg_lenient():
    return ["sex","penis","vaginal","completely_nude","nude","exposed_boobs","ahegao","cum","no_panties","no_bra", 
            "nipple_piercing", "anal_fluid","uncensored", "see-through", "pussy", "cunnilingus", "oral", "ass_focus",
            "anal", "sex_from_behind", "cum_on_clothes", "cum_on_face", "nipple","nipples", "missionary"
            "fellatio", "rape", "breasts_out","cum_in_pussy", "condom", "dildo", "sex_toy", "cum_in_mouth", "heavy_breathing", "cum_on_tongue"
            "panties", "panty_pull", "nude_cover", "underwear_only","grabbing_own_breast","ass_grab","censored","areola_slip","areolae","torn_pantyhose","micro_bikini","steaming_body"]

def pg_strict():
    return pg_lenient() + ["piercings", "cleavage","boobs","thongs","fellatio_gesture", "mosaic_censoring", "ass", "mosaic_censoring", 
                            "covered_nipples", "thigh_focus", "thighs", "bikini", "swimsuit", "grabbing_another's_breast", "huge_breasts", 
                            "foot_focus", "licking_foot", "foot_worship", "shirt_lift","clothes_lift", "underwear", "panties_under_pantyhose"]

async def download_image(exec_path, driver, image=0):
    if not image:
        tempDL = driver.find_element(By.XPATH, '//*[@id="post-option-download"]/a')
        tempDLAttr = tempDL.get_attribute("href")
        tempDLName = tempDL.get_attribute("download").encode('ascii', 'ignore').decode('ascii')
    else:
        tempDLAttr = image.find_element(By.XPATH, ".//img").get_attribute('src')
        tempDLName = tempDLAttr.split("/")[-1].encode("ascii", "ignore").decode("ascii")
    print(f"\n{tempDLAttr.split('?')[0]}")
    
    img_loc = f"./{exec_path.folder_path}/{tempDLName}"
    
    async with aiohttp.ClientSession() as session:
        async with session.get(tempDLAttr) as resp:
            with open(img_loc, 'wb') as fd:
                while True:
                    chunk = await resp.content.read(1024)
                    if not chunk:
                        break
                    fd.write(chunk)

    if not image:
        image_locations.append(img_loc)
        image_names.append(f"{tempDLName.split('.')[0]}")
    return img_loc