Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- app.py +131 -0
- funcs.py +88 -0
- module.py +143 -0
- requirements.txt +43 -0
app.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from module import generate_resume_urls
|
3 |
+
|
4 |
+
import os
|
5 |
+
import tempfile
|
6 |
+
import shutil
|
7 |
+
|
8 |
+
def write_result_to_file(result, filename="result.txt"):
|
9 |
+
# Create temp folder
|
10 |
+
result = "\n\n".join(result)
|
11 |
+
temp_dir = tempfile.mkdtemp()
|
12 |
+
|
13 |
+
# Write result to file
|
14 |
+
with open(os.path.join(temp_dir, filename), "w") as f:
|
15 |
+
f.write(result)
|
16 |
+
f.close()
|
17 |
+
|
18 |
+
return os.path.join(temp_dir, filename)
|
19 |
+
|
20 |
+
def get_links(job_search_status_enable,gender_enable,salary_enable,education_level_enable,experience_enable,age_enable,
|
21 |
+
links,use_filters,use_inc_values,job_search_status,gender,age_from,age_to,salary_from,salary_to,education_level,experience):
|
22 |
+
result_links = []
|
23 |
+
|
24 |
+
cout_slogan = "Кол-во ссылок: "
|
25 |
+
links_list = links.split("\n")
|
26 |
+
# Remove empy elemts from links_list
|
27 |
+
links_list = [link for link in links_list if link]
|
28 |
+
|
29 |
+
if not use_filters:
|
30 |
+
for link in links_list:
|
31 |
+
result = generate_resume_urls(link)
|
32 |
+
result_links += result
|
33 |
+
|
34 |
+
cout_slogan = cout_slogan + str(len(result_links))
|
35 |
+
return write_result_to_file(result_links),cout_slogan
|
36 |
+
|
37 |
+
# When use_filters is True
|
38 |
+
else:
|
39 |
+
selected_filters = []
|
40 |
+
filter_values = {}
|
41 |
+
|
42 |
+
for link in links_list:
|
43 |
+
if age_enable:
|
44 |
+
selected_filters.append("age")
|
45 |
+
# print(age_from,age_to)
|
46 |
+
filter_values["age"] = list(range(age_from,age_to))
|
47 |
+
diffrence = age_to - age_from
|
48 |
+
|
49 |
+
if diffrence <= 0:
|
50 |
+
filter_values["age"].append(age_from)
|
51 |
+
else:
|
52 |
+
filter_values["age"].append(age_to)
|
53 |
+
|
54 |
+
if job_search_status_enable:
|
55 |
+
selected_filters.append("job_search_status")
|
56 |
+
print(job_search_status)
|
57 |
+
filter_values["job_search_status"] = job_search_status
|
58 |
+
|
59 |
+
if gender_enable:
|
60 |
+
selected_filters.append("gender")
|
61 |
+
filter_values["gender"] = gender
|
62 |
+
|
63 |
+
if salary_enable:
|
64 |
+
selected_filters.append("salary")
|
65 |
+
filter_values["salary_from"] = salary_from
|
66 |
+
filter_values["salary_to"] = salary_to
|
67 |
+
|
68 |
+
if education_level_enable:
|
69 |
+
selected_filters.append("education_level")
|
70 |
+
filter_values["education_level"] = education_level
|
71 |
+
|
72 |
+
if experience_enable:
|
73 |
+
selected_filters.append("experience")
|
74 |
+
filter_values["experience"] = experience
|
75 |
+
|
76 |
+
for link in links_list:
|
77 |
+
result = generate_resume_urls(link, selected_filters, filter_values, use_inc_values)
|
78 |
+
result_links += result
|
79 |
+
|
80 |
+
cout_slogan = cout_slogan + str(len(result_links))
|
81 |
+
return write_result_to_file(result_links),cout_slogan
|
82 |
+
|
83 |
+
with gr.Blocks() as demo:
|
84 |
+
gr.Markdown("# Модуль для увеличения кол-ва ссылок за счет добавления фильтра")
|
85 |
+
|
86 |
+
with gr.Row():
|
87 |
+
links = gr.TextArea(label="Введите ссылки",placeholder="https://hh.ru/search/resume?text=Региональный+визуальный+мерчендайзер")
|
88 |
+
|
89 |
+
use_filters = gr.Checkbox(label="Использовать Фильтры ( если выключенно, создается максимально кол-во ссылок )")
|
90 |
+
with gr.Accordion("Фильтры",open=True):
|
91 |
+
# Набор фильтров
|
92 |
+
use_inc_values = gr.Checkbox(label="Добавить к списку резюме, список без указанного возраста и зарплаты")
|
93 |
+
with gr.Row():
|
94 |
+
with gr.Row():
|
95 |
+
job_search_status_enable = gr.Checkbox(label="Статус поиска работы",value=True)
|
96 |
+
gender_enable = gr.Checkbox(label="Пол",value=True)
|
97 |
+
with gr.Row():
|
98 |
+
age_enable = gr.Checkbox(label="Возраст",value=True)
|
99 |
+
with gr.Row():
|
100 |
+
salary_enable = gr.Checkbox(label="Зарплата",value=True)
|
101 |
+
|
102 |
+
education_level_enable = gr.Checkbox(label="Образование",value=True)
|
103 |
+
experience_enable = gr.Checkbox(label="Опыт работы",value=True)
|
104 |
+
|
105 |
+
with gr.Accordion("Настройка фильтров",open=False):
|
106 |
+
with gr.Row():
|
107 |
+
job_search_status = gr.CheckboxGroup(["unknown", "not_looking_for_job", "looking_for_offers", "active_search", "has_job_offer", "accepted_job_offer"], label="Статус поиска работы")
|
108 |
+
gender = gr.Radio(["male", "female","both"], label="Пол")
|
109 |
+
with gr.Row():
|
110 |
+
age_from = gr.Slider(18, 70,value=18, step=1, label="Возраст от")
|
111 |
+
age_to = gr.Slider(18, 70,value=71,step=1, label="Возраст до")
|
112 |
+
with gr.Row():
|
113 |
+
salary_from = gr.Slider(0, 1000000000,step=1,value=0, label="Зарплата от")
|
114 |
+
salary_to = gr.Slider(0, 1000000000,step=1,value=1000000000,label="Зарплата до")
|
115 |
+
|
116 |
+
education_level = gr.CheckboxGroup(["secondary", "special_secondary", "unfinished_higher", "bachelor", "master", "doctor", "candidate", "higher"], label="Уровень образования")
|
117 |
+
experience = gr.CheckboxGroup(["noExperience", "between1And3", "between3And6", "moreThan6"], label="Опыт работы")
|
118 |
+
|
119 |
+
with gr.Row():
|
120 |
+
get_result_btn = gr.Button("Получить")
|
121 |
+
with gr.Row():
|
122 |
+
result_field = gr.File(label="Результат", interactive=False)
|
123 |
+
with gr.Row():
|
124 |
+
result_count_field = gr.Label(value="Кол-во ссылок")
|
125 |
+
|
126 |
+
get_result_btn.click(fn=get_links,inputs=
|
127 |
+
[ job_search_status_enable,gender_enable,salary_enable,education_level_enable,experience_enable,age_enable,
|
128 |
+
links,use_filters,use_inc_values,job_search_status,gender,age_from,age_to,salary_from,salary_to,education_level,experience],
|
129 |
+
outputs=[result_field,result_count_field])
|
130 |
+
|
131 |
+
demo.launch()
|
funcs.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import urllib.parse
|
2 |
+
import scrapy
|
3 |
+
from scrapy.crawler import CrawlerProcess
|
4 |
+
from bs4 import BeautifulSoup
|
5 |
+
import json
|
6 |
+
|
7 |
+
from module_1_pre.funcs import generate_resume_urls, generate_specific_urls
|
8 |
+
|
9 |
+
|
10 |
+
class ResumeSpider(scrapy.Spider):
|
11 |
+
name = "resume_spider"
|
12 |
+
|
13 |
+
custom_settings = {
|
14 |
+
'RETRY_HTTP_CODES': [400],
|
15 |
+
'RETRY_TIMES': 5,
|
16 |
+
'LOG_ENABLED': False
|
17 |
+
}
|
18 |
+
|
19 |
+
def __init__(self, base_url=None,*args, **kwargs):
|
20 |
+
super(ResumeSpider, self).__init__(*args, **kwargs)
|
21 |
+
self.base_url = base_url
|
22 |
+
|
23 |
+
def start_requests(self):
|
24 |
+
urls = generate_resume_urls(self.base_url)
|
25 |
+
for url in urls:
|
26 |
+
yield scrapy.Request(url=url, callback=self.parse_general)
|
27 |
+
|
28 |
+
def parse_general(self, response):
|
29 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
30 |
+
main_element = soup.find('main', class_='resume-serp-content')
|
31 |
+
|
32 |
+
if main_element and main_element.find('div', attrs={'data-resume-id': True}):
|
33 |
+
page_count = self.get_page_count(soup)
|
34 |
+
|
35 |
+
if page_count < 250:
|
36 |
+
result = {
|
37 |
+
'url': response.url,
|
38 |
+
'pages': page_count
|
39 |
+
}
|
40 |
+
with open('resume_urls.json', 'a') as f:
|
41 |
+
json.dump(result, f, ensure_ascii=False, indent=4)
|
42 |
+
f.write('\n')
|
43 |
+
else:
|
44 |
+
self.base_url = response.url
|
45 |
+
specific_urls = generate_specific_urls(self.base_url)
|
46 |
+
for url in specific_urls:
|
47 |
+
yield scrapy.Request(url=url, callback=self.parse_specific)
|
48 |
+
else:
|
49 |
+
print(f'No resumes found on page: {response.url}')
|
50 |
+
|
51 |
+
def parse_specific(self, response):
|
52 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
53 |
+
page_count = self.get_page_count(soup)
|
54 |
+
|
55 |
+
result = {
|
56 |
+
'url': response.url,
|
57 |
+
'pages': page_count
|
58 |
+
}
|
59 |
+
with open('resume_urls.json', 'a', encoding='utf-8') as f:
|
60 |
+
json.dump(result, f, ensure_ascii=False, indent=4, separators=(',', ': '))
|
61 |
+
f.write(',\n') # Add comma and newline for valid JSON array
|
62 |
+
|
63 |
+
def get_page_count(self, soup):
|
64 |
+
pager = soup.find('div', class_='pager')
|
65 |
+
if pager:
|
66 |
+
last_page_link = pager.find_all('a', attrs={'data-qa': 'pager-page'})[-1]
|
67 |
+
last_page = int(last_page_link.text.strip())
|
68 |
+
return last_page
|
69 |
+
return 1
|
70 |
+
|
71 |
+
|
72 |
+
def stage1(base_url):
|
73 |
+
process = CrawlerProcess()
|
74 |
+
process.crawl(ResumeSpider,base_url)
|
75 |
+
process.start()
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
|
80 |
+
# for url in urls:
|
81 |
+
# result = generate_resume_urls(url)
|
82 |
+
# print(result)
|
83 |
+
# print("\n")
|
84 |
+
|
85 |
+
# lens = len(['https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&job_search_status=unknown&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&job_search_status=not_looking_for_job&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&job_search_status=looking_for_offers&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&job_search_status=active_search&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&job_search_status=has_job_offer&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&job_search_status=accepted_job_offer&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&experience=moreThan6&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&experience=between3And6&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&experience=between1And3&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&experience=noExperience&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&gender=male&label=only_with_gender&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&gender=female&label=only_with_gender&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&education_level=higher&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&education_level=unfinished_higher&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&education_level=master&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&education_level=bachelor&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80', 'https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&logic=normal&pos=full_text&exp_period=all_time&education_level=special_secondary&text=%D0%9A%D0%B0%D1%81%D1%81%D0%B8%D1%80'])
|
86 |
+
# print(lens)
|
87 |
+
|
88 |
+
|
module.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import urllib.parse
|
2 |
+
|
3 |
+
import urllib.parse
|
4 |
+
|
5 |
+
def generate_resume_urls(base_url, selected_filters=None, filter_values=None, use_incredible_values=True):
|
6 |
+
parsed_url = urllib.parse.urlparse(base_url)
|
7 |
+
query_params = urllib.parse.parse_qs(parsed_url.query)
|
8 |
+
|
9 |
+
if "/resumes/" in parsed_url.path:
|
10 |
+
search_text = parsed_url.path.split("/")[-1].replace("_", " ")
|
11 |
+
else:
|
12 |
+
search_text = query_params.get("text", [""])[0]
|
13 |
+
|
14 |
+
encoded_text = urllib.parse.quote(search_text)
|
15 |
+
|
16 |
+
filter_options = {
|
17 |
+
"job_search_status": ["unknown", "not_looking_for_job", "looking_for_offers", "active_search", "has_job_offer", "accepted_job_offer"],
|
18 |
+
"gender": ["male", "female"],
|
19 |
+
"age": list(range(18, 71)),
|
20 |
+
"salary" : False,
|
21 |
+
"salary_from": 0,
|
22 |
+
"salary_to": 1000000000,
|
23 |
+
"education_level": ["secondary", "special_secondary", "unfinished_higher", "bachelor", "master", "doctor", "candidate", "higher"],
|
24 |
+
"experience": ["noExperience", "between1And3", "between3And6", "moreThan6"]
|
25 |
+
}
|
26 |
+
|
27 |
+
if filter_values:
|
28 |
+
for key in filter_values.keys():
|
29 |
+
# print(key)
|
30 |
+
if key == "salary":
|
31 |
+
filter_options[key] = filter_values[key]
|
32 |
+
continue
|
33 |
+
if key == "salary_from":
|
34 |
+
filter_options[key] = filter_values[key]
|
35 |
+
continue
|
36 |
+
if key == "salary_to":
|
37 |
+
filter_options[key] = filter_values[key]
|
38 |
+
continue
|
39 |
+
if filter_values[key] == None:
|
40 |
+
continue
|
41 |
+
|
42 |
+
filter_options[key] = filter_values[key] if len(filter_values[key]) > 0 else filter_options[key]
|
43 |
+
|
44 |
+
# print(selected_filters)
|
45 |
+
|
46 |
+
if filter_values is None:
|
47 |
+
filter_values = {}
|
48 |
+
|
49 |
+
if selected_filters is None:
|
50 |
+
selected_filters = filter_options.keys()
|
51 |
+
|
52 |
+
generated_urls = []
|
53 |
+
|
54 |
+
# print(filter_values)
|
55 |
+
|
56 |
+
for status in filter_options["job_search_status"] if "job_search_status" in selected_filters else [None]:
|
57 |
+
for gender in filter_options["gender"] if "gender" in selected_filters else [None]:
|
58 |
+
for age in filter_options["age"] if "age" in selected_filters else [None]:
|
59 |
+
for experience in filter_options["experience"] if "experience" in selected_filters else [None]:
|
60 |
+
for education in filter_options["education_level"] if "education_level" in selected_filters else [None]:
|
61 |
+
url = f"https://hh.ru/search/resume?search_period=0&order_by=relevance&filter_exp_period=all_time&relocation=living_or_relocation&job_search_status_changed_by_user=true&logic=normal&pos=full_text&exp_period=all_time&text={encoded_text}"
|
62 |
+
|
63 |
+
if status:
|
64 |
+
url += f"&job_search_status={status}"
|
65 |
+
if gender:
|
66 |
+
url += f"&gender={gender}"
|
67 |
+
if age:
|
68 |
+
url += f"&label=only_with_age"
|
69 |
+
url += f"&age_from={age}&age_to={age}"
|
70 |
+
if experience:
|
71 |
+
url += f"&experience={experience}"
|
72 |
+
if education:
|
73 |
+
url += f"&education_level={education}"
|
74 |
+
|
75 |
+
if "salary" in selected_filters:
|
76 |
+
url +=f"&label=only_with_salary"
|
77 |
+
url += f"&salary_from={filter_options['salary_from']}&salary_to={filter_options['salary_to']}"
|
78 |
+
|
79 |
+
if "area" in query_params:
|
80 |
+
url += f"&area={query_params['area'][0]}"
|
81 |
+
if "university" in query_params:
|
82 |
+
url += f"&university={query_params['university'][0]}"
|
83 |
+
|
84 |
+
generated_urls.append(url)
|
85 |
+
|
86 |
+
if use_incredible_values:
|
87 |
+
for status in filter_options["job_search_status"] if "job_search_status" in selected_filters else [None]:
|
88 |
+
for gender in filter_options["gender"] if "gender" in selected_filters else [None]:
|
89 |
+
for experience in filter_options["experience"] if "experience" in selected_filters else [None]:
|
90 |
+
for education in filter_options["education_level"] if "education_level" in selected_filters else [None]:
|
91 |
+
url = f"https://hh.ru/search/resume?search_period=0&order_by=relevance&salary_from=10000000&salary_to=1000000000000&filter_exp_period=all_time&relocation=living_or_relocation&job_search_status_changed_by_user=true&logic=normal&pos=full_text&exp_period=all_time&age_from=100000000&age_to=1000000&text={encoded_text}"
|
92 |
+
|
93 |
+
if status:
|
94 |
+
url += f"&job_search_status={status}"
|
95 |
+
if gender:
|
96 |
+
url += f"&gender={gender}"
|
97 |
+
if experience:
|
98 |
+
url += f"&experience={experience}"
|
99 |
+
if education:
|
100 |
+
url += f"&education_level={education}"
|
101 |
+
|
102 |
+
if "area" in query_params:
|
103 |
+
url += f"&area={query_params['area'][0]}"
|
104 |
+
if "university" in query_params:
|
105 |
+
url += f"&university={query_params['university'][0]}"
|
106 |
+
|
107 |
+
generated_urls.append(url)
|
108 |
+
|
109 |
+
for key, value in query_params.items():
|
110 |
+
if key not in ["text", "job_search_status", "experience", "gender", "age_from", "age_to", "education_level", "area", "university"]:
|
111 |
+
for url in generated_urls:
|
112 |
+
url += f"&{key}={value[0]}"
|
113 |
+
|
114 |
+
return generated_urls
|
115 |
+
|
116 |
+
|
117 |
+
def generate_specific_urls(base_url):
|
118 |
+
parsed_url = urllib.parse.urlparse(base_url)
|
119 |
+
query_params = urllib.parse.parse_qs(parsed_url.query)
|
120 |
+
|
121 |
+
experience_options = ["noExperience", "between1And3", "between3And6", "moreThan6"]
|
122 |
+
employment_options = ["full", "part", "project", "volunteer", "probation"]
|
123 |
+
|
124 |
+
specific_urls = []
|
125 |
+
|
126 |
+
for emp in employment_options:
|
127 |
+
query_params["employment"] = [emp]
|
128 |
+
|
129 |
+
# Add area and university parameters if present
|
130 |
+
if "area" in query_params:
|
131 |
+
query_params["area"] = query_params["area"]
|
132 |
+
if "university" in query_params:
|
133 |
+
query_params["university"] = query_params["university"]
|
134 |
+
|
135 |
+
new_query_string = urllib.parse.urlencode(query_params, doseq=True)
|
136 |
+
new_url = parsed_url._replace(query=new_query_string).geturl()
|
137 |
+
specific_urls.append(new_url)
|
138 |
+
|
139 |
+
return specific_urls
|
140 |
+
|
141 |
+
|
142 |
+
# urls = generate_resume_urls("https://hh.ru/resumes/generalnyj-direktor-stroitelnoj-kompanii",use_incredible_values=True)
|
143 |
+
# print(len(urls))
|
requirements.txt
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
attrs==23.2.0
|
2 |
+
Automat==22.10.0
|
3 |
+
beautifulsoup4==4.12.3
|
4 |
+
certifi==2024.2.2
|
5 |
+
cffi==1.16.0
|
6 |
+
charset-normalizer==3.3.2
|
7 |
+
colorama==0.4.6
|
8 |
+
constantly==23.10.4
|
9 |
+
crochet==2.1.1
|
10 |
+
cryptography==42.0.5
|
11 |
+
cssselect==1.2.0
|
12 |
+
filelock==3.13.3
|
13 |
+
hyperlink==21.0.0
|
14 |
+
idna==3.6
|
15 |
+
incremental==22.10.0
|
16 |
+
itemadapter==0.8.0
|
17 |
+
itemloaders==1.1.0
|
18 |
+
jmespath==1.0.1
|
19 |
+
lxml==5.1.0
|
20 |
+
packaging==24.0
|
21 |
+
parsel==1.9.0
|
22 |
+
Protego==0.3.0
|
23 |
+
pyasn1==0.6.0
|
24 |
+
pyasn1_modules==0.4.0
|
25 |
+
pycparser==2.21
|
26 |
+
PyDispatcher==2.0.7
|
27 |
+
pyOpenSSL==24.1.0
|
28 |
+
queuelib==1.6.2
|
29 |
+
requests==2.31.0
|
30 |
+
requests-file==2.0.0
|
31 |
+
Scrapy==2.11.1
|
32 |
+
service-identity==24.1.0
|
33 |
+
six==1.16.0
|
34 |
+
soupsieve==2.5
|
35 |
+
tldextract==5.1.2
|
36 |
+
tqdm==4.66.2
|
37 |
+
Twisted==24.3.0
|
38 |
+
twisted-iocpsupport==1.0.4
|
39 |
+
typing_extensions==4.10.0
|
40 |
+
urllib3==2.2.1
|
41 |
+
w3lib==2.1.2
|
42 |
+
wrapt==1.16.0
|
43 |
+
zope.interface==6.2
|