neon_arch
commited on
Commit
•
0502a8f
1
Parent(s):
ab7348b
improve code to evade bot detection and closes #8
Browse files- src/engines/duckduckgo.rs +3 -2
- src/engines/searx.rs +2 -1
src/engines/duckduckgo.rs
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
|
5 |
use std::collections::HashMap;
|
6 |
|
7 |
-
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
|
8 |
use scraper::{Html, Selector};
|
9 |
|
10 |
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
@@ -50,7 +50,8 @@ pub async fn results(
|
|
50 |
let mut header_map = HeaderMap::new();
|
51 |
header_map.insert(USER_AGENT, user_agent.parse()?);
|
52 |
header_map.insert(REFERER, "https://google.com/".parse()?);
|
53 |
-
header_map.insert(CONTENT_TYPE, "
|
|
|
54 |
|
55 |
// fetch the html from upstream duckduckgo engine
|
56 |
// TODO: Write better error handling code to handle no results case.
|
|
|
4 |
|
5 |
use std::collections::HashMap;
|
6 |
|
7 |
+
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
8 |
use scraper::{Html, Selector};
|
9 |
|
10 |
use crate::search_results_handler::aggregation_models::RawSearchResult;
|
|
|
50 |
let mut header_map = HeaderMap::new();
|
51 |
header_map.insert(USER_AGENT, user_agent.parse()?);
|
52 |
header_map.insert(REFERER, "https://google.com/".parse()?);
|
53 |
+
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
54 |
+
header_map.insert(COOKIE, "kl=wt-wt".parse()?);
|
55 |
|
56 |
// fetch the html from upstream duckduckgo engine
|
57 |
// TODO: Write better error handling code to handle no results case.
|
src/engines/searx.rs
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
//! by querying the upstream searx search engine instance with user provided query and with a page
|
3 |
//! number if provided.
|
4 |
|
5 |
-
use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
|
6 |
use scraper::{Html, Selector};
|
7 |
use std::collections::HashMap;
|
8 |
|
@@ -38,6 +38,7 @@ pub async fn results(
|
|
38 |
header_map.insert(USER_AGENT, user_agent.parse()?);
|
39 |
header_map.insert(REFERER, "https://google.com/".parse()?);
|
40 |
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
|
|
41 |
|
42 |
// fetch the html from upstream searx instance engine
|
43 |
// TODO: Write better error handling code to handle no results case.
|
|
|
2 |
//! by querying the upstream searx search engine instance with user provided query and with a page
|
3 |
//! number if provided.
|
4 |
|
5 |
+
use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
|
6 |
use scraper::{Html, Selector};
|
7 |
use std::collections::HashMap;
|
8 |
|
|
|
38 |
header_map.insert(USER_AGENT, user_agent.parse()?);
|
39 |
header_map.insert(REFERER, "https://google.com/".parse()?);
|
40 |
header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
|
41 |
+
header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
|
42 |
|
43 |
// fetch the html from upstream searx instance engine
|
44 |
// TODO: Write better error handling code to handle no results case.
|