neon_arch commited on
Commit
0502a8f
1 Parent(s): ab7348b

improve code to evade bot detection and closes #8

Browse files
src/engines/duckduckgo.rs CHANGED
@@ -4,7 +4,7 @@
4
 
5
  use std::collections::HashMap;
6
 
7
- use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
@@ -50,7 +50,8 @@ pub async fn results(
50
  let mut header_map = HeaderMap::new();
51
  header_map.insert(USER_AGENT, user_agent.parse()?);
52
  header_map.insert(REFERER, "https://google.com/".parse()?);
53
- header_map.insert(CONTENT_TYPE, "text/html; charset=UTF-8".parse()?);
 
54
 
55
  // fetch the html from upstream duckduckgo engine
56
  // TODO: Write better error handling code to handle no results case.
 
4
 
5
  use std::collections::HashMap;
6
 
7
+ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
 
50
  let mut header_map = HeaderMap::new();
51
  header_map.insert(USER_AGENT, user_agent.parse()?);
52
  header_map.insert(REFERER, "https://google.com/".parse()?);
53
+ header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
54
+ header_map.insert(COOKIE, "kl=wt-wt".parse()?);
55
 
56
  // fetch the html from upstream duckduckgo engine
57
  // TODO: Write better error handling code to handle no results case.
src/engines/searx.rs CHANGED
@@ -2,7 +2,7 @@
2
  //! by querying the upstream searx search engine instance with user provided query and with a page
3
  //! number if provided.
4
 
5
- use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
@@ -38,6 +38,7 @@ pub async fn results(
38
  header_map.insert(USER_AGENT, user_agent.parse()?);
39
  header_map.insert(REFERER, "https://google.com/".parse()?);
40
  header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
 
41
 
42
  // fetch the html from upstream searx instance engine
43
  // TODO: Write better error handling code to handle no results case.
 
2
  //! by querying the upstream searx search engine instance with user provided query and with a page
3
  //! number if provided.
4
 
5
+ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
 
38
  header_map.insert(USER_AGENT, user_agent.parse()?);
39
  header_map.insert(REFERER, "https://google.com/".parse()?);
40
  header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
41
+ header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse()?);
42
 
43
  // fetch the html from upstream searx instance engine
44
  // TODO: Write better error handling code to handle no results case.