neon_arch commited on
Commit
c170de8
·
1 Parent(s): f8c3c8d

add code to evade ip blocking, improve pagination code and fix documentation

Browse files
Cargo.lock CHANGED
@@ -447,6 +447,16 @@ dependencies = [
447
  "bitflags",
448
  ]
449
 
 
 
 
 
 
 
 
 
 
 
450
  [[package]]
451
  name = "convert_case"
452
  version = "0.4.0"
@@ -1427,6 +1437,12 @@ version = "2.0.0"
1427
  source = "registry+https://github.com/rust-lang/crates.io-index"
1428
  checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"
1429
 
 
 
 
 
 
 
1430
  [[package]]
1431
  name = "memchr"
1432
  version = "2.5.0"
@@ -2157,6 +2173,20 @@ dependencies = [
2157
  "rand_core 0.3.1",
2158
  ]
2159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2160
  [[package]]
2161
  name = "redox_syscall"
2162
  version = "0.1.57"
@@ -2526,6 +2556,12 @@ dependencies = [
2526
  "digest",
2527
  ]
2528
 
 
 
 
 
 
 
2529
  [[package]]
2530
  name = "sha2"
2531
  version = "0.10.6"
@@ -3291,6 +3327,9 @@ dependencies = [
3291
  "fake-useragent",
3292
  "handlebars",
3293
  "log",
 
 
 
3294
  "reqwest 0.11.17",
3295
  "rlua",
3296
  "scraper",
 
447
  "bitflags",
448
  ]
449
 
450
+ [[package]]
451
+ name = "combine"
452
+ version = "4.6.6"
453
+ source = "registry+https://github.com/rust-lang/crates.io-index"
454
+ checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
455
+ dependencies = [
456
+ "bytes 1.4.0",
457
+ "memchr",
458
+ ]
459
+
460
  [[package]]
461
  name = "convert_case"
462
  version = "0.4.0"
 
1437
  source = "registry+https://github.com/rust-lang/crates.io-index"
1438
  checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"
1439
 
1440
+ [[package]]
1441
+ name = "md5"
1442
+ version = "0.7.0"
1443
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1444
+ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
1445
+
1446
  [[package]]
1447
  name = "memchr"
1448
  version = "2.5.0"
 
2173
  "rand_core 0.3.1",
2174
  ]
2175
 
2176
+ [[package]]
2177
+ name = "redis"
2178
+ version = "0.23.0"
2179
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2180
+ checksum = "3ea8c51b5dc1d8e5fd3350ec8167f464ec0995e79f2e90a075b63371500d557f"
2181
+ dependencies = [
2182
+ "combine",
2183
+ "itoa 1.0.6",
2184
+ "percent-encoding 2.2.0",
2185
+ "ryu",
2186
+ "sha1_smol",
2187
+ "url 2.3.1",
2188
+ ]
2189
+
2190
  [[package]]
2191
  name = "redox_syscall"
2192
  version = "0.1.57"
 
2556
  "digest",
2557
  ]
2558
 
2559
+ [[package]]
2560
+ name = "sha1_smol"
2561
+ version = "1.0.0"
2562
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2563
+ checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012"
2564
+
2565
  [[package]]
2566
  name = "sha2"
2567
  version = "0.10.6"
 
3327
  "fake-useragent",
3328
  "handlebars",
3329
  "log",
3330
+ "md5",
3331
+ "rand 0.6.5",
3332
+ "redis",
3333
  "reqwest 0.11.17",
3334
  "rlua",
3335
  "scraper",
Cargo.toml CHANGED
@@ -15,6 +15,9 @@ actix-web = {version="4.3.1"}
15
  actix-files = {version="0.6.2"}
16
  serde_json = {version="*"}
17
  fake-useragent = {version="*"}
18
- env_logger = "0.10.0"
19
- log = "0.4.17"
20
  rlua = {version="*"}
 
 
 
 
15
  actix-files = {version="0.6.2"}
16
  serde_json = {version="*"}
17
  fake-useragent = {version="*"}
18
+ env_logger = {version="0.10.0"}
19
+ log = {version="0.4.17"}
20
  rlua = {version="*"}
21
+ redis = {version="*"}
22
+ md5 = {version="*"}
23
+ rand={version="*"}
src/cache/cacher.rs ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the functionality to cache the aggregated results fetched and aggregated
2
+ //! from the upstream search engines in a json format.
3
+
4
+ use md5::compute;
5
+ use redis::{Client, Commands, Connection};
6
+
7
+ /// A named struct which stores the redis Connection url address to which the client will
8
+ /// connect to.
9
+ ///
10
+ /// # Fields
11
+ ///
12
+ /// * `redis_connection_url` - It stores the redis Connection url address.
13
+ #[derive(Clone)]
14
+ pub struct RedisCache {
15
+ redis_connection_url: String,
16
+ }
17
+
18
+ impl RedisCache {
19
+ /// Constructs a new `SearchResult` with the given arguments needed for the struct.
20
+ ///
21
+ /// # Arguments
22
+ ///
23
+ /// * `redis_connection_url` - It stores the redis Connection url address.
24
+ pub fn new(redis_connection_url: String) -> Self {
25
+ RedisCache {
26
+ redis_connection_url,
27
+ }
28
+ }
29
+
30
+ /// A helper function which computes the hash of the url and formats and returns it as string.
31
+ ///
32
+ /// # Arguments
33
+ ///
34
+ /// * `url` - It takes an url as string.
35
+ fn compute_url_hash(self, url: &str) -> String {
36
+ format!("{:?}", compute(url))
37
+ }
38
+
39
+ /// A function which fetches the cached json results as json string from the redis server.
40
+ ///
41
+ /// # Arguments
42
+ ///
43
+ /// * `url` - It takes an url as a string.
44
+ pub fn cached_results_json(self, url: String) -> Result<String, Box<dyn std::error::Error>> {
45
+ let hashed_url_string = self.clone().compute_url_hash(&url);
46
+ let mut redis_connection: Connection =
47
+ Client::open(self.redis_connection_url)?.get_connection()?;
48
+ Ok(redis_connection.get(hashed_url_string)?)
49
+ }
50
+
51
+ /// A function which caches the results by using the hashed `url` as the key and
52
+ /// `json results` as the value and stores it in redis server with ttl(time to live)
53
+ /// set to 60 seconds.
54
+ ///
55
+ /// # Arguments
56
+ ///
57
+ /// * `json_results` - It takes the json results string as an argument.
58
+ /// * `url` - It takes the url as a String.
59
+ pub fn cache_results(
60
+ self,
61
+ json_results: String,
62
+ url: String,
63
+ ) -> Result<(), Box<dyn std::error::Error>> {
64
+ let hashed_url_string = self.clone().compute_url_hash(&url);
65
+ let mut redis_connection: Connection =
66
+ Client::open(self.redis_connection_url)?.get_connection()?;
67
+
68
+ // put results_json into cache
69
+ redis_connection.set(hashed_url_string.clone(), json_results)?;
70
+
71
+ // Set the TTL for the key to 60 seconds
72
+ redis_connection
73
+ .expire::<String, u32>(hashed_url_string.clone(), 60)
74
+ .unwrap();
75
+
76
+ Ok(())
77
+ }
78
+ }
src/cache/mod.rs ADDED
@@ -0,0 +1 @@
 
 
1
+ pub mod cacher;
src/config_parser/parser.rs CHANGED
@@ -11,11 +11,15 @@ use std::fs;
11
  //
12
  /// * `port` - It stores the parsed port number option on which the server should launch.
13
  /// * `binding_ip_addr` - It stores the parsed ip address option on which the server should launch
 
 
 
14
  #[derive(Clone)]
15
  pub struct Config {
16
  pub port: u16,
17
  pub binding_ip_addr: String,
18
  pub style: Style,
 
19
  }
20
 
21
  impl Config {
@@ -44,6 +48,7 @@ impl Config {
44
  globals.get::<_, String>("theme")?,
45
  globals.get::<_, String>("colorscheme")?,
46
  ),
 
47
  })
48
  })
49
  }
 
11
  //
12
  /// * `port` - It stores the parsed port number option on which the server should launch.
13
  /// * `binding_ip_addr` - It stores the parsed ip address option on which the server should launch
14
+ /// * `style` - It stores the theming options for the website.
15
+ /// * `redis_connection_url` - It stores the redis connection url address on which the redis
16
+ /// client should connect.
17
  #[derive(Clone)]
18
  pub struct Config {
19
  pub port: u16,
20
  pub binding_ip_addr: String,
21
  pub style: Style,
22
+ pub redis_connection_url: String,
23
  }
24
 
25
  impl Config {
 
48
  globals.get::<_, String>("theme")?,
49
  globals.get::<_, String>("colorscheme")?,
50
  ),
51
+ redis_connection_url: globals.get::<_, String>("redis_connection_url")?,
52
  })
53
  })
54
  }
src/config_parser/parser_models.rs CHANGED
@@ -1,21 +1,24 @@
1
  //! This module provides public models for handling, storing and serializing parsed config file
2
  //! options from config.lua by grouping them togather.
3
 
4
- use serde::Serialize;
5
 
6
- /// A named struct which stores, serializes and groups the parsed config file options of theme and
7
- /// colorscheme names into the Style struct which derives the `Clone` and `Serialize` traits
8
- /// where the `Clone` trait is derived for allowing the struct to be cloned and passed to the
9
- /// server as a shared data between all routes except `/robots.txt` and the `Serialize` trait
10
- /// has been derived for allowing the object to be serialized so that it can be passed to
11
- /// handlebars template files.
 
 
 
12
  ///
13
  /// # Fields
14
  //
15
  /// * `theme` - It stores the parsed theme option used to set a theme for the website.
16
  /// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
17
  /// theme being used.
18
- #[derive(Serialize, Clone)]
19
  pub struct Style {
20
  pub theme: String,
21
  pub colorscheme: String,
 
1
  //! This module provides public models for handling, storing and serializing parsed config file
2
  //! options from config.lua by grouping them togather.
3
 
4
+ use serde::{Deserialize, Serialize};
5
 
6
+ /// A named struct which stores,deserializes, serializes and groups the parsed config file options
7
+ /// of theme and colorscheme names into the Style struct which derives the `Clone`, `Serialize`
8
+ /// and Deserialize traits where the `Clone` trait is derived for allowing the struct to be
9
+ /// cloned and passed to the server as a shared data between all routes except `/robots.txt` and
10
+ /// the `Serialize` trait has been derived for allowing the object to be serialized so that it
11
+ /// can be passed to handlebars template files and the `Deserialize` trait has been derived in
12
+ /// order to allow the deserializing the json back to struct in aggregate function in
13
+ /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass
14
+ /// it to the template files.
15
  ///
16
  /// # Fields
17
  //
18
  /// * `theme` - It stores the parsed theme option used to set a theme for the website.
19
  /// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the
20
  /// theme being used.
21
+ #[derive(Serialize, Deserialize, Clone)]
22
  pub struct Style {
23
  pub theme: String,
24
  pub colorscheme: String,
src/engines/duckduckgo.rs CHANGED
@@ -2,9 +2,10 @@
2
  //! by querying the upstream duckduckgo search engine with user provided query and with a page
3
  //! number if provided.
4
 
5
- use std::collections::HashMap;
6
 
7
- use reqwest::header::USER_AGENT;
 
8
  use scraper::{Html, Selector};
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
@@ -17,7 +18,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
17
  /// # Arguments
18
  ///
19
  /// * `query` - Takes the user provided query to query to the upstream search engine with.
20
- /// * `page` - Takes an Option<u32> as argument which can be either None or a valid page number.
21
  /// * `user_agent` - Takes a random user agent string as an argument.
22
  ///
23
  /// # Errors
@@ -27,32 +28,41 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
27
  /// selector fails to initialize"
28
  pub async fn results(
29
  query: &str,
30
- page: Option<u32>,
31
  user_agent: &str,
32
  ) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> {
33
  // Page number can be missing or empty string and so appropriate handling is required
34
  // so that upstream server recieves valid page number.
35
  let url: String = match page {
36
- Some(page_number) => {
37
- if page_number <= 1 {
38
- format!("https://html.duckduckgo.com/html/?q={query}&s=&dc=&v=1&o=json&api=/d.js")
39
- } else {
40
- format!(
41
- "https://duckduckgo.com/html/?q={}&s={}&dc={}&v=1&o=json&api=/d.js",
42
- query,
43
- page_number / 2 * 30,
44
- page_number / 2 * 30 + 1
45
- )
46
- }
47
  }
48
- None => format!("https://html.duckduckgo.com/html/?q={query}&s=&dc=&v=1&o=json&api=/d.js"),
49
  };
50
 
 
 
 
 
 
 
 
 
 
 
 
51
  // fetch the html from upstream duckduckgo engine
52
  // TODO: Write better error handling code to handle no results case.
53
  let results: String = reqwest::Client::new()
54
  .get(url)
55
- .header(USER_AGENT, user_agent)
56
  .send()
57
  .await?
58
  .text()
 
2
  //! by querying the upstream duckduckgo search engine with user provided query and with a page
3
  //! number if provided.
4
 
5
+ use std::{collections::HashMap, time::Duration};
6
 
7
+ use rand::Rng;
8
+ use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
9
  use scraper::{Html, Selector};
10
 
11
  use crate::search_results_handler::aggregation_models::RawSearchResult;
 
18
  /// # Arguments
19
  ///
20
  /// * `query` - Takes the user provided query to query to the upstream search engine with.
21
+ /// * `page` - Takes an u32 as an argument.
22
  /// * `user_agent` - Takes a random user agent string as an argument.
23
  ///
24
  /// # Errors
 
28
  /// selector fails to initialize"
29
  pub async fn results(
30
  query: &str,
31
+ page: u32,
32
  user_agent: &str,
33
  ) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> {
34
  // Page number can be missing or empty string and so appropriate handling is required
35
  // so that upstream server recieves valid page number.
36
  let url: String = match page {
37
+ 1 => {
38
+ format!("https://html.duckduckgo.com/html/?q={query}&s=&dc=&v=1&o=json&api=/d.js")
39
+ }
40
+ _ => {
41
+ format!(
42
+ "https://duckduckgo.com/html/?q={}&s={}&dc={}&v=1&o=json&api=/d.js",
43
+ query,
44
+ (page / 2 + (page % 2)) * 30,
45
+ (page / 2 + (page % 2)) * 30 + 1
46
+ )
 
47
  }
 
48
  };
49
 
50
+ // Add a random delay before making the request.
51
+ let mut rng = rand::thread_rng();
52
+ let delay_secs = rng.gen_range(1, 10);
53
+ std::thread::sleep(Duration::from_secs(delay_secs));
54
+
55
+ // initializing HeaderMap and adding appropriate headers.
56
+ let mut header_map = HeaderMap::new();
57
+ header_map.insert(USER_AGENT, user_agent.parse()?);
58
+ header_map.insert(REFERER, "https://google.com/".parse()?);
59
+ header_map.insert(CONTENT_TYPE, "text/html; charset=UTF-8".parse()?);
60
+
61
  // fetch the html from upstream duckduckgo engine
62
  // TODO: Write better error handling code to handle no results case.
63
  let results: String = reqwest::Client::new()
64
  .get(url)
65
+ .headers(header_map) // add spoofed headers to emulate human behaviour
66
  .send()
67
  .await?
68
  .text()
src/engines/searx.rs CHANGED
@@ -2,10 +2,10 @@
2
  //! by querying the upstream searx search engine instance with user provided query and with a page
3
  //! number if provided.
4
 
5
- use std::collections::HashMap;
6
-
7
- use reqwest::header::USER_AGENT;
8
  use scraper::{Html, Selector};
 
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
11
 
@@ -17,7 +17,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
17
  /// # Arguments
18
  ///
19
  /// * `query` - Takes the user provided query to query to the upstream search engine with.
20
- /// * `page` - Takes an Option<u32> as argument which can be either None or a valid page number.
21
  /// * `user_agent` - Takes a random user agent string as an argument.
22
  ///
23
  /// # Errors
@@ -27,27 +27,29 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
27
  /// selector fails to initialize"
28
  pub async fn results(
29
  query: &str,
30
- page: Option<u32>,
31
  user_agent: &str,
32
  ) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> {
33
  // Page number can be missing or empty string and so appropriate handling is required
34
  // so that upstream server recieves valid page number.
35
- let url: String = match page {
36
- Some(page_number) => {
37
- if page_number <= 1 {
38
- format!("https://searx.work/search?q={query}")
39
- } else {
40
- format!("https://searx.work/search?q={query}&pageno={page_number}",)
41
- }
42
- }
43
- None => format!("https://searx.work/search?q={query}"),
44
- };
 
 
45
 
46
  // fetch the html from upstream searx instance engine
47
  // TODO: Write better error handling code to handle no results case.
48
  let results: String = reqwest::Client::new()
49
  .get(url)
50
- .header(USER_AGENT, user_agent)
51
  .send()
52
  .await?
53
  .text()
 
2
  //! by querying the upstream searx search engine instance with user provided query and with a page
3
  //! number if provided.
4
 
5
+ use rand::Rng;
6
+ use reqwest::header::{HeaderMap, CONTENT_TYPE, REFERER, USER_AGENT};
 
7
  use scraper::{Html, Selector};
8
+ use std::{collections::HashMap, time::Duration};
9
 
10
  use crate::search_results_handler::aggregation_models::RawSearchResult;
11
 
 
17
  /// # Arguments
18
  ///
19
  /// * `query` - Takes the user provided query to query to the upstream search engine with.
20
+ /// * `page` - Takes an u32 as an argument.
21
  /// * `user_agent` - Takes a random user agent string as an argument.
22
  ///
23
  /// # Errors
 
27
  /// selector fails to initialize"
28
  pub async fn results(
29
  query: &str,
30
+ page: u32,
31
  user_agent: &str,
32
  ) -> Result<HashMap<String, RawSearchResult>, Box<dyn std::error::Error>> {
33
  // Page number can be missing or empty string and so appropriate handling is required
34
  // so that upstream server recieves valid page number.
35
+ let url: String = format!("https://searx.work/search?q={query}&pageno={page}");
36
+
37
+ // Add random delay before making the request.
38
+ let mut rng = rand::thread_rng();
39
+ let delay_secs = rng.gen_range(1, 10);
40
+ std::thread::sleep(Duration::from_secs(delay_secs));
41
+
42
+ // initializing headers and adding appropriate headers.
43
+ let mut header_map = HeaderMap::new();
44
+ header_map.insert(USER_AGENT, user_agent.parse()?);
45
+ header_map.insert(REFERER, "https://google.com/".parse()?);
46
+ header_map.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse()?);
47
 
48
  // fetch the html from upstream searx instance engine
49
  // TODO: Write better error handling code to handle no results case.
50
  let results: String = reqwest::Client::new()
51
  .get(url)
52
+ .headers(header_map) // add spoofed headers to emulate human behaviours.
53
  .send()
54
  .await?
55
  .text()
src/lib.rs CHANGED
@@ -1,6 +1,7 @@
1
  //! This main library module provides the functionality to provide and handle the Tcp server
2
  //! and register all the routes for the `websurfx` meta search engine website.
3
 
 
4
  pub mod config_parser;
5
  pub mod engines;
6
  pub mod search_results_handler;
 
1
  //! This main library module provides the functionality to provide and handle the Tcp server
2
  //! and register all the routes for the `websurfx` meta search engine website.
3
 
4
+ pub mod cache;
5
  pub mod config_parser;
6
  pub mod engines;
7
  pub mod search_results_handler;
src/search_results_handler/aggregation_models.rs CHANGED
@@ -1,12 +1,12 @@
1
  //! This module provides public models for handling, storing and serializing of search results
2
  //! data scraped from the upstream search engines.
3
 
4
- use serde::Serialize;
5
 
6
  use crate::config_parser::parser_models::Style;
7
 
8
- /// A named struct to store and serialize the individual search result from all the scraped
9
- /// and aggregated search results from the upstream search engines.
10
  ///
11
  /// # Fields
12
  ///
@@ -16,7 +16,7 @@ use crate::config_parser::parser_models::Style;
16
  /// * `url` - The url to be displayed below the search result title in html.
17
  /// * `description` - The description of the search result.
18
  /// * `engine` - The names of the upstream engines from which this results were provided.
19
- #[derive(Debug, Serialize)]
20
  #[serde(rename_all = "camelCase")]
21
  pub struct SearchResult {
22
  pub title: String,
@@ -116,15 +116,15 @@ impl RawSearchResult {
116
  }
117
  }
118
 
119
- /// A named struct to store and serialize the all the search results scraped and aggregated
120
- /// from the upstream search engines.
121
  ///
122
  /// # Fields
123
  ///
124
  /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
125
  /// `SearchResult` structs.
126
  /// * `page_query` - Stores the current pages search query `q` provided in the search url.
127
- #[derive(Serialize)]
128
  #[serde(rename_all = "camelCase")]
129
  pub struct SearchResults {
130
  pub results: Vec<SearchResult>,
 
1
  //! This module provides public models for handling, storing and serializing of search results
2
  //! data scraped from the upstream search engines.
3
 
4
+ use serde::{Deserialize, Serialize};
5
 
6
  use crate::config_parser::parser_models::Style;
7
 
8
+ /// A named struct to store, serialize and deserializes the individual search result from all the
9
+ /// scraped and aggregated search results from the upstream search engines.
10
  ///
11
  /// # Fields
12
  ///
 
16
  /// * `url` - The url to be displayed below the search result title in html.
17
  /// * `description` - The description of the search result.
18
  /// * `engine` - The names of the upstream engines from which this results were provided.
19
+ #[derive(Debug, Serialize, Deserialize)]
20
  #[serde(rename_all = "camelCase")]
21
  pub struct SearchResult {
22
  pub title: String,
 
116
  }
117
  }
118
 
119
+ /// A named struct to store, serialize, deserialize the all the search results scraped and
120
+ /// aggregated from the upstream search engines.
121
  ///
122
  /// # Fields
123
  ///
124
  /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
125
  /// `SearchResult` structs.
126
  /// * `page_query` - Stores the current pages search query `q` provided in the search url.
127
+ #[derive(Serialize, Deserialize)]
128
  #[serde(rename_all = "camelCase")]
129
  pub struct SearchResults {
130
  pub results: Vec<SearchResult>,
src/search_results_handler/aggregator.rs CHANGED
@@ -25,7 +25,7 @@ use crate::engines::{duckduckgo, searx};
25
  /// # Arguments
26
  ///
27
  /// * `query` - Accepts a string to query with the above upstream search engines.
28
- /// * `page` - Accepts an Option<u32> which could either be a None or a valid page number.
29
  ///
30
  /// # Error
31
  ///
@@ -34,7 +34,7 @@ use crate::engines::{duckduckgo, searx};
34
  /// containing appropriate values.
35
  pub async fn aggregate(
36
  query: &str,
37
- page: Option<u32>,
38
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
39
  let user_agent: String = random_user_agent();
40
  let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
 
25
  /// # Arguments
26
  ///
27
  /// * `query` - Accepts a string to query with the above upstream search engines.
28
+ /// * `page` - Accepts an u32 page number.
29
  ///
30
  /// # Error
31
  ///
 
34
  /// containing appropriate values.
35
  pub async fn aggregate(
36
  query: &str,
37
+ page: u32,
38
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
39
  let user_agent: String = random_user_agent();
40
  let mut result_map: HashMap<String, RawSearchResult> = HashMap::new();
src/server/routes.rs CHANGED
@@ -4,7 +4,11 @@
4
 
5
  use std::fs::read_to_string;
6
 
7
- use crate::{config_parser::parser::Config, search_results_handler::aggregator::aggregate};
 
 
 
 
8
  use actix_web::{get, web, HttpRequest, HttpResponse};
9
  use handlebars::Handlebars;
10
  use serde::Deserialize;
@@ -67,6 +71,9 @@ pub async fn search(
67
  config: web::Data<Config>,
68
  ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
69
  let params = web::Query::<SearchParams>::from_query(req.query_string())?;
 
 
 
70
  match &params.q {
71
  Some(query) => {
72
  if query.trim().is_empty() {
@@ -74,11 +81,63 @@ pub async fn search(
74
  .insert_header(("location", "/"))
75
  .finish())
76
  } else {
77
- let mut results_json: crate::search_results_handler::aggregation_models::SearchResults =
78
- aggregate(query, params.page).await?;
79
- results_json.add_style(config.style.clone());
80
- let page_content: String = hbs.render("search", &results_json)?;
81
- Ok(HttpResponse::Ok().body(page_content))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
  }
84
  None => Ok(HttpResponse::Found()
@@ -115,6 +174,3 @@ pub async fn settings(
115
  let page_content: String = hbs.render("settings", &config.style)?;
116
  Ok(HttpResponse::Ok().body(page_content))
117
  }
118
-
119
- // TODO: Write tests for tesing parameters for search function that if provided with something
120
- // other than u32 like alphabets and special characters than it should panic
 
4
 
5
  use std::fs::read_to_string;
6
 
7
+ use crate::{
8
+ cache::cacher::RedisCache,
9
+ config_parser::parser::Config,
10
+ search_results_handler::{aggregation_models::SearchResults, aggregator::aggregate},
11
+ };
12
  use actix_web::{get, web, HttpRequest, HttpResponse};
13
  use handlebars::Handlebars;
14
  use serde::Deserialize;
 
71
  config: web::Data<Config>,
72
  ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
73
  let params = web::Query::<SearchParams>::from_query(req.query_string())?;
74
+
75
+ //Initialize redis cache connection struct
76
+ let redis_cache = RedisCache::new(config.redis_connection_url.clone());
77
  match &params.q {
78
  Some(query) => {
79
  if query.trim().is_empty() {
 
81
  .insert_header(("location", "/"))
82
  .finish())
83
  } else {
84
+ // Initialize the page url as an empty string
85
+ let mut page_url = String::new();
86
+
87
+ // Find whether the page is valid page number if not then return
88
+ // the first page number and also construct the page_url accordingly
89
+ let page = match params.page {
90
+ Some(page_number) => {
91
+ if page_number <= 1 {
92
+ page_url = format!(
93
+ "http://{}:{}/search?q={}&page={}",
94
+ config.binding_ip_addr, config.port, query, 1
95
+ );
96
+ 1
97
+ } else {
98
+ page_url = format!(
99
+ "http://{}:{}/search?q={}&page={}",
100
+ config.binding_ip_addr, config.port, query, page_number
101
+ );
102
+
103
+ page_number
104
+ }
105
+ }
106
+ None => {
107
+ page_url = format!(
108
+ "http://{}:{}{}&page={}",
109
+ config.binding_ip_addr,
110
+ config.port,
111
+ req.uri(),
112
+ 1
113
+ );
114
+
115
+ 1
116
+ }
117
+ };
118
+
119
+ // fetch the cached results json.
120
+ let cached_results_json = redis_cache.clone().cached_results_json(page_url.clone());
121
+ // check if fetched results was indeed fetched or it was an error and if so
122
+ // handle the data accordingly.
123
+ match cached_results_json {
124
+ Ok(results_json) => {
125
+ let new_results_json: SearchResults = serde_json::from_str(&results_json)?;
126
+ let page_content: String = hbs.render("search", &new_results_json)?;
127
+ Ok(HttpResponse::Ok().body(page_content))
128
+ }
129
+ Err(_) => {
130
+ let mut results_json: crate::search_results_handler::aggregation_models::SearchResults =
131
+ aggregate(query, page).await?;
132
+ results_json.add_style(config.style.clone());
133
+ redis_cache.clone().cache_results(
134
+ serde_json::to_string(&results_json)?,
135
+ page_url.clone(),
136
+ )?;
137
+ let page_content: String = hbs.render("search", &results_json)?;
138
+ Ok(HttpResponse::Ok().body(page_content))
139
+ }
140
+ }
141
  }
142
  }
143
  None => Ok(HttpResponse::Found()
 
174
  let page_content: String = hbs.render("settings", &config.style)?;
175
  Ok(HttpResponse::Ok().body(page_content))
176
  }
 
 
 
tests/index.rs CHANGED
@@ -41,3 +41,5 @@ async fn test_index() {
41
  assert_eq!(res.text().await.unwrap(), template);
42
  }
43
 
 
 
 
41
  assert_eq!(res.text().await.unwrap(), template);
42
  }
43
 
44
+ // TODO: Write tests for tesing parameters for search function that if provided with something
45
+ // other than u32 like alphabets and special characters than it should panic
websurfx/config.lua CHANGED
@@ -16,3 +16,6 @@ binding_ip_addr = "127.0.0.1" --ip address on the which server should be launche
16
  -- }}
17
  colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used for the website theme
18
  theme = "simple" -- the theme name which should be used for the website
 
 
 
 
16
  -- }}
17
  colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used for the website theme
18
  theme = "simple" -- the theme name which should be used for the website
19
+
20
+ -- Caching
21
+ redis_connection_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on.