neon_arch commited on
Commit
493c56b
1 Parent(s): 453dbdc

⚙️ refactor: reorganize code & restructure codebase for better maintainability (#207)

Browse files
src/config/mod.rs CHANGED
@@ -2,4 +2,3 @@
2
  //! and convert the config options into rust readable form.
3
 
4
  pub mod parser;
5
- pub mod parser_models;
 
2
  //! and convert the config options into rust readable form.
3
 
4
  pub mod parser;
 
src/config/parser.rs CHANGED
@@ -3,7 +3,7 @@
3
 
4
  use crate::handler::paths::{file_path, FileType};
5
 
6
- use super::parser_models::Style;
7
  use log::LevelFilter;
8
  use rlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
@@ -27,7 +27,7 @@ pub struct Config {
27
  /// It stores the option to whether enable or disable debug mode.
28
  pub debug: bool,
29
  /// It stores all the engine names that were enabled by the user.
30
- pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
31
  /// It stores the time (secs) which controls the server request timeout.
32
  pub request_timeout: u8,
33
  /// It stores the number of threads which controls the app will use to run.
@@ -99,7 +99,7 @@ impl Config {
99
  .get::<_, HashMap<String, bool>>("upstream_search_engines")?
100
  .into_iter()
101
  .filter_map(|(key, value)| value.then_some(key))
102
- .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine))
103
  .collect(),
104
  request_timeout: globals.get::<_, u8>("request_timeout")?,
105
  threads,
 
3
 
4
  use crate::handler::paths::{file_path, FileType};
5
 
6
+ use crate::models::parser_models::Style;
7
  use log::LevelFilter;
8
  use rlua::Lua;
9
  use std::{collections::HashMap, fs, thread::available_parallelism};
 
27
  /// It stores the option to whether enable or disable debug mode.
28
  pub debug: bool,
29
  /// It stores all the engine names that were enabled by the user.
30
+ pub upstream_search_engines: Vec<crate::models::engine_models::EngineHandler>,
31
  /// It stores the time (secs) which controls the server request timeout.
32
  pub request_timeout: u8,
33
  /// It stores the number of threads which controls the app will use to run.
 
99
  .get::<_, HashMap<String, bool>>("upstream_search_engines")?
100
  .into_iter()
101
  .filter_map(|(key, value)| value.then_some(key))
102
+ .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine))
103
  .collect(),
104
  request_timeout: globals.get::<_, u8>("request_timeout")?,
105
  threads,
src/engines/duckduckgo.rs CHANGED
@@ -7,9 +7,9 @@ use std::collections::HashMap;
7
  use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
- use crate::results::aggregation_models::SearchResult;
11
 
12
- use super::engine_models::{EngineError, SearchEngine};
13
 
14
  use error_stack::{IntoReport, Report, Result, ResultExt};
15
 
 
7
  use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
8
  use scraper::{Html, Selector};
9
 
10
+ use crate::models::aggregation_models::SearchResult;
11
 
12
+ use crate::models::engine_models::{EngineError, SearchEngine};
13
 
14
  use error_stack::{IntoReport, Report, Result, ResultExt};
15
 
src/engines/mod.rs CHANGED
@@ -4,5 +4,4 @@
4
  //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
5
 
6
  pub mod duckduckgo;
7
- pub mod engine_models;
8
  pub mod searx;
 
4
  //! code. Moreover, it also provides a custom error for the upstream search engine handling code.
5
 
6
  pub mod duckduckgo;
 
7
  pub mod searx;
src/engines/searx.rs CHANGED
@@ -6,9 +6,9 @@ use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT};
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
- use crate::results::aggregation_models::SearchResult;
10
 
11
- use super::engine_models::{EngineError, SearchEngine};
12
  use error_stack::{IntoReport, Report, Result, ResultExt};
13
 
14
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
 
6
  use scraper::{Html, Selector};
7
  use std::collections::HashMap;
8
 
9
+ use crate::models::aggregation_models::SearchResult;
10
 
11
+ use crate::models::engine_models::{EngineError, SearchEngine};
12
  use error_stack::{IntoReport, Report, Result, ResultExt};
13
 
14
  /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to
src/lib.rs CHANGED
@@ -9,12 +9,13 @@ pub mod cache;
9
  pub mod config;
10
  pub mod engines;
11
  pub mod handler;
 
12
  pub mod results;
13
  pub mod server;
14
 
15
  use std::net::TcpListener;
16
 
17
- use crate::server::routes;
18
 
19
  use actix_cors::Cors;
20
  use actix_files as fs;
@@ -81,12 +82,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result<Server> {
81
  fs::Files::new("/images", format!("{}/images", public_folder_path))
82
  .show_files_listing(),
83
  )
84
- .service(routes::robots_data) // robots.txt
85
- .service(routes::index) // index page
86
- .service(routes::search) // search page
87
- .service(routes::about) // about page
88
- .service(routes::settings) // settings page
89
- .default_service(web::route().to(routes::not_found)) // error page
90
  })
91
  .workers(cloned_config_threads_opt as usize)
92
  // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
 
9
  pub mod config;
10
  pub mod engines;
11
  pub mod handler;
12
+ pub mod models;
13
  pub mod results;
14
  pub mod server;
15
 
16
  use std::net::TcpListener;
17
 
18
+ use crate::server::router;
19
 
20
  use actix_cors::Cors;
21
  use actix_files as fs;
 
82
  fs::Files::new("/images", format!("{}/images", public_folder_path))
83
  .show_files_listing(),
84
  )
85
+ .service(router::robots_data) // robots.txt
86
+ .service(router::index) // index page
87
+ .service(router::search) // search page
88
+ .service(router::about) // about page
89
+ .service(router::settings) // settings page
90
+ .default_service(web::route().to(router::not_found)) // error page
91
  })
92
  .workers(cloned_config_threads_opt as usize)
93
  // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
src/{results → models}/aggregation_models.rs RENAMED
@@ -3,7 +3,7 @@
3
 
4
  use serde::{Deserialize, Serialize};
5
 
6
- use crate::{config::parser_models::Style, engines::engine_models::EngineError};
7
 
8
  /// A named struct to store the raw scraped search results scraped search results from the
9
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
 
3
 
4
  use serde::{Deserialize, Serialize};
5
 
6
+ use super::{engine_models::EngineError, parser_models::Style};
7
 
8
  /// A named struct to store the raw scraped search results scraped search results from the
9
  /// upstream search engines before aggregating it.It derives the Clone trait which is needed
src/{engines → models}/engine_models.rs RENAMED
@@ -1,7 +1,7 @@
1
  //! This module provides the error enum to handle different errors associated while requesting data from
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
- use crate::results::aggregation_models::SearchResult;
5
  use error_stack::{IntoReport, Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
@@ -138,8 +138,11 @@ impl EngineHandler {
138
  pub fn new(engine_name: &str) -> Option<Self> {
139
  let engine: (&'static str, Box<dyn SearchEngine>) =
140
  match engine_name.to_lowercase().as_str() {
141
- "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)),
142
- "searx" => ("searx", Box::new(super::searx::Searx)),
 
 
 
143
  _ => return None,
144
  };
145
 
 
1
  //! This module provides the error enum to handle different errors associated while requesting data from
2
  //! the upstream search engines with the search query provided by the user.
3
 
4
+ use super::aggregation_models::SearchResult;
5
  use error_stack::{IntoReport, Result, ResultExt};
6
  use std::{collections::HashMap, fmt, time::Duration};
7
 
 
138
  pub fn new(engine_name: &str) -> Option<Self> {
139
  let engine: (&'static str, Box<dyn SearchEngine>) =
140
  match engine_name.to_lowercase().as_str() {
141
+ "duckduckgo" => (
142
+ "duckduckgo",
143
+ Box::new(crate::engines::duckduckgo::DuckDuckGo),
144
+ ),
145
+ "searx" => ("searx", Box::new(crate::engines::searx::Searx)),
146
  _ => return None,
147
  };
148
 
src/models/mod.rs ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ //! This module provides modules which in turn provides various models for aggregrating search
2
+ //! results, parsing config file, providing trait to standardize search engine handling code,
3
+ //! custom engine error for the search engine, etc.
4
+
5
+ pub mod aggregation_models;
6
+ pub mod engine_models;
7
+ pub mod parser_models;
8
+ pub mod server_models;
src/{config → models}/parser_models.rs RENAMED
File without changes
src/models/server_models.rs ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the models to parse cookies and search parameters from the search
2
+ //! engine website.
3
+ use serde::Deserialize;
4
+
5
+ /// A named struct which deserializes all the user provided search parameters and stores them.
6
+ #[derive(Deserialize)]
7
+ pub struct SearchParams {
8
+ /// It stores the search parameter option `q` (or query in simple words)
9
+ /// of the search url.
10
+ pub q: Option<String>,
11
+ /// It stores the search parameter `page` (or pageno in simple words)
12
+ /// of the search url.
13
+ pub page: Option<u32>,
14
+ }
15
+
16
+ /// A named struct which is used to deserialize the cookies fetched from the client side.
17
+ #[allow(dead_code)]
18
+ #[derive(Deserialize)]
19
+ pub struct Cookie {
20
+ /// It stores the theme name used in the website.
21
+ pub theme: String,
22
+ /// It stores the colorscheme name used for the website theme.
23
+ pub colorscheme: String,
24
+ /// It stores the user selected upstream search engines selected from the UI.
25
+ pub engines: Vec<String>,
26
+ }
src/results/aggregator.rs CHANGED
@@ -1,27 +1,23 @@
1
  //! This module provides the functionality to scrape and gathers all the results from the upstream
2
  //! search engines and then removes duplicate results.
3
 
4
- use std::{
5
- collections::HashMap,
6
- io::{BufReader, Read},
7
- time::Duration,
8
- };
9
-
10
- use super::{
11
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
12
- user_agent::random_user_agent,
13
  };
14
  use error_stack::Report;
15
  use rand::Rng;
16
  use regex::Regex;
 
 
 
 
 
17
  use std::{fs::File, io::BufRead};
18
  use tokio::task::JoinHandle;
19
 
20
- use crate::{
21
- engines::engine_models::{EngineError, EngineHandler},
22
- handler::paths::{file_path, FileType},
23
- };
24
-
25
  /// Aliases for long type annotations
26
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
27
 
 
1
  //! This module provides the functionality to scrape and gathers all the results from the upstream
2
  //! search engines and then removes duplicate results.
3
 
4
+ use super::user_agent::random_user_agent;
5
+ use crate::handler::paths::{file_path, FileType};
6
+ use crate::models::{
 
 
 
 
7
  aggregation_models::{EngineErrorInfo, SearchResult, SearchResults},
8
+ engine_models::{EngineError, EngineHandler},
9
  };
10
  use error_stack::Report;
11
  use rand::Rng;
12
  use regex::Regex;
13
+ use std::{
14
+ collections::HashMap,
15
+ io::{BufReader, Read},
16
+ time::Duration,
17
+ };
18
  use std::{fs::File, io::BufRead};
19
  use tokio::task::JoinHandle;
20
 
 
 
 
 
 
21
  /// Aliases for long type annotations
22
  type FutureVec = Vec<JoinHandle<Result<HashMap<String, SearchResult>, Report<EngineError>>>>;
23
 
src/results/mod.rs CHANGED
@@ -2,6 +2,5 @@
2
  //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
3
  //! provides various models to aggregate search results into a standardized form.
4
 
5
- pub mod aggregation_models;
6
  pub mod aggregator;
7
  pub mod user_agent;
 
2
  //! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also,
3
  //! provides various models to aggregate search results into a standardized form.
4
 
 
5
  pub mod aggregator;
6
  pub mod user_agent;
src/server/mod.rs CHANGED
@@ -3,4 +3,5 @@
3
  //! the search route. Also, caches the next, current and previous search results in the search
4
  //! routes with the help of the redis server.
5
 
 
6
  pub mod routes;
 
3
  //! the search route. Also, caches the next, current and previous search results in the search
4
  //! routes with the help of the redis server.
5
 
6
+ pub mod router;
7
  pub mod routes;
src/server/router.rs ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //! This module provides the functionality to handle different routes of the `websurfx`
2
+ //! meta search engine website and provide appropriate response to each route/page
3
+ //! when requested.
4
+
5
+ use crate::{
6
+ config::parser::Config,
7
+ handler::paths::{file_path, FileType},
8
+ };
9
+ use actix_web::{get, web, HttpRequest, HttpResponse};
10
+ use handlebars::Handlebars;
11
+ use std::fs::read_to_string;
12
+
13
+ /// Handles the route of index page or main page of the `websurfx` meta search engine website.
14
+ #[get("/")]
15
+ pub async fn index(
16
+ hbs: web::Data<Handlebars<'_>>,
17
+ config: web::Data<Config>,
18
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
19
+ let page_content: String = hbs.render("index", &config.style).unwrap();
20
+ Ok(HttpResponse::Ok().body(page_content))
21
+ }
22
+
23
+ /// Handles the route of any other accessed route/page which is not provided by the
24
+ /// website essentially the 404 error page.
25
+ pub async fn not_found(
26
+ hbs: web::Data<Handlebars<'_>>,
27
+ config: web::Data<Config>,
28
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
29
+ let page_content: String = hbs.render("404", &config.style)?;
30
+
31
+ Ok(HttpResponse::Ok()
32
+ .content_type("text/html; charset=utf-8")
33
+ .body(page_content))
34
+ }
35
+
36
+ /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
37
+ #[get("/robots.txt")]
38
+ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
39
+ let page_content: String =
40
+ read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
41
+ Ok(HttpResponse::Ok()
42
+ .content_type("text/plain; charset=ascii")
43
+ .body(page_content))
44
+ }
45
+
46
+ /// Handles the route of about page of the `websurfx` meta search engine website.
47
+ #[get("/about")]
48
+ pub async fn about(
49
+ hbs: web::Data<Handlebars<'_>>,
50
+ config: web::Data<Config>,
51
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
52
+ let page_content: String = hbs.render("about", &config.style)?;
53
+ Ok(HttpResponse::Ok().body(page_content))
54
+ }
55
+
56
+ /// Handles the route of settings page of the `websurfx` meta search engine website.
57
+ #[get("/settings")]
58
+ pub async fn settings(
59
+ hbs: web::Data<Handlebars<'_>>,
60
+ config: web::Data<Config>,
61
+ ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
62
+ let page_content: String = hbs.render("settings", &config.style)?;
63
+ Ok(HttpResponse::Ok().body(page_content))
64
+ }
src/server/routes/mod.rs ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ //! This module provides modules to handle various routes in the search engine website.
2
+
3
+ pub mod search;
src/server/{routes.rs → routes/search.rs} RENAMED
@@ -1,67 +1,19 @@
1
- //! This module provides the functionality to handle different routes of the `websurfx`
2
- //! meta search engine website and provide appropriate response to each route/page
3
- //! when requested.
4
-
5
- use std::fs::read_to_string;
6
 
7
  use crate::{
8
  cache::cacher::RedisCache,
9
  config::parser::Config,
10
- engines::engine_models::EngineHandler,
11
- handler::paths::{file_path, FileType},
12
- results::{aggregation_models::SearchResults, aggregator::aggregate},
 
 
 
13
  };
14
  use actix_web::{get, web, HttpRequest, HttpResponse};
15
  use handlebars::Handlebars;
16
- use serde::Deserialize;
17
  use tokio::join;
18
 
19
- /// A named struct which deserializes all the user provided search parameters and stores them.
20
- #[derive(Deserialize)]
21
- struct SearchParams {
22
- /// It stores the search parameter option `q` (or query in simple words)
23
- /// of the search url.
24
- q: Option<String>,
25
- /// It stores the search parameter `page` (or pageno in simple words)
26
- /// of the search url.
27
- page: Option<u32>,
28
- }
29
-
30
- /// Handles the route of index page or main page of the `websurfx` meta search engine website.
31
- #[get("/")]
32
- pub async fn index(
33
- hbs: web::Data<Handlebars<'_>>,
34
- config: web::Data<Config>,
35
- ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
36
- let page_content: String = hbs.render("index", &config.style).unwrap();
37
- Ok(HttpResponse::Ok().body(page_content))
38
- }
39
-
40
- /// Handles the route of any other accessed route/page which is not provided by the
41
- /// website essentially the 404 error page.
42
- pub async fn not_found(
43
- hbs: web::Data<Handlebars<'_>>,
44
- config: web::Data<Config>,
45
- ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
46
- let page_content: String = hbs.render("404", &config.style)?;
47
-
48
- Ok(HttpResponse::Ok()
49
- .content_type("text/html; charset=utf-8")
50
- .body(page_content))
51
- }
52
-
53
- /// A named struct which is used to deserialize the cookies fetched from the client side.
54
- #[allow(dead_code)]
55
- #[derive(Deserialize)]
56
- struct Cookie {
57
- /// It stores the theme name used in the website.
58
- theme: String,
59
- /// It stores the colorscheme name used for the website theme.
60
- colorscheme: String,
61
- /// It stores the user selected upstream search engines selected from the UI.
62
- engines: Vec<String>,
63
- }
64
-
65
  /// Handles the route of search page of the `websurfx` meta search engine website and it takes
66
  /// two search url parameters `q` and `page` where `page` parameter is optional.
67
  ///
@@ -178,9 +130,7 @@ async fn results(
178
  // default selected upstream search engines from the config file otherwise
179
  // parse the non-empty cookie and grab the user selected engines from the
180
  // UI and use that.
181
- let mut results: crate::results::aggregation_models::SearchResults = match req
182
- .cookie("appCookie")
183
- {
184
  Some(cookie_value) => {
185
  let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
186
 
@@ -218,33 +168,3 @@ async fn results(
218
  }
219
  }
220
  }
221
-
222
- /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
223
- #[get("/robots.txt")]
224
- pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
225
- let page_content: String =
226
- read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?;
227
- Ok(HttpResponse::Ok()
228
- .content_type("text/plain; charset=ascii")
229
- .body(page_content))
230
- }
231
-
232
- /// Handles the route of about page of the `websurfx` meta search engine website.
233
- #[get("/about")]
234
- pub async fn about(
235
- hbs: web::Data<Handlebars<'_>>,
236
- config: web::Data<Config>,
237
- ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
238
- let page_content: String = hbs.render("about", &config.style)?;
239
- Ok(HttpResponse::Ok().body(page_content))
240
- }
241
-
242
- /// Handles the route of settings page of the `websurfx` meta search engine website.
243
- #[get("/settings")]
244
- pub async fn settings(
245
- hbs: web::Data<Handlebars<'_>>,
246
- config: web::Data<Config>,
247
- ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
248
- let page_content: String = hbs.render("settings", &config.style)?;
249
- Ok(HttpResponse::Ok().body(page_content))
250
- }
 
1
+ //! This module handles the search route of the search engine website.
 
 
 
 
2
 
3
  use crate::{
4
  cache::cacher::RedisCache,
5
  config::parser::Config,
6
+ models::{
7
+ aggregation_models::SearchResults,
8
+ engine_models::EngineHandler,
9
+ server_models::{Cookie, SearchParams},
10
+ },
11
+ results::aggregator::aggregate,
12
  };
13
  use actix_web::{get, web, HttpRequest, HttpResponse};
14
  use handlebars::Handlebars;
 
15
  use tokio::join;
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  /// Handles the route of search page of the `websurfx` meta search engine website and it takes
18
  /// two search url parameters `q` and `page` where `page` parameter is optional.
19
  ///
 
130
  // default selected upstream search engines from the config file otherwise
131
  // parse the non-empty cookie and grab the user selected engines from the
132
  // UI and use that.
133
+ let mut results: SearchResults = match req.cookie("appCookie") {
 
 
134
  Some(cookie_value) => {
135
  let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?;
136
 
 
168
  }
169
  }
170
  }