neon_arch commited on
Commit
fc69ace
1 Parent(s): ed13a16

Improving source code documentation.

Browse files
src/bin/websurfx.rs CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  use std::ops::RangeInclusive;
2
 
3
  use websurfx::server::routes;
@@ -8,6 +13,7 @@ use clap::{command, Parser};
8
  use env_logger::Env;
9
  use handlebars::Handlebars;
10
 
 
11
  #[derive(Parser, Debug, Default)]
12
  #[clap(author = "neon_arch", version, about = "Websurfx server application")]
13
  #[command(propagate_version = true)]
@@ -19,8 +25,18 @@ struct CliArgs {
19
 
20
  const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
21
 
22
- // A function to check whether port is valid u32 number or is in range
23
- // between [1024-65536] otherwise display an appropriate error message.
 
 
 
 
 
 
 
 
 
 
24
  fn is_port_in_range(s: &str) -> Result<u16, String> {
25
  let port: usize = s
26
  .parse()
@@ -36,7 +52,12 @@ fn is_port_in_range(s: &str) -> Result<u16, String> {
36
  }
37
  }
38
 
39
- // The function that launches the main server and handle routing functionality
 
 
 
 
 
40
  #[actix_web::main]
41
  async fn main() -> std::io::Result<()> {
42
  let args = CliArgs::parse();
@@ -68,7 +89,7 @@ async fn main() -> std::io::Result<()> {
68
  .service(routes::settings) // settings page
69
  .default_service(web::route().to(routes::not_found)) // error page
70
  })
71
- // Start server on 127.0.0.1:8080
72
  .bind(("127.0.0.1", args.port))?
73
  .run()
74
  .await
 
1
+ //! Main module of the application
2
+ //!
3
+ //! This module contains the main function which handles the logging of the application to the
4
+ //! stdout and handles the command line arguments provided and launches the `websurfx` server.
5
+
6
  use std::ops::RangeInclusive;
7
 
8
  use websurfx::server::routes;
 
13
  use env_logger::Env;
14
  use handlebars::Handlebars;
15
 
16
+ /// A commandline arguments struct.
17
  #[derive(Parser, Debug, Default)]
18
  #[clap(author = "neon_arch", version, about = "Websurfx server application")]
19
  #[command(propagate_version = true)]
 
25
 
26
  const PORT_RANGE: RangeInclusive<usize> = 1024..=65535;
27
 
28
+ /// A function to check whether port is valid u32 number or is in range
29
+ /// between [1024-65536] otherwise display an appropriate error message.
30
+ ///
31
+ /// # Arguments
32
+ ///
33
+ /// * `s` - Takes a commandline argument port as a string.
34
+ ///
35
+ /// # Error
36
+ ///
37
+ /// Check whether the provided argument to `--port` commandline option is a valid
38
+ /// u16 argument and returns it as a u16 value otherwise returns an error with an
39
+ /// appropriate error message.
40
  fn is_port_in_range(s: &str) -> Result<u16, String> {
41
  let port: usize = s
42
  .parse()
 
52
  }
53
  }
54
 
55
+ /// The function that launches the main server and registers all the routes of the website.
56
+ ///
57
+ /// # Error
58
+ ///
59
+ /// Returns an error if the port is being used by something else on the system and is not
60
+ /// available for being used for other applications.
61
  #[actix_web::main]
62
  async fn main() -> std::io::Result<()> {
63
  let args = CliArgs::parse();
 
89
  .service(routes::settings) // settings page
90
  .default_service(web::route().to(routes::not_found)) // error page
91
  })
92
+ // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080.
93
  .bind(("127.0.0.1", args.port))?
94
  .run()
95
  .await
src/engines/duckduckgo.rs CHANGED
@@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
23
  /// # Errors
24
  ///
25
  /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
26
- /// reach the above **upstream search engine** page and also returns error if the scraping
27
  /// selector fails to initialize"
28
  pub async fn results(
29
  query: &str,
 
23
  /// # Errors
24
  ///
25
  /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
26
+ /// reach the above `upstream search engine` page and also returns error if the scraping
27
  /// selector fails to initialize"
28
  pub async fn results(
29
  query: &str,
src/engines/searx.rs CHANGED
@@ -23,7 +23,7 @@ use crate::search_results_handler::aggregation_models::RawSearchResult;
23
  /// # Errors
24
  ///
25
  /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
26
- /// reach the above **upstream search engine** page and also returns error if the scraping
27
  /// selector fails to initialize"
28
  pub async fn results(
29
  query: &str,
 
23
  /// # Errors
24
  ///
25
  /// Returns a reqwest error if the user is not connected to the internet or if their is failure to
26
+ /// reach the above `upstream search engine` page and also returns error if the scraping
27
  /// selector fails to initialize"
28
  pub async fn results(
29
  query: &str,
src/search_results_handler/aggregation_models.rs CHANGED
@@ -1,5 +1,19 @@
 
 
 
1
  use serde::Serialize;
2
 
 
 
 
 
 
 
 
 
 
 
 
3
  #[derive(Debug, Serialize)]
4
  #[serde(rename_all = "camelCase")]
5
  pub struct SearchResult {
@@ -11,6 +25,16 @@ pub struct SearchResult {
11
  }
12
 
13
  impl SearchResult {
 
 
 
 
 
 
 
 
 
 
14
  pub fn new(
15
  title: String,
16
  visiting_url: String,
@@ -28,6 +52,17 @@ impl SearchResult {
28
  }
29
  }
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  #[derive(Clone)]
32
  pub struct RawSearchResult {
33
  pub title: String,
@@ -37,6 +72,15 @@ pub struct RawSearchResult {
37
  }
38
 
39
  impl RawSearchResult {
 
 
 
 
 
 
 
 
 
40
  pub fn new(
41
  title: String,
42
  visiting_url: String,
@@ -50,6 +94,12 @@ impl RawSearchResult {
50
  engine,
51
  }
52
  }
 
 
 
 
 
 
53
  pub fn add_engines(&mut self, engine: String) {
54
  self.engine.push(engine)
55
  }
@@ -59,6 +109,14 @@ impl RawSearchResult {
59
  }
60
  }
61
 
 
 
 
 
 
 
 
 
62
  #[derive(Debug, Serialize)]
63
  #[serde(rename_all = "camelCase")]
64
  pub struct SearchResults {
@@ -67,6 +125,14 @@ pub struct SearchResults {
67
  }
68
 
69
  impl SearchResults {
 
 
 
 
 
 
 
 
70
  pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
71
  SearchResults {
72
  results,
 
1
+ //! This module provides public models for handling, storing and serializing of search results
2
+ //! data scraped from the upstream search engines.
3
+
4
  use serde::Serialize;
5
 
6
+ /// A named struct to store and serialize the individual search result from all the scraped
7
+ /// and aggregated search results from the upstream search engines.
8
+ ///
9
+ /// # Fields
10
+ ///
11
+ /// * `title` - The title of the search result.
12
+ /// * `visiting_url` - The url which is accessed when clicked on it (href url in html in simple
13
+ /// words).
14
+ /// * `url` - The url to be displayed below the search result title in html.
15
+ /// * `description` - The description of the search result.
16
+ /// * `engine` - The names of the upstream engines from which this results were provided.
17
  #[derive(Debug, Serialize)]
18
  #[serde(rename_all = "camelCase")]
19
  pub struct SearchResult {
 
25
  }
26
 
27
  impl SearchResult {
28
+ /// Constructs a new `SearchResult` with the given arguments needed for the struct.
29
+ ///
30
+ /// # Arguments
31
+ ///
32
+ /// * `title` - The title of the search result.
33
+ /// * `visiting_url` - The url which is accessed when clicked on it
34
+ /// (href url in html in simple words).
35
+ /// * `url` - The url to be displayed below the search result title in html.
36
+ /// * `description` - The description of the search result.
37
+ /// * `engine` - The names of the upstream engines from which this results were provided.
38
  pub fn new(
39
  title: String,
40
  visiting_url: String,
 
52
  }
53
  }
54
 
55
+ /// A named struct to store the raw scraped search results scraped search results from the
56
+ /// upstream search engines before aggregating it.It derives the Clone trait which is needed
57
+ /// to write idiomatic rust using `Iterators`.
58
+ ///
59
+ /// # Fields
60
+ ///
61
+ /// * `title` - The title of the search result.
62
+ /// * `visiting_url` - The url which is accessed when clicked on it
63
+ /// (href url in html in simple words).
64
+ /// * `description` - The description of the search result.
65
+ /// * `engine` - The names of the upstream engines from which this results were provided.
66
  #[derive(Clone)]
67
  pub struct RawSearchResult {
68
  pub title: String,
 
72
  }
73
 
74
  impl RawSearchResult {
75
+ /// Constructs a new `RawSearchResult` with the given arguments needed for the struct.
76
+ ///
77
+ /// # Arguments
78
+ ///
79
+ /// * `title` - The title of the search result.
80
+ /// * `visiting_url` - The url which is accessed when clicked on it
81
+ /// (href url in html in simple words).
82
+ /// * `description` - The description of the search result.
83
+ /// * `engine` - The names of the upstream engines from which this results were provided.
84
  pub fn new(
85
  title: String,
86
  visiting_url: String,
 
94
  engine,
95
  }
96
  }
97
+
98
+ /// A function which adds the engine name provided as a string into a vector of strings.
99
+ ///
100
+ /// # Arguments
101
+ ///
102
+ /// * `engine` - Takes an engine name provided as a String.
103
  pub fn add_engines(&mut self, engine: String) {
104
  self.engine.push(engine)
105
  }
 
109
  }
110
  }
111
 
112
+ /// A named struct to store and serialize the all the search results scraped and aggregated
113
+ /// from the upstream search engines.
114
+ ///
115
+ /// # Fields
116
+ ///
117
+ /// * `results` - Stores the individual serializable `SearchResult` struct into a vector of
118
+ /// `SearchResult` structs.
119
+ /// * `page_query` - Stores the current pages search query `q` provided in the search url.
120
  #[derive(Debug, Serialize)]
121
  #[serde(rename_all = "camelCase")]
122
  pub struct SearchResults {
 
125
  }
126
 
127
  impl SearchResults {
128
+ /// Constructs a new `SearchResult` with the given arguments needed for the struct.
129
+ ///
130
+ /// # Arguments
131
+ ///
132
+ /// * `results` - Takes an argument of individual serializable `SearchResult` struct
133
+ /// and stores it into a vector of `SearchResult` structs.
134
+ /// * `page_query` - Takes an argument of current page`s search query `q` provided in
135
+ /// the search url.
136
  pub fn new(results: Vec<SearchResult>, page_query: String) -> Self {
137
  SearchResults {
138
  results,
src/search_results_handler/aggregator.rs CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  use std::collections::HashMap;
2
 
3
  use super::{
@@ -7,17 +10,28 @@ use super::{
7
 
8
  use crate::engines::{duckduckgo, searx};
9
 
10
- // A function that aggregates all the scraped results from the above upstream engines and
11
- // then removes duplicate results and if two results are found to be from two or more engines
12
- // then puts their names together to show the results are fetched from these upstream engines
13
- // and then removes all data from the HashMap and puts into a struct of all results aggregated
14
- // into a vector and also adds the query used into the struct this is neccessory because otherwise
15
- // the search bar in search remains empty if searched from the query url
16
- //
17
- // For Example:
18
- //
19
- // If you search from the url like *https://127.0.0.1/search?q=huston* then the search bar should
20
- // contain the word huston and not remain empty.
 
 
 
 
 
 
 
 
 
 
 
21
  pub async fn aggregate(
22
  query: &str,
23
  page: Option<u32>,
 
1
+ //! This module provides the functionality to scrape and gathers all the results from the upstream
2
+ //! search engines and then removes duplicate results.
3
+
4
  use std::collections::HashMap;
5
 
6
  use super::{
 
10
 
11
  use crate::engines::{duckduckgo, searx};
12
 
13
+ /// A function that aggregates all the scraped results from the above upstream engines and
14
+ /// then removes duplicate results and if two results are found to be from two or more engines
15
+ /// then puts their names together to show the results are fetched from these upstream engines
16
+ /// and then removes all data from the HashMap and puts into a struct of all results aggregated
17
+ /// into a vector and also adds the query used into the struct this is neccessory because
18
+ /// otherwise the search bar in search remains empty if searched from the query url
19
+ ///
20
+ /// # Example:
21
+ ///
22
+ /// If you search from the url like `https://127.0.0.1/search?q=huston` then the search bar should
23
+ /// contain the word huston and not remain empty.
24
+ ///
25
+ /// # Arguments
26
+ ///
27
+ /// * `query` - Accepts a string to query with the above upstream search engines.
28
+ /// * `page` - Accepts an Option<u32> which could either be a None or a valid page number.
29
+ ///
30
+ /// # Error
31
+ ///
32
+ /// Returns an error a reqwest and scraping selector errors if any error occurs in the results
33
+ /// function in either `searx` or `duckduckgo` or both otherwise returns a `SearchResults struct`
34
+ /// containing appropriate values.
35
  pub async fn aggregate(
36
  query: &str,
37
  page: Option<u32>,
src/search_results_handler/user_agent.rs CHANGED
@@ -1,6 +1,12 @@
 
 
1
  use fake_useragent::{Browsers, UserAgentsBuilder};
2
 
3
- // A function to generate random user agent to improve privacy of the user.
 
 
 
 
4
  pub fn random_user_agent() -> String {
5
  UserAgentsBuilder::new()
6
  .cache(false)
 
1
+ //! This module provides the functionality to generate random user agent string.
2
+
3
  use fake_useragent::{Browsers, UserAgentsBuilder};
4
 
5
+ /// A function to generate random user agent to improve privacy of the user.
6
+ ///
7
+ /// # Returns
8
+ ///
9
+ /// A randomly generated user agent string.
10
  pub fn random_user_agent() -> String {
11
  UserAgentsBuilder::new()
12
  .cache(false)
src/server/routes.rs CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  use std::fs::read_to_string;
2
 
3
  use crate::search_results_handler::aggregator::aggregate;
@@ -5,12 +9,21 @@ use actix_web::{get, web, HttpRequest, HttpResponse};
5
  use handlebars::Handlebars;
6
  use serde::Deserialize;
7
 
 
 
 
 
 
 
 
 
8
  #[derive(Debug, Deserialize)]
9
  struct SearchParams {
10
  q: Option<String>,
11
  page: Option<u32>,
12
  }
13
 
 
14
  #[get("/")]
15
  pub async fn index(
16
  hbs: web::Data<Handlebars<'_>>,
@@ -19,6 +32,8 @@ pub async fn index(
19
  Ok(HttpResponse::Ok().body(page_content))
20
  }
21
 
 
 
22
  pub async fn not_found(
23
  hbs: web::Data<Handlebars<'_>>,
24
  ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
@@ -29,6 +44,20 @@ pub async fn not_found(
29
  .body(page_content))
30
  }
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  #[get("/search")]
33
  pub async fn search(
34
  hbs: web::Data<Handlebars<'_>>,
@@ -54,6 +83,7 @@ pub async fn search(
54
  }
55
  }
56
 
 
57
  #[get("/robots.txt")]
58
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
59
  let page_content: String = read_to_string("./public/robots.txt")?;
@@ -62,6 +92,7 @@ pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std:
62
  .body(page_content))
63
  }
64
 
 
65
  #[get("/about")]
66
  pub async fn about(
67
  hbs: web::Data<Handlebars<'_>>,
@@ -70,6 +101,7 @@ pub async fn about(
70
  Ok(HttpResponse::Ok().body(page_content))
71
  }
72
 
 
73
  #[get("/settings")]
74
  pub async fn settings(
75
  hbs: web::Data<Handlebars<'_>>,
@@ -77,3 +109,6 @@ pub async fn settings(
77
  let page_content: String = hbs.render("settings", &"")?;
78
  Ok(HttpResponse::Ok().body(page_content))
79
  }
 
 
 
 
1
+ //! This module provides the functionality to handle different routes of the `websurfx`
2
+ //! meta search engine website and provide approriate response to each route/page
3
+ //! when requested.
4
+
5
  use std::fs::read_to_string;
6
 
7
  use crate::search_results_handler::aggregator::aggregate;
 
9
  use handlebars::Handlebars;
10
  use serde::Deserialize;
11
 
12
+ /// A named struct which deserializes all the user provided search parameters and stores them.
13
+ ///
14
+ /// # Fields
15
+ ///
16
+ /// * `q` - It stores the search parameter option `q` (or query in simple words)
17
+ /// of the search url.
18
+ /// * `page` - It stores the search parameter `page` (or pageno in simple words)
19
+ /// of the search url.
20
  #[derive(Debug, Deserialize)]
21
  struct SearchParams {
22
  q: Option<String>,
23
  page: Option<u32>,
24
  }
25
 
26
+ /// Handles the route of index page or main page of the `websurfx` meta search engine website.
27
  #[get("/")]
28
  pub async fn index(
29
  hbs: web::Data<Handlebars<'_>>,
 
32
  Ok(HttpResponse::Ok().body(page_content))
33
  }
34
 
35
+ /// Handles the route of any other accessed route/page which is not provided by the
36
+ /// website essentially the 404 error page.
37
  pub async fn not_found(
38
  hbs: web::Data<Handlebars<'_>>,
39
  ) -> Result<HttpResponse, Box<dyn std::error::Error>> {
 
44
  .body(page_content))
45
  }
46
 
47
+ /// Handles the route of search page of the `websurfx` meta search engine website and it takes
48
+ /// two search url parameters `q` and `page` where `page` parameter is optional.
49
+ ///
50
+ /// # Example
51
+ ///
52
+ /// ```bash
53
+ /// curl "http://127.0.0.1:8080/search?q=sweden&page=1"
54
+ /// ```
55
+ ///
56
+ /// Or
57
+ ///
58
+ /// ```bash
59
+ /// curl "http://127.0.0.1:8080/search?q=sweden"
60
+ /// ```
61
  #[get("/search")]
62
  pub async fn search(
63
  hbs: web::Data<Handlebars<'_>>,
 
83
  }
84
  }
85
 
86
+ /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
87
  #[get("/robots.txt")]
88
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
89
  let page_content: String = read_to_string("./public/robots.txt")?;
 
92
  .body(page_content))
93
  }
94
 
95
+ /// Handles the route of about page of the `websurfx` meta search engine website.
96
  #[get("/about")]
97
  pub async fn about(
98
  hbs: web::Data<Handlebars<'_>>,
 
101
  Ok(HttpResponse::Ok().body(page_content))
102
  }
103
 
104
+ /// Handles the route of settings page of the `websurfx` meta search engine website.
105
  #[get("/settings")]
106
  pub async fn settings(
107
  hbs: web::Data<Handlebars<'_>>,
 
109
  let page_content: String = hbs.render("settings", &"")?;
110
  Ok(HttpResponse::Ok().body(page_content))
111
  }
112
+
113
+ // TODO: Write tests for tesing parameters for search function that if provided with something
114
+ // other than u32 like alphabets and special characters than it should panic