alamin655 commited on
Commit
867753a
·
unverified ·
2 Parent(s): d3a7435 6c94b92

Merge pull request #202 from neon-mmd/feat-disallow-user-to-search-via-lists

Browse files
Cargo.lock CHANGED
@@ -532,18 +532,18 @@ dependencies = [
532
 
533
  [[package]]
534
  name = "clap"
535
- version = "4.4.1"
536
  source = "registry+https://github.com/rust-lang/crates.io-index"
537
- checksum = "7c8d502cbaec4595d2e7d5f61e318f05417bd2b66fdc3809498f0d3fdf0bea27"
538
  dependencies = [
539
  "clap_builder",
540
  ]
541
 
542
  [[package]]
543
  name = "clap_builder"
544
- version = "4.4.1"
545
  source = "registry+https://github.com/rust-lang/crates.io-index"
546
- checksum = "5891c7bc0edb3e1c2204fc5e94009affabeb1821c9e5fdc3959536c5c0bb984d"
547
  dependencies = [
548
  "anstyle",
549
  "clap_lex",
@@ -1270,9 +1270,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
1270
 
1271
  [[package]]
1272
  name = "handlebars"
1273
- version = "4.3.7"
1274
  source = "registry+https://github.com/rust-lang/crates.io-index"
1275
- checksum = "83c3372087601b532857d332f5957cbae686da52bb7810bf038c3e3c3cc2fa0d"
1276
  dependencies = [
1277
  "log",
1278
  "pest",
@@ -2494,9 +2494,9 @@ dependencies = [
2494
 
2495
  [[package]]
2496
  name = "redis"
2497
- version = "0.23.2"
2498
  source = "registry+https://github.com/rust-lang/crates.io-index"
2499
- checksum = "ffd6543a7bc6428396845f6854ccf3d1ae8823816592e2cbe74f20f50f209d02"
2500
  dependencies = [
2501
  "arc-swap",
2502
  "async-trait",
@@ -2663,9 +2663,9 @@ dependencies = [
2663
 
2664
  [[package]]
2665
  name = "rustix"
2666
- version = "0.38.10"
2667
  source = "registry+https://github.com/rust-lang/crates.io-index"
2668
- checksum = "ed6248e1caa625eb708e266e06159f135e8c26f2bb7ceb72dc4b2766d0340964"
2669
  dependencies = [
2670
  "bitflags 2.4.0",
2671
  "errno",
@@ -3697,7 +3697,7 @@ dependencies = [
3697
 
3698
  [[package]]
3699
  name = "websurfx"
3700
- version = "0.18.6"
3701
  dependencies = [
3702
  "actix-cors",
3703
  "actix-files",
 
532
 
533
  [[package]]
534
  name = "clap"
535
+ version = "4.4.2"
536
  source = "registry+https://github.com/rust-lang/crates.io-index"
537
+ checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6"
538
  dependencies = [
539
  "clap_builder",
540
  ]
541
 
542
  [[package]]
543
  name = "clap_builder"
544
+ version = "4.4.2"
545
  source = "registry+https://github.com/rust-lang/crates.io-index"
546
+ checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08"
547
  dependencies = [
548
  "anstyle",
549
  "clap_lex",
 
1270
 
1271
  [[package]]
1272
  name = "handlebars"
1273
+ version = "4.4.0"
1274
  source = "registry+https://github.com/rust-lang/crates.io-index"
1275
+ checksum = "c39b3bc2a8f715298032cf5087e58573809374b08160aa7d750582bdb82d2683"
1276
  dependencies = [
1277
  "log",
1278
  "pest",
 
2494
 
2495
  [[package]]
2496
  name = "redis"
2497
+ version = "0.23.3"
2498
  source = "registry+https://github.com/rust-lang/crates.io-index"
2499
+ checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba"
2500
  dependencies = [
2501
  "arc-swap",
2502
  "async-trait",
 
2663
 
2664
  [[package]]
2665
  name = "rustix"
2666
+ version = "0.38.11"
2667
  source = "registry+https://github.com/rust-lang/crates.io-index"
2668
+ checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453"
2669
  dependencies = [
2670
  "bitflags 2.4.0",
2671
  "errno",
 
3697
 
3698
  [[package]]
3699
  name = "websurfx"
3700
+ version = "0.19.0"
3701
  dependencies = [
3702
  "actix-cors",
3703
  "actix-files",
Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
  [package]
2
  name = "websurfx"
3
- version = "0.18.6"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
@@ -10,7 +10,7 @@ license = "AGPL-3.0"
10
  reqwest = {version="0.11.20",features=["json"]}
11
  tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
12
  serde = {version="1.0.188",features=["derive"]}
13
- handlebars = { version = "4.3.7", features = ["dir_source"] }
14
  scraper = {version="0.17.1"}
15
  actix-web = {version="4.4.0", features = ["cookies"]}
16
  actix-files = {version="0.6.2"}
@@ -19,8 +19,8 @@ serde_json = {version="1.0.105"}
19
  fake-useragent = {version="0.1.3"}
20
  env_logger = {version="0.10.0"}
21
  log = {version="0.4.20"}
22
- mlua = {version="0.8.10",features=["luajit"]}
23
- redis = {version="0.23.2",features=["tokio-comp","connection-manager"]}
24
  md5 = {version="0.7.0"}
25
  rand={version="0.8.5"}
26
  once_cell = {version="1.18.0"}
 
1
  [package]
2
  name = "websurfx"
3
+ version = "0.19.0"
4
  edition = "2021"
5
  description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind."
6
  repository = "https://github.com/neon-mmd/websurfx"
 
10
  reqwest = {version="0.11.20",features=["json"]}
11
  tokio = {version="1.32.0",features=["rt-multi-thread","macros"]}
12
  serde = {version="1.0.188",features=["derive"]}
13
+ handlebars = { version = "4.4.0", features = ["dir_source"] }
14
  scraper = {version="0.17.1"}
15
  actix-web = {version="4.4.0", features = ["cookies"]}
16
  actix-files = {version="0.6.2"}
 
19
  fake-useragent = {version="0.1.3"}
20
  env_logger = {version="0.10.0"}
21
  log = {version="0.4.20"}
22
+ mlua = {version="0.8.10", features=["luajit"]}
23
+ redis = {version="0.23.3", features=["tokio-comp","connection-manager"]}
24
  md5 = {version="0.7.0"}
25
  rand={version="0.8.5"}
26
  once_cell = {version="1.18.0"}
public/images/barricade.png ADDED
public/images/filter.png ADDED
public/static/themes/simple.css CHANGED
@@ -132,6 +132,35 @@ body {
132
  width: 1.2rem;
133
  height: 1.2rem;
134
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  /* styles for the footer and header */
137
 
 
132
  width: 1.2rem;
133
  height: 1.2rem;
134
  }
135
+ .results .result_disallowed,
136
+ .results .result_filtered {
137
+ display: flex;
138
+ justify-content: center;
139
+ align-items: center;
140
+ gap: 10rem;
141
+ font-size: 2rem;
142
+ color: var(--foreground-color);
143
+ margin: 0rem 7rem;
144
+ }
145
+
146
+ .results .result_disallowed .user_query,
147
+ .results .result_filtered .user_query {
148
+ color: var(--background-color);
149
+ font-weight: 300;
150
+ }
151
+
152
+ .results .result_disallowed img,
153
+ .results .result_filtered img {
154
+ width: 30rem;
155
+ }
156
+
157
+ .results .result_disallowed div,
158
+ .results .result_filtered div {
159
+ display: flex;
160
+ flex-direction: column;
161
+ gap: 1rem;
162
+ line-break: strict;
163
+ }
164
 
165
  /* styles for the footer and header */
166
 
public/templates/search.html CHANGED
@@ -1,37 +1,69 @@
1
  {{>header this.style}}
2
  <main class="results">
3
- {{>search_bar this}}
4
- <div class="results_aggregated">
5
- {{#if results}} {{#each results}}
6
- <div class="result">
7
- <h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
8
- <small>{{{this.url}}}</small>
9
- <p>{{{this.description}}}</p>
10
- <div class="upstream_engines">
11
- {{#each engine}}
12
- <span>{{{this}}}</span>
13
- {{/each}}
14
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  </div>
16
- {{/each}} {{else}}
17
- <div class="result_not_found">
18
- <p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
19
- <p class="suggestions">Suggestions:</p>
20
- <ul>
21
- <li>Make sure that all words are spelled correctly.</li>
22
- <li>Try different keywords.</li>
23
- <li>Try more general keywords.</li>
24
- </ul>
25
- <img src="./images/no_results.gif" alt="Man fishing gif" />
26
  </div>
27
- {{/if}}
28
- </div>
29
- <div class="page_navigation">
30
- <button type="button" onclick="navigate_backward()">
31
- &#8592; previous
32
- </button>
33
- <button type="button" onclick="navigate_forward()">next &#8594;</button>
34
- </div>
35
  </main>
36
  <script src="static/index.js"></script>
37
  <script src="static/pagination.js"></script>
 
1
  {{>header this.style}}
2
  <main class="results">
3
+ {{>search_bar this}}
4
+ <div class="results_aggregated">
5
+ {{#if results}} {{#each results}}
6
+ <div class="result">
7
+ <h1><a href="{{{this.url}}}">{{{this.title}}}</a></h1>
8
+ <small>{{{this.url}}}</small>
9
+ <p>{{{this.description}}}</p>
10
+ <div class="upstream_engines">
11
+ {{#each engine}}
12
+ <span>{{{this}}}</span>
13
+ {{/each}}
14
+ </div>
15
+ </div>
16
+ {{/each}} {{else}} {{#if disallowed}}
17
+ <div class="result_disallowed">
18
+ <div class="description">
19
+ <p>
20
+ Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
21
+ has been disallowed.
22
+ </p>
23
+ <p class="description_paragraph">Dear user,</p>
24
+ <p class="description_paragraph">
25
+ The query - <span class="user_query">{{{this.pageQuery}}}</span> - has
26
+ been blacklisted via server configuration and hence disallowed by the
27
+ server. Henceforth no results could be displayed for your query.
28
+ </p>
29
+ </div>
30
+ <img src="./images/barricade.png" alt="Image of a Barricade" />
31
+ </div>
32
+ {{else}} {{#if filtered}}
33
+ <div class="result_filtered">
34
+ <div class="description">
35
+ <p>
36
+ Your search - <span class="user_query">{{{this.pageQuery}}}</span> -
37
+ has been filtered.
38
+ </p>
39
+ <p class="description_paragraph">Dear user,</p>
40
+ <p class="description_paragraph">
41
+ All the search results contain results that has been configured to be
42
+ filtered out via server configuration and henceforth has been
43
+ completely filtered out.
44
+ </p>
45
+ </div>
46
+ <img src="./images/filter.png" alt="Image of a paper inside a funnel" />
47
+ </div>
48
+ {{else}}
49
+ <div class="result_not_found">
50
+ <p>Your search - {{{this.pageQuery}}} - did not match any documents.</p>
51
+ <p class="suggestions">Suggestions:</p>
52
+ <ul>
53
+ <li>Make sure that all words are spelled correctly.</li>
54
+ <li>Try different keywords.</li>
55
+ <li>Try more general keywords.</li>
56
+ </ul>
57
+ <img src="./images/no_results.gif" alt="Man fishing gif" />
58
+ </div>
59
+ {{/if}} {{/if}} {{/if}}
60
  </div>
61
+ <div class="page_navigation">
62
+ <button type="button" onclick="navigate_backward()">
63
+ &#8592; previous
64
+ </button>
65
+ <button type="button" onclick="navigate_forward()">next &#8594;</button>
 
 
 
 
 
66
  </div>
 
 
 
 
 
 
 
 
67
  </main>
68
  <script src="static/index.js"></script>
69
  <script src="static/pagination.js"></script>
src/config/parser.rs CHANGED
@@ -35,6 +35,7 @@ pub struct Config {
35
  pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
36
  pub request_timeout: u8,
37
  pub threads: u8,
 
38
  }
39
 
40
  /// Configuration options for the aggregator.
@@ -89,6 +90,16 @@ impl Config {
89
  parsed_threads
90
  };
91
 
 
 
 
 
 
 
 
 
 
 
92
  Ok(Config {
93
  port: globals.get::<_, u16>("port")?,
94
  binding_ip: globals.get::<_, String>("binding_ip")?,
@@ -110,6 +121,7 @@ impl Config {
110
  .collect(),
111
  request_timeout: globals.get::<_, u8>("request_timeout")?,
112
  threads,
 
113
  })
114
  }
115
  }
 
35
  pub upstream_search_engines: Vec<crate::engines::engine_models::EngineHandler>,
36
  pub request_timeout: u8,
37
  pub threads: u8,
38
+ pub safe_search: u8,
39
  }
40
 
41
  /// Configuration options for the aggregator.
 
90
  parsed_threads
91
  };
92
 
93
+ let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?;
94
+ let safe_search: u8 = match parsed_safe_search {
95
+ 0..=4 => parsed_safe_search,
96
+ _ => {
97
+ log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4.");
98
+ log::error!("Falling back to using the value `1` for the option");
99
+ 1
100
+ }
101
+ };
102
+
103
  Ok(Config {
104
  port: globals.get::<_, u16>("port")?,
105
  binding_ip: globals.get::<_, String>("binding_ip")?,
 
121
  .collect(),
122
  request_timeout: globals.get::<_, u8>("request_timeout")?,
123
  threads,
124
+ safe_search,
125
  })
126
  }
127
  }
src/engines/duckduckgo.rs CHANGED
@@ -43,6 +43,7 @@ impl SearchEngine for DuckDuckGo {
43
  page: u32,
44
  user_agent: &str,
45
  request_timeout: u8,
 
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
48
  // so that upstream server recieves valid page number.
 
43
  page: u32,
44
  user_agent: &str,
45
  request_timeout: u8,
46
+ _safe_search: u8,
47
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
48
  // Page number can be missing or empty string and so appropriate handling is required
49
  // so that upstream server recieves valid page number.
src/engines/engine_models.rs CHANGED
@@ -71,6 +71,7 @@ pub trait SearchEngine: Sync + Send {
71
  page: u32,
72
  user_agent: &str,
73
  request_timeout: u8,
 
74
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
75
  }
76
 
 
71
  page: u32,
72
  user_agent: &str,
73
  request_timeout: u8,
74
+ safe_search: u8,
75
  ) -> Result<HashMap<String, SearchResult>, EngineError>;
76
  }
77
 
src/engines/searx.rs CHANGED
@@ -42,12 +42,21 @@ impl SearchEngine for Searx {
42
  page: u32,
43
  user_agent: &str,
44
  request_timeout: u8,
 
45
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
46
  // Page number can be missing or empty string and so appropriate handling is required
47
  // so that upstream server recieves valid page number.
 
 
 
 
48
  let url: String = match page {
49
- 0 | 1 => format!("https://searx.work/search?q={query}&pageno=1"),
50
- _ => format!("https://searx.work/search?q={query}&pageno={page}"),
 
 
 
 
51
  };
52
 
53
  // initializing headers and adding appropriate headers.
 
42
  page: u32,
43
  user_agent: &str,
44
  request_timeout: u8,
45
+ mut safe_search: u8,
46
  ) -> Result<HashMap<String, SearchResult>, EngineError> {
47
  // Page number can be missing or empty string and so appropriate handling is required
48
  // so that upstream server recieves valid page number.
49
+ if safe_search == 3 {
50
+ safe_search = 2;
51
+ };
52
+
53
  let url: String = match page {
54
+ 0 | 1 => {
55
+ format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}")
56
+ }
57
+ _ => format!(
58
+ "https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}"
59
+ ),
60
  };
61
 
62
  // initializing headers and adding appropriate headers.
src/results/aggregation_models.rs CHANGED
@@ -102,13 +102,15 @@ impl EngineErrorInfo {
102
  /// and the type of error that caused it.
103
  /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
104
  /// given search query.
105
- #[derive(Serialize, Deserialize)]
106
  #[serde(rename_all = "camelCase")]
107
  pub struct SearchResults {
108
  pub results: Vec<SearchResult>,
109
  pub page_query: String,
110
  pub style: Style,
111
- pub engine_errors_info: SmallVec<[EngineErrorInfo; 0]>,
 
 
112
  }
113
 
114
  impl SearchResults {
@@ -122,6 +124,7 @@ impl SearchResults {
122
  /// the search url.
123
  /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
124
  /// given search query.
 
125
  pub fn new(
126
  results: Vec<SearchResult>,
127
  page_query: &str,
@@ -131,12 +134,38 @@ impl SearchResults {
131
  results,
132
  page_query: page_query.to_owned(),
133
  style: Style::default(),
134
- engine_errors_info: SmallVec::from(engine_errors_info),
 
 
135
  }
136
  }
137
 
138
  /// A setter function to add website style to the return search results.
139
  pub fn add_style(&mut self, style: &Style) {
140
- self.style = style.to_owned();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  }
142
  }
 
102
  /// and the type of error that caused it.
103
  /// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the
104
  /// given search query.
105
+ #[derive(Serialize, Deserialize, Default)]
106
  #[serde(rename_all = "camelCase")]
107
  pub struct SearchResults {
108
  pub results: Vec<SearchResult>,
109
  pub page_query: String,
110
  pub style: Style,
111
+ pub engine_errors_info: Vec<EngineErrorInfo>,
112
+ pub disallowed: bool,
113
+ pub filtered: bool,
114
  }
115
 
116
  impl SearchResults {
 
124
  /// the search url.
125
  /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the
126
  /// given search query.
127
+ /// * ``
128
  pub fn new(
129
  results: Vec<SearchResult>,
130
  page_query: &str,
 
134
  results,
135
  page_query: page_query.to_owned(),
136
  style: Style::default(),
137
+ engine_errors_info: engine_errors_info.to_owned(),
138
+ disallowed: Default::default(),
139
+ filtered: Default::default(),
140
  }
141
  }
142
 
143
  /// A setter function to add website style to the return search results.
144
  pub fn add_style(&mut self, style: &Style) {
145
+ self.style = style.clone();
146
+ }
147
+
148
+ /// A setter function that sets disallowed to true.
149
+ pub fn set_disallowed(&mut self) {
150
+ self.disallowed = true;
151
+ }
152
+
153
+ /// A setter function to set the current page search query.
154
+ pub fn set_page_query(&mut self, page: &str) {
155
+ self.page_query = page.to_owned();
156
+ }
157
+
158
+ /// A setter function that sets the filtered to true.
159
+ pub fn set_filtered(&mut self) {
160
+ self.filtered = true;
161
+ }
162
+
163
+ /// A getter function that gets the value of `engine_errors_info`.
164
+ pub fn engine_errors_info(&mut self) -> Vec<EngineErrorInfo> {
165
+ std::mem::take(&mut self.engine_errors_info)
166
+ }
167
+ /// A getter function that gets the value of `results`.
168
+ pub fn results(&mut self) -> Vec<SearchResult> {
169
+ self.results.clone()
170
  }
171
  }
src/results/aggregator.rs CHANGED
@@ -70,6 +70,7 @@ pub async fn aggregate(
70
  debug: bool,
71
  upstream_search_engines: &[EngineHandler],
72
  request_timeout: u8,
 
73
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
74
  let user_agent: &str = random_user_agent();
75
 
@@ -91,7 +92,13 @@ pub async fn aggregate(
91
  let query: String = query.to_owned();
92
  tasks.push(tokio::spawn(async move {
93
  search_engine
94
- .results(&query, page, user_agent, request_timeout)
 
 
 
 
 
 
95
  .await
96
  }));
97
  }
@@ -150,20 +157,22 @@ pub async fn aggregate(
150
  }
151
  }
152
 
153
- let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
154
- filter_with_lists(
155
- &mut result_map,
156
- &mut blacklist_map,
157
- file_path(FileType::BlockList)?,
158
- )?;
 
159
 
160
- filter_with_lists(
161
- &mut blacklist_map,
162
- &mut result_map,
163
- file_path(FileType::AllowList)?,
164
- )?;
165
 
166
- drop(blacklist_map);
 
167
 
168
  let results: Vec<SearchResult> = result_map.into_values().collect();
169
 
@@ -189,7 +198,7 @@ pub fn filter_with_lists(
189
  let mut reader = BufReader::new(File::open(file_path)?);
190
 
191
  for line in reader.by_ref().lines() {
192
- let re = Regex::new(&line?)?;
193
 
194
  // Iterate over each search result in the map and check if it matches the regex pattern
195
  for (url, search_result) in map_to_be_filtered.clone().into_iter() {
 
70
  debug: bool,
71
  upstream_search_engines: &[EngineHandler],
72
  request_timeout: u8,
73
+ safe_search: u8,
74
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
75
  let user_agent: &str = random_user_agent();
76
 
 
92
  let query: String = query.to_owned();
93
  tasks.push(tokio::spawn(async move {
94
  search_engine
95
+ .results(
96
+ &query,
97
+ page,
98
+ user_agent.clone(),
99
+ request_timeout,
100
+ safe_search,
101
+ )
102
  .await
103
  }));
104
  }
 
157
  }
158
  }
159
 
160
+ if safe_search >= 3 {
161
+ let mut blacklist_map: HashMap<String, SearchResult> = HashMap::new();
162
+ filter_with_lists(
163
+ &mut result_map,
164
+ &mut blacklist_map,
165
+ file_path(FileType::BlockList)?,
166
+ )?;
167
 
168
+ filter_with_lists(
169
+ &mut blacklist_map,
170
+ &mut result_map,
171
+ file_path(FileType::AllowList)?,
172
+ )?;
173
 
174
+ drop(blacklist_map);
175
+ }
176
 
177
  let results: Vec<SearchResult> = result_map.into_values().collect();
178
 
 
198
  let mut reader = BufReader::new(File::open(file_path)?);
199
 
200
  for line in reader.by_ref().lines() {
201
+ let re = Regex::new(line?.trim())?;
202
 
203
  // Iterate over each search result in the map and check if it matches the regex pattern
204
  for (url, search_result) in map_to_be_filtered.clone().into_iter() {
src/server/routes.rs CHANGED
@@ -2,7 +2,10 @@
2
  //! meta search engine website and provide appropriate response to each route/page
3
  //! when requested.
4
 
5
- use std::fs::read_to_string;
 
 
 
6
 
7
  use crate::{
8
  cache::cacher::RedisCache,
@@ -13,12 +16,13 @@ use crate::{
13
  };
14
  use actix_web::{get, web, HttpRequest, HttpResponse};
15
  use handlebars::Handlebars;
 
16
  use serde::Deserialize;
17
  use tokio::join;
18
 
19
  // ---- Constants ----
20
  /// Initialize redis cache connection once and store it on the heap.
21
- const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
22
 
23
  /// A named struct which deserializes all the user provided search parameters and stores them.
24
  ///
@@ -32,6 +36,7 @@ const REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::Once
32
  struct SearchParams {
33
  q: Option<String>,
34
  page: Option<u32>,
 
35
  }
36
 
37
  /// Handles the route of index page or main page of the `websurfx` meta search engine website.
@@ -105,42 +110,58 @@ pub async fn search(
105
  None => 1,
106
  };
107
 
 
 
 
 
 
 
 
 
 
 
 
108
  let (_, results, _) = join!(
109
  results(
110
  format!(
111
- "http://{}:{}/search?q={}&page={}",
112
  config.binding_ip,
113
  config.port,
114
  query,
115
- page - 1
 
116
  ),
117
  &config,
118
  query,
119
  page - 1,
120
- &req,
 
121
  ),
122
  results(
123
  format!(
124
- "http://{}:{}/search?q={}&page={}",
125
- config.binding_ip, config.port, query, page
126
  ),
127
  &config,
128
  query,
129
  page,
130
- &req,
 
131
  ),
132
  results(
133
  format!(
134
- "http://{}:{}/search?q={}&page={}",
135
  config.binding_ip,
136
  config.port,
137
  query,
138
- page + 1
 
139
  ),
140
  &config,
141
  query,
142
  page + 1,
143
- &req,
 
144
  )
145
  );
146
 
@@ -160,9 +181,10 @@ async fn results(
160
  config: &Config,
161
  query: &str,
162
  page: u32,
163
- req: &HttpRequest,
 
164
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
165
- let redis_cache: RedisCache = REDIS_CACHE
166
  .get_or_init(async {
167
  // Initialize redis cache connection pool only one and store it in the heap.
168
  RedisCache::new(&config.redis_url, 5).await.unwrap()
@@ -178,6 +200,23 @@ async fn results(
178
  match cached_results_json {
179
  Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
180
  Err(_) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  // check if the cookie value is empty or not if it is empty then use the
182
  // default selected upstream search engines from the config file otherwise
183
  // parse the non-empty cookie and grab the user selected engines from the
@@ -199,6 +238,7 @@ async fn results(
199
  config.debug,
200
  &engines,
201
  config.request_timeout,
 
202
  )
203
  .await?
204
  }
@@ -210,14 +250,16 @@ async fn results(
210
  config.debug,
211
  &config.upstream_search_engines,
212
  config.request_timeout,
 
213
  )
214
  .await?
215
  }
216
  };
217
-
 
 
218
  results.add_style(&config.style);
219
  redis_cache
220
- .clone()
221
  .cache_results(&serde_json::to_string(&results)?, &url)
222
  .await?;
223
  Ok(results)
@@ -225,6 +267,22 @@ async fn results(
225
  }
226
  }
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
229
  #[get("/robots.txt")]
230
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
 
2
  //! meta search engine website and provide appropriate response to each route/page
3
  //! when requested.
4
 
5
+ use std::{
6
+ fs::{read_to_string, File},
7
+ io::{BufRead, BufReader, Read},
8
+ };
9
 
10
  use crate::{
11
  cache::cacher::RedisCache,
 
16
  };
17
  use actix_web::{get, web, HttpRequest, HttpResponse};
18
  use handlebars::Handlebars;
19
+ use regex::Regex;
20
  use serde::Deserialize;
21
  use tokio::join;
22
 
23
  // ---- Constants ----
24
  /// Initialize redis cache connection once and store it on the heap.
25
+ static REDIS_CACHE: async_once_cell::OnceCell<RedisCache> = async_once_cell::OnceCell::new();
26
 
27
  /// A named struct which deserializes all the user provided search parameters and stores them.
28
  ///
 
36
  struct SearchParams {
37
  q: Option<String>,
38
  page: Option<u32>,
39
+ safesearch: Option<u8>,
40
  }
41
 
42
  /// Handles the route of index page or main page of the `websurfx` meta search engine website.
 
110
  None => 1,
111
  };
112
 
113
+ let safe_search: u8 = match config.safe_search {
114
+ 3..=4 => config.safe_search,
115
+ _ => match &params.safesearch {
116
+ Some(safesearch) => match safesearch {
117
+ 0..=2 => *safesearch,
118
+ _ => 1,
119
+ },
120
+ None => config.safe_search,
121
+ },
122
+ };
123
+
124
  let (_, results, _) = join!(
125
  results(
126
  format!(
127
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
128
  config.binding_ip,
129
  config.port,
130
  query,
131
+ page - 1,
132
+ safe_search
133
  ),
134
  &config,
135
  query,
136
  page - 1,
137
+ req.clone(),
138
+ safe_search
139
  ),
140
  results(
141
  format!(
142
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
143
+ config.binding_ip, config.port, query, page, safe_search
144
  ),
145
  &config,
146
  query,
147
  page,
148
+ req.clone(),
149
+ safe_search
150
  ),
151
  results(
152
  format!(
153
+ "http://{}:{}/search?q={}&page={}&safesearch={}",
154
  config.binding_ip,
155
  config.port,
156
  query,
157
+ page + 1,
158
+ safe_search
159
  ),
160
  &config,
161
  query,
162
  page + 1,
163
+ req.clone(),
164
+ safe_search
165
  )
166
  );
167
 
 
181
  config: &Config,
182
  query: &str,
183
  page: u32,
184
+ req: HttpRequest,
185
+ safe_search: u8,
186
  ) -> Result<SearchResults, Box<dyn std::error::Error>> {
187
+ let mut redis_cache: RedisCache = REDIS_CACHE
188
  .get_or_init(async {
189
  // Initialize redis cache connection pool only one and store it in the heap.
190
  RedisCache::new(&config.redis_url, 5).await.unwrap()
 
200
  match cached_results_json {
201
  Ok(results) => Ok(serde_json::from_str::<SearchResults>(&results)?),
202
  Err(_) => {
203
+ if safe_search == 4 {
204
+ let mut results: SearchResults = SearchResults::default();
205
+ let mut _flag: bool =
206
+ is_match_from_filter_list(file_path(FileType::BlockList)?, query)?;
207
+ _flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?;
208
+
209
+ if _flag {
210
+ results.set_disallowed();
211
+ results.add_style(&config.style);
212
+ results.set_page_query(query);
213
+ redis_cache
214
+ .cache_results(&serde_json::to_string(&results)?, &url)
215
+ .await?;
216
+ return Ok(results);
217
+ }
218
+ }
219
+
220
  // check if the cookie value is empty or not if it is empty then use the
221
  // default selected upstream search engines from the config file otherwise
222
  // parse the non-empty cookie and grab the user selected engines from the
 
238
  config.debug,
239
  &engines,
240
  config.request_timeout,
241
+ safe_search,
242
  )
243
  .await?
244
  }
 
250
  config.debug,
251
  &config.upstream_search_engines,
252
  config.request_timeout,
253
+ safe_search,
254
  )
255
  .await?
256
  }
257
  };
258
+ if results.engine_errors_info().is_empty() && results.results().is_empty() {
259
+ results.set_filtered();
260
+ }
261
  results.add_style(&config.style);
262
  redis_cache
 
263
  .cache_results(&serde_json::to_string(&results)?, &url)
264
  .await?;
265
  Ok(results)
 
267
  }
268
  }
269
 
270
+ fn is_match_from_filter_list(
271
+ file_path: &str,
272
+ query: &str,
273
+ ) -> Result<bool, Box<dyn std::error::Error>> {
274
+ let mut flag = false;
275
+ let mut reader = BufReader::new(File::open(file_path)?);
276
+ for line in reader.by_ref().lines() {
277
+ let re = Regex::new(&line?)?;
278
+ if re.is_match(query) {
279
+ flag = true;
280
+ break;
281
+ }
282
+ }
283
+ Ok(flag)
284
+ }
285
+
286
  /// Handles the route of robots.txt page of the `websurfx` meta search engine website.
287
  #[get("/robots.txt")]
288
  pub async fn robots_data(_req: HttpRequest) -> Result<HttpResponse, Box<dyn std::error::Error>> {
websurfx/config.lua CHANGED
@@ -11,6 +11,17 @@ production_use = false -- whether to use production mode or not (in other words
11
  -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
12
  request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
13
 
 
 
 
 
 
 
 
 
 
 
 
14
  -- ### Website ###
15
  -- The different colorschemes provided are:
16
  -- {{
 
11
  -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests.
12
  request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds).
13
 
14
+ -- ### Search ###
15
+ -- Filter results based on different levels. The levels provided are:
16
+ -- {{
17
+ -- 0 - None
18
+ -- 1 - Low
19
+ -- 2 - Moderate
20
+ -- 3 - High
21
+ -- 4 - Aggressive
22
+ -- }}
23
+ safe_search = 2
24
+
25
  -- ### Website ###
26
  -- The different colorschemes provided are:
27
  -- {{