File size: 6,362 Bytes
5962cca
 
 
493c56b
2a68081
b72af01
5962cca
 
019b332
9a4cf94
049b1c1
 
5962cca
049b1c1
 
9a4cf94
049b1c1
 
 
 
9a4cf94
5962cca
 
9a4cf94
 
5962cca
9a4cf94
5962cca
 
9a4cf94
ebb9e9e
 
 
 
 
 
 
c2280b7
5962cca
 
 
 
b72af01
 
15dfda6
b72af01
5aca5c0
049b1c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b72af01
 
2a68081
b72af01
2d47e8d
b72af01
 
 
 
2d47e8d
15dfda6
b72af01
 
 
 
 
 
 
 
049b1c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b72af01
 
2a68081
b72af01
2a68081
2d47e8d
cbad560
5aca5c0
 
 
049b1c1
5aca5c0
049b1c1
 
5aca5c0
049b1c1
5aca5c0
 
 
 
 
 
 
 
 
 
049b1c1
 
 
 
 
 
 
 
 
5aca5c0
 
 
493c56b
 
 
 
 
5aca5c0
 
 
 
 
 
 
 
 
049b1c1
 
5aca5c0
 
 
b72af01
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
//! This module provides the error enum to handle different errors associated while requesting data from
//! the upstream search engines with the search query provided by the user.

use super::aggregation_models::SearchResult;
use error_stack::{Result, ResultExt};
use std::{collections::HashMap, fmt, time::Duration};

/// A custom error type used for handle engine associated errors.
#[derive(Debug)]
pub enum EngineError {
    /// This variant handles all request related errors like forbidden, not found,
    /// etc.
    EmptyResultSet,
    /// This variant handles the not results found error provide by the upstream
    /// search engines.
    RequestError,
    ///  This variant handles all the errors which are unexpected or occur rarely
    /// and are errors mostly related to failure in initialization of HeaderMap,
    /// Selector errors and all other errors occurring within the code handling
    /// the `upstream search engines`.
    UnexpectedError,
}

impl fmt::Display for EngineError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            EngineError::EmptyResultSet => {
                write!(f, "The upstream search engine returned an empty result set")
            }
            EngineError::RequestError => {
                write!(
                    f,
                    "Error occurred while requesting data from upstream search engine"
                )
            }
            EngineError::UnexpectedError => {
                write!(f, "An unexpected error occurred while processing the data")
            }
        }
    }
}

impl error_stack::Context for EngineError {}

/// A trait to define common behavior for all search engines.
#[async_trait::async_trait]
pub trait SearchEngine: Sync + Send {
    /// This helper function fetches/requests the search results from the upstream search engine in
    /// an html form.
    ///
    /// # Arguments
    ///
    /// * `url` - It takes the url of the upstream search engine with the user requested search
    /// query appended in the search parameters.
    /// * `header_map` - It takes the http request headers to be sent to the upstream engine in
    /// order to prevent being detected as a bot. It takes the header as a HeaderMap type.
    /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit
    /// the amount of time for each request to remain connected when until the results can be provided
    /// by the upstream engine.
    ///
    /// # Error
    ///
    /// It returns the html data as a string if the upstream engine provides the data as expected
    /// otherwise it returns a custom `EngineError`.
    async fn fetch_html_from_upstream(
        &self,
        url: &str,
        header_map: reqwest::header::HeaderMap,
        request_timeout: u8,
    ) -> Result<String, EngineError> {
        // fetch the html from upstream search engine
        Ok(reqwest::Client::new()
            .get(url)
            .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server
            .headers(header_map) // add spoofed headers to emulate human behavior
            .send()
            .await
            .change_context(EngineError::RequestError)?
            .text()
            .await
            .change_context(EngineError::RequestError)?)
    }

    /// This function scrapes results from the upstream engine and puts all the scraped results like
    /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description
    /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult
    /// struct and then returns it within a Result enum.
    ///
    /// # Arguments
    ///
    /// * `query` - Takes the user provided query to query to the upstream search engine with.
    /// * `page` - Takes an u32 as an argument.
    /// * `user_agent` - Takes a random user agent string as an argument.
    /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout.
    ///
    /// # Errors
    ///
    /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to
    /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to
    /// provide results for the requested search query and also returns error if the scraping selector
    /// or HeaderMap fails to initialize.
    async fn results(
        &self,
        query: &str,
        page: u32,
        user_agent: &str,
        request_timeout: u8,
        safe_search: u8,
    ) -> Result<HashMap<String, SearchResult>, EngineError>;
}

/// A named struct which stores the engine struct with the name of the associated engine.
pub struct EngineHandler {
    /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements
    /// the `SearchEngine` trait.
    engine: Box<dyn SearchEngine>,
    /// It stores the name of the engine to which the struct is associated to.
    name: &'static str,
}

impl Clone for EngineHandler {
    fn clone(&self) -> Self {
        Self::new(self.name).unwrap()
    }
}

impl EngineHandler {
    /// Parses an engine name into an engine handler.
    ///
    /// # Arguments
    ///
    /// * `engine_name` - It takes the name of the engine to which the struct was associated to.
    ///
    /// # Returns
    ///
    /// It returns an option either containing the value or a none if the engine is unknown
    pub fn new(engine_name: &str) -> Option<Self> {
        let engine: (&'static str, Box<dyn SearchEngine>) =
            match engine_name.to_lowercase().as_str() {
                "duckduckgo" => (
                    "duckduckgo",
                    Box::new(crate::engines::duckduckgo::DuckDuckGo),
                ),
                "searx" => ("searx", Box::new(crate::engines::searx::Searx)),
                _ => return None,
            };

        Some(Self {
            engine: engine.1,
            name: engine.0,
        })
    }

    /// This function converts the EngineHandler type into a tuple containing the engine name and
    /// the associated engine struct.
    pub fn into_name_engine(self) -> (&'static str, Box<dyn SearchEngine>) {
        (self.name, self.engine)
    }
}