Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -31,6 +31,7 @@ import PyPDF2 
     | 
|
| 31 | 
         
             
            import io
         
     | 
| 32 | 
         
             
            import requests
         
     | 
| 33 | 
         
             
            from duckduckgo_search import DDGS
         
     | 
| 
         | 
|
| 34 | 
         | 
| 35 | 
         
             
            # Load environment variables from a .env file
         
     | 
| 36 | 
         
             
            load_dotenv()
         
     | 
| 
         @@ -54,6 +55,20 @@ client = InferenceClient( 
     | 
|
| 54 | 
         
             
            similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
         
     | 
| 55 | 
         | 
| 56 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 57 | 
         
             
            def duckduckgo_search(query, num_results=10, time_range="", language="", safesearch=2):
         
     | 
| 58 | 
         
             
                try:
         
     | 
| 59 | 
         
             
                    ddgs = DDGS()
         
     | 
| 
         @@ -78,13 +93,18 @@ def duckduckgo_search(query, num_results=10, time_range="", language="", safesea 
     | 
|
| 78 | 
         
             
                    else:
         
     | 
| 79 | 
         
             
                        safesearch_setting = "strict"
         
     | 
| 80 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 81 | 
         
             
                    results = ddgs.text(
         
     | 
| 82 | 
         
             
                        query,
         
     | 
| 83 | 
         
             
                        region='wt-wt',
         
     | 
| 84 | 
         
             
                        safesearch=safesearch_setting,
         
     | 
| 85 | 
         
             
                        timelimit=timelimit,
         
     | 
| 86 | 
         
            -
                        max_results=num_results 
     | 
| 87 | 
         
            -
                        backend: str = "html"
         
     | 
| 88 | 
         
             
                    )
         
     | 
| 89 | 
         | 
| 90 | 
         
             
                    return [{"url": result["href"], "title": result["title"]} for result in results]
         
     | 
| 
         | 
|
| 31 | 
         
             
            import io
         
     | 
| 32 | 
         
             
            import requests
         
     | 
| 33 | 
         
             
            from duckduckgo_search import DDGS
         
     | 
| 34 | 
         
            +
            import random
         
     | 
| 35 | 
         | 
| 36 | 
         
             
            # Load environment variables from a .env file
         
     | 
| 37 | 
         
             
            load_dotenv()
         
     | 
| 
         | 
|
| 55 | 
         
             
            similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
         
     | 
| 56 | 
         | 
| 57 | 
         | 
| 58 | 
         
            +
            # List of common user agents
         
     | 
| 59 | 
         
            +
            USER_AGENTS = [
         
     | 
| 60 | 
         
            +
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
         
     | 
| 61 | 
         
            +
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
         
     | 
| 62 | 
         
            +
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
         
     | 
| 63 | 
         
            +
                "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
         
     | 
| 64 | 
         
            +
                "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1",
         
     | 
| 65 | 
         
            +
                "Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/91.0.4472.80 Mobile/15E148 Safari/604.1",
         
     | 
| 66 | 
         
            +
                "Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/88.0",
         
     | 
| 67 | 
         
            +
            ]
         
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
            def get_random_user_agent():
         
     | 
| 70 | 
         
            +
                return random.choice(USER_AGENTS)
         
     | 
| 71 | 
         
            +
             
     | 
| 72 | 
         
             
            def duckduckgo_search(query, num_results=10, time_range="", language="", safesearch=2):
         
     | 
| 73 | 
         
             
                try:
         
     | 
| 74 | 
         
             
                    ddgs = DDGS()
         
     | 
| 
         | 
|
| 93 | 
         
             
                    else:
         
     | 
| 94 | 
         
             
                        safesearch_setting = "strict"
         
     | 
| 95 | 
         | 
| 96 | 
         
            +
                    # Get a random user agent
         
     | 
| 97 | 
         
            +
                    user_agent = get_random_user_agent()
         
     | 
| 98 | 
         
            +
                    
         
     | 
| 99 | 
         
            +
                    # Set the user agent for the DDGS instance
         
     | 
| 100 | 
         
            +
                    ddgs.session.headers.update({'User-Agent': user_agent})
         
     | 
| 101 | 
         
            +
                    
         
     | 
| 102 | 
         
             
                    results = ddgs.text(
         
     | 
| 103 | 
         
             
                        query,
         
     | 
| 104 | 
         
             
                        region='wt-wt',
         
     | 
| 105 | 
         
             
                        safesearch=safesearch_setting,
         
     | 
| 106 | 
         
             
                        timelimit=timelimit,
         
     | 
| 107 | 
         
            +
                        max_results=num_results
         
     | 
| 
         | 
|
| 108 | 
         
             
                    )
         
     | 
| 109 | 
         | 
| 110 | 
         
             
                    return [{"url": result["href"], "title": result["title"]} for result in results]
         
     |