Final_Assignment_Project

Running

App Files Files Community

wt002 commited on 11 days ago

Commit

25e901d

verified ·

1 Parent(s): df67b8a

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -54

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ from dotenv import load_dotenv
 import gradio as gr
 import requests
-import os
-import requests
 from typing import List, Dict, Union
 import pandas as pd
 import wikipediaapi
-from serpapi import GoogleSearch
-from typing import List, Dict, Optional
 load_dotenv()
@@ -21,76 +21,74 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 class BasicAgent:
-    def __init__(self, api_key: str = None):
-        self.api_key = api_key or os.getenv("SERP_API_KEY")
-        if not self.api_key:
-            raise ValueError("Missing SERPAPI_API_KEY. Get one at https://serpapi.com/")
-        print("SerpAPI Agent initialized")
-    def search(self, query: str, num_results: int = 3) -> List[Dict]:
-        """Execute search and return structured results"""
-        params = {
-            "q": query,
-            "api_key": self.api_key,
-            "num": num_results,
-            "hl": "en",  # Language: English
-            "gl": "us"   # Country: United States
         }
         try:
-            search = GoogleSearch(params)
-            results = search.get_dict()
-            return self._format_results(results)
         except Exception as e:
             print(f"Search failed: {str(e)}")
             return []
-    def _format_results(self, raw_results: Dict) -> List[Dict]:
-        """Extract and format organic results"""
-        formatted = []
-        for result in raw_results.get("organic_results", []):
-            formatted.append({
-                "position": result.get("position"),
-                "title": result.get("title"),
-                "link": result.get("link"),
-                "snippet": result.get("snippet"),
-                "source": result.get("source")
-            })
-        return formatted
-    def __call__(self, query: str) -> str:
-        """Callable interface that returns a string"""
-        results = self.search(query)
-        if not results:
-            return "No results found"
         output = []
         for res in results:
             output.append(
                 f"{res['position']}. {res['title']}\n"
                 f"   {res['link']}\n"
-                f"   {res['snippet']}\n"
-                f"   Source: {res['source']}"
             )
-        return "\n\n".join(output)
 # Usage Example
 if __name__ == "__main__":
-    # Initialize with API key (or set SERPAPI_API_KEY environment variable)
-    agent = BasicAgent()
-    # Perform search
-    query = "What is Python programming language?"
-    print(f"Searching for: {query}")
-    # Option 1: Get structured data
-    structured_results = agent.search(query)
-    print("\nStructured Results:", structured_results[0])  # Print first result
-    # Option 2: Get printable string
-    printable_results = agent(query)
-    print("\nFormatted Results:\n", printable_results)
 def run_and_submit_all( profile: gr.OAuthProfile | None):

 import gradio as gr
 import requests
 from typing import List, Dict, Union
 import pandas as pd
 import wikipediaapi
+from bs4 import BeautifulSoup
+import urllib.parse
+from typing import List, Dict
+import fake_useragent  # For realistic user-agent rotation
 load_dotenv()
 # --- Basic Agent Definition ---
 class BasicAgent:
+    def __init__(self):
+        self.user_agent = fake_useragent.UserAgent().random
+        self.headers = {
+            'User-Agent': self.user_agent,
+            'Accept-Language': 'en-US,en;q=0.5',
         }
+        print("GoogleScraper initialized with User-Agent:", self.user_agent[:50] + "...")
+    def search(self, query: str, num_results: int = 3) -> List[Dict]:
+        """Perform Google search and return structured results"""
+        encoded_query = urllib.parse.quote_plus(query)
+        url = f"https://www.google.com/search?q={encoded_query}&num={num_results + 2}"  # +2 for buffer
         try:
+            response = requests.get(url, headers=self.headers, timeout=10)
+            response.raise_for_status()
+            return self._parse_results(response.text, num_results)
         except Exception as e:
             print(f"Search failed: {str(e)}")
             return []
+    def _parse_results(self, html: str, max_results: int) -> List[Dict]:
+        """Parse HTML and extract search results"""
+        soup = BeautifulSoup(html, 'html.parser')
+        results = []
+        # Main result blocks (class names may change - this works as of July 2024)
+        for i, result in enumerate(soup.select('.tF2Cxc')[:max_results]):
+            title = result.select_one('h3')
+            link = result.find('a')['href']
+            snippet = result.select_one('.IsZvec')
+            if title and link:
+                results.append({
+                    'position': i + 1,
+                    'title': title.get_text(),
+                    'link': link,
+                    'snippet': snippet.get_text() if snippet else None
+                })
+        return results
+    def pretty_print(self, results: List[Dict]) -> str:
+        """Format results for human-readable output"""
         output = []
         for res in results:
             output.append(
                 f"{res['position']}. {res['title']}\n"
                 f"   {res['link']}\n"
+                f"   {res['snippet'] or 'No description available'}\n"
             )
+        return "\n".join(output)
 # Usage Example
 if __name__ == "__main__":
+    scraper = BasicAgent()
+    # Search for Python programming
+    query = "Python programming language"
+    print(f"Searching Google for: '{query}'")
+    results = scraper.search(query)
+    if results:
+        print("\nTop Results:")
+        print(scraper.pretty_print(results))
+    else:
+        print("No results found or search failed")
 def run_and_submit_all( profile: gr.OAuthProfile | None):