Spaces:

shubhendu-ghosh
/

Questo

Sleeping

App Files Files Community

shubhendu-ghosh-DS commited on Jan 8, 2024

Commit

193bf75

•

1 Parent(s): 74c6795

summarized the google searches

Browse files

Files changed (8) hide show

app.py +13 -31
google_search_service/__pycache__/search_google.cpython-38.pyc +0 -0
google_search_service/search_google.py +10 -0
requirements.txt +35 -1
summerizer_service/__pycache__/summarize_text.cpython-38.pyc +0 -0
summerizer_service/summarize_text.py +13 -0
web_scraping_service/__pycache__/beautiful_scrape.cpython-38.pyc +0 -0
web_scraping_service/beautiful_scrape.py +24 -0

app.py CHANGED Viewed

@@ -1,43 +1,25 @@
 import gradio as gr
-from googlesearch import search
-from bs4 import BeautifulSoup
-import requests
-def google_search(query, num_results=5):
-    search_results = search(query, num_results=num_results)
-    return search_results
-def scrape_text_from_url(url):
-    try:
-        response = requests.get(url)
-        soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove specific elements (customize as needed)
-        unwanted_elements = ['footer', 'script', 'style', 'noscript']
-        for tag in unwanted_elements:
-            for el in soup.find_all(tag):
-                el.extract()
-        # Extract text from remaining paragraphs
-        text = ' '.join([p.text for p in soup.find_all('p')])
-        return text.strip()  # Strip leading and trailing whitespaces
-    except Exception as e:
-        print(f"Error scraping {url}: {e}")
-        return None
 def get_google_data(search_term):
-    whole_result = ''
-    search_results = google_search(search_term)
     for i, result in enumerate(search_results, start=1):
-        text = scrape_text_from_url(result)
         if text:
-            whole_result += text
-    return whole_result
 iface = gr.Interface(fn=get_google_data, inputs="text", outputs="text")
-iface.launch(share=True)

 import gradio as gr
+from google_search_service.search_google import GoogleSearchService
+from summerizer_service.summarize_text import Summarizer
+from web_scraping_service.beautiful_scrape import WebScrapingService
 def get_google_data(search_term):
+    summaries = ''
+    search_results = GoogleSearchService().google_search(search_term)
     for i, result in enumerate(search_results, start=1):
+        text = WebScrapingService().scrape_text_from_url(result)
         if text:
+            summary = Summarizer().summarize_text(text)
+            print("summary is -----", summary)
+            summaries += summary
+    search_result = Summarizer().summarize_text(summaries)
+    return search_result
 iface = gr.Interface(fn=get_google_data, inputs="text", outputs="text")
+iface.launch()

google_search_service/__pycache__/search_google.cpython-38.pyc ADDED Viewed

Binary file (737 Bytes). View file

google_search_service/search_google.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from googlesearch import search
+class GoogleSearchService:
+    def __init__(self):
+        pass
+    def google_search(self, query, num_results=5):
+        search_results = search(query, num_results=num_results)
+        return search_results

requirements.txt CHANGED Viewed

@@ -1,9 +1,12 @@
 aiofiles==23.2.1
 altair==5.2.0
 annotated-types==0.6.0
 anyio==4.2.0
 attrs==23.1.0
 beautifulsoup4==4.12.2
 certifi==2023.11.17
 charset-normalizer==3.3.2
 click==8.1.7
@@ -14,31 +17,47 @@ exceptiongroup==1.2.0
 fastapi==0.108.0
 ffmpy==0.3.1
 filelock==3.13.1
 fonttools==4.47.0
 fsspec==2023.12.2
 googlesearch-python==1.2.3
 gradio==4.12.0
 gradio-client==0.8.0
 h11==0.14.0
 httpcore==1.0.2
 httpx==0.26.0
 huggingface-hub==0.20.1
 idna==3.6
 importlib-resources==6.1.1
 Jinja2==3.1.2
 jsonschema==4.20.0
 jsonschema-specifications==2023.12.1
 kiwisolver==1.4.5
 markdown-it-py==3.0.0
 MarkupSafe==2.1.3
 matplotlib==3.7.4
 mdurl==0.1.2
-numpy==1.24.4
 orjson==3.9.10
 packaging==23.2
 pandas==2.0.3
 Pillow==10.1.0
 pkgutil-resolve-name==1.3.10
 pydantic==2.5.3
 pydantic-core==2.14.6
 pydub==0.25.1
@@ -49,22 +68,37 @@ python-multipart==0.0.6
 pytz==2023.3.post1
 PyYAML==6.0.1
 referencing==0.32.0
 requests==2.31.0
 rich==13.7.0
 rpds-py==0.16.2
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.0
 soupsieve==2.5
 starlette==0.32.0.post1
 tomlkit==0.12.0
 toolz==0.12.0
 tqdm==4.66.1
 typer==0.9.0
 typing-extensions==4.9.0
 tzdata==2023.4
 urllib3==2.1.0
 uvicorn==0.25.0
 websockets==11.0.3
 zipp==3.17.0

+absl-py==2.0.0
 aiofiles==23.2.1
 altair==5.2.0
 annotated-types==0.6.0
 anyio==4.2.0
+astunparse==1.6.3
 attrs==23.1.0
 beautifulsoup4==4.12.2
+cachetools==5.3.2
 certifi==2023.11.17
 charset-normalizer==3.3.2
 click==8.1.7
 fastapi==0.108.0
 ffmpy==0.3.1
 filelock==3.13.1
+flatbuffers==23.5.26
 fonttools==4.47.0
 fsspec==2023.12.2
+gast==0.4.0
+google-auth==2.26.1
+google-auth-oauthlib==1.0.0
+google-pasta==0.2.0
 googlesearch-python==1.2.3
 gradio==4.12.0
 gradio-client==0.8.0
+grpcio==1.60.0
 h11==0.14.0
+h5py==3.10.0
 httpcore==1.0.2
 httpx==0.26.0
 huggingface-hub==0.20.1
 idna==3.6
+importlib-metadata==7.0.1
 importlib-resources==6.1.1
 Jinja2==3.1.2
 jsonschema==4.20.0
 jsonschema-specifications==2023.12.1
+keras==2.13.1
 kiwisolver==1.4.5
+libclang==16.0.6
+Markdown==3.5.1
 markdown-it-py==3.0.0
 MarkupSafe==2.1.3
 matplotlib==3.7.4
 mdurl==0.1.2
+numpy==1.24.3
+oauthlib==3.2.2
+opt-einsum==3.3.0
 orjson==3.9.10
 packaging==23.2
 pandas==2.0.3
 Pillow==10.1.0
 pkgutil-resolve-name==1.3.10
+protobuf==4.25.1
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
 pydantic==2.5.3
 pydantic-core==2.14.6
 pydub==0.25.1
 pytz==2023.3.post1
 PyYAML==6.0.1
 referencing==0.32.0
+regex==2023.12.25
 requests==2.31.0
+requests-oauthlib==1.3.1
 rich==13.7.0
 rpds-py==0.16.2
+rsa==4.9
+safetensors==0.4.1
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.0
 soupsieve==2.5
 starlette==0.32.0.post1
+tensorboard==2.13.0
+tensorboard-data-server==0.7.2
+tensorflow==2.13.0
+tensorflow-estimator==2.13.0
+tensorflow-intel==2.13.0
+tensorflow-io-gcs-filesystem==0.31.0
+termcolor==2.4.0
+tokenizers==0.15.0
 tomlkit==0.12.0
 toolz==0.12.0
 tqdm==4.66.1
+transformers==4.36.2
 typer==0.9.0
 typing-extensions==4.9.0
 tzdata==2023.4
 urllib3==2.1.0
 uvicorn==0.25.0
 websockets==11.0.3
+werkzeug==3.0.1
+wrapt==1.16.0
 zipp==3.17.0

summerizer_service/__pycache__/summarize_text.cpython-38.pyc ADDED Viewed

Binary file (824 Bytes). View file

summerizer_service/summarize_text.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from transformers import pipeline
+class Summarizer:
+    def __init__(self):
+        pass
+    def summarize_text(self, text):
+        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+        summary = summarizer(text, max_length=500, min_length=100, do_sample=False)
+        return summary[0]['summary_text']

web_scraping_service/__pycache__/beautiful_scrape.cpython-38.pyc ADDED Viewed

Binary file (1.27 kB). View file

web_scraping_service/beautiful_scrape.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from bs4 import BeautifulSoup
+import requests
+from requests.exceptions import HTTPError
+class WebScrapingService:
+    def __init__(self):
+        pass
+    def scrape_text_from_url(self, url):
+        try:
+            response = requests.get(url)
+            soup = BeautifulSoup(response.text, 'html.parser')
+            unwanted_elements = ['footer', 'script', 'style', 'noscript']
+            for tag in unwanted_elements:
+                for el in soup.find_all(tag):
+                    el.extract()
+            text = ' '.join([p.text for p in soup.find_all('p')])
+            return text.strip()  # Strip leading and trailing whitespaces
+        except Exception as e:
+            raise HTTPError(e)