shubhendu-ghosh-DS commited on
Commit
74c6795
1 Parent(s): d1db85e

added search results and scrape them

Browse files
Files changed (2) hide show
  1. app.py +40 -4
  2. requirements.txt +70 -0
app.py CHANGED
@@ -1,7 +1,43 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello there! " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from googlesearch import search
3
+ from bs4 import BeautifulSoup
4
+ import requests
5
 
 
 
6
 
7
+ def google_search(query, num_results=5):
8
+ search_results = search(query, num_results=num_results)
9
+ return search_results
10
+
11
+ def scrape_text_from_url(url):
12
+ try:
13
+ response = requests.get(url)
14
+ soup = BeautifulSoup(response.text, 'html.parser')
15
+
16
+ # Remove specific elements (customize as needed)
17
+ unwanted_elements = ['footer', 'script', 'style', 'noscript']
18
+ for tag in unwanted_elements:
19
+ for el in soup.find_all(tag):
20
+ el.extract()
21
+
22
+ # Extract text from remaining paragraphs
23
+ text = ' '.join([p.text for p in soup.find_all('p')])
24
+
25
+ return text.strip() # Strip leading and trailing whitespaces
26
+ except Exception as e:
27
+ print(f"Error scraping {url}: {e}")
28
+ return None
29
+
30
+ def get_google_data(search_term):
31
+ whole_result = ''
32
+ search_results = google_search(search_term)
33
+ for i, result in enumerate(search_results, start=1):
34
+ text = scrape_text_from_url(result)
35
+ if text:
36
+ whole_result += text
37
+
38
+ return whole_result
39
+
40
+
41
+ iface = gr.Interface(fn=get_google_data, inputs="text", outputs="text")
42
+
43
+ iface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.2.0
3
+ annotated-types==0.6.0
4
+ anyio==4.2.0
5
+ attrs==23.1.0
6
+ beautifulsoup4==4.12.2
7
+ certifi==2023.11.17
8
+ charset-normalizer==3.3.2
9
+ click==8.1.7
10
+ colorama==0.4.6
11
+ contourpy==1.1.1
12
+ cycler==0.12.1
13
+ exceptiongroup==1.2.0
14
+ fastapi==0.108.0
15
+ ffmpy==0.3.1
16
+ filelock==3.13.1
17
+ fonttools==4.47.0
18
+ fsspec==2023.12.2
19
+ googlesearch-python==1.2.3
20
+ gradio==4.12.0
21
+ gradio-client==0.8.0
22
+ h11==0.14.0
23
+ httpcore==1.0.2
24
+ httpx==0.26.0
25
+ huggingface-hub==0.20.1
26
+ idna==3.6
27
+ importlib-resources==6.1.1
28
+ Jinja2==3.1.2
29
+ jsonschema==4.20.0
30
+ jsonschema-specifications==2023.12.1
31
+ kiwisolver==1.4.5
32
+ markdown-it-py==3.0.0
33
+ MarkupSafe==2.1.3
34
+ matplotlib==3.7.4
35
+ mdurl==0.1.2
36
+ numpy==1.24.4
37
+ orjson==3.9.10
38
+ packaging==23.2
39
+ pandas==2.0.3
40
+ Pillow==10.1.0
41
+ pkgutil-resolve-name==1.3.10
42
+ pydantic==2.5.3
43
+ pydantic-core==2.14.6
44
+ pydub==0.25.1
45
+ pygments==2.17.2
46
+ pyparsing==3.1.1
47
+ python-dateutil==2.8.2
48
+ python-multipart==0.0.6
49
+ pytz==2023.3.post1
50
+ PyYAML==6.0.1
51
+ referencing==0.32.0
52
+ requests==2.31.0
53
+ rich==13.7.0
54
+ rpds-py==0.16.2
55
+ semantic-version==2.10.0
56
+ shellingham==1.5.4
57
+ six==1.16.0
58
+ sniffio==1.3.0
59
+ soupsieve==2.5
60
+ starlette==0.32.0.post1
61
+ tomlkit==0.12.0
62
+ toolz==0.12.0
63
+ tqdm==4.66.1
64
+ typer==0.9.0
65
+ typing-extensions==4.9.0
66
+ tzdata==2023.4
67
+ urllib3==2.1.0
68
+ uvicorn==0.25.0
69
+ websockets==11.0.3
70
+ zipp==3.17.0