|
import gradio as gr |
|
import urllib.request |
|
import requests |
|
import bs4 |
|
import lxml |
|
|
|
def find_it1(url): |
|
source = urllib.request.urlopen(url).read() |
|
soup = bs.BeautifulSoup(source,'lxml') |
|
|
|
print(soup.title) |
|
|
|
|
|
print(soup.title.name) |
|
|
|
|
|
print(soup.title.string) |
|
|
|
|
|
print(soup.title.parent.name) |
|
|
|
|
|
print(soup.p) |
|
print(soup.find_all('p')) |
|
for paragraph in soup.find_all('p'): |
|
print(paragraph.string) |
|
print(str(paragraph.text)) |
|
|
|
for url in soup.find_all('a'): |
|
print(url.get('href')) |
|
print(soup.get_text()) |
|
|
|
|
|
|
|
def find_it2(url): |
|
response = requests.get(url,a1=None,q2=None,q3=None) |
|
try: |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.content, 'lxml') |
|
out = 'URL Links:\n'.join([p.text for p in soup.find_all('a')]) |
|
return out |
|
except Exception as e: |
|
print (e) |
|
return e |
|
|
|
|
|
with gr.Blocks() as app: |
|
with gr.Row(): |
|
inp = gr.Textbox() |
|
btn = gr.Button() |
|
outp = gr.Textbox() |
|
btn.click(find_it1,inp,outp) |
|
app.launch() |
|
|
|
|