|
import gradio as gr |
|
import requests |
|
import bs4 |
|
|
|
def sitemap(url): |
|
if url != "" and url != None: |
|
out = [] |
|
source = requests.get(url) |
|
if source.status_code ==200: |
|
|
|
soup = bs4.BeautifulSoup(source.content,'html.parser') |
|
|
|
rawp=(f'RAW TEXT RETURNED: {soup.text}') |
|
cnt=0 |
|
cnt+=len(rawp) |
|
out.append(rawp) |
|
out.append("HTML fragments: ") |
|
q=("a","p","span","content","article") |
|
for p in soup.find_all("a"): |
|
out.append({"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string}) |
|
with gr.Blocks() as app: |
|
inp=gr.Textbox() |
|
btn=gr.Button() |
|
outp=gr.JSON() |
|
btn.click(sitemap,inp,outp) |
|
app.launch() |