Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from bs4 import BeautifulSoup as bs | |
| #import html5lib | |
| #import copy | |
| import requests | |
| #from IPython.display import IFrame | |
| def scrape(instring): | |
| return gr.HTML.update(f'''<embed src={instring} width= "500" height= "375">words</embed>''') | |
| def scrape1(instring): | |
| # set the url to perform the get request | |
| URL = f'{instring}' | |
| page = requests.get(URL) | |
| # load the page content | |
| text = page.content | |
| # make a soup object by using beautiful | |
| # soup and set the markup as html parser | |
| soup = bs(text, "html.parser") | |
| out = str(soup.prettify()) | |
| return gr.HTML.update(f'''<object data={instring} type="application/pdf" width="100%" height="500px">''') | |
| def scrape0(instring): | |
| #r = requests.get(instring) | |
| chunk_size=2000 | |
| url = f'{instring}' | |
| r = requests.get(url, stream=True) | |
| html_content = requests.get(url).text | |
| soup = bs(html_content,"html.parser") | |
| with open('/tmp/metadata.pdf', 'wb') as fd: | |
| for chunk in r.iter_content(chunk_size): | |
| fd.write(chunk) | |
| try: | |
| out = r.content | |
| except Exception: | |
| #out=copy.copy(soup) | |
| print ("No Divs") | |
| #out = IFrame(src={instring}, width=700, height=600) | |
| #return gr.HTML.update(f'''<iframe src={out}, width=700, height=600></iframe>''') | |
| return gr.HTML.update(f'''{soup}''') | |
| with gr.Blocks() as app: | |
| inp=gr.Textbox() | |
| go_btn = gr.Button() | |
| outp = gr.HTML() | |
| go_btn.click(scrape,inp,outp) | |
| app.queue(concurrency_count=10).launch() |