|
|
|
|
|
|
|
import spacy |
|
from urllib.request import urlopen, Request |
|
from bs4 import BeautifulSoup |
|
import gradio as gr |
|
|
|
|
|
try: |
|
nlp = spacy.load("en_core_web_sm") |
|
except OSError: |
|
from spacy.cli import download |
|
download("en_core_web_sm") |
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
def extract_text(url): |
|
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) |
|
html = urlopen(req).read() |
|
text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings) |
|
return text |
|
|
|
def extract_details(text): |
|
|
|
doc = nlp(text) |
|
|
|
|
|
names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"] |
|
dates = [ent.text for ent in doc.ents if ent.label_ == "DATE"] |
|
|
|
|
|
husband_name = next((name for name in names if "husband" in text.lower()), "Not Found") |
|
children = [name for name in names if "child" in text.lower() or "children" in text.lower()] |
|
marriage_date = next((date for date in dates if "marriage" in text.lower()), "Not Found") |
|
grandchildren = [name for name in names if "grandchild" in text.lower() or "grandchildren" in text.lower()] |
|
greatgrandchildren = [name for name in names if "great-grandchild" in text.lower() or "great-grandchildren" in text.lower()] |
|
|
|
|
|
table = f""" |
|
| Name | Birthday | Husband Name | Children | Marriage Date | Grandchildren | Great-grandchildren | |
|
|-----------------|---------------|----------------|------------------|-----------------|-----------------------|-----------------------| |
|
| {', '.join(names[:1])} | {', '.join(dates[:1])} | {husband_name} | {', '.join(children)} | {marriage_date} | {', '.join(grandchildren)} | {', '.join(greatgrandchildren)} | |
|
""" |
|
return table |
|
|
|
def create_table(url): |
|
text = extract_text(url) |
|
return extract_details(text) |
|
|
|
demo = gr.Interface(fn=create_table, inputs="text", outputs="text") |
|
|
|
if __name__ == "__main__": |
|
demo.launch(show_api=False) |
|
|