#!/usr/bin/env python # coding: utf-8 import spacy from urllib.request import urlopen, Request from bs4 import BeautifulSoup import gradio as gr # Ensure spaCy model is available try: nlp = spacy.load("en_core_web_sm") except OSError: from spacy.cli import download download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") def extract_text(url): req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) html = urlopen(req).read() text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings) return text def extract_details(text): # Process text with spaCy doc = nlp(text) # Extract potential entities names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"] dates = [ent.text for ent in doc.ents if ent.label_ == "DATE"] # Simple heuristic for categorizing information husband_name = next((name for name in names if "husband" in text.lower()), "Not Found") children = [name for name in names if "child" in text.lower() or "children" in text.lower()] marriage_date = next((date for date in dates if "marriage" in text.lower()), "Not Found") grandchildren = [name for name in names if "grandchild" in text.lower() or "grandchildren" in text.lower()] greatgrandchildren = [name for name in names if "great-grandchild" in text.lower() or "great-grandchildren" in text.lower()] # Construct the table table = f""" | Name | Birthday | Husband Name | Children | Marriage Date | Grandchildren | Great-grandchildren | |-----------------|---------------|----------------|------------------|-----------------|-----------------------|-----------------------| | {', '.join(names[:1])} | {', '.join(dates[:1])} | {husband_name} | {', '.join(children)} | {marriage_date} | {', '.join(grandchildren)} | {', '.join(greatgrandchildren)} | """ return table def create_table(url): text = extract_text(url) return extract_details(text) demo = gr.Interface(fn=create_table, inputs="text", outputs="text") if __name__ == "__main__": demo.launch(show_api=False)