File size: 2,138 Bytes
f892196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python
# coding: utf-8

import spacy
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import gradio as gr

# Ensure spaCy model is available
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

def extract_text(url):
    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    html = urlopen(req).read()
    text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
    return text

def extract_details(text):
    # Process text with spaCy
    doc = nlp(text)
    
    # Extract potential entities
    names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
    dates = [ent.text for ent in doc.ents if ent.label_ == "DATE"]
    
    # Simple heuristic for categorizing information
    husband_name = next((name for name in names if "husband" in text.lower()), "Not Found")
    children = [name for name in names if "child" in text.lower() or "children" in text.lower()]
    marriage_date = next((date for date in dates if "marriage" in text.lower()), "Not Found")
    grandchildren = [name for name in names if "grandchild" in text.lower() or "grandchildren" in text.lower()]
    greatgrandchildren = [name for name in names if "great-grandchild" in text.lower() or "great-grandchildren" in text.lower()]
    
    # Construct the table
    table = f"""
    | Name            | Birthday      | Husband Name   | Children         | Marriage Date   | Grandchildren         | Great-grandchildren    |
    |-----------------|---------------|----------------|------------------|-----------------|-----------------------|-----------------------|
    | {', '.join(names[:1])} | {', '.join(dates[:1])} | {husband_name}   | {', '.join(children)} | {marriage_date} | {', '.join(grandchildren)} | {', '.join(greatgrandchildren)} |
    """
    return table

def create_table(url):
    text = extract_text(url)
    return extract_details(text)

demo = gr.Interface(fn=create_table, inputs="text", outputs="text")

if __name__ == "__main__":
    demo.launch(show_api=False)