KrishanRao's picture
Create app.py
f892196 verified
#!/usr/bin/env python
# coding: utf-8
import spacy
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import gradio as gr
# Ensure spaCy model is available
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
from spacy.cli import download
download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
def extract_text(url):
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
html = urlopen(req).read()
text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
return text
def extract_details(text):
# Process text with spaCy
doc = nlp(text)
# Extract potential entities
names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
dates = [ent.text for ent in doc.ents if ent.label_ == "DATE"]
# Simple heuristic for categorizing information
husband_name = next((name for name in names if "husband" in text.lower()), "Not Found")
children = [name for name in names if "child" in text.lower() or "children" in text.lower()]
marriage_date = next((date for date in dates if "marriage" in text.lower()), "Not Found")
grandchildren = [name for name in names if "grandchild" in text.lower() or "grandchildren" in text.lower()]
greatgrandchildren = [name for name in names if "great-grandchild" in text.lower() or "great-grandchildren" in text.lower()]
# Construct the table
table = f"""
| Name | Birthday | Husband Name | Children | Marriage Date | Grandchildren | Great-grandchildren |
|-----------------|---------------|----------------|------------------|-----------------|-----------------------|-----------------------|
| {', '.join(names[:1])} | {', '.join(dates[:1])} | {husband_name} | {', '.join(children)} | {marriage_date} | {', '.join(grandchildren)} | {', '.join(greatgrandchildren)} |
"""
return table
def create_table(url):
text = extract_text(url)
return extract_details(text)
demo = gr.Interface(fn=create_table, inputs="text", outputs="text")
if __name__ == "__main__":
demo.launch(show_api=False)