Spaces:

KrishanRao
/

Unstructured_to_Structured

Sleeping

App Files Files Community

KrishanRao commited on Jan 18

Commit

f892196

verified ·

1 Parent(s): 980469b

Create app.py

Browse files

Files changed (1) hide show

app.py +53 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+#!/usr/bin/env python
+# coding: utf-8
+import spacy
+from urllib.request import urlopen, Request
+from bs4 import BeautifulSoup
+import gradio as gr
+# Ensure spaCy model is available
+try:
+    nlp = spacy.load("en_core_web_sm")
+except OSError:
+    from spacy.cli import download
+    download("en_core_web_sm")
+    nlp = spacy.load("en_core_web_sm")
+def extract_text(url):
+    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+    html = urlopen(req).read()
+    text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
+    return text
+def extract_details(text):
+    # Process text with spaCy
+    doc = nlp(text)
+    # Extract potential entities
+    names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
+    dates = [ent.text for ent in doc.ents if ent.label_ == "DATE"]
+    # Simple heuristic for categorizing information
+    husband_name = next((name for name in names if "husband" in text.lower()), "Not Found")
+    children = [name for name in names if "child" in text.lower() or "children" in text.lower()]
+    marriage_date = next((date for date in dates if "marriage" in text.lower()), "Not Found")
+    grandchildren = [name for name in names if "grandchild" in text.lower() or "grandchildren" in text.lower()]
+    greatgrandchildren = [name for name in names if "great-grandchild" in text.lower() or "great-grandchildren" in text.lower()]
+    # Construct the table
+    table = f"""
+    | Name            | Birthday      | Husband Name   | Children         | Marriage Date   | Grandchildren         | Great-grandchildren    |
+    |-----------------|---------------|----------------|------------------|-----------------|-----------------------|-----------------------|
+    | {', '.join(names[:1])} | {', '.join(dates[:1])} | {husband_name}   | {', '.join(children)} | {marriage_date} | {', '.join(grandchildren)} | {', '.join(greatgrandchildren)} |
+    """
+    return table
+def create_table(url):
+    text = extract_text(url)
+    return extract_details(text)
+demo = gr.Interface(fn=create_table, inputs="text", outputs="text")
+if __name__ == "__main__":
+    demo.launch(show_api=False)