KrishanRao commited on
Commit
f892196
·
verified ·
1 Parent(s): 980469b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ import spacy
5
+ from urllib.request import urlopen, Request
6
+ from bs4 import BeautifulSoup
7
+ import gradio as gr
8
+
9
+ # Ensure spaCy model is available
10
+ try:
11
+ nlp = spacy.load("en_core_web_sm")
12
+ except OSError:
13
+ from spacy.cli import download
14
+ download("en_core_web_sm")
15
+ nlp = spacy.load("en_core_web_sm")
16
+
17
+ def extract_text(url):
18
+ req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
19
+ html = urlopen(req).read()
20
+ text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
21
+ return text
22
+
23
+ def extract_details(text):
24
+ # Process text with spaCy
25
+ doc = nlp(text)
26
+
27
+ # Extract potential entities
28
+ names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
29
+ dates = [ent.text for ent in doc.ents if ent.label_ == "DATE"]
30
+
31
+ # Simple heuristic for categorizing information
32
+ husband_name = next((name for name in names if "husband" in text.lower()), "Not Found")
33
+ children = [name for name in names if "child" in text.lower() or "children" in text.lower()]
34
+ marriage_date = next((date for date in dates if "marriage" in text.lower()), "Not Found")
35
+ grandchildren = [name for name in names if "grandchild" in text.lower() or "grandchildren" in text.lower()]
36
+ greatgrandchildren = [name for name in names if "great-grandchild" in text.lower() or "great-grandchildren" in text.lower()]
37
+
38
+ # Construct the table
39
+ table = f"""
40
+ | Name | Birthday | Husband Name | Children | Marriage Date | Grandchildren | Great-grandchildren |
41
+ |-----------------|---------------|----------------|------------------|-----------------|-----------------------|-----------------------|
42
+ | {', '.join(names[:1])} | {', '.join(dates[:1])} | {husband_name} | {', '.join(children)} | {marriage_date} | {', '.join(grandchildren)} | {', '.join(greatgrandchildren)} |
43
+ """
44
+ return table
45
+
46
+ def create_table(url):
47
+ text = extract_text(url)
48
+ return extract_details(text)
49
+
50
+ demo = gr.Interface(fn=create_table, inputs="text", outputs="text")
51
+
52
+ if __name__ == "__main__":
53
+ demo.launch(show_api=False)