Nikhil Singh commited on
Commit
a02ed2b
·
1 Parent(s): c447545

email upload

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -53,13 +53,8 @@ def parse_query(sentences: List[str], labels: List[str], threshold: float = 0.3,
53
 
54
  for sentence in sentences:
55
  _entities = model.predict_entities(sentence, labels, threshold=threshold)
56
-
57
- entities = []
58
-
59
- for entity in _entities:
60
- entities.append(entity)
61
-
62
- results.append({"sentence": sentence, "entities": entities})
63
 
64
  return results
65
 
@@ -69,17 +64,16 @@ def present(email_file, labels, multilingual=False):
69
  further_cleaned_text = remove_special_characters(cleaned_text)
70
  sentence_list = get_sentences(further_cleaned_text)
71
 
72
- result = parse_query(sentence_list, labels, threshold=0.3, nested_ner=False, model_name="urchade/gliner_base", multilingual=multilingual)
73
 
74
  email_info = {
75
  "Subject": email.subject,
76
  "From": email.from_,
77
  "To": email.to,
78
  "Date": email.date,
79
- "Cleaned Body": further_cleaned_text,
80
- "Extracted Entities": result
81
  }
82
- return [email_info[key] for key in email_info]
83
 
84
  labels = ["PERSON", "PRODUCT", "DEAL", "ORDER", "ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY", "MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
85
 
@@ -99,8 +93,7 @@ demo = gr.Interface(
99
  gr.components.Textbox(label="From"),
100
  gr.components.Textbox(label="To"),
101
  gr.components.Textbox(label="Date"),
102
- gr.components.Textbox(label="Cleaned Body"),
103
- gr.components.JSON(label="Extracted Entities")
104
  ],
105
  title="Email Info Extractor",
106
  description="Upload an email file (.eml) to extract its details and detected entities."
 
53
 
54
  for sentence in sentences:
55
  _entities = model.predict_entities(sentence, labels, threshold=threshold)
56
+ entities = [{"text": entity["text"], "label": entity["label"]} for entity in _entities]
57
+ results.extend(entities)
 
 
 
 
 
58
 
59
  return results
60
 
 
64
  further_cleaned_text = remove_special_characters(cleaned_text)
65
  sentence_list = get_sentences(further_cleaned_text)
66
 
67
+ entities = parse_query(sentence_list, labels, threshold=0.3, nested_ner=False, model_name="urchade/gliner_base", multilingual=multilingual)
68
 
69
  email_info = {
70
  "Subject": email.subject,
71
  "From": email.from_,
72
  "To": email.to,
73
  "Date": email.date,
74
+ "Extracted Entities": entities
 
75
  }
76
+ return [email_info[key] for key in ["Subject", "From", "To", "Date"]] + [entities]
77
 
78
  labels = ["PERSON", "PRODUCT", "DEAL", "ORDER", "ORDER PAYMENT METHOD", "STORE", "LEGAL ENTITY", "MERCHANT", "FINANCIAL TRANSACTION", "UNCATEGORIZED", "DATE"]
79
 
 
93
  gr.components.Textbox(label="From"),
94
  gr.components.Textbox(label="To"),
95
  gr.components.Textbox(label="Date"),
96
+ gr.components.Dataframe(label="Extracted Entities")
 
97
  ],
98
  title="Email Info Extractor",
99
  description="Upload an email file (.eml) to extract its details and detected entities."