Mike commited on
Commit
a7c8f02
1 Parent(s): babd965

create two separate highlights

Browse files
Files changed (1) hide show
  1. app.py +28 -10
app.py CHANGED
@@ -1,37 +1,55 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
 
4
- token_skill_classifier = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="simple")
5
- token_knowledge_classifier = pipeline(model="jjzha/jobbert_knowledge_extraction", aggregation_strategy="simple")
6
 
7
 
8
  examples = [
9
- "Knowing Python is a plus.",
 
10
  ]
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def ner(text):
14
  output_skills = token_skill_classifier(text)
15
  for result in output_skills:
16
  if result.get("entity_group"):
17
- tag = result["entity_group"]
18
- result["entity"] = tag + "-Skill"
19
  del result["entity_group"]
20
 
21
  output_knowledge = token_knowledge_classifier(text)
22
  for result in output_knowledge:
23
  if result.get("entity_group"):
24
- tag = result["entity_group"]
25
- result["entity"] = tag + "-Knowledge"
26
  del result["entity_group"]
 
 
 
27
 
28
- output = output_skills + output_knowledge
29
- return {"text": text, "entities": output}
30
 
31
 
32
  demo = gr.Interface(fn=ner,
33
  inputs=gr.Textbox(placeholder="Enter sentence here..."),
34
- outputs=gr.HighlightedText(),
35
  examples=examples)
36
 
37
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
 
4
+ token_skill_classifier = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
5
+ token_knowledge_classifier = pipeline(model="jjzha/jobbert_knowledge_extraction", aggregation_strategy="first")
6
 
7
 
8
  examples = [
9
+ "Knowing Python is a plus",
10
+ "Recommend changes, develop and implement processes to ensure compliance with IFRS standards"
11
  ]
12
 
13
 
14
+ def aggregate_span(results):
15
+ new_results = []
16
+ current_result = results[0]
17
+
18
+ for result in results[1:]:
19
+ if result["start"] == current_result["end"] + 1:
20
+ current_result["word"] += " " + result["word"]
21
+ current_result["end"] = result["end"]
22
+ else:
23
+ new_results.append(current_result)
24
+ current_result = result
25
+
26
+ new_results.append(current_result)
27
+
28
+ return new_results
29
+
30
+
31
  def ner(text):
32
  output_skills = token_skill_classifier(text)
33
  for result in output_skills:
34
  if result.get("entity_group"):
35
+ result["entity"] = "Skill"
 
36
  del result["entity_group"]
37
 
38
  output_knowledge = token_knowledge_classifier(text)
39
  for result in output_knowledge:
40
  if result.get("entity_group"):
41
+ result["entity"] = "Knowledge"
 
42
  del result["entity_group"]
43
+
44
+ output_skills = aggregate_span(output_skills)
45
+ output_knowledge = aggregate_span(output_knowledge)
46
 
47
+ return {"text": text, "entities": output_skills}, {"text": text, "entities": output_knowledge}
 
48
 
49
 
50
  demo = gr.Interface(fn=ner,
51
  inputs=gr.Textbox(placeholder="Enter sentence here..."),
52
+ outputs=["highlight", "highlight"],
53
  examples=examples)
54
 
55
  demo.launch()