File size: 1,803 Bytes
317026d
 
 
61232ab
 
dc453a9
317026d
 
a7c8f02
 
317026d
 
 
a7c8f02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317026d
bb98016
 
babd965
a7c8f02
babd965
bb98016
1da09fe
bb98016
babd965
a7c8f02
babd965
89c7e63
 
 
 
 
bb98016
a7c8f02
317026d
 
bb98016
 
a7c8f02
317026d
 
e72157c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
from transformers import pipeline

token_skill_classifier = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
token_knowledge_classifier = pipeline(model="jjzha/jobbert_knowledge_extraction", aggregation_strategy="first")


examples = [
        "Knowing Python is a plus",
        "Recommend changes, develop and implement processes to ensure compliance with IFRS standards"
        ]


def aggregate_span(results):
    new_results = []
    current_result = results[0]

    for result in results[1:]:
        if result["start"] == current_result["end"] + 1:
            current_result["word"] += " " + result["word"]
            current_result["end"] = result["end"]
        else:
            new_results.append(current_result)
            current_result = result

    new_results.append(current_result)

    return new_results

def ner(text):
    output_skills = token_skill_classifier(text)
    for result in output_skills:
        if result.get("entity_group"):
            result["entity"] = "Skill"
            del result["entity_group"]

    output_knowledge = token_knowledge_classifier(text)
    for result in output_knowledge:
        if result.get("entity_group"):
            result["entity"] = "Knowledge"
            del result["entity_group"]

    if len(output_skills) > 0:
        output_skills = aggregate_span(output_skills)
    if len(output_knowledge) > 0:
        output_knowledge = aggregate_span(output_knowledge)

    return {"text": text, "entities": output_skills}, {"text": text, "entities": output_knowledge}


demo = gr.Interface(fn=ner,
                    inputs=gr.Textbox(placeholder="Enter sentence here..."),
                    outputs=["highlight", "highlight"],
                    examples=examples)

demo.launch()