File size: 3,241 Bytes
c7f3d38
 
 
d691e4a
c7f3d38
d691e4a
 
 
 
e733676
c7f3d38
 
d691e4a
 
c7f3d38
 
 
 
d691e4a
 
 
 
 
c7f3d38
 
d691e4a
 
 
c7f3d38
 
 
 
 
d691e4a
c7f3d38
 
 
 
 
d691e4a
 
 
e733676
 
d691e4a
 
 
e733676
d691e4a
 
e733676
 
c7f3d38
d691e4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7f3d38
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
from transformers import pipeline

get_completion = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
get_ner = pipeline("ner", model="dslim/bert-base-NER")
get_zero = pipeline(
    "zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
)


def summarize_text(input):
    output = get_completion(input)
    return output[0]["summary_text"]


def merge_tokens(tokens):
    merged_tokens = []
    for token in tokens:
        if (
            merged_tokens
            and token["entity"].startswith("I-")
            and merged_tokens[-1]["entity"].endswith(token["entity"][2:])
        ):
            # If current token continues the entity of the last one, merge them
            last_token = merged_tokens[-1]
            last_token["word"] += token["word"].replace("##", "")
            last_token["end"] = token["end"]
            last_token["score"] = (last_token["score"] + token["score"]) / 2
        else:
            # Otherwise, add the token to the list
            merged_tokens.append(token)
        return merged_tokens


def named_entity_recognition(input):
    output = get_ner(input)
    merged_output = merge_tokens(output)
    return {"text": input, "entities": output}


def zero_shot_pred(text, check_labels):
    output = get_zero(text, check_labels)
    return output


def label_score_dict(text, check_labels):
    zero_shot_out = zero_shot_pred(text, check_labels)
    out = {}
    for i, j in zip(zero_shot_out["labels"], zero_shot_out["scores"]):
        out.update({i: j})
    print(out)
    return out


interface_summarise = gr.Interface(
    fn=summarize_text,
    inputs=[gr.Textbox(label="Text to summarise", lines=5)],
    outputs=[gr.Textbox(label="Summary")],
    title="Text Summarizer",
    description="Summary of text via `distillBART-CNN` model!",
)

interface_ner = gr.Interface(
    fn=named_entity_recognition,
    inputs=[gr.Textbox(label="Text to find entities", lines=2)],
    outputs=[gr.HighlightedText(label="Text with entities")],
    title="NER with dslim/bert-base-NER",
    description="Find entities using the `dslim/bert-base-NER` model under the hood!",
    allow_flagging="never",
    examples=[
        "Tim Cook is the CEO of Apple, stays in California and makes iPhones ",
        "My name is Bose and I am a physicist living in Delhi",
    ],
)

interface_zero_shot = gr.Interface(
    fn=label_score_dict,
    inputs=[
        gr.Textbox(label="Text to classify", lines=2),
        gr.Textbox(label="Check for labels"),
    ],
    outputs=gr.Label(num_top_classes=4),
    title="Zero-Shot Preds using DeBERTa-v3-base-mnli",
    description="Classify sentence on self defined target vars",
    examples=[
        [
            "Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.",
            "mobile, website, billing, account access",
        ],
        # "My name is Bose and I am a physicist living in Delhi"
    ],
)

demo = gr.TabbedInterface(
    [interface_summarise, interface_ner, interface_zero_shot],
    ["Text Summary ", "Named Entity Recognition", "Zero Shot Classifications"],
)

if __name__ == "__main__":
    demo.launch(enable_queue=True)