File size: 4,325 Bytes
8dcfc9b
 
 
 
 
 
31bcbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46da71c
31bcbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8dcfc9b
 
 
 
 
31bcbcd
8dcfc9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46da71c
42935e3
 
 
 
8dcfc9b
 
 
 
31bcbcd
8dcfc9b
 
 
 
42935e3
8dcfc9b
 
 
 
31bcbcd
8dcfc9b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import streamlit as st
import openai
import os
import re
import ast

st.title("Named Entity Recognition (NER) with GPT-3")

if "guidelines" not in st.session_state:
    st.session_state["guidelines"] = [
        {
            "entity": "PERSON",
            "definition": "Short name or full name of a person from any geographic regions.",
            "color": "red",
        },
        {
            "entity": "DATE",
            "definition": "Any format of dates. Dates can also be in natural language.",
            "color": "green",
        },
        {
            "entity": "LOC",
            "definition": "Name of any geographic location, like cities, countries, continents, districts etc.",
            "color": "blue",
        },
    ]


st.header("Guidelines")

# display guidelines in a table
st.table(st.session_state["guidelines"])

st.write("You can add new guidelines here.")
new_entity = st.text_input("Entity")
new_definition = st.text_input("Definition")
color = st.text_input("Color")
if st.button("Add Guideline"):
    st.session_state["guidelines"].append(
        {"entity": new_entity, "definition": new_definition, "color": color}
    )
    new_entity = ""
    new_definition = ""


examples = [
    {
        "sentence": "Mr. Jacob lives in Madrid since 12th January 2015.",
        "output": "{{'PERSON': ['Mr. Jacob'], 'DATE': ['12th January 2015'], 'LOC': ['Madrid']}}",
    },
    {
        "sentence": "Mr. Rajeev Mishra and Sunita Roy are friends and they meet each other on 24/03/1998.",
        "output": "{{'PERSON': ['Mr. Rajeev Mishra', 'Sunita Roy'], 'DATE': ['24/03/1998'], 'LOC': ['None']}}",
    },
]


def generate_guidelines_prompt(guidelines):
    guidelines_prompt = "Entity Definition:\n"
    for guideline in guidelines:
        guidelines_prompt += f"{guideline['entity']}: {guideline['definition']}\n"
    guidelines_prompt += "\nOutput Format:\n"
    guidelines_prompt += "{{'PERSON': [list of entities present], 'DATE': [list of entities present], 'LOC': [list of entities present]}}\n"
    guidelines_prompt += "If no entities are presented in any categories keep it None\n"
    guidelines_prompt += "\nExamples:\n\n"
    for i, example in enumerate(examples):
        guidelines_prompt += f"{i+1}. Sentence: {example['sentence']}\n"
        guidelines_prompt += f"Output: {example['output']}\n\n"
    guidelines_prompt += str(len(examples) + 1) + ". Sentence: {}\n"
    guidelines_prompt += "Output: "
    return guidelines_prompt


openai.api_key = os.getenv("OPENAI_API_KEY")

SYSTEM_PROMPT = "You are a smart and intelligent Named Entity Recognition (NER) system. I will provide you the definition of the entities you need to extract, the sentence from where your extract the entities and the output format with examples."
USER_PROMPT_1 = "Are you clear about your role?"
ASSISTANT_PROMPT_1 = "Sure, I'm ready to help you with your NER task. Please provide me with the necessary information to get started."
GUIDELINES_PROMPT = generate_guidelines_prompt(st.session_state["guidelines"])


def openai_chat_completion_response(final_prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": USER_PROMPT_1},
            {"role": "assistant", "content": ASSISTANT_PROMPT_1},
            {"role": "user", "content": final_prompt},
        ],
    )

    return response["choices"][0]["message"]["content"].strip(" \n")


colors = {}
for guideline in st.session_state["guidelines"]:
    colors[guideline["entity"]] = guideline["color"]


my_sentence = st.text_input("Your Sentence")
if st.button("Submit"):
    GUIDELINES_PROMPT = GUIDELINES_PROMPT.format(my_sentence)
    print(GUIDELINES_PROMPT)
    ners = openai_chat_completion_response(GUIDELINES_PROMPT)
    ners_dictionary = ast.literal_eval(ners)
    for entity_type, entity_list in ners_dictionary.items():
        entity_list = list(set(entity_list))
        color = colors[entity_type]
        for ent in entity_list:
            if ent != "None":
                my_sentence = re.sub(
                    ent,
                    ":" + color + "[" + ent + "\[" + entity_type + "\]" + "]",
                    my_sentence,
                )
    st.markdown(my_sentence)