File size: 4,081 Bytes
8dcfc9b
 
 
 
 
c813793
8dcfc9b
c813793
31bcbcd
c813793
 
 
 
 
 
31bcbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c813793
 
31bcbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c813793
31bcbcd
 
 
 
 
 
 
 
 
 
 
 
 
8dcfc9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46da71c
8dcfc9b
 
ba54a85
 
 
 
 
8dcfc9b
ba54a85
8dcfc9b
 
35f56c1
8dcfc9b
 
42935e3
8dcfc9b
 
 
 
31bcbcd
8dcfc9b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
import openai
import os
import re
import ast
import pandas as pd

st.title(":rocket: Named Entity Recognition (NER) with GPT-3")

# st.header("Guidelines")
st.markdown(
    "You can edit the guidelines here. Press  `Delete` to remove a row after selecting it."
)
df = pd.DataFrame(
    [
        {
            "entity": "PERSON",
            "definition": "Short name or full name of a person from any geographic regions.",
            "color": "red",
        },
        {
            "entity": "DATE",
            "definition": "Any format of dates. Dates can also be in natural language.",
            "color": "green",
        },
        {
            "entity": "LOC",
            "definition": "Name of any geographic location, like cities, countries, continents, districts etc.",
            "color": "blue",
        },
    ]
)
edited_df = st.experimental_data_editor(df, num_rows="dynamic")

examples = [
    {
        "sentence": "Mr. Jacob lives in Madrid since 12th January 2015.",
        "output": "{{'PERSON': ['Mr. Jacob'], 'DATE': ['12th January 2015'], 'LOC': ['Madrid']}}",
    },
    {
        "sentence": "Mr. Rajeev Mishra and Sunita Roy are friends and they meet each other on 24/03/1998.",
        "output": "{{'PERSON': ['Mr. Rajeev Mishra', 'Sunita Roy'], 'DATE': ['24/03/1998'], 'LOC': ['None']}}",
    },
]


def generate_guidelines_prompt(guidelines):
    guidelines_prompt = "Entity Definition:\n"
    for guideline in guidelines.values():
        guidelines_prompt += f"{guideline['entity']}: {guideline['definition']}\n"
    guidelines_prompt += "\nOutput Format:\n"
    guidelines_prompt += "{{'PERSON': [list of entities present], 'DATE': [list of entities present], 'LOC': [list of entities present]}}\n"
    guidelines_prompt += "If no entities are presented in any categories keep it None\n"
    guidelines_prompt += "\nExamples:\n\n"
    for i, example in enumerate(examples):
        guidelines_prompt += f"{i+1}. Sentence: {example['sentence']}\n"
        guidelines_prompt += f"Output: {example['output']}\n\n"
    guidelines_prompt += str(len(examples) + 1) + ". Sentence: {}\n"
    guidelines_prompt += "Output: "
    return guidelines_prompt


openai.api_key = os.getenv("OPENAI_API_KEY")

SYSTEM_PROMPT = "You are a smart and intelligent Named Entity Recognition (NER) system. I will provide you the definition of the entities you need to extract, the sentence from where your extract the entities and the output format with examples."
USER_PROMPT_1 = "Are you clear about your role?"
ASSISTANT_PROMPT_1 = "Sure, I'm ready to help you with your NER task. Please provide me with the necessary information to get started."


def openai_chat_completion_response(final_prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": USER_PROMPT_1},
            {"role": "assistant", "content": ASSISTANT_PROMPT_1},
            {"role": "user", "content": final_prompt},
        ],
    )

    return response["choices"][0]["message"]["content"].strip(" \n")


my_sentence = st.text_input("Your Sentence")
if st.button("Submit"):
    colors = {}
    for guideline in edited_df.to_dict(orient="index").values():
        colors[guideline["entity"]] = guideline["color"]
    
    GUIDELINES_PROMPT = generate_guidelines_prompt(edited_df.to_dict(orient="index"))
    GUIDELINES_PROMPT = GUIDELINES_PROMPT.format(my_sentence)

    ners = openai_chat_completion_response(GUIDELINES_PROMPT)
    ners_dictionary = ast.literal_eval(ners)
    st.json(ners_dictionary)
    for entity_type, entity_list in ners_dictionary.items():
        entity_list = list(set(entity_list))
        color = colors[entity_type]
        for ent in entity_list:
            if ent != "None":
                my_sentence = re.sub(
                    ent,
                    ":" + color + "[" + ent + "\[" + entity_type + "\]" + "]",
                    my_sentence,
                )
    st.markdown(my_sentence)