|
import streamlit as st |
|
import openai |
|
import os |
|
import re |
|
import ast |
|
|
|
st.title("Named Entity Recognition (NER) with GPT-3") |
|
|
|
if "guidelines" not in st.session_state: |
|
st.session_state["guidelines"] = [ |
|
{ |
|
"entity": "PERSON", |
|
"definition": "Short name or full name of a person from any geographic regions.", |
|
"color": "red", |
|
}, |
|
{ |
|
"entity": "DATE", |
|
"definition": "Any format of dates. Dates can also be in natural language.", |
|
"color": "green", |
|
}, |
|
{ |
|
"entity": "LOC", |
|
"definition": "Name of any geographic location, like cities, countries, continents, districts etc.", |
|
"color": "blue", |
|
}, |
|
] |
|
|
|
|
|
st.header("Guidelines") |
|
|
|
|
|
st.table(st.session_state["guidelines"]) |
|
|
|
st.write("You can add new guidelines here.") |
|
new_entity = st.text_input("Entity") |
|
new_definition = st.text_input("Definition") |
|
color = st.text_input("Color") |
|
if st.button("Add Guideline"): |
|
st.session_state["guidelines"].append( |
|
{"entity": new_entity, "definition": new_definition, "color": color} |
|
) |
|
new_entity = "" |
|
new_definition = "" |
|
|
|
|
|
examples = [ |
|
{ |
|
"sentence": "Mr. Jacob lives in Madrid since 12th January 2015.", |
|
"output": "{{'PERSON': ['Mr. Jacob'], 'DATE': ['12th January 2015'], 'LOC': ['Madrid']}}", |
|
}, |
|
{ |
|
"sentence": "Mr. Rajeev Mishra and Sunita Roy are friends and they meet each other on 24/03/1998.", |
|
"output": "{{'PERSON': ['Mr. Rajeev Mishra', 'Sunita Roy'], 'DATE': ['24/03/1998'], 'LOC': ['None']}}", |
|
}, |
|
] |
|
|
|
|
|
def generate_guidelines_prompt(guidelines): |
|
guidelines_prompt = "Entity Definition:\n" |
|
for guideline in guidelines: |
|
guidelines_prompt += f"{guideline['entity']}: {guideline['definition']}\n" |
|
guidelines_prompt += "\nOutput Format:\n" |
|
guidelines_prompt += "{{'PERSON': [list of entities present], 'DATE': [list of entities present], 'LOC': [list of entities present]}}\n" |
|
guidelines_prompt += "If no entities are presented in any categories keep it None\n" |
|
guidelines_prompt += "\nExamples:\n\n" |
|
for i, example in enumerate(examples): |
|
guidelines_prompt += f"{i+1}. Sentence: {example['sentence']}\n" |
|
guidelines_prompt += f"Output: {example['output']}\n\n" |
|
guidelines_prompt += str(len(examples) + 1) + ". Sentence: {}\n" |
|
guidelines_prompt += "Output: " |
|
return guidelines_prompt |
|
|
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
SYSTEM_PROMPT = "You are a smart and intelligent Named Entity Recognition (NER) system. I will provide you the definition of the entities you need to extract, the sentence from where your extract the entities and the output format with examples." |
|
USER_PROMPT_1 = "Are you clear about your role?" |
|
ASSISTANT_PROMPT_1 = "Sure, I'm ready to help you with your NER task. Please provide me with the necessary information to get started." |
|
GUIDELINES_PROMPT = generate_guidelines_prompt(st.session_state["guidelines"]) |
|
|
|
|
|
def openai_chat_completion_response(final_prompt): |
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "system", "content": SYSTEM_PROMPT}, |
|
{"role": "user", "content": USER_PROMPT_1}, |
|
{"role": "assistant", "content": ASSISTANT_PROMPT_1}, |
|
{"role": "user", "content": final_prompt}, |
|
], |
|
) |
|
|
|
return response["choices"][0]["message"]["content"].strip(" \n") |
|
|
|
|
|
colors = {} |
|
for guideline in st.session_state["guidelines"]: |
|
colors[guideline["entity"]] = guideline["color"] |
|
|
|
|
|
my_sentence = st.text_input("Your Sentence") |
|
if st.button("Submit"): |
|
GUIDELINES_PROMPT = GUIDELINES_PROMPT.format(my_sentence) |
|
print(GUIDELINES_PROMPT) |
|
ners = openai_chat_completion_response(GUIDELINES_PROMPT) |
|
ners_dictionary = ast.literal_eval(ners) |
|
for entity_type, entity_list in ners_dictionary.items(): |
|
entity_list = list(set(entity_list)) |
|
color = colors[entity_type] |
|
for ent in entity_list: |
|
if ent != "None": |
|
my_sentence = re.sub( |
|
ent, |
|
":" + color + "[" + ent + "\[" + entity_type + "\]" + "]", |
|
my_sentence, |
|
) |
|
st.markdown(my_sentence) |
|
|