NERGPT / app.py
chandralegend's picture
fixed color issue
35f56c1
raw
history blame contribute delete
No virus
4.08 kB
import streamlit as st
import openai
import os
import re
import ast
import pandas as pd
st.title(":rocket: Named Entity Recognition (NER) with GPT-3")
# st.header("Guidelines")
st.markdown(
"You can edit the guidelines here. Press `Delete` to remove a row after selecting it."
)
df = pd.DataFrame(
[
{
"entity": "PERSON",
"definition": "Short name or full name of a person from any geographic regions.",
"color": "red",
},
{
"entity": "DATE",
"definition": "Any format of dates. Dates can also be in natural language.",
"color": "green",
},
{
"entity": "LOC",
"definition": "Name of any geographic location, like cities, countries, continents, districts etc.",
"color": "blue",
},
]
)
edited_df = st.experimental_data_editor(df, num_rows="dynamic")
examples = [
{
"sentence": "Mr. Jacob lives in Madrid since 12th January 2015.",
"output": "{{'PERSON': ['Mr. Jacob'], 'DATE': ['12th January 2015'], 'LOC': ['Madrid']}}",
},
{
"sentence": "Mr. Rajeev Mishra and Sunita Roy are friends and they meet each other on 24/03/1998.",
"output": "{{'PERSON': ['Mr. Rajeev Mishra', 'Sunita Roy'], 'DATE': ['24/03/1998'], 'LOC': ['None']}}",
},
]
def generate_guidelines_prompt(guidelines):
guidelines_prompt = "Entity Definition:\n"
for guideline in guidelines.values():
guidelines_prompt += f"{guideline['entity']}: {guideline['definition']}\n"
guidelines_prompt += "\nOutput Format:\n"
guidelines_prompt += "{{'PERSON': [list of entities present], 'DATE': [list of entities present], 'LOC': [list of entities present]}}\n"
guidelines_prompt += "If no entities are presented in any categories keep it None\n"
guidelines_prompt += "\nExamples:\n\n"
for i, example in enumerate(examples):
guidelines_prompt += f"{i+1}. Sentence: {example['sentence']}\n"
guidelines_prompt += f"Output: {example['output']}\n\n"
guidelines_prompt += str(len(examples) + 1) + ". Sentence: {}\n"
guidelines_prompt += "Output: "
return guidelines_prompt
openai.api_key = os.getenv("OPENAI_API_KEY")
SYSTEM_PROMPT = "You are a smart and intelligent Named Entity Recognition (NER) system. I will provide you the definition of the entities you need to extract, the sentence from where your extract the entities and the output format with examples."
USER_PROMPT_1 = "Are you clear about your role?"
ASSISTANT_PROMPT_1 = "Sure, I'm ready to help you with your NER task. Please provide me with the necessary information to get started."
def openai_chat_completion_response(final_prompt):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": USER_PROMPT_1},
{"role": "assistant", "content": ASSISTANT_PROMPT_1},
{"role": "user", "content": final_prompt},
],
)
return response["choices"][0]["message"]["content"].strip(" \n")
my_sentence = st.text_input("Your Sentence")
if st.button("Submit"):
colors = {}
for guideline in edited_df.to_dict(orient="index").values():
colors[guideline["entity"]] = guideline["color"]
GUIDELINES_PROMPT = generate_guidelines_prompt(edited_df.to_dict(orient="index"))
GUIDELINES_PROMPT = GUIDELINES_PROMPT.format(my_sentence)
ners = openai_chat_completion_response(GUIDELINES_PROMPT)
ners_dictionary = ast.literal_eval(ners)
st.json(ners_dictionary)
for entity_type, entity_list in ners_dictionary.items():
entity_list = list(set(entity_list))
color = colors[entity_type]
for ent in entity_list:
if ent != "None":
my_sentence = re.sub(
ent,
":" + color + "[" + ent + "\[" + entity_type + "\]" + "]",
my_sentence,
)
st.markdown(my_sentence)