import streamlit as st import openai import os import re import ast import pandas as pd st.title(":rocket: Named Entity Recognition (NER) with GPT-3") # st.header("Guidelines") st.markdown( "You can edit the guidelines here. Press `Delete` to remove a row after selecting it." ) df = pd.DataFrame( [ { "entity": "PERSON", "definition": "Short name or full name of a person from any geographic regions.", "color": "red", }, { "entity": "DATE", "definition": "Any format of dates. Dates can also be in natural language.", "color": "green", }, { "entity": "LOC", "definition": "Name of any geographic location, like cities, countries, continents, districts etc.", "color": "blue", }, ] ) edited_df = st.experimental_data_editor(df, num_rows="dynamic") examples = [ { "sentence": "Mr. Jacob lives in Madrid since 12th January 2015.", "output": "{{'PERSON': ['Mr. Jacob'], 'DATE': ['12th January 2015'], 'LOC': ['Madrid']}}", }, { "sentence": "Mr. Rajeev Mishra and Sunita Roy are friends and they meet each other on 24/03/1998.", "output": "{{'PERSON': ['Mr. Rajeev Mishra', 'Sunita Roy'], 'DATE': ['24/03/1998'], 'LOC': ['None']}}", }, ] def generate_guidelines_prompt(guidelines): guidelines_prompt = "Entity Definition:\n" for guideline in guidelines.values(): guidelines_prompt += f"{guideline['entity']}: {guideline['definition']}\n" guidelines_prompt += "\nOutput Format:\n" guidelines_prompt += "{{'PERSON': [list of entities present], 'DATE': [list of entities present], 'LOC': [list of entities present]}}\n" guidelines_prompt += "If no entities are presented in any categories keep it None\n" guidelines_prompt += "\nExamples:\n\n" for i, example in enumerate(examples): guidelines_prompt += f"{i+1}. Sentence: {example['sentence']}\n" guidelines_prompt += f"Output: {example['output']}\n\n" guidelines_prompt += str(len(examples) + 1) + ". Sentence: {}\n" guidelines_prompt += "Output: " return guidelines_prompt openai.api_key = os.getenv("OPENAI_API_KEY") SYSTEM_PROMPT = "You are a smart and intelligent Named Entity Recognition (NER) system. I will provide you the definition of the entities you need to extract, the sentence from where your extract the entities and the output format with examples." USER_PROMPT_1 = "Are you clear about your role?" ASSISTANT_PROMPT_1 = "Sure, I'm ready to help you with your NER task. Please provide me with the necessary information to get started." def openai_chat_completion_response(final_prompt): response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": USER_PROMPT_1}, {"role": "assistant", "content": ASSISTANT_PROMPT_1}, {"role": "user", "content": final_prompt}, ], ) return response["choices"][0]["message"]["content"].strip(" \n") my_sentence = st.text_input("Your Sentence") if st.button("Submit"): colors = {} for guideline in edited_df.to_dict(orient="index").values(): colors[guideline["entity"]] = guideline["color"] GUIDELINES_PROMPT = generate_guidelines_prompt(edited_df.to_dict(orient="index")) GUIDELINES_PROMPT = GUIDELINES_PROMPT.format(my_sentence) ners = openai_chat_completion_response(GUIDELINES_PROMPT) ners_dictionary = ast.literal_eval(ners) st.json(ners_dictionary) for entity_type, entity_list in ners_dictionary.items(): entity_list = list(set(entity_list)) color = colors[entity_type] for ent in entity_list: if ent != "None": my_sentence = re.sub( ent, ":" + color + "[" + ent + "\[" + entity_type + "\]" + "]", my_sentence, ) st.markdown(my_sentence)