Spaces:

jaseci
/

NERGPT

Runtime error

File size: 4,081 Bytes

8dcfc9b
 
 
 
 
c813793
8dcfc9b
c813793
31bcbcd
c813793
 
 
 
 
 
31bcbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c813793
 
31bcbcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c813793
31bcbcd
 
 
 
 
 
 
 
 
 
 
 
 
8dcfc9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46da71c
8dcfc9b
 
ba54a85
 
 
 
 
8dcfc9b
ba54a85
8dcfc9b
 
35f56c1
8dcfc9b
 
42935e3
8dcfc9b
 
 
 
31bcbcd
8dcfc9b

import streamlit as st
import openai
import os
import re
import ast
import pandas as pd

st.title(":rocket: Named Entity Recognition (NER) with GPT-3")

# st.header("Guidelines")
st.markdown(
    "You can edit the guidelines here. Press  `Delete` to remove a row after selecting it."
)
df = pd.DataFrame(
    [
        {
            "entity": "PERSON",
            "definition": "Short name or full name of a person from any geographic regions.",
            "color": "red",
        },
        {
            "entity": "DATE",
            "definition": "Any format of dates. Dates can also be in natural language.",
            "color": "green",
        },
        {
            "entity": "LOC",
            "definition": "Name of any geographic location, like cities, countries, continents, districts etc.",
            "color": "blue",
        },
    ]
)
edited_df = st.experimental_data_editor(df, num_rows="dynamic")

examples = [
    {
        "sentence": "Mr. Jacob lives in Madrid since 12th January 2015.",
        "output": "{{'PERSON': ['Mr. Jacob'], 'DATE': ['12th January 2015'], 'LOC': ['Madrid']}}",
    },
    {
        "sentence": "Mr. Rajeev Mishra and Sunita Roy are friends and they meet each other on 24/03/1998.",
        "output": "{{'PERSON': ['Mr. Rajeev Mishra', 'Sunita Roy'], 'DATE': ['24/03/1998'], 'LOC': ['None']}}",
    },
]


def generate_guidelines_prompt(guidelines):
    guidelines_prompt = "Entity Definition:\n"
    for guideline in guidelines.values():
        guidelines_prompt += f"{guideline['entity']}: {guideline['definition']}\n"
    guidelines_prompt += "\nOutput Format:\n"
    guidelines_prompt += "{{'PERSON': [list of entities present], 'DATE': [list of entities present], 'LOC': [list of entities present]}}\n"
    guidelines_prompt += "If no entities are presented in any categories keep it None\n"
    guidelines_prompt += "\nExamples:\n\n"
    for i, example in enumerate(examples):
        guidelines_prompt += f"{i+1}. Sentence: {example['sentence']}\n"
        guidelines_prompt += f"Output: {example['output']}\n\n"
    guidelines_prompt += str(len(examples) + 1) + ". Sentence: {}\n"
    guidelines_prompt += "Output: "
    return guidelines_prompt


openai.api_key = os.getenv("OPENAI_API_KEY")

SYSTEM_PROMPT = "You are a smart and intelligent Named Entity Recognition (NER) system. I will provide you the definition of the entities you need to extract, the sentence from where your extract the entities and the output format with examples."
USER_PROMPT_1 = "Are you clear about your role?"
ASSISTANT_PROMPT_1 = "Sure, I'm ready to help you with your NER task. Please provide me with the necessary information to get started."


def openai_chat_completion_response(final_prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": USER_PROMPT_1},
            {"role": "assistant", "content": ASSISTANT_PROMPT_1},
            {"role": "user", "content": final_prompt},
        ],
    )

    return response["choices"][0]["message"]["content"].strip(" \n")


my_sentence = st.text_input("Your Sentence")
if st.button("Submit"):
    colors = {}
    for guideline in edited_df.to_dict(orient="index").values():
        colors[guideline["entity"]] = guideline["color"]
    
    GUIDELINES_PROMPT = generate_guidelines_prompt(edited_df.to_dict(orient="index"))
    GUIDELINES_PROMPT = GUIDELINES_PROMPT.format(my_sentence)

    ners = openai_chat_completion_response(GUIDELINES_PROMPT)
    ners_dictionary = ast.literal_eval(ners)
    st.json(ners_dictionary)
    for entity_type, entity_list in ners_dictionary.items():
        entity_list = list(set(entity_list))
        color = colors[entity_type]
        for ent in entity_list:
            if ent != "None":
                my_sentence = re.sub(
                    ent,
                    ":" + color + "[" + ent + "\[" + entity_type + "\]" + "]",
                    my_sentence,
                )
    st.markdown(my_sentence)