Spaces:

chkla
/

PromptCardsPlayground

Sleeping

App Files Files Community

chkla commited on Apr 13, 2023

Commit

20e2dd0

1 Parent(s): 4def8e0

Upload 2 files

Browse files

Files changed (2) hide show

app_anno.py +146 -0
requirements.txt +5 -0

app_anno.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import pandas as pd
+import streamlit as st
+from langchain import PromptTemplate, HuggingFaceHub, LLMChain
+from langchain.llms import OpenAI
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import os
+import re
+def extract_positive_negative(text):
+    pattern = r'\b(?:positive|negative)\b'
+    result = re.findall(pattern, text)
+    return result
+def classify_text(text, llm_chain, api):
+    if api == "HuggingFace":
+        classification = llm_chain.run(str(text))
+    elif api == "OpenAI":
+        classification = llm_chain.run(str(text))
+        classification = re.sub(r'\s', '', classification)
+    return classification.lower()
+def classify_csv(df, llm_chain, api):
+    df["label_gold"] = df["label"]
+    del df["label"]
+    df["label_pred"] = df["text"].apply(classify_text, llm_chain=llm_chain, api=api)
+    return df
+def classify_csv_zero(zero_file, llm_chain, api):
+    df = pd.read_csv(zero_file, sep=';')
+    df["label"] = df["text"].apply(classify_text, llm_chain=llm_chain, api=api)
+    return df
+def evaluate_performance(df):
+    merged_df = df
+    correct_preds = sum(merged_df["label_gold"] == merged_df["label_pred"])
+    total_preds = len(merged_df)
+    percentage_overlap = correct_preds / total_preds * 100
+    return percentage_overlap
+def display_home():
+    st.write("Please select an API and a model to classify the text. We currently support HuggingFace and OpenAI.")
+    api = st.selectbox("Select an API", ["HuggingFace", "OpenAI"])
+    if api == "HuggingFace":
+        model = st.selectbox("Select a model", ["google/flan-t5-xl", "databricks/dolly-v1-6b"])
+        api_key_hug = st.text_input("HuggingFace API Key")
+    elif api == "OpenAI":
+        model = None
+        api_key_openai = st.text_input("OpenAI API Key")
+    st.write("Please select a temperature for the model. The higher the temperature, the more creative the model will be.")
+    temperature = st.slider("Set the temperature", min_value=0.0, max_value=1.0, value=0.0, step=0.01)
+    st.write("We provide two different setups for the annotation task. In the first setup (**Test**), you can upload a CSV file with gold labels and evaluate the performance of the model. In the second setup (**Zero-Shot**), you can upload a CSV file without gold labels and use the model to classify the text.")
+    setup = st.selectbox("Setup", ["Test", "Zero-Shot"])
+    if setup == "Test":
+        gold_file = st.file_uploader("Upload Gold Labels CSV file with a text and a label column", type=["csv"])
+    elif setup == "Zero-Shot":
+        gold_file = None
+        zero_file = st.file_uploader("Upload CSV file with a text column", type=["csv"])
+    st.write("Please enter the prompt template below. You can use the following variables: {text} (text to classify).")
+    prompt_template = st.text_area("Enter your task description", """Instruction: Identify the sentiment of a text. Please read the text and provide one of these responses: "positive" or "negative".\nText to classify in "positive" or "negative": {text}\nAnswer:""", height=200)
+    classify_button = st.button("Run Classification/ Annotation")
+    if classify_button:
+        if prompt_template:
+            prompt = PromptTemplate(
+                template=prompt_template,
+                input_variables=["text"]
+            )
+            if api == "HuggingFace":
+                if api_key_hug:
+                    os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key_hug
+                    llm_chain = LLMChain(prompt=prompt, llm=HuggingFaceHub(repo_id=model, model_kwargs={"temperature": temperature, "max_length": 128}))
+                elif not api_key_hug:
+                    st.warning("Please enter your HuggingFace API key to classify the text.")
+            elif api == "OpenAI":
+                if api_key_openai:
+                    os.environ["OPENAI_API_KEY"] = api_key_openai
+                    llm_chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=temperature))
+                elif not api_key_openai:
+                    st.warning("Please enter your OpenAI API key to classify the text.")
+            if setup == "Zero-Shot":
+                if zero_file is not None:
+                    df_predicted = classify_csv_zero(zero_file, llm_chain, api)
+                    st.write(df_predicted)
+                    st.download_button(
+                        label="Download CSV",
+                        data=df_predicted.to_csv(index=False),
+                        file_name="classified_zero-shot_data.csv",
+                        mime="text/csv"
+                    )
+            elif setup == "Test":
+                if gold_file is not None:
+                    df = pd.read_csv(gold_file, sep=';')
+                    if "label" not in df.columns:
+                        st.warning("Please make sure that the gold labels CSV file contains a column named 'label'.")
+                    else:
+                        df = classify_csv(df, llm_chain, api)
+                        st.write(df)
+                        st.download_button(
+                            label="Download CSV",
+                            data=df.to_csv(index=False),
+                            file_name="classified_test_data.csv",
+                            mime="text/csv"
+                        )
+                        percentage_overlap = evaluate_performance(df)
+                        st.write("**Performance Evaluation**")
+                        st.write(f"Percentage overlap between gold labels and predicted labels: {percentage_overlap:.2f}%")
+                elif gold_file is None:
+                    st.warning("Please upload a gold labels CSV file to evaluate the performance of the model.")
+        elif not prompt:
+            st.warning("Please enter a prompt question to classify the text.")
+def main():
+    st.set_page_config(page_title="PromptCards Playground", page_icon=":pencil2:")
+    st.title("AInnotator")
+    # add a menu to the sidebar
+    if "current_page" not in st.session_state:
+        st.session_state.current_page = "homepage"
+    # Initialize selected_prompt in session_state if not set
+    if "selected_prompt" not in st.session_state:
+        st.session_state.selected_prompt = ""
+    # Add a menu
+    menu = ["Homepage", "Playground", "Prompt Archive", "Annotator", "About"]
+    st.sidebar.title("About")
+    st.sidebar.write("AInnotator 🤖🏷️ is a tool for creating artificial labels/ annotations. It is based on the concept of PromptCards, which are small, self-contained descriptions of a task that can be used to generate labels for a wide range of NLP tasks. Check out the GitHub repository and the PromptCards Archive for more information.")
+    st.sidebar.write("---")
+    st.sidebar.write("Check out the [PromptCards archive]() to find a wide range of prompts for different NLP tasks.")
+    st.sidebar.write("---")
+    st.sidebar.write("Made with ❤️ and 🤖.")
+    display_home()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+langchain
+pandas
+streamlit
+transformers
+sklearn