Data_Generation_LabelingCopy

Sleeping

App Files Files

Wedyan2023 commited on Oct 25, 2024

Commit

de05ae1

verified ·

1 Parent(s): 5ad0f33

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -119

app.py CHANGED Viewed

@@ -6,134 +6,133 @@
 @author: Wedyan2023
 @email: w.s.alskaran2@gmail.com
 """
 import streamlit as st
-#from openai import OpenAI
-from huggingface import transformers
-# Initialize session state
 if 'messages' not in st.session_state:
     st.session_state.messages = []
-# Function to generate system prompt based on user inputs
-def create_system_prompt(classification_type, num_to_generate, domain, min_words, max_words, labels):
-    system_prompt = f"You are a professional {classification_type.lower()} expert. Your role is to generate exactly {num_to_generate} data examples for {domain}. "
-    system_prompt += f"Each example should consist of between {min_words} and {max_words} words. "
-    system_prompt += "Use the following labels: " + ", ".join(labels) + ". Please do not add any extra commentary or explanation. "
-    system_prompt += "Format each example like this: \nExample: <text>, Label: <label>\n"
-    return system_prompt
-# OpenAI client setup (replace with your OpenAI API credentials)
-#client = OpenAI(api_key='YOUR_API_KEY')
-client = Huggingface(api_key='YOUR_API_KEY')
-# App title
-st.title("Data Generation for Classification")
-# Choice between Data Generation or Data Labeling
-mode = st.radio("Choose Task:", ["Data Generation", "Data Labeling"])
-if mode == "Data Generation":
-    # Step 1: Choose Classification Type
-    classification_type = st.radio(
-        "Select Classification Type:",
-        ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
-    )
-    # Step 2: Choose labels based on classification type
     if classification_type == "Sentiment Analysis":
-        labels = ["Positive", "Negative", "Neutral"]
     elif classification_type == "Binary Classification":
-        class1 = st.text_input("Enter First Class for Binary Classification")
-        class2 = st.text_input("Enter Second Class for Binary Classification")
-        labels = [class1, class2]
     elif classification_type == "Multi-Class Classification":
-        num_classes = st.slider("Number of Classes (Max 10):", 2, 10, 3)
-        labels = [st.text_input(f"Enter Class {i+1}") for i in range(num_classes)]
-    # Step 3: Choose the domain
-    domain = st.radio(
-        "Select Domain:",
-        ["Restaurant reviews", "E-commerce reviews", "Custom"]
-    )
     if domain == "Custom":
-        domain = st.text_input("Enter Custom Domain")
-    # Step 4: Specify example length (min and max words)
-    min_words = st.slider("Minimum Words per Example", 10, 90, 20)
-    max_words = st.slider("Maximum Words per Example", 10, 90, 40)
-    # Step 5: Ask if user wants few-shot examples
-    use_few_shot = st.checkbox("Use Few-Shot Examples?")
-    few_shot_examples = []
-    if use_few_shot:
-        num_few_shots = st.slider("Number of Few-Shot Examples (Max 5):", 1, 5, 2)
-        for i in range(num_few_shots):
-            example_text = st.text_area(f"Enter Example {i+1} Text")
-            example_label = st.selectbox(f"Select Label for Example {i+1}", labels)
-            few_shot_examples.append(f"Example: {example_text}, Label: {example_label}")
-    # Step 6: Specify the number of examples to generate
-    num_to_generate = st.number_input("Number of Examples to Generate", min_value=1, max_value=50, value=10)
-    # Step 7: Generate system prompt based on the inputs
-    system_prompt = create_system_prompt(classification_type, num_to_generate, domain, min_words, max_words, labels)
-    if st.button("Generate Examples"):
-        all_generated_examples = []
-        remaining_examples = num_to_generate
-        with st.spinner("Generating..."):
-            while remaining_examples > 0:
-                chunk_size = min(remaining_examples, 5)
-                try:
-                    # Add system and user messages to session state
-                    st.session_state.messages.append({"role": "system", "content": system_prompt})
-                    # Add few-shot examples to the system prompt
-                    if few_shot_examples:
-                        for example in few_shot_examples:
-                            st.session_state.messages.append({"role": "user", "content": example})
-                    # Stream API request to generate examples
-                    stream = client.chat.completions.create(
-                        model="gpt-3.5-turbo",
-                        messages=[
-                            {"role": m["role"], "content": m["content"]}
-                            for m in st.session_state.messages
-                        ],
-                        temperature=0.7,
-                        stream=True,
-                        max_tokens=3000,
-                    )
-                    # Capture streamed response
-                    response = ""
-                    for chunk in stream:
-                        if 'content' in chunk['choices'][0]['delta']:
-                            response += chunk['choices'][0]['delta']['content']
-                    # Split response into individual examples by "Example: "
-                    generated_examples = response.split("Example: ")[1:chunk_size+1]  # Extract up to the chunk size
-                    # Clean up the extracted examples
-                    cleaned_examples = [f"Example {i+1}: {ex.strip()}" for i, ex in enumerate(generated_examples)]
-                    # Store the new examples
-                    all_generated_examples.extend(cleaned_examples)
-                    remaining_examples -= chunk_size
-                except Exception as e:
-                    st.error("Error during generation.")
-                    st.write(e)
-                    break
-        # Display all generated examples properly formatted
-        for idx, example in enumerate(all_generated_examples):
-            st.write(f"Example {idx+1}: {example.strip()}")
-        # Clear session state to avoid repetition of old prompts
-        st.session_state.messages = []  # Reset after each generation

 @author: Wedyan2023
 @email: w.s.alskaran2@gmail.com
 """
+import numpy as np
 import streamlit as st
+from openai import OpenAI
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Initialize the client
+client = OpenAI(
+    base_url="https://api-inference.huggingface.co/v1",
+    api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN')  # Replace with your token
+)
+# Function to reset conversation
+def reset_conversation():
+    st.session_state.conversation = []
+    st.session_state.messages = []
+    return None
+# Initialize session state for 'messages' if it doesn't exist
 if 'messages' not in st.session_state:
     st.session_state.messages = []
+# Define classification options
+classification_types = ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
+# Start with a selection between data generation or labeling
+st.sidebar.write("Choose Task:")
+task = st.sidebar.radio("Do you want to generate data or label data?", ("Data Generation", "Data Labeling"))
+# If the user selects Data Labeling
+if task == "Data Labeling":
+    st.sidebar.write("Choose Classification Type:")
+    classification_type = st.sidebar.radio("Select a classification type:", classification_types)
+    # Handle Sentiment Analysis
     if classification_type == "Sentiment Analysis":
+        st.sidebar.write("Classes: Positive, Negative, Neutral (fixed)")
+        class_labels = ["Positive", "Negative", "Neutral"]
+    # Handle Binary Classification
     elif classification_type == "Binary Classification":
+        class_1 = st.sidebar.text_input("Enter Class 1:")
+        class_2 = st.sidebar.text_input("Enter Class 2:")
+        class_labels = [class_1, class_2]
+    # Handle Multi-Class Classification
     elif classification_type == "Multi-Class Classification":
+        class_labels = []
+        for i in range(1, 11):  # Allow up to 10 classes
+            label = st.sidebar.text_input(f"Enter Class {i} (leave blank to stop):")
+            if label:
+                class_labels.append(label)
+            else:
+                break
+    # Domain selection
+    st.sidebar.write("Specify the Domain:")
+    domain = st.sidebar.radio("Choose a domain:", ("Restaurant Reviews", "E-commerce Reviews", "Custom"))
     if domain == "Custom":
+        domain = st.sidebar.text_input("Enter Custom Domain:")
+    # Specify example length
+    st.sidebar.write("Specify the Length of Examples:")
+    min_words = st.sidebar.number_input("Minimum word count (10 to 90):", 10, 90, 10)
+    max_words = st.sidebar.number_input("Maximum word count (10 to 90):", min_words, 90, 50)
+    # Few-shot examples option
+    use_few_shot = st.sidebar.radio("Do you want to use few-shot examples?", ("Yes", "No"))
+    few_shot_examples = []
+    if use_few_shot == "Yes":
+        num_examples = st.sidebar.number_input("How many few-shot examples? (1 to 5)", 1, 5, 1)
+        for i in range(num_examples):
+            example_text = st.text_area(f"Enter example {i+1}:")
+            example_label = st.selectbox(f"Select the label for example {i+1}:", class_labels)
+            few_shot_examples.append({"text": example_text, "label": example_label})
+    # Generate the system prompt based on classification type
+    if classification_type == "Sentiment Analysis":
+        system_prompt = f"You are a propositional sentiment analysis expert. Your role is to generate sentiment analysis reviews based on the data entered and few-shot examples provided, if any, for the domain '{domain}'."
+    elif classification_type == "Binary Classification":
+        system_prompt = f"You are an expert in binary classification. Your task is to label examples for the domain '{domain}' with either '{class_1}' or '{class_2}', based on the data provided."
+    else:  # Multi-Class Classification
+        system_prompt = f"You are an expert in multi-class classification. Your role is to label examples for the domain '{domain}' using the provided class labels."
+    st.sidebar.write("System Prompt:")
+    st.sidebar.write(system_prompt)
+    # Step-by-step thinking
+    st.sidebar.write("Generated Data:")
+    st.sidebar.write("Think step by step to ensure accuracy in classification.")
+    # Accept user input for generating or labeling data
+    if prompt := st.chat_input(f"Hi, I'm ready to help with {classification_type} for {domain}. Ask me a question or provide data to classify."):
+        # Display user message in chat message container
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Add user message to chat history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        # Display assistant response in chat message container
+        with st.chat_message("assistant"):
+            try:
+                # Stream the response from the model
+                stream = client.chat.completions.create(
+                    model="meta-llama/Meta-Llama-3-8B-Instruct",
+                    messages=[
+                        {"role": m["role"], "content": m["content"]}
+                        for m in st.session_state.messages
+                    ],
+                    temperature=0.5,
+                    stream=True,
+                    max_tokens=3000,
+                )
+                response = st.write_stream(stream)
+            except Exception as e:
+                response = "😵‍💫 Something went wrong. Try again later."
+                st.write(response)
+        st.session_state.messages.append({"role": "assistant", "content": response})
+# If the user selects Data Generation
+else:
+    st.sidebar.write("This feature will allow you to generate new data. Coming soon!")