Wedyan2023 commited on
Commit
de05ae1
·
verified ·
1 Parent(s): 5ad0f33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -119
app.py CHANGED
@@ -6,134 +6,133 @@
6
  @author: Wedyan2023
7
  @email: w.s.alskaran2@gmail.com
8
  """
 
9
  import streamlit as st
10
- #from openai import OpenAI
11
- from huggingface import transformers
 
12
 
13
- # Initialize session state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  if 'messages' not in st.session_state:
15
  st.session_state.messages = []
16
 
17
- # Function to generate system prompt based on user inputs
18
- def create_system_prompt(classification_type, num_to_generate, domain, min_words, max_words, labels):
19
- system_prompt = f"You are a professional {classification_type.lower()} expert. Your role is to generate exactly {num_to_generate} data examples for {domain}. "
20
- system_prompt += f"Each example should consist of between {min_words} and {max_words} words. "
21
- system_prompt += "Use the following labels: " + ", ".join(labels) + ". Please do not add any extra commentary or explanation. "
22
- system_prompt += "Format each example like this: \nExample: <text>, Label: <label>\n"
23
- return system_prompt
24
-
25
- # OpenAI client setup (replace with your OpenAI API credentials)
26
- #client = OpenAI(api_key='YOUR_API_KEY')
27
- client = Huggingface(api_key='YOUR_API_KEY')
28
-
29
- # App title
30
- st.title("Data Generation for Classification")
31
-
32
- # Choice between Data Generation or Data Labeling
33
- mode = st.radio("Choose Task:", ["Data Generation", "Data Labeling"])
34
-
35
- if mode == "Data Generation":
36
- # Step 1: Choose Classification Type
37
- classification_type = st.radio(
38
- "Select Classification Type:",
39
- ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
40
- )
41
-
42
- # Step 2: Choose labels based on classification type
43
  if classification_type == "Sentiment Analysis":
44
- labels = ["Positive", "Negative", "Neutral"]
 
 
 
45
  elif classification_type == "Binary Classification":
46
- class1 = st.text_input("Enter First Class for Binary Classification")
47
- class2 = st.text_input("Enter Second Class for Binary Classification")
48
- labels = [class1, class2]
 
 
49
  elif classification_type == "Multi-Class Classification":
50
- num_classes = st.slider("Number of Classes (Max 10):", 2, 10, 3)
51
- labels = [st.text_input(f"Enter Class {i+1}") for i in range(num_classes)]
52
-
53
- # Step 3: Choose the domain
54
- domain = st.radio(
55
- "Select Domain:",
56
- ["Restaurant reviews", "E-commerce reviews", "Custom"]
57
- )
 
 
 
58
  if domain == "Custom":
59
- domain = st.text_input("Enter Custom Domain")
60
-
61
- # Step 4: Specify example length (min and max words)
62
- min_words = st.slider("Minimum Words per Example", 10, 90, 20)
63
- max_words = st.slider("Maximum Words per Example", 10, 90, 40)
64
-
65
- # Step 5: Ask if user wants few-shot examples
66
- use_few_shot = st.checkbox("Use Few-Shot Examples?")
67
-
68
- few_shot_examples = []
69
- if use_few_shot:
70
- num_few_shots = st.slider("Number of Few-Shot Examples (Max 5):", 1, 5, 2)
71
- for i in range(num_few_shots):
72
- example_text = st.text_area(f"Enter Example {i+1} Text")
73
- example_label = st.selectbox(f"Select Label for Example {i+1}", labels)
74
- few_shot_examples.append(f"Example: {example_text}, Label: {example_label}")
75
-
76
- # Step 6: Specify the number of examples to generate
77
- num_to_generate = st.number_input("Number of Examples to Generate", min_value=1, max_value=50, value=10)
78
-
79
- # Step 7: Generate system prompt based on the inputs
80
- system_prompt = create_system_prompt(classification_type, num_to_generate, domain, min_words, max_words, labels)
81
-
82
- if st.button("Generate Examples"):
83
- all_generated_examples = []
84
- remaining_examples = num_to_generate
85
-
86
- with st.spinner("Generating..."):
87
- while remaining_examples > 0:
88
- chunk_size = min(remaining_examples, 5)
89
- try:
90
- # Add system and user messages to session state
91
- st.session_state.messages.append({"role": "system", "content": system_prompt})
92
-
93
- # Add few-shot examples to the system prompt
94
- if few_shot_examples:
95
- for example in few_shot_examples:
96
- st.session_state.messages.append({"role": "user", "content": example})
97
-
98
- # Stream API request to generate examples
99
- stream = client.chat.completions.create(
100
- model="gpt-3.5-turbo",
101
- messages=[
102
- {"role": m["role"], "content": m["content"]}
103
- for m in st.session_state.messages
104
- ],
105
- temperature=0.7,
106
- stream=True,
107
- max_tokens=3000,
108
- )
109
-
110
- # Capture streamed response
111
- response = ""
112
- for chunk in stream:
113
- if 'content' in chunk['choices'][0]['delta']:
114
- response += chunk['choices'][0]['delta']['content']
115
-
116
- # Split response into individual examples by "Example: "
117
- generated_examples = response.split("Example: ")[1:chunk_size+1] # Extract up to the chunk size
118
-
119
- # Clean up the extracted examples
120
- cleaned_examples = [f"Example {i+1}: {ex.strip()}" for i, ex in enumerate(generated_examples)]
121
-
122
- # Store the new examples
123
- all_generated_examples.extend(cleaned_examples)
124
- remaining_examples -= chunk_size
125
-
126
- except Exception as e:
127
- st.error("Error during generation.")
128
- st.write(e)
129
- break
130
-
131
- # Display all generated examples properly formatted
132
- for idx, example in enumerate(all_generated_examples):
133
- st.write(f"Example {idx+1}: {example.strip()}")
134
-
135
- # Clear session state to avoid repetition of old prompts
136
- st.session_state.messages = [] # Reset after each generation
137
 
 
 
 
 
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
 
6
  @author: Wedyan2023
7
  @email: w.s.alskaran2@gmail.com
8
  """
9
+ import numpy as np
10
  import streamlit as st
11
+ from openai import OpenAI
12
+ import os
13
+ from dotenv import load_dotenv
14
 
15
+ load_dotenv()
16
+
17
+ # Initialize the client
18
+ client = OpenAI(
19
+ base_url="https://api-inference.huggingface.co/v1",
20
+ api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN') # Replace with your token
21
+ )
22
+
23
+ # Function to reset conversation
24
+ def reset_conversation():
25
+ st.session_state.conversation = []
26
+ st.session_state.messages = []
27
+ return None
28
+
29
+ # Initialize session state for 'messages' if it doesn't exist
30
  if 'messages' not in st.session_state:
31
  st.session_state.messages = []
32
 
33
+ # Define classification options
34
+ classification_types = ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
35
+
36
+ # Start with a selection between data generation or labeling
37
+ st.sidebar.write("Choose Task:")
38
+ task = st.sidebar.radio("Do you want to generate data or label data?", ("Data Generation", "Data Labeling"))
39
+
40
+ # If the user selects Data Labeling
41
+ if task == "Data Labeling":
42
+ st.sidebar.write("Choose Classification Type:")
43
+ classification_type = st.sidebar.radio("Select a classification type:", classification_types)
44
+
45
+ # Handle Sentiment Analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  if classification_type == "Sentiment Analysis":
47
+ st.sidebar.write("Classes: Positive, Negative, Neutral (fixed)")
48
+ class_labels = ["Positive", "Negative", "Neutral"]
49
+
50
+ # Handle Binary Classification
51
  elif classification_type == "Binary Classification":
52
+ class_1 = st.sidebar.text_input("Enter Class 1:")
53
+ class_2 = st.sidebar.text_input("Enter Class 2:")
54
+ class_labels = [class_1, class_2]
55
+
56
+ # Handle Multi-Class Classification
57
  elif classification_type == "Multi-Class Classification":
58
+ class_labels = []
59
+ for i in range(1, 11): # Allow up to 10 classes
60
+ label = st.sidebar.text_input(f"Enter Class {i} (leave blank to stop):")
61
+ if label:
62
+ class_labels.append(label)
63
+ else:
64
+ break
65
+
66
+ # Domain selection
67
+ st.sidebar.write("Specify the Domain:")
68
+ domain = st.sidebar.radio("Choose a domain:", ("Restaurant Reviews", "E-commerce Reviews", "Custom"))
69
  if domain == "Custom":
70
+ domain = st.sidebar.text_input("Enter Custom Domain:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ # Specify example length
73
+ st.sidebar.write("Specify the Length of Examples:")
74
+ min_words = st.sidebar.number_input("Minimum word count (10 to 90):", 10, 90, 10)
75
+ max_words = st.sidebar.number_input("Maximum word count (10 to 90):", min_words, 90, 50)
76
 
77
+ # Few-shot examples option
78
+ use_few_shot = st.sidebar.radio("Do you want to use few-shot examples?", ("Yes", "No"))
79
+ few_shot_examples = []
80
+ if use_few_shot == "Yes":
81
+ num_examples = st.sidebar.number_input("How many few-shot examples? (1 to 5)", 1, 5, 1)
82
+ for i in range(num_examples):
83
+ example_text = st.text_area(f"Enter example {i+1}:")
84
+ example_label = st.selectbox(f"Select the label for example {i+1}:", class_labels)
85
+ few_shot_examples.append({"text": example_text, "label": example_label})
86
+
87
+ # Generate the system prompt based on classification type
88
+ if classification_type == "Sentiment Analysis":
89
+ system_prompt = f"You are a propositional sentiment analysis expert. Your role is to generate sentiment analysis reviews based on the data entered and few-shot examples provided, if any, for the domain '{domain}'."
90
+ elif classification_type == "Binary Classification":
91
+ system_prompt = f"You are an expert in binary classification. Your task is to label examples for the domain '{domain}' with either '{class_1}' or '{class_2}', based on the data provided."
92
+ else: # Multi-Class Classification
93
+ system_prompt = f"You are an expert in multi-class classification. Your role is to label examples for the domain '{domain}' using the provided class labels."
94
+
95
+ st.sidebar.write("System Prompt:")
96
+ st.sidebar.write(system_prompt)
97
+
98
+ # Step-by-step thinking
99
+ st.sidebar.write("Generated Data:")
100
+ st.sidebar.write("Think step by step to ensure accuracy in classification.")
101
+
102
+ # Accept user input for generating or labeling data
103
+ if prompt := st.chat_input(f"Hi, I'm ready to help with {classification_type} for {domain}. Ask me a question or provide data to classify."):
104
+
105
+ # Display user message in chat message container
106
+ with st.chat_message("user"):
107
+ st.markdown(prompt)
108
+ # Add user message to chat history
109
+ st.session_state.messages.append({"role": "user", "content": prompt})
110
+
111
+ # Display assistant response in chat message container
112
+ with st.chat_message("assistant"):
113
+
114
+ try:
115
+ # Stream the response from the model
116
+ stream = client.chat.completions.create(
117
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
118
+ messages=[
119
+ {"role": m["role"], "content": m["content"]}
120
+ for m in st.session_state.messages
121
+ ],
122
+ temperature=0.5,
123
+ stream=True,
124
+ max_tokens=3000,
125
+ )
126
+
127
+ response = st.write_stream(stream)
128
+
129
+ except Exception as e:
130
+ response = "😵‍💫 Something went wrong. Try again later."
131
+ st.write(response)
132
+
133
+ st.session_state.messages.append({"role": "assistant", "content": response})
134
+
135
+ # If the user selects Data Generation
136
+ else:
137
+ st.sidebar.write("This feature will allow you to generate new data. Coming soon!")
138