Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
from groq import Groq | |
import json | |
import time | |
import re | |
from concurrent.futures import ThreadPoolExecutor | |
from io import StringIO | |
class CustomConversationIntentClassifier: | |
def __init__(self): | |
# Define hierarchical intent categories and their patterns | |
if 'custom_intents' not in st.session_state: | |
self.intent_hierarchy = { | |
"A. Communication & Response Intent": { | |
"Information-Seeking": [ | |
r"what", r"how", r"why", r"when", r"where", r"who", | |
r"want to know", r"tell me about", r"can you explain" | |
], | |
"Clarification": [ | |
r"explain", r"clarify", r"what do you mean", r"repeat", | |
r"didn't understand", r"could you elaborate" | |
], | |
"Agreement": [ | |
r"yes", r"agree", r"makes sense", r"exactly", | |
r"that's right", r"correct" | |
], | |
"Disagreement": [ | |
r"no", r"don't agree", r"incorrect", r"that's wrong", | |
r"i disagree", r"not correct" | |
], | |
"Acknowledgment": [ | |
r"got it", r"i see", r"understood", r"noted", | |
r"alright", r"okay" | |
], | |
"Apology": [ | |
r"sorry", r"apologize", r"my mistake", r"my fault", | |
r"i apologize", r"regret" | |
], | |
"Appreciation": [ | |
r"thank you", r"thanks", r"appreciate", r"grateful", | |
r"thank you for your help" | |
], | |
"Urgency": [ | |
r"asap", r"urgent", r"immediately", r"right away", | |
r"emergency", r"as soon as possible" | |
] | |
}, | |
"B. Decision-Making Intent": { | |
"Exploration": [ | |
r"consider", r"explore", r"what if", r"options", | |
r"alternatives", r"possibilities" | |
], | |
"Commitment": [ | |
r"decided", r"will do", r"i've made my decision", | |
r"going to", r"i will", r"definitely" | |
], | |
"Indecision": [ | |
r"not sure", r"unsure", r"undecided", r"can't decide", | |
r"torn between", r"haven't decided" | |
], | |
"Delegation": [ | |
r"can you handle", r"take care of", r"assign", | |
r"please handle", r"can you manage" | |
], | |
"Evaluation": [ | |
r"compare", r"evaluate", r"assess", r"weigh", | |
r"pros and cons", r"better option" | |
] | |
}, | |
"C. Emotional & Psychological Intent": { | |
"Seeking Validation": [ | |
r"am i right", r"is this correct", r"does this make sense", | |
r"what do you think", r"how did i do" | |
], | |
"Seeking Support": [ | |
r"need help", r"support", r"assist", r"guide", | |
r"can you help", r"struggling with" | |
], | |
"Expressing Frustration": [ | |
r"annoying", r"frustrated", r"irritating", r"fed up", | |
r"this is difficult", r"getting nowhere" | |
], | |
"Venting": [ | |
r"just need to", r"off my chest", r"let me tell you", | |
r"you won't believe", r"so tired of" | |
], | |
"Seeking Comfort": [ | |
r"feeling down", r"upset", r"worried", r"anxious", | |
r"stressed", r"not feeling great" | |
] | |
}, | |
"D. Social & Relationship Intent": { | |
"Social Bonding": [ | |
r"coffee", r"lunch", r"catch up", r"get together", | |
r"hang out", r"meet up" | |
], | |
"Networking": [ | |
r"connect", r"network", r"introduction", r"link up", | |
r"get in touch", r"reach out" | |
], | |
"Collaboration": [ | |
r"work together", r"collaborate", r"team up", | |
r"join forces", r"partner" | |
], | |
"Teaching": [ | |
r"let me show", r"teach", r"explain how", | |
r"guide you through", r"help you understand" | |
], | |
"Testing Boundaries": [ | |
r"be honest", r"frank", r"between us", | |
r"confidential", r"keep this private" | |
] | |
}, | |
"E. Action-Oriented Intent": { | |
"Requesting Action": [ | |
r"can you", r"please", r"would you", r"need you to", | |
r"send", r"do this" | |
], | |
"Offering Help": [ | |
r"can i help", r"let me help", r"assistance", | |
r"i can do", r"happy to help" | |
], | |
"Providing Feedback": [ | |
r"feedback", r"suggestion", r"think about", | |
r"my opinion", r"recommend" | |
], | |
"Expressing Intent to Quit": [ | |
r"quit", r"give up", r"stop", r"abandon", | |
r"no longer want", r"discontinue" | |
], | |
"Confirming Action": [ | |
r"is this done", r"completed", r"finished", | |
r"status", r"update" | |
] | |
} | |
} | |
st.session_state['custom_intents'] = self.intent_hierarchy | |
else: | |
self.intent_hierarchy = st.session_state['custom_intents'] | |
def add_intent_category(self, main_category, subcategory, patterns): | |
if main_category not in self.intent_hierarchy: | |
self.intent_hierarchy[main_category] = {} | |
self.intent_hierarchy[main_category][subcategory] = patterns | |
st.session_state['custom_intents'] = self.intent_hierarchy | |
def preprocess_text(self, text): | |
if pd.isna(text): | |
return "" | |
text = str(text).lower() | |
text = re.sub(r'[^\w\s]', ' ', text) | |
return text | |
def classify_intent(self, text): | |
text = self.preprocess_text(text) | |
results = [] | |
for main_category, subcategories in self.intent_hierarchy.items(): | |
for subcategory, patterns in subcategories.items(): | |
for pattern in patterns: | |
if re.search(r'\b' + pattern + r'\b', text): | |
results.append({ | |
'main_category': main_category, | |
'subcategory': subcategory | |
}) | |
break | |
if results and results[-1]['subcategory'] == subcategory: | |
break | |
if not results: | |
return [{'main_category': 'Unclassified', 'subcategory': 'Other'}] | |
return results | |
def process_conversation(self, df): | |
hr_intents = [self.classify_intent(msg) for msg in df['HR']] | |
employee_intents = [self.classify_intent(msg) for msg in df['Employee']] | |
results_df = pd.DataFrame({ | |
'HR_Message': df['HR'], | |
'HR_Main_Category': [intent[0]['main_category'] for intent in hr_intents], | |
'HR_Subcategory': [intent[0]['subcategory'] for intent in hr_intents], | |
'Employee_Message': df['Employee'], | |
'Employee_Main_Category': [intent[0]['main_category'] for intent in employee_intents], | |
'Employee_Subcategory': [intent[0]['subcategory'] for intent in employee_intents] | |
}) | |
return results_df | |
class EnhancedConversationAnalyzer: | |
def __init__(self, groq_api_key): | |
self.client = Groq(api_key=groq_api_key) | |
# System prompt for consistent analysis | |
self.system_prompt = """You are an expert conversation analyzer focusing on workplace communications. | |
Analyze conversations for sentiment, psychological aspects, and satisfaction levels. | |
Always respond with valid JSON containing numerical scores and brief explanations.""" | |
def clean_json_response(self, response_text): | |
"""Clean and validate JSON response""" | |
try: | |
# Try to find JSON content between curly braces | |
start = response_text.find('{') | |
end = response_text.rfind('}') + 1 | |
if start != -1 and end != 0: | |
json_str = response_text[start:end] | |
return json.loads(json_str) | |
except: | |
pass | |
return self.get_empty_analysis() | |
def analyze_message(self, message, role): | |
"""Analyze a single message using Groq LLM""" | |
if pd.isna(message): | |
return self.get_empty_analysis() | |
prompt = f"""Analyze this {role} message and respond ONLY with a JSON object: | |
Message: "{message}" | |
Required JSON format: | |
{{ | |
"sentiment": {{ | |
"compound": <float between -1 and 1>, | |
"positive": <float between 0 and 1>, | |
"negative": <float between 0 and 1> | |
}}, | |
"psychological": {{ | |
"stress": <integer between 0 and 10>, | |
"confidence": <integer between 0 and 10>, | |
"frustration": <integer between 0 and 10> | |
}}, | |
"satisfaction": <integer between 0 and 100>, | |
"explanation": "<brief analysis, max 50 words>" | |
}} | |
Ensure the response is ONLY the JSON object with no additional text.""" | |
try: | |
completion = self.client.chat.completions.create( | |
messages=[ | |
{"role": "system", "content": self.system_prompt}, | |
{"role": "user", "content": prompt} | |
], | |
model="llama-3.3-70b-versatile", | |
temperature=0.1, | |
) | |
# Get and clean the response | |
response_text = completion.choices[0].message.content | |
analysis = self.clean_json_response(response_text) | |
# Validate and sanitize the values | |
analysis = self.validate_analysis(analysis) | |
return analysis | |
except Exception as e: | |
st.error(f"Error analyzing message: {str(e)}") | |
return self.get_empty_analysis() | |
def validate_analysis(self, analysis): | |
"""Validate and sanitize analysis values""" | |
template = self.get_empty_analysis() | |
try: | |
# Ensure all required fields exist and have valid values | |
sentiment = analysis.get('sentiment', {}) | |
template['sentiment']['compound'] = max(-1, min(1, float(sentiment.get('compound', 0)))) | |
template['sentiment']['positive'] = max(0, min(1, float(sentiment.get('positive', 0)))) | |
template['sentiment']['negative'] = max(0, min(1, float(sentiment.get('negative', 0)))) | |
psychological = analysis.get('psychological', {}) | |
template['psychological']['stress'] = max(0, min(10, int(psychological.get('stress', 0)))) | |
template['psychological']['confidence'] = max(0, min(10, int(psychological.get('confidence', 0)))) | |
template['psychological']['frustration'] = max(0, min(10, int(psychological.get('frustration', 0)))) | |
template['satisfaction'] = max(0, min(100, int(analysis.get('satisfaction', 0)))) | |
template['explanation'] = str(analysis.get('explanation', ''))[:50] | |
return template | |
except: | |
return template | |
def get_empty_analysis(self): | |
"""Return empty analysis structure""" | |
return { | |
"sentiment": {"compound": 0.0, "positive": 0.0, "negative": 0.0}, | |
"psychological": {"stress": 0, "confidence": 0, "frustration": 0}, | |
"satisfaction": 0, | |
"explanation": "No message to analyze" | |
} | |
def process_conversation(self, df): | |
"""Process conversation with LLM analysis""" | |
results = [] | |
total_rows = len(df) | |
progress_bar = st.progress(0) | |
with ThreadPoolExecutor(max_workers=4) as executor: | |
for index, row in df.iterrows(): | |
# Update progress | |
progress = (index + 1) / total_rows | |
progress_bar.progress(progress) | |
# Process messages | |
hr_future = executor.submit(self.analyze_message, row['HR'], 'HR') | |
emp_future = executor.submit(self.analyze_message, row['Employee'], 'Employee') | |
hr_analysis = hr_future.result() | |
emp_analysis = emp_future.result() | |
results.append({ | |
'HR_Message': row['HR'], | |
'HR_Sentiment_Compound': hr_analysis['sentiment']['compound'], | |
'HR_Sentiment_Positive': hr_analysis['sentiment']['positive'], | |
'HR_Sentiment_Negative': hr_analysis['sentiment']['negative'], | |
'HR_Satisfaction_Score': hr_analysis['satisfaction'], | |
'HR_Stress_Level': hr_analysis['psychological']['stress'], | |
'HR_Confidence_Level': hr_analysis['psychological']['confidence'], | |
'HR_Frustration_Level': hr_analysis['psychological']['frustration'], | |
'HR_Analysis': hr_analysis['explanation'], | |
'Employee_Message': row['Employee'], | |
'Employee_Sentiment_Compound': emp_analysis['sentiment']['compound'], | |
'Employee_Sentiment_Positive': emp_analysis['sentiment']['positive'], | |
'Employee_Sentiment_Negative': emp_analysis['sentiment']['negative'], | |
'Employee_Satisfaction_Score': emp_analysis['satisfaction'], | |
'Employee_Stress_Level': emp_analysis['psychological']['stress'], | |
'Employee_Confidence_Level': emp_analysis['psychological']['confidence'], | |
'Employee_Frustration_Level': emp_analysis['psychological']['frustration'], | |
'Employee_Analysis': emp_analysis['explanation'] | |
}) | |
# Add a small delay to avoid rate limits | |
time.sleep(0.1) | |
progress_bar.empty() | |
return pd.DataFrame(results) | |
def create_intent_distribution_plot(df, role): | |
main_category_counts = df[f'{role}_Main_Category'].value_counts() | |
fig = px.bar( | |
x=main_category_counts.index, | |
y=main_category_counts.values, | |
title=f'Intent Distribution for {role}', | |
labels={'x': 'Intent Category', 'y': 'Count'} | |
) | |
return fig | |
def intent_management_ui(): | |
st.sidebar.header("Custom Intent Management") | |
# Add new intent category | |
with st.sidebar.expander("Add New Intent Category"): | |
main_category = st.text_input("Main Category (e.g., F. Custom Intent)") | |
subcategory = st.text_input("Subcategory (e.g., Custom Type)") | |
patterns = st.text_area("Patterns (one per line)") | |
if st.button("Add Intent"): | |
if main_category and subcategory and patterns: | |
pattern_list = [p.strip() for p in patterns.split('\n') if p.strip()] | |
st.session_state.classifier.add_intent_category( | |
main_category, subcategory, pattern_list | |
) | |
st.success(f"Added new intent: {main_category} - {subcategory}") | |
# View current intents | |
with st.sidebar.expander("View Current Intents"): | |
st.json(st.session_state.classifier.intent_hierarchy) | |
# Export/Import intents | |
with st.sidebar.expander("Export/Import Intents"): | |
if st.button("Export Intents"): | |
json_str = json.dumps(st.session_state.classifier.intent_hierarchy, indent=2) | |
st.download_button( | |
label="Download Intents JSON", | |
data=json_str, | |
file_name="custom_intents.json", | |
mime="application/json" | |
) | |
uploaded_json = st.file_uploader("Import Intents JSON", type="json") | |
if uploaded_json is not None: | |
try: | |
new_intents = json.load(uploaded_json) | |
st.session_state.classifier.intent_hierarchy = new_intents | |
st.session_state['custom_intents'] = new_intents | |
st.success("Successfully imported intents") | |
except Exception as e: | |
st.error(f"Error importing intents: {str(e)}") | |
def main(): | |
st.title("Comprehensive Conversation Analyzer") | |
st.write("Upload a CSV file to analyze conversations using intent classification and sentiment analysis.") | |
# Initialize intent classifier | |
if 'classifier' not in st.session_state: | |
st.session_state.classifier = CustomConversationIntentClassifier() | |
# Show intent management UI in sidebar | |
intent_management_ui() | |
# Groq API key input for sentiment analysis | |
groq_api_key = st.text_input("Enter your Groq API key for sentiment analysis", type="password") | |
# File upload | |
uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
if uploaded_file is not None: | |
try: | |
df = pd.read_csv(uploaded_file) | |
if 'HR' not in df.columns or 'Employee' not in df.columns: | |
st.error("CSV file must contain 'HR' and 'Employee' columns!") | |
return | |
st.subheader("Sample of Original Data") | |
st.dataframe(df.head()) | |
# Store results for later combination | |
intent_results = None | |
sentiment_results = None | |
# Intent Classification | |
with st.expander("Intent Classification Results"): | |
with st.spinner("Classifying intents..."): | |
intent_results = st.session_state.classifier.process_conversation(df) | |
st.dataframe(intent_results) | |
st.subheader("Intent Distribution") | |
hr_plot = create_intent_distribution_plot(intent_results, 'HR') | |
st.plotly_chart(hr_plot) | |
emp_plot = create_intent_distribution_plot(intent_results, 'Employee') | |
st.plotly_chart(emp_plot) | |
# Download intent results | |
intent_csv = intent_results.to_csv(index=False) | |
st.download_button( | |
label="Download intent classification results as CSV", | |
data=intent_csv, | |
file_name="classified_conversations.csv", | |
mime="text/csv" | |
) | |
# Sentiment Analysis | |
if groq_api_key: | |
with st.expander("Sentiment Analysis Results"): | |
analyzer = EnhancedConversationAnalyzer(groq_api_key) | |
with st.spinner("Analyzing sentiments using AI... This may take a few minutes."): | |
sentiment_results = analyzer.process_conversation(df) | |
# Display sentiment summary metrics | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric( | |
"Average HR Satisfaction", | |
f"{sentiment_results['HR_Satisfaction_Score'].mean():.1f}%" | |
) | |
with col2: | |
st.metric( | |
"Average Employee Satisfaction", | |
f"{sentiment_results['Employee_Satisfaction_Score'].mean():.1f}%" | |
) | |
with col3: | |
st.metric( | |
"Overall Sentiment", | |
f"{sentiment_results['Employee_Sentiment_Compound'].mean():.2f}" | |
) | |
# Display sentiment visualizations | |
sentiment_fig = px.line( | |
sentiment_results, | |
y=['HR_Sentiment_Compound', 'Employee_Sentiment_Compound'], | |
title='Sentiment Trends', | |
labels={'value': 'Sentiment Score', 'index': 'Message Number'} | |
) | |
st.plotly_chart(sentiment_fig) | |
satisfaction_fig = px.line( | |
sentiment_results, | |
y=['HR_Satisfaction_Score', 'Employee_Satisfaction_Score'], | |
title='Satisfaction Score Trends', | |
labels={'value': 'Satisfaction Score', 'index': 'Message Number'} | |
) | |
st.plotly_chart(satisfaction_fig) | |
# Display detailed sentiment results | |
st.subheader("Detailed Sentiment Analysis") | |
st.dataframe(sentiment_results) | |
# Download sentiment results | |
sentiment_csv = sentiment_results.to_csv(index=False) | |
st.download_button( | |
label="Download sentiment analysis results as CSV", | |
data=sentiment_csv, | |
file_name="sentiment_analysis.csv", | |
mime="text/csv" | |
) | |
else: | |
st.warning("Please enter your Groq API key to perform sentiment analysis.") | |
# Combined Results Section | |
if intent_results is not None: | |
st.subheader("Combined Analysis Results") | |
if sentiment_results is not None: | |
# Combine the results | |
# Keep only one copy of the messages | |
combined_results = intent_results.copy() | |
# Add sentiment columns | |
sentiment_columns = [col for col in sentiment_results.columns | |
if col not in ['HR_Message', 'Employee_Message']] | |
for col in sentiment_columns: | |
combined_results[col] = sentiment_results[col] | |
st.write("Preview of combined results:") | |
st.dataframe(combined_results.head()) | |
# Download combined results | |
combined_csv = combined_results.to_csv(index=False) | |
st.download_button( | |
label="Download combined analysis results as CSV", | |
data=combined_csv, | |
file_name="combined_analysis.csv", | |
mime="text/csv", | |
key="combined_download" | |
) | |
else: | |
st.info("Add your Groq API key and run sentiment analysis to get combined results.") | |
except Exception as e: | |
st.error(f"An error occurred: {str(e)}") | |
if __name__ == "__main__": | |
main() |