Spaces:

sharangrav24
/

SentimentAnalysis

Sleeping

SentimentAnalysis / chatbot.py

KrSharangrav

more emojis added

da72d89 4 months ago

5.29 kB

	import os
	import re
	import streamlit as st
	import google.generativeai as genai
	from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
	from db import get_entry_by_index, get_dataset_summary

	# Configure Gemini API key
	GEMINI_API_KEY = os.getenv("gemini_api")
	if GEMINI_API_KEY:
	genai.configure(api_key=GEMINI_API_KEY)
	else:
	st.error("⚠️ Google API key is missing! Set it in Hugging Face Secrets.")

	# Load pre-trained sentiment analysis model
	MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
	sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
	except Exception as e:
	st.error(f"❌ Error loading sentiment model: {e}")

	# Load Topic Extraction Model
	try:
	topic_pipeline = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
	except Exception as e:
	st.error(f"❌ Error loading topic extraction model: {e}")

	# Predefined topic labels for classification
	TOPIC_LABELS = [
	"Technology 💻", "Politics 🏛️", "Business 💼", "Sports ⚽", "Entertainment 🎭", "Health 🩺",
	"Science 🔬", "Education 🎓", "Finance 💰", "Travel ✈️", "Food 🍔", "Environment 🌱", "Culture 🎎",
	"History 🏺", "Art 🎨", "Literature 📚", "Automotive 🚗", "Law ⚖️", "Music 🎵", "Movies 🎬"
	]

	def analyze_sentiment(text):
	try:
	result = sentiment_pipeline(text)[0]
	label = result['label']
	score = result['score']
	sentiment_mapping = {
	"LABEL_0": "😞 Negative",
	"LABEL_1": "😐 Neutral",
	"LABEL_2": "😄 Positive"
	}
	return sentiment_mapping.get(label, "Unknown"), score
	except Exception as e:
	return f"Error analyzing sentiment: {e}", None

	def extract_topic(text):
	try:
	result = topic_pipeline(text, TOPIC_LABELS)
	top_topic = result["labels"][0]
	confidence = result["scores"][0]
	return top_topic, confidence
	except Exception as e:
	return f"Error extracting topic: {e}", None

	# Helper: Extract entry index from prompt (e.g., "data entry 1" yields index 0)
	def extract_entry_index(prompt):
	match = re.search(r'(data entry\|entry)\s+(\d+)', prompt, re.IGNORECASE)
	if match:
	index = int(match.group(2)) - 1 # convert to 0-based index
	return index
	return None

	# Helper: Detect if the query is asking for a specific dataset entry.
	def is_entry_query(prompt):
	index = extract_entry_index(prompt)
	if index is not None:
	return True, index
	return False, None

	# Helper: Detect if the query is a basic dataset question.
	def is_basic_dataset_question(prompt):
	lower = prompt.lower()
	keywords = ["dataset summary", "total tweets", "sentiment distribution", "overall dataset", "data overview", "data summary"]
	return any(keyword in lower for keyword in keywords)

	def chatbot_response(user_prompt):
	if not user_prompt:
	return None, None, None, None, None

	try:
	# If the query is a basic dataset question, fetch summary from MongoDB.
	if is_basic_dataset_question(user_prompt):
	summary = get_dataset_summary()
	ai_response = "Dataset Summary:\n" + summary
	sentiment_label, sentiment_confidence = analyze_sentiment(summary)
	topic_label, topic_confidence = extract_topic(summary)
	return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence

	# If the query is about a specific entry in the dataset...
	entry_query, index = is_entry_query(user_prompt)
	if entry_query:
	entry = get_entry_by_index(index)
	if entry is None:
	return "❌ No entry found for the requested index.", None, None, None, None
	# Retrieve fields from the document
	entry_text = entry.get("text", "No text available.")
	entry_user = entry.get("user", "Unknown")
	entry_date = entry.get("date", "Unknown")
	# Build a static response message with new lines for each field.
	ai_response = (
	"Let's break down this MongoDB entry:\n\n"
	f"Tweet: {entry_text}\n\n"
	f"User: {entry_user}\n\n"
	f"Date: {entry_date}"
	)
	sentiment_label, sentiment_confidence = analyze_sentiment(entry_text)
	topic_label, topic_confidence = extract_topic(entry_text)
	return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence

	# For other queries, use the generative model.
	model_gen = genai.GenerativeModel("gemini-1.5-pro")
	ai_response_obj = model_gen.generate_content(user_prompt)
	ai_response = ai_response_obj.text
	sentiment_label, sentiment_confidence = analyze_sentiment(user_prompt)
	topic_label, topic_confidence = extract_topic(user_prompt)
	return ai_response, sentiment_label, sentiment_confidence, topic_label, topic_confidence

	except Exception as e:
	return f"❌ Error: {e}", None, None, None, None