Spaces:

KrizTech100
/

Sentiment_Analysis

Running

App Files Files Community

Sentiment_Analysis / app.py

KrizTech100

Update app.py

92972fa verified 9 days ago

raw

history blame contribute delete

8.88 kB

	# Patch missing HfFolder before gradio imports it
	import sys
	from unittest.mock import MagicMock
	try:
	from huggingface_hub import HfFolder
	except ImportError:
	import huggingface_hub
	huggingface_hub.HfFolder = MagicMock()
	sys.modules["huggingface_hub"].HfFolder = MagicMock()

	import gradio as gr
	import assemblyai as aai
	import librosa
	import soundfile as sf
	import torch
	import json
	import csv
	import os
	import tempfile
	import warnings
	from datetime import datetime
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch.nn.functional as F
	from docx import Document
	from reportlab.platypus import SimpleDocTemplate, Paragraph
	from reportlab.lib.styles import getSampleStyleSheet

	warnings.filterwarnings("ignore", category=FutureWarning)
	warnings.filterwarnings("ignore", category=UserWarning)
	warnings.filterwarnings("ignore", category=RuntimeWarning)

	# =========================
	# CONFIG
	# =========================
	aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
	device = "cuda" if torch.cuda.is_available() else "cpu"

	tokenizer = AutoTokenizer.from_pretrained(
	"j-hartmann/emotion-english-distilroberta-base"
	)
	sentiment_model = AutoModelForSequenceClassification.from_pretrained(
	"j-hartmann/emotion-english-distilroberta-base"
	)
	sentiment_model.to(device)
	sentiment_model.eval()

	# Maps model's 7 emotion classes to business-friendly labels
	EMOTION_LABELS = {
	0: ("🔴", "Negative"), # Anger
	1: ("🔴", "Negative"), # Disgust
	2: ("🔴", "Negative"), # Fear
	3: ("🟢", "Positive"), # Joy
	4: ("🟡", "Neutral"), # Neutral
	5: ("🔴", "Negative"), # Sadness
	6: ("🟢", "Positive"), # Surprise
	}

	# =========================
	# HELPERS
	# =========================
	def format_time(ms):
	s = ms / 1000
	return f"{int(s // 60):02d}:{int(s % 60):02d}"


	def split_into_chunks(text, chunk_size=200):
	"""
	Split text into equal fixed-character chunks.
	Breaks at the nearest space to avoid cutting mid-word.
	"""
	text = text.strip()
	if len(text) <= chunk_size:
	return [text]

	chunks = []
	while len(text) > chunk_size:
	split_at = text.rfind(" ", 0, chunk_size)
	if split_at == -1:
	split_at = chunk_size
	chunks.append(text[:split_at].strip())
	text = text[split_at:].strip()

	if text:
	chunks.append(text)

	return chunks


	def analyze_sentiment(text):
	inputs = tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=512,
	padding=True
	).to(device)
	with torch.no_grad():
	logits = sentiment_model(**inputs).logits
	probs = F.softmax(logits, dim=-1)[0]
	return torch.argmax(probs).item()


	def build_segments(transcript):
	speaker_map = {}
	counter = 1
	segments = []
	for u in transcript.utterances:
	raw = str(u.speaker)
	if raw not in speaker_map:
	speaker_map[raw] = counter
	counter += 1
	segments.append({
	"speaker": speaker_map[raw],
	"start": format_time(u.start or 0),
	"end": format_time(u.end or 0),
	"text": u.text,
	})
	return segments


	# =========================
	# MAIN PROCESS
	# =========================
	def process_audio(file, speakers, language, state):
	if file is None:
	return "❌ No audio provided", "", "", state

	temp_wav = None
	try:
	audio, sr = librosa.load(file, sr=None, mono=True)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	sf.write(tmp.name, audio, sr)
	temp_wav = tmp.name

	config = aai.TranscriptionConfig(
	speaker_labels=True,
	speakers_expected=int(speakers) if speakers > 0 else None,
	language_code=None if language == "auto" else language,
	speech_model=aai.SpeechModel.best
	)
	transcript = aai.Transcriber().transcribe(temp_wav, config)

	if transcript.error:
	return f"❌ {transcript.error}", "", "", state

	segments = build_segments(transcript)
	speaker_count = len(set(s["speaker"] for s in segments))

	conversation = ""
	export_segments = []

	for i, seg in enumerate(segments, start=1):
	chunks = split_into_chunks(seg["text"])

	for c_idx, chunk in enumerate(chunks, start=1):
	emotion_idx = analyze_sentiment(chunk)
	emoji, label = EMOTION_LABELS.get(emotion_idx, ("⚪", "Unknown"))

	chunk_label = f" \| Chunk {c_idx}" if len(chunks) > 1 else ""

	conversation += (
	f"Speaker {seg['speaker']} \| Utterance {i}{chunk_label}\n"
	f"({seg['start']} - {seg['end']})\n"
	f"{emoji} {label}: {chunk}\n\n"
	)

	export_segments.append({
	"speaker": seg["speaker"],
	"start": seg["start"],
	"end": seg["end"],
	"chunk": c_idx,
	"text": chunk,
	"sentiment": label,
	})

	new_state = {"segments": export_segments, "conversation": conversation}
	return (
	"✅ Done",
	conversation,
	f"Speakers: {speaker_count} \| Utterances: {len(segments)}",
	new_state,
	)

	except Exception as e:
	return f"❌ Error: {str(e)}", "", "", state

	finally:
	if temp_wav and os.path.exists(temp_wav):
	os.remove(temp_wav)


	# =========================
	# EXPORT
	# =========================
	def export_file(format_type, state):
	segments = state.get("segments", [])
	conversation = state.get("conversation", "")
	if not conversation and not segments:
	return None

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

	if format_type == "TXT":
	path = f"/tmp/conversation_{timestamp}.txt"
	with open(path, "w", encoding="utf-8") as f:
	f.write(conversation)
	elif format_type == "JSON":
	path = f"/tmp/conversation_{timestamp}.json"
	with open(path, "w", encoding="utf-8") as f:
	json.dump(segments, f, indent=4)
	elif format_type == "CSV":
	path = f"/tmp/conversation_{timestamp}.csv"
	with open(path, "w", newline="", encoding="utf-8") as f:
	writer = csv.DictWriter(
	f, fieldnames=["speaker", "start", "end", "chunk", "text", "sentiment"]
	)
	writer.writeheader()
	writer.writerows(segments)
	elif format_type == "WORD":
	path = f"/tmp/conversation_{timestamp}.docx"
	doc = Document()
	doc.add_heading("Conversation Transcript", 0)
	doc.add_paragraph(conversation)
	doc.save(path)
	elif format_type == "PDF":
	path = f"/tmp/conversation_{timestamp}.pdf"
	doc = SimpleDocTemplate(path)
	styles = getSampleStyleSheet()
	content = [Paragraph(conversation.replace("\n", "<br/>"), styles["Normal"])]
	doc.build(content)
	else:
	return None

	return path


	# =========================
	# UI
	# =========================
	with gr.Blocks(title="AI Conversation Sentiment Analyzer", theme=gr.themes.Soft()) as app:

	gr.Markdown("# 🎙 AI Conversation Sentiment Analyzer")

	state = gr.State({"segments": [], "conversation": ""})

	with gr.Group():
	gr.Markdown("### 🎙 Input Audio")
	audio = gr.Audio(sources=["upload", "microphone"], type="filepath")

	with gr.Group():
	gr.Markdown("### ⚙ Settings")
	with gr.Row():
	speakers = gr.Number(value=0, label="Speakers (0 = auto-detect)")
	language = gr.Dropdown(
	["auto", "en", "fr", "es", "de"], value="auto", label="Language"
	)

	analyze_btn = gr.Button("🚀 Analyze", variant="primary")

	with gr.Group():
	gr.Markdown("### 💬 Conversation Output")
	status = gr.Textbox(label="Status")
	conversation_box = gr.Textbox(lines=18, label="Conversation + Sentiment")
	info = gr.Textbox(label="Info")

	with gr.Group():
	gr.Markdown("### 📁 Export")
	with gr.Row():
	export_format = gr.Dropdown(
	["TXT", "JSON", "CSV", "WORD", "PDF"], value="TXT", label="Format"
	)
	export_btn = gr.Button("⬇ Export")
	download = gr.File()

	analyze_btn.click(
	process_audio,
	inputs=[audio, speakers, language, state],
	outputs=[status, conversation_box, info, state],
	)
	export_btn.click(
	export_file,
	inputs=[export_format, state],
	outputs=[download],
	)

	if __name__ == "__main__":
	app.launch(server_name="0.0.0.0", server_port=7860)