Spaces:

ceh-vedant
/

script-brain-optimizer

Running

App Files Files Community

script-brain-optimizer / app.py

ceh-vedant

Update app.py

1458172 verified 15 days ago

raw

history blame contribute delete

13.7 kB

	import sys
	import os
	import types
	import logging
	import re

	# Shim for removed audioop module (Python 3.13+)
	if 'audioop' not in sys.modules:
	sys.modules['audioop'] = types.ModuleType('audioop')

	import gradio as gr
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib
	matplotlib.use('Agg')

	logger = logging.getLogger(__name__)
	logging.basicConfig(level=logging.INFO)

	# ---------------------------------------------------------------------------
	# Monkey-patch TRIBE's ExtractWordsFromAudio to build word-level events
	# WITHOUT calling whisperx (which requires CUDA libs unavailable on CPU).
	#
	# Instead, we use a simple heuristic: split the transcript text into words
	# and distribute them evenly across the audio duration. This gives TRIBE
	# enough word-level signal for its text encoder without needing ASR.
	# ---------------------------------------------------------------------------
	def _patched_get_transcript_from_audio(wav_filename, language="english"):
	"""CPU-safe replacement that creates word events from audio duration.

	When the audio was generated from known text (gTTS), the global
	CURRENT_SCRIPT_TEXT will contain that text. Otherwise we create
	a minimal placeholder so TRIBE's pipeline doesn't crash.
	"""
	import pandas as pd
	import soundfile as sf
	from pathlib import Path

	wav_filename = Path(wav_filename)

	# Get audio duration
	try:
	info = sf.info(str(wav_filename))
	duration = info.duration
	except Exception:
	duration = 30.0 # fallback

	# Use the known script text if available, otherwise a placeholder
	text = _CURRENT_SCRIPT_TEXT or "audio content placeholder"

	# Tokenize into words
	raw_words = text.split()
	if not raw_words:
	return pd.DataFrame(columns=["text", "start", "duration", "sequence_id", "sentence"])

	# Split into sentences (rough: split on . ! ?)
	sentences = re.split(r'(?<=[.!?])\s+', text)
	sentences = [s.strip() for s in sentences if s.strip()]
	if not sentences:
	sentences = [text]

	# Distribute words evenly across the audio duration
	word_duration = duration / len(raw_words)
	words = []
	word_idx = 0
	for sent_idx, sentence in enumerate(sentences):
	sent_words = sentence.split()
	for w in sent_words:
	if word_idx >= len(raw_words):
	break
	words.append({
	"text": w.replace('"', ''),
	"start": word_idx * word_duration,
	"duration": word_duration * 0.9,
	"sequence_id": sent_idx,
	"sentence": sentence.replace('"', ''),
	})
	word_idx += 1

	return pd.DataFrame(words)


	# Global to pass text from the analyze function to the monkey-patch
	_CURRENT_SCRIPT_TEXT = None


	def apply_patches():
	"""Patch TRIBE's ExtractWordsFromAudio to avoid whisperx/CUDA dependency."""
	try:
	from tribev2.eventstransforms import ExtractWordsFromAudio
	ExtractWordsFromAudio._get_transcript_from_audio = staticmethod(
	_patched_get_transcript_from_audio
	)
	logger.info("Patched ExtractWordsFromAudio (CPU-safe, no whisperx)")
	except Exception as e:
	logger.warning(f"Could not patch ExtractWordsFromAudio: {e}")

	# Apply patches at import time
	apply_patches()

	# ---------------------------------------------------------------------------
	# Model loading
	# ---------------------------------------------------------------------------
	model = None

	def load_model():
	global model
	if model is not None:
	return "✅ Already loaded!"
	try:
	apply_patches() # re-apply in case import order matters
	from tribev2 import TribeModel
	model = TribeModel.from_pretrained("facebook/tribev2", cache_folder="/tmp/tribe_cache")
	return "✅ Model loaded!"
	except Exception as e:
	import traceback
	traceback.print_exc()
	return f"❌ Error loading model: {str(e)}"

	# ---------------------------------------------------------------------------
	# Brain region definitions (approximate vertex ranges on fsaverage5)
	# ---------------------------------------------------------------------------
	REGIONS = [
	("Visual cortex", 0.00, 0.15, "#378ADD"),
	("Auditory cortex", 0.15, 0.30, "#D85A30"),
	("Language (Broca's area)", 0.30, 0.45, "#7F77DD"),
	("Prefrontal (attention)", 0.45, 0.62, "#1D9E75"),
	("Temporal (memory)", 0.62, 0.78, "#BA7517"),
	("Emotion (limbic)", 0.78, 1.00, "#D4537E"),
	]

	def score_predictions(preds):
	avg = np.mean(np.abs(preds), axis=0)
	global_max = avg.max() + 1e-8
	half = len(avg) // 2
	scores = {}
	for name, s, e, _ in REGIONS:
	start, end = int(half * s), int(half * e)
	scores[name] = round(float(np.mean(avg[start:end]) / global_max * 100), 1)
	return scores, round(sum(scores.values()) / len(scores), 1)

	def make_brain_plot(preds):
	try:
	from nilearn import plotting, datasets
	avg = np.mean(np.abs(preds), axis=0)
	avg_norm = (avg - avg.min()) / (avg.max() - avg.min() + 1e-8)
	half = len(avg_norm) // 2
	fsaverage = datasets.fetch_surf_fsaverage("fsaverage5")
	fig, axes = plt.subplots(1, 2, figsize=(14, 5), subplot_kw={"projection": "3d"})
	fig.patch.set_facecolor("#111111")
	plotting.plot_surf_stat_map(fsaverage.infl_left, avg_norm[:half], hemi="left",
	view="lateral", colorbar=True, cmap="hot", title="Left hemisphere", axes=axes[0], figure=fig)
	plotting.plot_surf_stat_map(fsaverage.infl_right, avg_norm[half:], hemi="right",
	view="lateral", colorbar=True, cmap="hot", title="Right hemisphere", axes=axes[1], figure=fig)
	plt.tight_layout()
	plt.savefig("/tmp/brain_map.png", dpi=130, bbox_inches="tight", facecolor="#111111")
	plt.close()
	return "/tmp/brain_map.png"
	except Exception as e:
	print(f"Brain plot error: {e}")
	return None

	def make_score_chart(scores, overall):
	fig, ax = plt.subplots(figsize=(9, 4))
	fig.patch.set_facecolor("#1a1a1a")
	ax.set_facecolor("#1a1a1a")
	names = [r[0] for r in REGIONS]
	colors = [r[3] for r in REGIONS]
	vals = [scores.get(n, 0) for n in names]
	bars = ax.barh(names, vals, color=colors, height=0.55)
	ax.set_xlim(0, 100)
	ax.axvline(70, color="#888", linestyle="--", linewidth=1, alpha=0.6)
	ax.set_xlabel("Activation score", color="#ccc", fontsize=11)
	ax.set_title(f"Brain region activation \| Overall: {overall}/100",
	color="white", fontsize=13, fontweight="bold", pad=12)
	ax.tick_params(colors="#ccc")
	for spine in ax.spines.values():
	spine.set_edgecolor("#333")
	for bar, val in zip(bars, vals):
	ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height() / 2,
	f"{val}", va="center", color="white", fontsize=10, fontweight="bold")
	plt.tight_layout()
	plt.savefig("/tmp/score_chart.png", dpi=130, bbox_inches="tight", facecolor="#1a1a1a")
	plt.close()
	return "/tmp/score_chart.png"

	def generate_suggestions(scores, overall):
	tips = []
	if scores.get("Prefrontal (attention)", 100) < 70:
	tips.append("→ Open with a bold question or surprising fact to boost attention")
	if scores.get("Emotion (limbic)", 100) < 70:
	tips.append("→ Add emotional language — 'imagine', 'feel', personal stories")
	if scores.get("Temporal (memory)", 100) < 70:
	tips.append("→ Include specific numbers or data points to improve memorability")
	if scores.get("Visual cortex", 100) < 70:
	tips.append("→ Use more visual language — describe what viewers will 'see'")
	if scores.get("Language (Broca's area)", 100) < 70:
	tips.append("→ Break long sentences into shorter, punchier ones")
	if scores.get("Auditory cortex", 100) < 70:
	tips.append("→ Add rhythm and repetition — the brain responds to sound patterns")
	if not tips:
	tips.append("→ Excellent! Consider adding a strong call-to-action at the end")
	status = "🟢 Strong" if overall >= 75 else "🟡 Good, needs polish" if overall >= 55 else "🔴 Needs work"
	return f"Overall: {overall}/100 — {status}\n\n" + "\n".join(tips)

	# ---------------------------------------------------------------------------
	# Main analysis function
	# ---------------------------------------------------------------------------
	def analyze(input_mode, script_text, audio_file, progress=gr.Progress()):
	global _CURRENT_SCRIPT_TEXT

	if input_mode == "Text" and (not script_text or not script_text.strip()):
	return None, None, "⚠️ Please paste your script text first.", None
	if input_mode == "Audio" and audio_file is None:
	return None, None, "⚠️ Please upload an audio file first.", None

	if model is None:
	progress(0.1, desc="Loading TRIBE v2 model (first time ~5 mins)...")
	msg = load_model()
	if "Error" in msg:
	return None, None, msg, None

	try:
	if input_mode == "Text":
	progress(0.2, desc="Converting text to speech...")

	from gtts import gTTS
	from langdetect import detect

	text = script_text.strip()
	lang = detect(text)
	audio_path = "/tmp/script_audio.mp3"
	tts = gTTS(text=text, lang=lang)
	tts.save(audio_path)

	# Store text so the monkey-patched transcriber can use it
	# instead of running ASR on the audio we just synthesised.
	_CURRENT_SCRIPT_TEXT = text

	progress(0.4, desc="Running TRIBE v2 on generated audio...")
	df = model.get_events_dataframe(audio_path=audio_path)

	else:
	import shutil
	progress(0.2, desc="Loading audio file...")
	ext = os.path.splitext(audio_file)[1] or ".mp3"
	audio_path = f"/tmp/input_audio{ext}"
	shutil.copy(audio_file, audio_path)

	# No known text for uploaded audio
	_CURRENT_SCRIPT_TEXT = None

	progress(0.4, desc="Running TRIBE v2 on audio...")
	df = model.get_events_dataframe(audio_path=audio_path)

	progress(0.6, desc="Predicting brain response...")
	preds, segments = model.predict(events=df)

	progress(0.75, desc="Scoring regions...")
	scores, overall = score_predictions(preds)

	progress(0.85, desc="Rendering maps...")
	brain_img = make_brain_plot(preds)
	score_img = make_score_chart(scores, overall)
	suggestions = generate_suggestions(scores, overall)

	np.save("/tmp/brain_predictions.npy", preds)
	progress(1.0, desc="Done!")
	return brain_img, score_img, suggestions, "/tmp/brain_predictions.npy"

	except Exception as e:
	import traceback
	full_error = traceback.format_exc()
	print(full_error)
	return None, None, f"❌ Error:\n{str(e)}\n\nFull traceback:\n{full_error}", None
	finally:
	_CURRENT_SCRIPT_TEXT = None

	# ---------------------------------------------------------------------------
	# Gradio UI
	# ---------------------------------------------------------------------------
	css = "#title{text-align:center} #subtitle{text-align:center;color:#888;font-size:14px}"

	with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), css=css) as demo:
	gr.Markdown("# 🧠 Script Brain Optimizer", elem_id="title")
	gr.Markdown("Analyze your script or audio → real fMRI predictions via TRIBE v2 → iterate", elem_id="subtitle")

	with gr.Row():
	with gr.Column(scale=1):
	input_mode = gr.Radio(
	choices=["Text", "Audio"], value="Text",
	label="Input type",
	info="Text: paste your script \| Audio: upload MP3/WAV"
	)
	script_input = gr.Textbox(
	label="Your script",
	placeholder="Paste your content script here...",
	lines=10, max_lines=20, visible=True
	)
	audio_input = gr.Audio(
	label="Upload audio file (MP3, WAV, M4A, FLAC)",
	type="filepath", sources=["upload"], visible=False
	)
	with gr.Row():
	clear_btn = gr.Button("Clear", variant="secondary", scale=1)
	analyze_btn = gr.Button("🧠 Analyze", variant="primary", scale=3)
	suggestions_out = gr.Markdown(value="Add your content and click Analyze...")
	download_out = gr.File(label="Download predictions (.npy)")

	with gr.Column(scale=2):
	brain_img_out = gr.Image(label="Brain activation map", height=320)
	score_img_out = gr.Image(label="Region scores", height=280)

	def toggle_mode(mode):
	return gr.update(visible=mode=="Text"), gr.update(visible=mode=="Audio")

	input_mode.change(fn=toggle_mode, inputs=[input_mode],
	outputs=[script_input, audio_input])

	analyze_btn.click(fn=analyze, inputs=[input_mode, script_input, audio_input],
	outputs=[brain_img_out, score_img_out, suggestions_out, download_out])

	clear_btn.click(
	fn=lambda: ("", None, None, None, "Add your content and click Analyze...", None),
	outputs=[script_input, audio_input, brain_img_out, score_img_out, suggestions_out, download_out]
	)

	gr.Markdown("---\nPowered by [TRIBE v2](https://github.com/facebookresearch/tribev2) by Meta FAIR")

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)