Spaces:

GoshawkVortexAI
/

Goshawk-Tiktok

Running

App Files Files Community

Goshawk-Tiktok / app.py

GoshawkVortexAI

Create app.py

9864a36 verified 20 days ago

raw

history blame contribute delete

12.3 kB

	import gradio as gr
	import pandas as pd
	import torch
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	import tempfile
	import os

	# ── Model loading ──────────────────────────────────────────────────────────────

	print("Loading sentiment model (savasy/bert-base-turkish-sentiment-cased)...")
	sentiment_pipeline = pipeline(
	"sentiment-analysis",
	model="savasy/bert-base-turkish-sentiment-cased",
	tokenizer="savasy/bert-base-turkish-sentiment-cased",
	truncation=True,
	max_length=512,
	)

	print("Loading toxicity model (unitary/toxic-bert)...")
	toxicity_pipeline = pipeline(
	"text-classification",
	model="unitary/toxic-bert",
	tokenizer="unitary/toxic-bert",
	truncation=True,
	max_length=512,
	)

	# ── Keyword extraction (simple TF-based, no external API) ─────────────────────

	import re
	from collections import Counter

	STOPWORDS_TR = {
	"bir", "bu", "ve", "ile", "da", "de", "mi", "mu", "mü", "mı", "ki",
	"ne", "için", "ama", "fakat", "çok", "daha", "en", "gibi", "kadar",
	"ben", "sen", "o", "biz", "siz", "onlar", "bu", "şu", "o", "her",
	"hiç", "bazı", "tüm", "bütün", "var", "yok", "olan", "olarak",
	}

	STOPWORDS_EN = {
	"the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
	"of", "with", "by", "from", "is", "are", "was", "were", "be", "been",
	"have", "has", "had", "do", "does", "did", "will", "would", "could",
	"should", "may", "might", "this", "that", "these", "those", "it", "its",
	"i", "you", "he", "she", "we", "they", "not", "no", "so", "as", "if",
	}

	STOPWORDS = STOPWORDS_TR \| STOPWORDS_EN


	def extract_keywords(text: str, top_n: int = 5) -> str:
	words = re.findall(r"\b[a-zA-ZçğışöüÇĞİŞÖÜ]{3,}\b", text.lower())
	filtered = [w for w in words if w not in STOPWORDS]
	if not filtered:
	return ""
	counts = Counter(filtered)
	return ", ".join(w for w, _ in counts.most_common(top_n))


	# ── Core analysis ──────────────────────────────────────────────────────────────

	def analyze_text(text: str) -> dict:
	text = text.strip()
	if not text:
	return {}

	# Sentiment
	sent_result = sentiment_pipeline(text)[0]
	sentiment_label = sent_result["label"] # e.g. "positive" / "negative"
	sentiment_score = round(sent_result["score"], 4)

	# Toxicity
	tox_result = toxicity_pipeline(text)[0]
	is_toxic = tox_result["label"].lower() == "toxic"
	toxicity_score = round(tox_result["score"] if is_toxic else 1 - tox_result["score"], 4)
	toxicity_label = "Toxic 🚨" if is_toxic else "Safe ✅"

	# Keywords
	keywords = extract_keywords(text)

	return {
	"Text": text[:120] + ("…" if len(text) > 120 else ""),
	"Sentiment": sentiment_label.capitalize(),
	"Sentiment Score": sentiment_score,
	"Toxicity": toxicity_label,
	"Toxicity Score": toxicity_score,
	"Keywords": keywords,
	}


	# ── Gradio handlers ────────────────────────────────────────────────────────────

	def run_analysis(text_input: str, csv_file):
	rows = []

	# CSV path takes priority; fall back to text box
	if csv_file is not None:
	try:
	df_in = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file)
	except Exception as e:
	return pd.DataFrame([{"Error": f"Could not read CSV: {e}"}]), None

	# Use first column that looks like text
	text_col = df_in.columns[0]
	for col in df_in.columns:
	if df_in[col].dtype == object:
	text_col = col
	break

	for _, row in df_in.iterrows():
	cell = str(row[text_col]).strip()
	if cell and cell.lower() != "nan":
	result = analyze_text(cell)
	if result:
	rows.append(result)

	elif text_input and text_input.strip():
	result = analyze_text(text_input.strip())
	if result:
	rows.append(result)
	else:
	return pd.DataFrame([{"Info": "Please enter text or upload a CSV file."}]), None

	if not rows:
	return pd.DataFrame([{"Info": "No valid text found to analyze."}]), None

	df_out = pd.DataFrame(rows)

	# Save CSV for download
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8")
	df_out.to_csv(tmp.name, index=False)
	tmp.close()

	return df_out, tmp.name


	# ── UI ─────────────────────────────────────────────────────────────────────────

	CSS = """
	/* ── Palette ─────────────────────────────── */
	:root {
	--bg: #0d0f14;
	--surface: #161a22;
	--border: #252a35;
	--accent: #5b8dee;
	--accent2: #e05b8d;
	--text: #e8ecf4;
	--muted: #7a8399;
	--safe: #3ecf78;
	--toxic: #ff4f64;
	--font-head: 'Space Mono', monospace;
	--font-body: 'DM Sans', sans-serif;
	--radius: 10px;
	}

	/* ── Reset & base ─────────────────────────── */
	body, .gradio-container {
	background: var(--bg) !important;
	color: var(--text) !important;
	font-family: var(--font-body) !important;
	}

	/* ── Header ───────────────────────────────── */
	#app-header {
	text-align: center;
	padding: 36px 20px 20px;
	background: linear-gradient(135deg, #0d0f14 0%, #161a22 100%);
	border-bottom: 1px solid var(--border);
	margin-bottom: 28px;
	}
	#app-header h1 {
	font-family: var(--font-head);
	font-size: clamp(1.4rem, 4vw, 2.2rem);
	letter-spacing: -0.5px;
	background: linear-gradient(90deg, var(--accent), var(--accent2));
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	margin: 0 0 8px;
	}
	#app-header p {
	color: var(--muted);
	font-size: 0.9rem;
	margin: 0;
	}

	/* ── Panels ───────────────────────────────── */
	.gr-group, .gr-box, .gr-form {
	background: var(--surface) !important;
	border: 1px solid var(--border) !important;
	border-radius: var(--radius) !important;
	}

	/* ── Labels ───────────────────────────────── */
	label span, .gr-form label {
	color: var(--muted) !important;
	font-size: 0.78rem !important;
	letter-spacing: 0.06em !important;
	text-transform: uppercase !important;
	font-family: var(--font-head) !important;
	}

	/* ── Textbox ──────────────────────────────── */
	textarea, input[type="text"] {
	background: #1c2130 !important;
	border: 1px solid var(--border) !important;
	border-radius: 8px !important;
	color: var(--text) !important;
	font-family: var(--font-body) !important;
	font-size: 0.95rem !important;
	}
	textarea:focus, input[type="text"]:focus {
	border-color: var(--accent) !important;
	box-shadow: 0 0 0 3px rgba(91,141,238,0.15) !important;
	}

	/* ── Buttons ──────────────────────────────── */
	button.primary {
	background: linear-gradient(135deg, var(--accent), #3d6dcf) !important;
	border: none !important;
	border-radius: 8px !important;
	color: #fff !important;
	font-family: var(--font-head) !important;
	font-size: 0.85rem !important;
	letter-spacing: 0.08em !important;
	padding: 10px 28px !important;
	cursor: pointer !important;
	transition: opacity 0.2s, transform 0.1s !important;
	}
	button.primary:hover { opacity: 0.88 !important; transform: translateY(-1px) !important; }
	button.primary:active { transform: translateY(0) !important; }

	button.secondary {
	background: var(--surface) !important;
	border: 1px solid var(--border) !important;
	border-radius: 8px !important;
	color: var(--muted) !important;
	font-family: var(--font-head) !important;
	font-size: 0.78rem !important;
	}

	/* ── Dataframe ────────────────────────────── */
	.gr-dataframe, .dataframe-container {
	background: var(--surface) !important;
	border-radius: var(--radius) !important;
	overflow: hidden !important;
	}
	table { width: 100%; border-collapse: collapse; }
	thead tr { background: #1c2130 !important; }
	thead th {
	color: var(--accent) !important;
	font-family: var(--font-head) !important;
	font-size: 0.72rem !important;
	letter-spacing: 0.07em !important;
	text-transform: uppercase !important;
	padding: 10px 14px !important;
	border-bottom: 1px solid var(--border) !important;
	}
	tbody tr:nth-child(even) { background: rgba(255,255,255,0.02) !important; }
	tbody tr:hover { background: rgba(91,141,238,0.06) !important; }
	tbody td {
	color: var(--text) !important;
	font-size: 0.87rem !important;
	padding: 9px 14px !important;
	border-bottom: 1px solid var(--border) !important;
	}

	/* ── File upload ──────────────────────────── */
	.gr-file-upload {
	border: 2px dashed var(--border) !important;
	border-radius: var(--radius) !important;
	background: #13171f !important;
	}
	.gr-file-upload:hover { border-color: var(--accent) !important; }

	/* ── Download file ────────────────────────── */
	.gr-file { background: var(--surface) !important; border-color: var(--border) !important; }

	/* ── Divider ──────────────────────────────── */
	hr { border-color: var(--border) !important; margin: 20px 0 !important; }

	/* ── Scrollbar ────────────────────────────── */
	::-webkit-scrollbar { width: 6px; height: 6px; }
	::-webkit-scrollbar-track { background: var(--bg); }
	::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
	"""

	HEADER_HTML = """
	<link rel="preconnect" href="https://fonts.googleapis.com">
	<link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;500;600&display=swap" rel="stylesheet">
	<div id="app-header">
	<h1>⚡ Social Media Analytics</h1>
	<p>Sentiment · Toxicity · Keywords  \|  Turkish & English supported</p>
	</div>
	"""

	with gr.Blocks(css=CSS, title="Social Media Analytics") as demo:
	gr.HTML(HEADER_HTML)

	with gr.Row():
	with gr.Column(scale=1):
	text_input = gr.Textbox(
	label="Single Text Analysis",
	placeholder="Paste a tweet, comment, or any social media post here…",
	lines=5,
	)
	csv_file = gr.File(
	label="Bulk CSV Upload (first text column is used)",
	file_types=[".csv"],
	)
	analyze_btn = gr.Button("🔍 Analyze", variant="primary")

	with gr.Column(scale=2):
	result_table = gr.Dataframe(
	label="Results",
	interactive=False,
	wrap=True,
	)
	download_btn = gr.File(label="⬇ Download CSV", interactive=False)

	analyze_btn.click(
	fn=run_analysis,
	inputs=[text_input, csv_file],
	outputs=[result_table, download_btn],
	)

	gr.HTML("""
	<div style="text-align:center;padding:20px 0 10px;color:#4a5268;font-size:0.78rem;font-family:'Space Mono',monospace;">
	Models: savasy/bert-base-turkish-sentiment-cased · unitary/toxic-bert
	</div>
	""")

	if __name__ == "__main__":
	demo.launch()