Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import torch | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| import tempfile | |
| import os | |
| # ββ Model loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print("Loading sentiment model (savasy/bert-base-turkish-sentiment-cased)...") | |
| sentiment_pipeline = pipeline( | |
| "sentiment-analysis", | |
| model="savasy/bert-base-turkish-sentiment-cased", | |
| tokenizer="savasy/bert-base-turkish-sentiment-cased", | |
| truncation=True, | |
| max_length=512, | |
| ) | |
| print("Loading toxicity model (unitary/toxic-bert)...") | |
| toxicity_pipeline = pipeline( | |
| "text-classification", | |
| model="unitary/toxic-bert", | |
| tokenizer="unitary/toxic-bert", | |
| truncation=True, | |
| max_length=512, | |
| ) | |
| # ββ Keyword extraction (simple TF-based, no external API) βββββββββββββββββββββ | |
| import re | |
| from collections import Counter | |
| STOPWORDS_TR = { | |
| "bir", "bu", "ve", "ile", "da", "de", "mi", "mu", "mΓΌ", "mΔ±", "ki", | |
| "ne", "iΓ§in", "ama", "fakat", "Γ§ok", "daha", "en", "gibi", "kadar", | |
| "ben", "sen", "o", "biz", "siz", "onlar", "bu", "Εu", "o", "her", | |
| "hiΓ§", "bazΔ±", "tΓΌm", "bΓΌtΓΌn", "var", "yok", "olan", "olarak", | |
| } | |
| STOPWORDS_EN = { | |
| "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", | |
| "of", "with", "by", "from", "is", "are", "was", "were", "be", "been", | |
| "have", "has", "had", "do", "does", "did", "will", "would", "could", | |
| "should", "may", "might", "this", "that", "these", "those", "it", "its", | |
| "i", "you", "he", "she", "we", "they", "not", "no", "so", "as", "if", | |
| } | |
| STOPWORDS = STOPWORDS_TR | STOPWORDS_EN | |
| def extract_keywords(text: str, top_n: int = 5) -> str: | |
| words = re.findall(r"\b[a-zA-ZΓ§ΔΔ±ΕΓΆΓΌΓΔΔ°ΕΓΓ]{3,}\b", text.lower()) | |
| filtered = [w for w in words if w not in STOPWORDS] | |
| if not filtered: | |
| return "" | |
| counts = Counter(filtered) | |
| return ", ".join(w for w, _ in counts.most_common(top_n)) | |
| # ββ Core analysis ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def analyze_text(text: str) -> dict: | |
| text = text.strip() | |
| if not text: | |
| return {} | |
| # Sentiment | |
| sent_result = sentiment_pipeline(text)[0] | |
| sentiment_label = sent_result["label"] # e.g. "positive" / "negative" | |
| sentiment_score = round(sent_result["score"], 4) | |
| # Toxicity | |
| tox_result = toxicity_pipeline(text)[0] | |
| is_toxic = tox_result["label"].lower() == "toxic" | |
| toxicity_score = round(tox_result["score"] if is_toxic else 1 - tox_result["score"], 4) | |
| toxicity_label = "Toxic π¨" if is_toxic else "Safe β " | |
| # Keywords | |
| keywords = extract_keywords(text) | |
| return { | |
| "Text": text[:120] + ("β¦" if len(text) > 120 else ""), | |
| "Sentiment": sentiment_label.capitalize(), | |
| "Sentiment Score": sentiment_score, | |
| "Toxicity": toxicity_label, | |
| "Toxicity Score": toxicity_score, | |
| "Keywords": keywords, | |
| } | |
| # ββ Gradio handlers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_analysis(text_input: str, csv_file): | |
| rows = [] | |
| # CSV path takes priority; fall back to text box | |
| if csv_file is not None: | |
| try: | |
| df_in = pd.read_csv(csv_file.name if hasattr(csv_file, "name") else csv_file) | |
| except Exception as e: | |
| return pd.DataFrame([{"Error": f"Could not read CSV: {e}"}]), None | |
| # Use first column that looks like text | |
| text_col = df_in.columns[0] | |
| for col in df_in.columns: | |
| if df_in[col].dtype == object: | |
| text_col = col | |
| break | |
| for _, row in df_in.iterrows(): | |
| cell = str(row[text_col]).strip() | |
| if cell and cell.lower() != "nan": | |
| result = analyze_text(cell) | |
| if result: | |
| rows.append(result) | |
| elif text_input and text_input.strip(): | |
| result = analyze_text(text_input.strip()) | |
| if result: | |
| rows.append(result) | |
| else: | |
| return pd.DataFrame([{"Info": "Please enter text or upload a CSV file."}]), None | |
| if not rows: | |
| return pd.DataFrame([{"Info": "No valid text found to analyze."}]), None | |
| df_out = pd.DataFrame(rows) | |
| # Save CSV for download | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8") | |
| df_out.to_csv(tmp.name, index=False) | |
| tmp.close() | |
| return df_out, tmp.name | |
| # ββ UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = """ | |
| /* ββ Palette βββββββββββββββββββββββββββββββ */ | |
| :root { | |
| --bg: #0d0f14; | |
| --surface: #161a22; | |
| --border: #252a35; | |
| --accent: #5b8dee; | |
| --accent2: #e05b8d; | |
| --text: #e8ecf4; | |
| --muted: #7a8399; | |
| --safe: #3ecf78; | |
| --toxic: #ff4f64; | |
| --font-head: 'Space Mono', monospace; | |
| --font-body: 'DM Sans', sans-serif; | |
| --radius: 10px; | |
| } | |
| /* ββ Reset & base βββββββββββββββββββββββββββ */ | |
| body, .gradio-container { | |
| background: var(--bg) !important; | |
| color: var(--text) !important; | |
| font-family: var(--font-body) !important; | |
| } | |
| /* ββ Header βββββββββββββββββββββββββββββββββ */ | |
| #app-header { | |
| text-align: center; | |
| padding: 36px 20px 20px; | |
| background: linear-gradient(135deg, #0d0f14 0%, #161a22 100%); | |
| border-bottom: 1px solid var(--border); | |
| margin-bottom: 28px; | |
| } | |
| #app-header h1 { | |
| font-family: var(--font-head); | |
| font-size: clamp(1.4rem, 4vw, 2.2rem); | |
| letter-spacing: -0.5px; | |
| background: linear-gradient(90deg, var(--accent), var(--accent2)); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| margin: 0 0 8px; | |
| } | |
| #app-header p { | |
| color: var(--muted); | |
| font-size: 0.9rem; | |
| margin: 0; | |
| } | |
| /* ββ Panels βββββββββββββββββββββββββββββββββ */ | |
| .gr-group, .gr-box, .gr-form { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: var(--radius) !important; | |
| } | |
| /* ββ Labels βββββββββββββββββββββββββββββββββ */ | |
| label span, .gr-form label { | |
| color: var(--muted) !important; | |
| font-size: 0.78rem !important; | |
| letter-spacing: 0.06em !important; | |
| text-transform: uppercase !important; | |
| font-family: var(--font-head) !important; | |
| } | |
| /* ββ Textbox ββββββββββββββββββββββββββββββββ */ | |
| textarea, input[type="text"] { | |
| background: #1c2130 !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 8px !important; | |
| color: var(--text) !important; | |
| font-family: var(--font-body) !important; | |
| font-size: 0.95rem !important; | |
| } | |
| textarea:focus, input[type="text"]:focus { | |
| border-color: var(--accent) !important; | |
| box-shadow: 0 0 0 3px rgba(91,141,238,0.15) !important; | |
| } | |
| /* ββ Buttons ββββββββββββββββββββββββββββββββ */ | |
| button.primary { | |
| background: linear-gradient(135deg, var(--accent), #3d6dcf) !important; | |
| border: none !important; | |
| border-radius: 8px !important; | |
| color: #fff !important; | |
| font-family: var(--font-head) !important; | |
| font-size: 0.85rem !important; | |
| letter-spacing: 0.08em !important; | |
| padding: 10px 28px !important; | |
| cursor: pointer !important; | |
| transition: opacity 0.2s, transform 0.1s !important; | |
| } | |
| button.primary:hover { opacity: 0.88 !important; transform: translateY(-1px) !important; } | |
| button.primary:active { transform: translateY(0) !important; } | |
| button.secondary { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 8px !important; | |
| color: var(--muted) !important; | |
| font-family: var(--font-head) !important; | |
| font-size: 0.78rem !important; | |
| } | |
| /* ββ Dataframe ββββββββββββββββββββββββββββββ */ | |
| .gr-dataframe, .dataframe-container { | |
| background: var(--surface) !important; | |
| border-radius: var(--radius) !important; | |
| overflow: hidden !important; | |
| } | |
| table { width: 100%; border-collapse: collapse; } | |
| thead tr { background: #1c2130 !important; } | |
| thead th { | |
| color: var(--accent) !important; | |
| font-family: var(--font-head) !important; | |
| font-size: 0.72rem !important; | |
| letter-spacing: 0.07em !important; | |
| text-transform: uppercase !important; | |
| padding: 10px 14px !important; | |
| border-bottom: 1px solid var(--border) !important; | |
| } | |
| tbody tr:nth-child(even) { background: rgba(255,255,255,0.02) !important; } | |
| tbody tr:hover { background: rgba(91,141,238,0.06) !important; } | |
| tbody td { | |
| color: var(--text) !important; | |
| font-size: 0.87rem !important; | |
| padding: 9px 14px !important; | |
| border-bottom: 1px solid var(--border) !important; | |
| } | |
| /* ββ File upload ββββββββββββββββββββββββββββ */ | |
| .gr-file-upload { | |
| border: 2px dashed var(--border) !important; | |
| border-radius: var(--radius) !important; | |
| background: #13171f !important; | |
| } | |
| .gr-file-upload:hover { border-color: var(--accent) !important; } | |
| /* ββ Download file ββββββββββββββββββββββββββ */ | |
| .gr-file { background: var(--surface) !important; border-color: var(--border) !important; } | |
| /* ββ Divider ββββββββββββββββββββββββββββββββ */ | |
| hr { border-color: var(--border) !important; margin: 20px 0 !important; } | |
| /* ββ Scrollbar ββββββββββββββββββββββββββββββ */ | |
| ::-webkit-scrollbar { width: 6px; height: 6px; } | |
| ::-webkit-scrollbar-track { background: var(--bg); } | |
| ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; } | |
| """ | |
| HEADER_HTML = """ | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;500;600&display=swap" rel="stylesheet"> | |
| <div id="app-header"> | |
| <h1>β‘ Social Media Analytics</h1> | |
| <p>Sentiment Β· Toxicity Β· Keywords | Turkish & English supported</p> | |
| </div> | |
| """ | |
| with gr.Blocks(css=CSS, title="Social Media Analytics") as demo: | |
| gr.HTML(HEADER_HTML) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| text_input = gr.Textbox( | |
| label="Single Text Analysis", | |
| placeholder="Paste a tweet, comment, or any social media post hereβ¦", | |
| lines=5, | |
| ) | |
| csv_file = gr.File( | |
| label="Bulk CSV Upload (first text column is used)", | |
| file_types=[".csv"], | |
| ) | |
| analyze_btn = gr.Button("π Analyze", variant="primary") | |
| with gr.Column(scale=2): | |
| result_table = gr.Dataframe( | |
| label="Results", | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| download_btn = gr.File(label="β¬ Download CSV", interactive=False) | |
| analyze_btn.click( | |
| fn=run_analysis, | |
| inputs=[text_input, csv_file], | |
| outputs=[result_table, download_btn], | |
| ) | |
| gr.HTML(""" | |
| <div style="text-align:center;padding:20px 0 10px;color:#4a5268;font-size:0.78rem;font-family:'Space Mono',monospace;"> | |
| Models: savasy/bert-base-turkish-sentiment-cased Β· unitary/toxic-bert | |
| </div> | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |