Spaces:

Sigdev
/

comment_validator

Paused

App Files Files Community

aurelien commited on Oct 31, 2025

Commit

845c5fd

1 Parent(s): f63cd93

Edit script for GPU

Browse files

Files changed (1) hide show

app.py +78 -4

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import numpy as np
 from sentence_transformers import SentenceTransformer
 from transformers import pipeline
 import torch
-from validate_comment_sentiment_tags import analyze_comment  # ton code ci-dessus, tu peux aussi le copier ici
 app = FastAPI(title="Comment Validator API")
@@ -15,14 +14,89 @@ app = FastAPI(title="Comment Validator API")
 # 🔹 Chargement des modèles
 # =====================================
-device = "mps" if torch.backends.mps.is_available() else "cpu"
 print(f"🧠 Using device: {device}")
 text_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device=device)
 clf = joblib.load("models/classifier.joblib")
 encoder = joblib.load("models/encoder.joblib")
-sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment", device=-1)
-toxicity_analyzer = pipeline("text-classification", model="unitary/toxic-bert", return_all_scores=True, device=-1)
 # =====================================
 # 🔸 Modèles de requête/réponse

 from sentence_transformers import SentenceTransformer
 from transformers import pipeline
 import torch
 app = FastAPI(title="Comment Validator API")
 # 🔹 Chargement des modèles
 # =====================================
+if torch.cuda.is_available():
+    device = "cuda"
+elif torch.backends.mps.is_available():
+    device = "mps"  # pour ton Mac local
+else:
+    device = "cpu"
 print(f"🧠 Using device: {device}")
+print("Loading model embedding")
 text_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device=device)
+print("Loading model classifier")
 clf = joblib.load("models/classifier.joblib")
+print("Loading model encoder")
 encoder = joblib.load("models/encoder.joblib")
+print("Loading model sentiment-analysis")
+sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment", device=device)
+print("Loading model toxicity")
+toxicity_analyzer = pipeline("text-classification", model="unitary/toxic-bert", return_all_scores=True, device=device)
+def analyze_comment(comment: str, category: str, country: str) -> dict:
+    reasons = []
+    # --- Analyse du sentiment ---
+    try:
+        sentiment = sentiment_analyzer(comment[:512])[0]
+        label = sentiment["label"]
+        score = sentiment["score"]
+    except Exception:
+        label, score = "unknown", 0.0
+    if "1" in label or "2" in label:
+        sentiment_score = -1
+        reasons.append("Le ton semble négatif ou insatisfait.")
+    elif "4" in label or "5" in label:
+        sentiment_score = 1
+    else:
+        sentiment_score = 0
+    # --- Encodage du texte ---
+    X_text = text_model.encode([comment])
+    # --- Encodage catégorie/pays ---
+    df_cat = pd.DataFrame([[category, country]], columns=["category", "country"])
+    try:
+        X_cat = encoder.transform(df_cat)
+    except ValueError:
+        reasons.append(f"Catégorie ou pays inconnus : {category}, {country}")
+        n_features = sum(len(cats) for cats in encoder.categories_)
+        X_cat = np.zeros((1, n_features))
+    # --- Concaténation ---
+    X = np.concatenate([X_text, X_cat], axis=1)
+    # --- Prédiction validité ---
+    proba = clf.predict_proba(X)[0][1]
+    prediction = proba >= 0.5
+    if len(comment.split()) < 3:
+        reasons.append("Le commentaire est trop court.")
+    if sentiment_score < 0:
+        reasons.append("Le ton global est négatif.")
+    if proba < 0.4:
+        reasons.append("Le modèle estime une faible probabilité de validité.")
+    # --- Analyse toxicité ---
+    try:
+        tox_scores = toxicity_analyzer(comment[:512])[0]  # tronquer pour sécurité
+        tags = {f"tag_{item['label']}": round(item['score'], 3) for item in tox_scores}
+    except Exception:
+        tags = {f"tag_{label}": 0.0 for label in ["toxicity","severe_toxicity","obscene","identity_attack","insult","threat"]}
+    # --- Résultat final ---
+    result = {
+        "is_valid": bool(prediction),
+        "confidence": round(float(proba), 3),
+        "sentiment": label,
+        "sentiment_score": round(float(score), 3),
+        "reasons": "; ".join(reasons) if reasons else "Aucune anomalie détectée."
+    }
+    result.update(tags)
+    return result
 # =====================================
 # 🔸 Modèles de requête/réponse