| | |
| | import os |
| | import tempfile |
| | import numpy as np |
| | import pandas as pd |
| | import gradio as gr |
| |
|
| | import joblib |
| | import soundfile as sf |
| | from pydub import AudioSegment |
| | import opensmile |
| |
|
| | import freesound |
| | import xgboost as xgb |
| |
|
| | |
| | try: |
| | import gensim.downloader as api |
| | _GENSIM_OK = True |
| | except Exception: |
| | _GENSIM_OK = False |
| |
|
| |
|
| | |
| | |
| | |
| | MIN_EFFECT, MAX_EFFECT = 0.5, 3.0 |
| | MIN_MUSIC, MAX_MUSIC = 10.0, 60.0 |
| | SR_TARGET = 16000 |
| |
|
| |
|
| | |
| | |
| | |
| | def load_artifact(*candidate_paths: str): |
| | """ |
| | Charge un artifact joblib/pkl depuis la racine ou chemins candidats. |
| | Essaie tous les chemins donnés, puis lève une erreur claire. |
| | """ |
| | for p in candidate_paths: |
| | if p and os.path.exists(p): |
| | return joblib.load(p) |
| | tried = "\n".join([f"- {p}" for p in candidate_paths if p]) |
| | raise FileNotFoundError( |
| | "Artifact introuvable. J'ai essayé :\n" + (tried if tried else "(aucun chemin)") |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| | CSS = """ |
| | .card { |
| | border: 1px solid #e5e7eb; |
| | background: #ffffff; |
| | padding: 16px; |
| | border-radius: 16px; |
| | } |
| | .card-error{ |
| | border-color: #fca5a5; |
| | background: #fff1f2; |
| | } |
| | .card-title{ |
| | font-weight: 950; |
| | margin-bottom: 8px; |
| | } |
| | .badges{ |
| | display:flex; |
| | gap:10px; |
| | flex-wrap:wrap; |
| | margin-bottom:12px; |
| | } |
| | .badge{ |
| | padding:6px 10px; |
| | border-radius:999px; |
| | font-weight:900; |
| | font-size: 13px; |
| | border: 1px solid #e5e7eb; |
| | } |
| | .badge-type{ background:#eef2ff; color:#3730a3;} |
| | .badge-time{ background:#ecfeff; color:#155e75;} |
| | |
| | .grid{ |
| | display:grid; |
| | grid-template-columns: 1fr; |
| | gap:10px; |
| | } |
| | .box{ |
| | border:1px solid #e5e7eb; |
| | border-radius:14px; |
| | padding:12px; |
| | background:#fafafa; |
| | } |
| | .box-title{ font-weight:900; margin-bottom:4px; } |
| | .box-value{ font-size:18px; font-weight:800; } |
| | |
| | .hint{ |
| | margin-top:10px; |
| | color:#6b7280; |
| | font-size:12px; |
| | } |
| | |
| | #header-title { font-size: 28px; font-weight: 950; margin-bottom: 6px; } |
| | #header-sub { color:#6b7280; margin-top:0px; line-height:1.45; } |
| | """ |
| |
|
| |
|
| | def html_error(title, body_html): |
| | return f""" |
| | <div class="card card-error"> |
| | <div class="card-title">❌ {title}</div> |
| | <div>{body_html}</div> |
| | </div> |
| | """.strip() |
| |
|
| |
|
| | def html_result(badge_text, duration, rating_text, downloads_text, extra_html=""): |
| | return f""" |
| | <div class="card"> |
| | <div class="badges"> |
| | <span class="badge badge-type">{badge_text}</span> |
| | <span class="badge badge-time">⏱️ {duration:.2f} s</span> |
| | </div> |
| | |
| | <div class="grid"> |
| | <div class="box"> |
| | <div class="box-title">📈 Popularité de la note moyenne</div> |
| | <div class="box-value">{rating_text}</div> |
| | </div> |
| | <div class="box"> |
| | <div class="box-title">⬇️ Popularité des téléchargements</div> |
| | <div class="box-value">{downloads_text}</div> |
| | </div> |
| | </div> |
| | |
| | {extra_html} |
| | |
| | <div class="hint"> |
| | Résultats affichés en <b>niveaux</b> (faible / moyen / élevé), pas en valeurs exactes. |
| | </div> |
| | </div> |
| | """.strip() |
| |
|
| | def normalize_avg_rating_label_fr(label) -> str: |
| | """ |
| | Convertit n'importe quel label avg_rating (EN/FR/variantes) en FR stable. |
| | Sorties possibles : "Informations manquantes", "Faible", "Moyen", "Élevé" |
| | """ |
| | if label is None: |
| | return "Informations manquantes" |
| |
|
| | s = str(label).strip().lower() |
| |
|
| | |
| | if "miss" in s or "missing" in s or "none" in s or "no" in s or "nan" in s: |
| | return "Informations manquantes" |
| | if "info" in s and "manq" in s: |
| | return "Informations manquantes" |
| |
|
| | |
| | if "high" in s or "élev" in s or "eleve" in s: |
| | return "Élevé" |
| |
|
| | |
| | if "medium" in s or "moy" in s: |
| | return "Moyen" |
| |
|
| | |
| | if "low" in s or "faibl" in s: |
| | return "Faible" |
| |
|
| | |
| | return "Informations manquantes" |
| |
|
| |
|
| | def avg_fr_to_class(avg_fr: str) -> int: |
| | """ |
| | Convertit l'étiquette FR en classe 0..3 pour interpret_results() |
| | """ |
| | s = str(avg_fr).strip().lower() |
| | if "manqu" in s: |
| | return 0 |
| | if "faibl" in s: |
| | return 1 |
| | if "moy" in s: |
| | return 2 |
| | if "élev" in s or "eleve" in s: |
| | return 3 |
| | return 0 |
| |
|
| | |
| | |
| | |
| | def interpret_results(avg_class: int, dl_class: int) -> str: |
| | """ |
| | avg_class: 0=Missed info, 1=Low, 2=Medium, 3=High |
| | dl_class: 0=Low, 1=Medium, 2=High |
| | """ |
| | if avg_class == 0: |
| | return ( |
| | "ℹ️ <b>Interprétation</b> :<br>" |
| | "Aucune évaluation possible (rating manquant)." |
| | ) |
| |
|
| | if avg_class == 3 and dl_class == 2: |
| | potentiel = "très fort" |
| | detail = "contenu de haute qualité et très populaire." |
| | elif avg_class == 3 and dl_class == 1: |
| | potentiel = "fort" |
| | detail = "contenu bien apprécié, en croissance." |
| | elif avg_class == 3 and dl_class == 0: |
| | potentiel = "prometteur" |
| | detail = "bonne qualité mais faible visibilité (peut gagner en popularité)." |
| | elif avg_class == 2 and dl_class == 2: |
| | potentiel = "modéré à fort" |
| | detail = "populaire mais qualité perçue moyenne." |
| | elif avg_class == 2 and dl_class == 1: |
| | potentiel = "modéré" |
| | detail = "profil standard, popularité stable." |
| | elif avg_class == 2 and dl_class == 0: |
| | potentiel = "limité" |
| | detail = "engagement faible, diffusion limitée." |
| | elif avg_class == 1 and dl_class == 2: |
| | potentiel = "contradictoire" |
| | detail = "très téléchargé mais peu apprécié (usage pratique possible)." |
| | elif avg_class == 1 and dl_class == 1: |
| | potentiel = "faible" |
| | detail = "peu attractif pour les utilisateurs." |
| | else: |
| | potentiel = "très faible" |
| | detail = "faible intérêt global." |
| |
|
| | return ( |
| | "<b>Interprétation</b> :<br>" |
| | f"Potentiel estimé : <b>{potentiel}</b> — {detail}" |
| | ) |
| |
|
| |
|
| | def avg_label_to_class(avg_label: str) -> int: |
| | """ |
| | Convertit un label texte (LabelEncoder) en classe 0..3 : |
| | 0=Missed info, 1=Low, 2=Medium, 3=High |
| | Robuste aux variantes. |
| | """ |
| | if avg_label is None: |
| | return 0 |
| | s = str(avg_label).strip().lower() |
| | if "miss" in s or "missing" in s or "none" in s or "no" in s: |
| | return 0 |
| | if "high" in s or "élev" in s or "eleve" in s: |
| | return 3 |
| | if "medium" in s or "moy" in s: |
| | return 2 |
| | if "low" in s or "faibl" in s: |
| | return 1 |
| | return 0 |
| |
|
| |
|
| | |
| | |
| | |
| | API_TOKEN = os.getenv("FREESOUND_TOKEN", "").strip() |
| | fs_client = freesound.FreesoundClient() |
| | if API_TOKEN: |
| | fs_client.set_token(API_TOKEN, "token") |
| |
|
| |
|
| | |
| | |
| | |
| | MODEL_EFFECT = load_artifact("xgb_model_EffectSound.pkl") |
| | MODEL_MUSIC = load_artifact("xgb_model_Music.pkl") |
| |
|
| | RATING_DISPLAY_AUDIO = { |
| | 0: "❌ Informations manquantes", |
| | 1: "⭐ Faible", |
| | 2: "⭐⭐ Moyen", |
| | 3: "⭐⭐⭐ Élevé", |
| | } |
| | DOWNLOADS_DISPLAY_AUDIO = { |
| | 0: "⭐ Faible", |
| | 1: "⭐⭐ Moyen", |
| | 2: "⭐⭐⭐ Élevé", |
| | } |
| |
|
| | SMILE = opensmile.Smile( |
| | feature_set=opensmile.FeatureSet.eGeMAPSv02, |
| | feature_level=opensmile.FeatureLevel.Functionals, |
| | ) |
| |
|
| |
|
| | def get_duration_seconds(filepath): |
| | ext = os.path.splitext(filepath)[1].lower() |
| | if ext == ".mp3": |
| | audio = AudioSegment.from_file(filepath) |
| | return len(audio) / 1000.0 |
| | with sf.SoundFile(filepath) as f: |
| | return len(f) / f.samplerate |
| |
|
| |
|
| | def to_wav_16k_mono(filepath): |
| | ext = os.path.splitext(filepath)[1].lower() |
| | if ext == ".wav": |
| | try: |
| | with sf.SoundFile(filepath) as f: |
| | if f.samplerate == SR_TARGET and f.channels == 1: |
| | return filepath |
| | except Exception: |
| | pass |
| |
|
| | audio = AudioSegment.from_file(filepath) |
| | audio = audio.set_channels(1).set_frame_rate(SR_TARGET) |
| |
|
| | tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) |
| | tmp.close() |
| | audio.export(tmp.name, format="wav") |
| | return tmp.name |
| |
|
| |
|
| | def extract_opensmile_features(filepath): |
| | wav_path = to_wav_16k_mono(filepath) |
| | feats = SMILE.process_file(wav_path) |
| | feats = feats.select_dtypes(include=[np.number]).reset_index(drop=True) |
| | return feats |
| |
|
| |
|
| | def predict_upload_with_dmatrix(model, X_df: pd.DataFrame): |
| | """ |
| | Résout 'data did not contain feature names' en passant via Booster + DMatrix(feature_names=...). |
| | Retour: array shape (1, n_outputs) |
| | """ |
| | if hasattr(model, "estimators_"): |
| | preds = [] |
| | for est in model.estimators_: |
| | booster = est.get_booster() if hasattr(est, "get_booster") else est |
| | dm = xgb.DMatrix(X_df.values, feature_names=list(X_df.columns)) |
| | p = booster.predict(dm) |
| | preds.append(np.asarray(p).reshape(-1)) |
| | return np.column_stack(preds) |
| |
|
| | booster = model.get_booster() if hasattr(model, "get_booster") else model |
| | dm = xgb.DMatrix(X_df.values, feature_names=list(X_df.columns)) |
| | p = booster.predict(dm) |
| | return np.asarray(p).reshape(1, -1) |
| |
|
| |
|
| | def predict_from_uploaded_audio(audio_file): |
| | if audio_file is None: |
| | return html_error("Aucun fichier", "Veuillez importer un fichier audio (wav, mp3, flac…).") |
| |
|
| | |
| | try: |
| | duration = get_duration_seconds(audio_file) |
| | except Exception as e: |
| | return html_error("Audio illisible", f"Impossible de lire l'audio.<br>Détail : <code>{e}</code>") |
| |
|
| | |
| | if duration < MIN_EFFECT: |
| | return html_error( |
| | "Audio trop court", |
| | f"Durée détectée : <b>{duration:.2f} s</b><br><br>" |
| | f"Plages acceptées :<br>" |
| | f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>" |
| | f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>" |
| | ) |
| |
|
| | if (MAX_EFFECT < duration < MIN_MUSIC) or duration > MAX_MUSIC: |
| | return html_error( |
| | "Audio hors plage", |
| | f"Durée détectée : <b>{duration:.2f} s</b><br><br>" |
| | f"Plages acceptées :<br>" |
| | f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>" |
| | f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>" |
| | ) |
| |
|
| | |
| | if duration <= MAX_EFFECT: |
| | badge = "🔊 Effet sonore (upload)" |
| | model = MODEL_EFFECT |
| | else: |
| | badge = "🎵 Musique (upload)" |
| | model = MODEL_MUSIC |
| |
|
| | |
| | try: |
| | X = extract_opensmile_features(audio_file) |
| | except Exception as e: |
| | return html_error("Extraction openSMILE échouée", f"Détail : <code>{e}</code>") |
| |
|
| | |
| | try: |
| | expected = model.estimators_[0].feature_names_in_ if hasattr(model, "estimators_") else model.feature_names_in_ |
| | X = X.reindex(columns=list(expected), fill_value=0) |
| | except Exception as e: |
| | return html_error("Alignement des features échoué", f"Détail : <code>{e}</code>") |
| |
|
| | |
| | try: |
| | y = predict_upload_with_dmatrix(model, X) |
| | except Exception as e: |
| | return html_error("Prédiction échouée", f"Détail : <code>{e}</code>") |
| |
|
| | y = np.array(y) |
| | avg_class = int(y[0, 0]) |
| | dl_class = int(y[0, 1]) |
| |
|
| | rating_text = RATING_DISPLAY_AUDIO.get(avg_class, "Inconnu") |
| | downloads_text = DOWNLOADS_DISPLAY_AUDIO.get(dl_class, "Inconnu") |
| |
|
| | conclusion = interpret_results(avg_class, dl_class) |
| | extra = f""" |
| | <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db"> |
| | {conclusion} |
| | </div> |
| | """ |
| | return html_result(badge, duration, rating_text, downloads_text, extra_html=extra) |
| |
|
| |
|
| | |
| | |
| | |
| | xgb_music_num = load_artifact("xgb_num_downloads_music_model.pkl") |
| | xgb_music_feat_num = load_artifact("xgb_num_downloads_music_features.pkl") |
| | xgb_music_avg = load_artifact("xgb_avg_rating_music_model.pkl") |
| | xgb_music_feat_avg = load_artifact("xgb_avg_rating_music_features.pkl") |
| | le_music_avg = load_artifact("xgb_avg_rating_music_label_encoder.pkl") |
| |
|
| | xgb_effect_num = load_artifact("xgb_num_downloads_effectsound_model.pkl") |
| | xgb_effect_feat_num = load_artifact("xgb_num_downloads_effectsound_features.pkl") |
| | xgb_effect_avg = load_artifact("xgb_avg_rating_effectsound_model.pkl") |
| | xgb_effect_feat_avg = load_artifact("xgb_avg_rating_effectsound_features.pkl") |
| | le_effect_avg = load_artifact("xgb_avg_rating_effectsound_label_encoder.pkl") |
| |
|
| | NUM_DOWNLOADS_MAP_FR = {0: "Faible", 1: "Moyen", 2: "Élevé"} |
| |
|
| |
|
| | def safe_float(v): |
| | try: |
| | return float(v) |
| | except Exception: |
| | return 0.0 |
| |
|
| |
|
| | def predict_with_model_fs(model, features_dict, feat_list, label_encoder=None): |
| | row = [] |
| | for col in feat_list: |
| | val = features_dict.get(col, 0) |
| | if val is None or isinstance(val, (list, dict)): |
| | val = 0 |
| | row.append(safe_float(val)) |
| |
|
| | X = pd.DataFrame([row], columns=feat_list) |
| | dmatrix = xgb.DMatrix(X.values, feature_names=feat_list) |
| |
|
| | pred_int = int(model.get_booster().predict(dmatrix)[0]) |
| |
|
| | if label_encoder is not None: |
| | return label_encoder.inverse_transform([pred_int])[0] |
| | return pred_int |
| |
|
| |
|
| | def predict_from_freesound_url(url: str): |
| | if not API_TOKEN: |
| | return html_error( |
| | "Token FreeSound manquant", |
| | "Ajoute la variable d’environnement <code>FREESOUND_TOKEN</code> pour activer cet onglet." |
| | ) |
| |
|
| | if not url or not url.strip(): |
| | return html_error("URL vide", "Collez une URL FreeSound du type <code>https://freesound.org/s/123456/</code>") |
| |
|
| | |
| | try: |
| | sound_id = int(url.rstrip("/").split("/")[-1]) |
| | except Exception: |
| | return html_error("URL invalide", "Impossible d'extraire l'ID depuis l'URL.") |
| |
|
| | all_features = list(set( |
| | list(xgb_music_feat_num) + list(xgb_music_feat_avg) + list(xgb_effect_feat_num) + list(xgb_effect_feat_avg) |
| | )) |
| | fields = "duration," + ",".join(all_features) |
| |
|
| | try: |
| | results = fs_client.search(query="", filter=f"id:{sound_id}", fields=fields) |
| | except Exception as e: |
| | return html_error("Erreur API FreeSound", f"Détail : <code>{e}</code>") |
| |
|
| | if len(results.results) == 0: |
| | return html_error("Son introuvable", "Aucun résultat pour cet ID.") |
| |
|
| | sound = results.results[0] |
| | duration = safe_float(sound.get("duration", 0)) |
| |
|
| | |
| | if MIN_EFFECT <= duration <= MAX_EFFECT: |
| | badge = "🔊 Effet sonore (URL → features API)" |
| | dl_class = int(predict_with_model_fs(xgb_effect_num, sound, xgb_effect_feat_num)) |
| | avg_text_raw = str(predict_with_model_fs(xgb_effect_avg, sound, xgb_effect_feat_avg, le_effect_avg)) |
| | avg_text = normalize_avg_rating_label_fr(avg_text_raw) |
| | avg_class = avg_fr_to_class(avg_text) |
| | dl_text = NUM_DOWNLOADS_MAP_FR.get(dl_class, str(dl_class)) |
| |
|
| | conclusion = interpret_results(avg_class, dl_class) |
| |
|
| | extra = f""" |
| | <div class="hint">ID FreeSound : <b>{sound_id}</b></div> |
| | <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db"> |
| | {conclusion} |
| | </div> |
| | """ |
| | return html_result(badge, duration, avg_text, dl_text, extra_html=extra) |
| |
|
| | |
| | if MIN_MUSIC <= duration <= MAX_MUSIC: |
| | badge = "🎵 Musique (URL → features API)" |
| | dl_class = int(predict_with_model_fs(xgb_music_num, sound, xgb_music_feat_num)) |
| | avg_text_raw = str(predict_with_model_fs(xgb_music_avg, sound, xgb_music_feat_avg, le_music_avg)) |
| | avg_text = normalize_avg_rating_label_fr(avg_text_raw) |
| | avg_class = avg_fr_to_class(avg_text) |
| |
|
| | dl_text = NUM_DOWNLOADS_MAP_FR.get(dl_class, str(dl_class)) |
| |
|
| | conclusion = interpret_results(avg_class, dl_class) |
| |
|
| | extra = f""" |
| | <div class="hint">ID FreeSound : <b>{sound_id}</b></div> |
| | <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db"> |
| | {conclusion} |
| | </div> |
| | """ |
| | return html_result(badge, duration, avg_text, dl_text, extra_html=extra) |
| |
|
| | return html_error( |
| | "Durée non supportée", |
| | f"Durée détectée : <b>{duration:.2f} s</b><br><br>" |
| | f"Plages acceptées :<br>" |
| | f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>" |
| | f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>" |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| | |
| |
|
| | class AvgRatingTransformer: |
| | def __init__(self, est, class_mapping=None): |
| | self.est = est |
| | if class_mapping is None: |
| | self.class_mapping = {0: "MissedInfo", 1: "Low", 2: "Medium", 3: "High"} |
| | else: |
| | self.class_mapping = class_mapping |
| |
|
| | def transform(self, X): |
| | X = np.asarray(X) |
| | mask_non_zero = X != 0 |
| | Xt = np.zeros_like(X, dtype=int) |
| | if mask_non_zero.any(): |
| | Xt[mask_non_zero] = self.est.transform(X[mask_non_zero].reshape(-1, 1)).flatten() + 1 |
| | return np.array([self.class_mapping.get(v, "MissedInfo") for v in Xt]) |
| |
|
| |
|
| | |
| | |
| | scaler_samplerate_music = load_artifact("scaler_music_samplerate.joblib", "music/scaler_music_samplerate.joblib") |
| | scaler_age_days_music = load_artifact("scaler_music_age_days_log.joblib", "music/scaler_music_age_days_log.joblib") |
| | username_freq_music = load_artifact("username_freq_dict_music.joblib", "music/username_freq_dict_music.joblib") |
| | est_num_downloads_music = load_artifact("est_num_downloads_music.joblib", "music/est_num_downloads_music.joblib") |
| | avg_rating_transformer_music = load_artifact("avg_rating_transformer_music.joblib", "music/avg_rating_transformer_music.joblib") |
| | music_subcategory_cols = load_artifact("music_subcategory_cols.joblib", "music/music_subcategory_cols.joblib") |
| | music_onehot_cols = load_artifact("music_onehot_cols.joblib", "music/music_onehot_cols.joblib") |
| | music_onehot_tags = load_artifact("music_onehot_tags.joblib", "music/music_onehot_tags.joblib") |
| |
|
| | scaler_samplerate_effect = load_artifact("scaler_effectSamplerate.joblib", "effectSound/scaler_effectSamplerate.joblib") |
| | scaler_age_days_effect = load_artifact("scaler_effectSound_age_days_log.joblib", "effectSound/scaler_effectSound_age_days_log.joblib") |
| | username_freq_effect = load_artifact("username_freq_dict_effectSound.joblib", "effectSound/username_freq_dict_effectSound.joblib") |
| | est_num_downloads_effect = load_artifact("est_num_downloads_effectSound.joblib", "effectSound/est_num_downloads_effectSound.joblib") |
| | avg_rating_transformer_effect = load_artifact("avg_rating_transformer_effectSound.joblib", "effectSound/avg_rating_transformer_effectSound.joblib") |
| | effect_subcategory_cols = load_artifact("effectSound_subcategory_cols.joblib", "effectSound/effectSound_subcategory_cols.joblib") |
| | effect_onehot_cols = load_artifact("effectSound_onehot_cols.joblib", "effectSound/effectSound_onehot_cols.joblib") |
| | effect_onehot_tags = load_artifact("effect_onehot_tags.joblib", "effectSound/effect_onehot_tags.joblib") |
| |
|
| | |
| | |
| | music_model_num_downloads = load_artifact("music_model_num_downloads.joblib") |
| | music_model_avg_rating = load_artifact("music_xgb_avg_rating.joblib") |
| | music_avg_rating_le_meta = load_artifact("music_xgb_avg_rating_label_encoder.joblib") |
| | music_model_features = load_artifact("music_model_features_list.joblib") |
| |
|
| | effect_model_num_downloads = load_artifact("effectSound_model_num_downloads.joblib") |
| | effect_model_avg_rating = load_artifact("effectSound_xgb_avg_rating.joblib") |
| | effect_avg_rating_le_meta = load_artifact("effectSound_xgb_avg_rating_label_encoder.joblib") |
| | effect_model_features = load_artifact("effect_model_features_list.joblib") |
| |
|
| | |
| | music_model_features = list(dict.fromkeys(list(music_model_features))) |
| | effect_model_features = list(dict.fromkeys(list(effect_model_features))) |
| |
|
| | |
| | if _GENSIM_OK: |
| | try: |
| | glove_model = api.load("glove-wiki-gigaword-100") |
| | except Exception: |
| | glove_model = None |
| | else: |
| | glove_model = None |
| |
|
| |
|
| | def preprocess_name(df, vec_dim=8): |
| | |
| | |
| | |
| | df = df.copy() |
| | name = df["name_clean"].fillna("").astype(str) |
| | df["name_len"] = name.str.len() |
| | |
| | vec = np.zeros((len(df), vec_dim), dtype=float) |
| | for i, s in enumerate(name.tolist()): |
| | h = abs(hash(s)) |
| | for k in range(vec_dim): |
| | vec[i, k] = ((h >> (k * 3)) & 0x7) |
| | for k in range(vec_dim): |
| | df[f"name_vec_{k}"] = vec[:, k] |
| | return df |
| |
|
| |
|
| | def description_to_vec(text, model, dim=100): |
| | if model is None: |
| | return np.zeros(dim) |
| | if not text: |
| | return np.zeros(dim) |
| | words = str(text).lower().split() |
| | vecs = [model[w] for w in words if w in model] |
| | if len(vecs) == 0: |
| | return np.zeros(dim) |
| | return np.mean(vecs, axis=0) |
| |
|
| |
|
| | def fetch_sound_metadata(sound_url: str) -> pd.DataFrame: |
| | """ |
| | Récupère les metadata FreeSound (sans télécharger l'audio). |
| | """ |
| | if not API_TOKEN: |
| | raise RuntimeError("Token FreeSound manquant (FREESOUND_TOKEN).") |
| |
|
| | sound_id = int(sound_url.rstrip("/").split("/")[-1]) |
| | sound = fs_client.get_sound(sound_id) |
| |
|
| | data = { |
| | "id": sound_id, |
| | "file_path": None, |
| | "name": getattr(sound, "name", ""), |
| | "num_ratings": getattr(sound, "num_ratings", 0), |
| | "tags": ",".join(getattr(sound, "tags", []) or []), |
| | "username": getattr(sound, "username", ""), |
| | "description": getattr(sound, "description", "") or "", |
| | "created": getattr(sound, "created", ""), |
| | "license": getattr(sound, "license", ""), |
| | "num_downloads": getattr(sound, "num_downloads", 0), |
| | "channels": getattr(sound, "channels", 0), |
| | "filesize": getattr(sound, "filesize", 0), |
| | "num_comments": getattr(sound, "num_comments", 0), |
| | "category_is_user_provided": getattr(sound, "category_is_user_provided", 0), |
| | "duration": getattr(sound, "duration", 0), |
| | "avg_rating": getattr(sound, "avg_rating", 0), |
| | "category": getattr(sound, "category", "Unknown"), |
| | "subcategory": getattr(sound, "subcategory", "Other"), |
| | "type": getattr(sound, "type", ""), |
| | "samplerate": getattr(sound, "samplerate", 0), |
| | } |
| | return pd.DataFrame([data]) |
| |
|
| |
|
| | def preprocess_sound(df: pd.DataFrame): |
| | """ |
| | Preprocessing complet basé sur la durée pour choisir Music vs EffectSound. |
| | """ |
| | df = df.copy() |
| | dur = float(df["duration"].iloc[0]) |
| |
|
| | if MIN_EFFECT <= dur <= MAX_EFFECT: |
| | scaler_samplerate = scaler_samplerate_effect |
| | scaler_age = scaler_age_days_effect |
| | username_freq = username_freq_effect |
| | est_num_downloads = est_num_downloads_effect |
| | avg_rating_transformer = avg_rating_transformer_effect |
| | subcat_cols = effect_subcategory_cols |
| | onehot_cols = effect_onehot_cols |
| | onehot_tags = effect_onehot_tags |
| | elif MIN_MUSIC <= dur <= MAX_MUSIC: |
| | scaler_samplerate = scaler_samplerate_music |
| | scaler_age = scaler_age_days_music |
| | username_freq = username_freq_music |
| | est_num_downloads = est_num_downloads_music |
| | avg_rating_transformer = avg_rating_transformer_music |
| | subcat_cols = music_subcategory_cols |
| | onehot_cols = music_onehot_cols |
| | onehot_tags = music_onehot_tags |
| | else: |
| | return f"❌ Son trop court ou trop long ({dur} sec)" |
| |
|
| | |
| | df["category_is_user_provided"] = df["category_is_user_provided"].astype(int) |
| |
|
| | |
| | df["username_freq"] = df["username"].map(username_freq).fillna(0) |
| |
|
| | |
| | for col in ["num_ratings", "num_comments", "filesize", "duration"]: |
| | df[col] = np.log1p(df[col]) |
| |
|
| | |
| | df["samplerate"] = scaler_samplerate.transform(df[["samplerate"]]) |
| |
|
| | |
| | df["created"] = pd.to_datetime(df["created"], errors="coerce").dt.tz_localize(None) |
| | df["age_days"] = (pd.Timestamp.now() - df["created"]).dt.days |
| | df["age_days_log"] = np.log1p(df["age_days"]) |
| | df["age_days_log_scaled"] = scaler_age.transform(df[["age_days_log"]]) |
| | df = df.drop(columns=["created", "age_days", "age_days_log"]) |
| |
|
| | |
| | df["num_downloads_class"] = est_num_downloads.transform(df[["num_downloads"]]) |
| |
|
| | |
| | df["avg_rating"] = avg_rating_transformer.transform(df["avg_rating"].to_numpy()) |
| |
|
| | |
| | for col in subcat_cols: |
| | df[col] = 0 |
| | subcat_val = df["subcategory"].iloc[0] |
| | for col in subcat_cols: |
| | cat_name = col.replace("subcategory_", "") |
| | if subcat_val == cat_name: |
| | df[col] = 1 |
| | df.drop(columns=["subcategory"], inplace=True) |
| |
|
| | |
| | for col in onehot_cols: |
| | if col not in df.columns: |
| | df[col] = 0 |
| |
|
| | license_val = df.loc[0, "license"] |
| | category_val = df.loc[0, "category"] |
| | type_val = df.loc[0, "type"] |
| |
|
| | for col_name in [f"license_{license_val}", f"category_{category_val}", f"type_{type_val}"]: |
| | if col_name in df.columns: |
| | df[col_name] = 1 |
| |
|
| | |
| | for col in ["name", "tags", "description"]: |
| | if col not in df.columns: |
| | df[col] = "" |
| |
|
| | df["tags_list"] = df["tags"].fillna("").astype(str).str.lower().str.split(",") |
| |
|
| | if not df["tags_list"].iloc[0] or df["tags_list"].iloc[0] == [""]: |
| | df["tags_list"] = [["Other"]] |
| |
|
| | for col in onehot_tags: |
| | if col not in df.columns: |
| | df[col] = 0 |
| |
|
| | tags_list = df["tags"].iloc[0].lower().split(",") if df["tags"].iloc[0] else [] |
| | for col in onehot_tags: |
| | tag_name = col.replace("tag_", "").lower() |
| | if tag_name in tags_list: |
| | df[col] = 1 |
| |
|
| | df.drop(columns=["tags"], inplace=True) |
| |
|
| | |
| | df["name_clean"] = df["name"].astype(str).str.lower().str.rsplit(".", n=1).str[0] |
| | df = preprocess_name(df, vec_dim=8) |
| | df.drop(columns=["name", "name_clean"], inplace=True) |
| |
|
| | |
| | desc_vec = description_to_vec(df["description"].iloc[0], glove_model) |
| | for i in range(100): |
| | df[f"description_glove_{i}"] = float(desc_vec[i]) |
| | df.drop(columns=["description"], inplace=True) |
| |
|
| | |
| | df.drop( |
| | columns=[ |
| | "license", "category", "type", "subcategory", "id", |
| | "num_downloads", "file_path", "username", "tags_list" |
| | ], |
| | inplace=True, |
| | errors="ignore" |
| | ) |
| |
|
| | return df |
| |
|
| |
|
| | def predict_with_model_meta(model, df_input: pd.DataFrame, le=None): |
| | booster_feats = model.get_booster().feature_names |
| | X_aligned = df_input.reindex(columns=booster_feats, fill_value=0.0).astype(float) |
| | dmatrix = xgb.DMatrix(X_aligned.values, feature_names=list(booster_feats)) |
| | preds = model.get_booster().predict(dmatrix) |
| | pred_val = preds[0] |
| | pred_int = int(round(float(pred_val))) |
| |
|
| | if le is not None: |
| | try: |
| | return le.inverse_transform([pred_int])[0] |
| | except Exception: |
| | return f"Classe inconnue ({pred_int})" |
| | return pred_int |
| |
|
| |
|
| | def predict_from_metadata_url(url: str): |
| | if not API_TOKEN: |
| | return html_error( |
| | "Token FreeSound manquant", |
| | "Ajoute la variable d’environnement <code>FREESOUND_TOKEN</code> pour activer cet onglet." |
| | ) |
| |
|
| | if not url or not url.strip(): |
| | return html_error( |
| | "URL vide", |
| | "Collez une URL FreeSound du type <code>https://freesound.org/s/123456/</code>" |
| | ) |
| |
|
| | |
| | try: |
| | df_raw = fetch_sound_metadata(url) |
| | except Exception as e: |
| | return html_error("Erreur API FreeSound", f"Détail : <code>{e}</code>") |
| |
|
| | |
| | try: |
| | dur = float(df_raw["duration"].iloc[0]) |
| | except Exception: |
| | dur = 0.0 |
| |
|
| | if dur < MIN_EFFECT: |
| | return html_error( |
| | "Audio trop court", |
| | f"Durée détectée : <b>{dur:.2f} s</b><br><br>" |
| | f"Plages acceptées :<br>" |
| | f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>" |
| | f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>" |
| | ) |
| |
|
| | if (MAX_EFFECT < dur < MIN_MUSIC) or dur > MAX_MUSIC: |
| | return html_error( |
| | "Audio hors plage", |
| | f"Durée détectée : <b>{dur:.2f} s</b><br><br>" |
| | f"Plages acceptées :<br>" |
| | f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>" |
| | f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>" |
| | ) |
| |
|
| | |
| | df_processed = preprocess_sound(df_raw) |
| | if isinstance(df_processed, str): |
| | return html_error("Preprocessing impossible", df_processed) |
| |
|
| | |
| | cols_to_remove = ["avg_rating", "num_downloads_class"] |
| | df_for_model = df_processed.drop( |
| | columns=[c for c in cols_to_remove if c in df_processed.columns], |
| | errors="ignore" |
| | ) |
| |
|
| | |
| | if MIN_EFFECT <= dur <= MAX_EFFECT: |
| | badge = "🔊 Effet sonore (URL → metadata)" |
| | model_nd = effect_model_num_downloads |
| | model_ar = effect_model_avg_rating |
| | model_features = effect_model_features |
| | current_le = effect_avg_rating_le_meta |
| | else: |
| | badge = "🎵 Musique (URL → metadata)" |
| | model_nd = music_model_num_downloads |
| | model_ar = music_model_avg_rating |
| | model_features = music_model_features |
| | current_le = music_avg_rating_le_meta |
| |
|
| | |
| | df_for_model = df_for_model.reindex(columns=model_features, fill_value=0.0).astype(float) |
| |
|
| | |
| | pred_num_downloads_val = predict_with_model_meta(model_nd, df_for_model, le=None) |
| | pred_num_downloads_val = int(pred_num_downloads_val) if str(pred_num_downloads_val).isdigit() else pred_num_downloads_val |
| |
|
| | |
| | NUM_DOWNLOADS_MAP = {0: "Faible", 1: "Moyen", 2: "Élevé"} |
| | pred_downloads_text = NUM_DOWNLOADS_MAP.get(pred_num_downloads_val, str(pred_num_downloads_val)) |
| |
|
| | |
| | pred_avg_rating_label_raw = predict_with_model_meta(model_ar, df_for_model, le=current_le) |
| | pred_avg_rating_label = normalize_avg_rating_label_fr(pred_avg_rating_label_raw) |
| | avg_class = avg_fr_to_class(pred_avg_rating_label) |
| |
|
| | dl_class = int(pred_num_downloads_val) if isinstance(pred_num_downloads_val, (int, np.integer)) else 0 |
| |
|
| | rating_display = str(pred_avg_rating_label) |
| | downloads_display = pred_downloads_text |
| |
|
| | conclusion = interpret_results(avg_class, dl_class) |
| | extra = f""" |
| | <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db"> |
| | {conclusion} |
| | </div> |
| | """ |
| |
|
| | return html_result(badge, dur, rating_display, downloads_display, extra_html=extra) |
| |
|
| | |
| | |
| | |
| | theme = gr.themes.Soft() |
| |
|
| | with gr.Blocks(title="Démo — Popularité Audio", css=CSS) as demo: |
| | gr.HTML( |
| | f""" |
| | <div id="header-title">Démo — Prédiction de popularité audio</div> |
| | <p id="header-sub"> |
| | Trois modes : <b>Upload audio</b> (openSMILE), <b>URL FreeSound</b> (features API), <b>URL FreeSound</b> (metadata + preprocessing complet).<br><br> |
| | <b>Durées acceptées :</b> 🔊 Effet sonore {MIN_EFFECT}–{MAX_EFFECT}s · 🎵 Musique {MIN_MUSIC}–{MAX_MUSIC}s |
| | </p> |
| | """ |
| | ) |
| |
|
| | if not API_TOKEN: |
| | gr.Markdown( |
| | "⚠️ **FREESOUND_TOKEN non défini** : les onglets URL (2 et 3) ne fonctionneront pas tant que tu ne l’ajoutes pas." |
| | ) |
| |
|
| | with gr.Tabs(): |
| | |
| | with gr.Tab("1) Upload audio (openSMILE)"): |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | gr.Markdown("### Importer un fichier") |
| | audio_in = gr.Audio(type="filepath", label="Fichier audio") |
| | btn_audio = gr.Button("🚀 Prédire (upload)", variant="primary") |
| | with gr.Column(scale=1): |
| | gr.Markdown("### Résultat") |
| | out_audio = gr.HTML() |
| | btn_audio.click(predict_from_uploaded_audio, inputs=audio_in, outputs=out_audio) |
| |
|
| | |
| | with gr.Tab("2) URL FreeSound (features API)"): |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | gr.Markdown("### Coller une URL FreeSound") |
| | url_in = gr.Textbox(label="URL FreeSound", placeholder="https://freesound.org/s/123456/") |
| | btn_url = gr.Button("🚀 Prédire (URL → features API)", variant="primary") |
| | with gr.Column(scale=1): |
| | gr.Markdown("### Résultat") |
| | out_url = gr.HTML() |
| | btn_url.click(predict_from_freesound_url, inputs=url_in, outputs=out_url) |
| |
|
| | |
| | with gr.Tab("3) URL FreeSound (metadata)"): |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | gr.Markdown("### Coller une URL FreeSound") |
| | url_meta = gr.Textbox(label="URL FreeSound", placeholder="https://freesound.org/s/123456/") |
| | btn_meta = gr.Button("🚀 Prédire (URL → metadata)", variant="primary") |
| | with gr.Column(scale=1): |
| | gr.Markdown("### Résultat") |
| | out_meta = gr.HTML() |
| | btn_meta.click(predict_from_metadata_url, inputs=url_meta, outputs=out_meta) |
| |
|
| | demo.launch(theme=theme) |
| |
|