Spaces:
Runtime error
Runtime error
import os | |
import re | |
from datetime import datetime | |
import gradio as gr | |
import torch | |
import pandas as pd | |
import soundfile as sf | |
import torchaudio | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
from src.transcription import SpeechEncoder | |
from src.sentiment import TextEncoder | |
# Configuration pour Hugging Face Spaces | |
HF_SPACE = os.getenv("HF_SPACE", "false").lower() == "true" | |
# Préchargement des modèles | |
print("Chargement des modèles...") | |
# Modèle français plus léger | |
processor_ctc = Wav2Vec2Processor.from_pretrained( | |
"LeBenchmark/wav2vec2-FR-2K-small", | |
cache_dir="./models" if not HF_SPACE else None | |
) | |
model_ctc = Wav2Vec2ForCTC.from_pretrained( | |
"LeBenchmark/wav2vec2-FR-2K-small", | |
cache_dir="./models" if not HF_SPACE else None | |
) | |
speech_enc = SpeechEncoder() | |
text_enc = TextEncoder() | |
print("Modèles chargés avec succès!") | |
# Pipeline d'analyse | |
def analyze_audio(audio_path): | |
if audio_path is None: | |
return "Aucun audio fourni", "", pd.DataFrame(), {} | |
try: | |
# Lecture et prétraitement | |
data, sr = sf.read(audio_path) | |
arr = data.T if data.ndim > 1 else data | |
wav = torch.from_numpy(arr).unsqueeze(0).float() | |
if sr != 16000: | |
wav = torchaudio.transforms.Resample(sr, 16000)(wav) | |
sr = 16000 | |
if wav.size(0) > 1: | |
wav = wav.mean(dim=0, keepdim=True) | |
# Transcription | |
inputs = processor_ctc(wav.squeeze().numpy(), sampling_rate=sr, return_tensors="pt") | |
with torch.no_grad(): | |
logits = model_ctc(**inputs).logits | |
pred_ids = torch.argmax(logits, dim=-1) | |
transcription = processor_ctc.batch_decode(pred_ids)[0].lower() | |
# Sentiment principal | |
sent_dict = TextEncoder.analyze_sentiment(transcription) | |
label, conf = max(sent_dict.items(), key=lambda x: x[1]) | |
emojis = {"positif": "😊", "neutre": "😐", "négatif": "☹️"} | |
emoji = emojis.get(label, "") | |
# Segmentation par phrase | |
segments = [s.strip() for s in re.split(r'[.?!]', transcription) if s.strip()] | |
seg_results = [] | |
for seg in segments: | |
sd = TextEncoder.analyze_sentiment(seg) | |
l, c = max(sd.items(), key=lambda x: x[1]) | |
seg_results.append({"Segment": seg, "Sentiment": l.capitalize(), "Confiance (%)": round(c*100,1)}) | |
seg_df = pd.DataFrame(seg_results) | |
# Historique entry | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
history_entry = { | |
"Horodatage": timestamp, | |
"Transcription": transcription, | |
"Sentiment": label.capitalize(), | |
"Confiance (%)": round(conf*100,1) | |
} | |
# Rendu | |
summary_html = ( | |
f"<div style='display:flex;align-items:center;'>" | |
f"<span style='font-size:3rem;margin-right:10px;'>{emoji}</span>" | |
f"<h2 style='color:#6a0dad;'>{label.upper()}</h2>" | |
f"</div>" | |
f"<p><strong>Confiance :</strong> {conf*100:.1f}%</p>" | |
) | |
return transcription, summary_html, seg_df, history_entry | |
except Exception as e: | |
error_msg = f"Erreur lors de l'analyse: {str(e)}" | |
return error_msg, "", pd.DataFrame(), {} | |
# Export CSV | |
def export_history_csv(history): | |
if not history: | |
return None | |
df = pd.DataFrame(history) | |
path = "history.csv" | |
df.to_csv(path, index=False) | |
return path | |
# Interface Gradio | |
demo = gr.Blocks( | |
theme=gr.themes.Monochrome(primary_hue="purple"), | |
title="Analyse de Sentiment Audio - Hugging Face Space" | |
) | |
with demo: | |
gr.Markdown(""" | |
# 🎤 Analyse de Sentiment Audio | |
Ce Space permet d'analyser le sentiment d'extraits audio en français en combinant : | |
- **Transcription audio** avec Wav2Vec2 | |
- **Analyse de sentiment** avec BERT multilingue | |
""") | |
gr.HTML(""" | |
<div style="display: flex; flex-direction: column; gap: 10px; margin-bottom: 20px;"> | |
<div style="background-color: #f3e8ff; padding: 12px 20px; border-radius: 12px; border-left: 5px solid #8e44ad;"> | |
<strong>Étape 1 :</strong> Enregistrez votre voix ou téléversez un fichier audio (format WAV recommandé). | |
</div> | |
<div style="background-color: #e0f7fa; padding: 12px 20px; border-radius: 12px; border-left: 5px solid #0097a7;"> | |
<strong>Étape 2 :</strong> Cliquez sur le bouton <em><b>Analyser</b></em> pour lancer la transcription et l'analyse. | |
</div> | |
<div style="background-color: #fff3e0; padding: 12px 20px; border-radius: 12px; border-left: 5px solid #fb8c00;"> | |
<strong>Étape 3 :</strong> Visualisez les résultats : transcription, sentiment, et analyse détaillée. | |
</div> | |
<div style="background-color: #e8f5e9; padding: 12px 20px; border-radius: 12px; border-left: 5px solid #43a047;"> | |
<strong>Étape 4 :</strong> Exportez l'historique des analyses au format CSV si besoin. | |
</div> | |
</div> | |
""") | |
# Section API | |
with gr.Accordion("🔌 API REST", open=False): | |
gr.Markdown(""" | |
### Endpoints disponibles : | |
- **`/api/predict`** - Analyse audio (POST) | |
- **`/api/predict_text`** - Analyse textuelle (POST) | |
- **`/api/health`** - Vérification état (GET) | |
- **`/api/docs`** - Documentation Swagger | |
### Exemple d'utilisation : | |
```bash | |
curl -X POST "https://huggingface.co/spaces/<username>/sentiment-audio-analyzer/api/predict" \ | |
-F "file=@audio.wav" | |
``` | |
""") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
audio_in = gr.Audio( | |
sources=["microphone", "upload"], | |
type="filepath", | |
label="Audio Input" | |
) | |
btn = gr.Button("🔍 Analyser", variant="primary") | |
export_btn = gr.Button("📊 Exporter CSV") | |
with gr.Column(scale=3): | |
chat = gr.Chatbot(label="Historique des échanges") | |
transcription_out = gr.Textbox(label="Transcription", interactive=False) | |
summary_out = gr.HTML(label="Sentiment") | |
seg_out = gr.Dataframe(label="Détail par segment") | |
hist_out = gr.Dataframe(label="Historique") | |
state_chat = gr.State([]) # list of (user,bot) | |
state_hist = gr.State([]) # list of dict entries | |
def chat_callback(audio_path, chat_history, hist_state): | |
transcription, summary, seg_df, hist_entry = analyze_audio(audio_path) | |
user_msg = "[Audio reçu]" | |
bot_msg = f"**Transcription :** {transcription}\n**Sentiment :** {summary}" | |
chat_history = chat_history + [(user_msg, bot_msg)] | |
if hist_entry: | |
hist_state = hist_state + [hist_entry] | |
return chat_history, transcription, summary, seg_df, hist_state | |
btn.click( | |
fn=chat_callback, | |
inputs=[audio_in, state_chat, state_hist], | |
outputs=[chat, transcription_out, summary_out, seg_out, state_hist] | |
) | |
export_btn.click( | |
fn=export_history_csv, | |
inputs=[state_hist], | |
outputs=[gr.File(label="Télécharger CSV")] | |
) | |
# Configuration pour Hugging Face Spaces | |
if __name__ == "__main__": | |
demo.launch( | |
server_name="0.0.0.0" if HF_SPACE else "127.0.0.1", | |
server_port=7860, | |
share=False | |
) |