Spaces:

A-POR-LOS-8000
/

CHATBOT

Sleeping

App Files Files Community

Marcos12886 commited on Sep 10, 2024

Commit

166aa6c

1 Parent(s): abdf62b

Decibelios. Llamar modelos mejor. Mejorar botones...

Browse files

Files changed (3) hide show

app.py +69 -72
interfaz.py +2 -2
model.py +9 -9

app.py CHANGED Viewed

@@ -7,71 +7,63 @@ from interfaz import estilo, my_theme
 token = os.getenv("HF_TOKEN")
 client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=token)
-model_cache = {}
-def load_model_and_dataset(model_path, dataset_path, filter_white_noise):
-    if (model_path, dataset_path, filter_white_noise) not in model_cache:
-        model, _, _, id2label = predict_params(dataset_path, model_path, filter_white_noise)
-        model_cache[(model_path, dataset_path, filter_white_noise)] = (model, id2label)
-    return model_cache[(model_path, dataset_path, filter_white_noise)]
-def predict(audio_path, model_path, dataset_path, filter_white_noise):
-    model, id2label = load_model_and_dataset(model_path, dataset_path, filter_white_noise)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model.to(device)
     model.eval()
-    audios = AudioDataset(dataset_path, {}, filter_white_noise).preprocess_audio(audio_path)
-    inputs = {"input_values": audios.to(device).unsqueeze(0)}
     with torch.no_grad():
         outputs = model(**inputs)
         logits = outputs.logits
-        predicted_class_ids = torch.argmax(logits, dim=-1).item()
-        label = id2label[predicted_class_ids]
-        if dataset_path == "data/mixed_data":
-            label_mapping = {0: 'Hambre', 1: 'Problemas para respirar', 2: 'Dolor', 3: 'Cansancio/Incomodidad'}
-            label = label_mapping.get(predicted_class_ids, label)
-    return label
-def predict_stream(audio_path):
-    model_mon, _ = load_model_and_dataset(
-        model_path="distilhubert-finetuned-cry-detector",
-        dataset_path="data/baby_cry_detection",
-        filter_white_noise=False
-        )
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model_mon.to(device)
-    model_mon.eval()
-    audio_dataset = AudioDataset(dataset_path="data/baby_cry_detection", label2id={}, filter_white_noise=False)
-    processed_audio = audio_dataset.preprocess_audio(audio_path)
-    inputs = {"input_values": processed_audio.to(device).unsqueeze(0)}
     with torch.no_grad():
-        outputs = model_mon(**inputs)
-        logits = outputs.logits
         probabilities = torch.nn.functional.softmax(logits, dim=-1)
         crying_probabilities = probabilities[:, 1]
-        avg_crying_probability = crying_probabilities.mean().item()*100
-    if avg_crying_probability < 25:
-        model_class, id2label = load_model_and_dataset(
-            model_path="distilhubert-finetuned-mixed-data",
-            dataset_path="data/mixed_data",
-            filter_white_noise=True
-            )
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        model_class.to(device)
-        model_class.eval()
-        audio_dataset_class = AudioDataset(dataset_path="data/mixed_data", label2id={}, filter_white_noise=True)
-        processed_audio_class = audio_dataset_class.preprocess_audio(audio_path)
-        inputs_class = {"input_values": processed_audio_class.to(device).unsqueeze(0)}
-        with torch.no_grad():
-            outputs_class = model_class(**inputs_class)
-            logits_class = outputs_class.logits
-            predicted_class_ids_class = torch.argmax(logits_class, dim=-1).item()
-            label_class = id2label[predicted_class_ids_class]
-            label_mapping = {0: 'Hambre', 1: 'Problemas para respirar', 2: 'Dolor', 3: 'Cansancio/Incomodidad'}
-            label_class = label_mapping.get(predicted_class_ids_class, label_class)
-        return f"Bebé llorando por {label_class}. Probabilidad: {avg_crying_probability:.1f})"
     else:
-        return f"No está llorando. Proabilidad: {avg_crying_probability:.1f})"
 def chatbot_config(message, history: list[tuple[str, str]]):
     system_message = "You are a Chatbot specialized in baby health and care."
@@ -105,12 +97,12 @@ with gr.Blocks(theme=my_theme) as demo:
         with gr.Row():
             with gr.Column():
                 gr.Markdown("<h2>Predictor</h2>")
-                boton_pagina_1 = gr.Button("Prueba el predictor")
-                gr.Markdown("<p>Descubre por qué llora tu bebé y resuelve dudas sobre su cuidado con nuestro Iremia assistant</p>")
             with gr.Column():
                 gr.Markdown("<h2>Monitor</h2>")
-                boton_pagina_2 = gr.Button("Prueba el monitor")
-                gr.Markdown("<p>Un monitor inteligente que detecta si tu hijo está llorando y te indica el motivo antes de que puedas levantarte del sofá</p>")
     with gr.Column(visible=False) as pag_predictor:
         gr.Markdown("<h2>Predictor</h2>")
         audio_input = gr.Audio(
@@ -119,14 +111,8 @@ with gr.Blocks(theme=my_theme) as demo:
             label="Baby recorder",
             type="filepath",
             )
-        classify_btn = gr.Button("¿Por qué llora?")
-        classify_btn.click(
-            lambda audio: predict( # Mirar porque usar lambda
-                audio,
-                model_path="distilhubert-finetuned-mixed-data",
-                dataset_path="data/mixed_data",
-                filter_white_noise=True
-                ),
             inputs=audio_input,
             outputs=gr.Textbox(label="Tu bebé llora por:")
             )
@@ -134,18 +120,29 @@ with gr.Blocks(theme=my_theme) as demo:
     with gr.Column(visible=False) as pag_monitor:
         gr.Markdown("<h2>Monitor</h2>")
         audio_stream = gr.Audio(
-                # min_length=1.0, # mirar por qué no va esto
                 format="wav",
                 label="Baby recorder",
                 type="filepath",
                 streaming=True
             )
         audio_stream.stream(
-            predict_stream,
-            inputs=audio_stream,
-            outputs=gr.Textbox(label="Tu bebé está:"),
         )
         gr.Button("Volver a la pantalla inicial").click(cambiar_pestaña, outputs=[pag_monitor, chatbot])
-    boton_pagina_1.click(cambiar_pestaña, outputs=[chatbot, pag_predictor])
-    boton_pagina_2.click(cambiar_pestaña, outputs=[chatbot, pag_monitor])
 demo.launch(share=True)

 token = os.getenv("HF_TOKEN")
 client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct", token=token)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_class, id2label_class = predict_params(model_path="distilhubert-finetuned-mixed-data", dataset_path="data/mixed_data", filter_white_noise=True)
+model_mon, id2label_mon = predict_params(model_path="distilhubert-finetuned-cry-detector", dataset_path="data/baby_cry_detection", filter_white_noise=False)
+def call(audiopath, model, dataset_path, filter_white_noise):
     model.to(device)
     model.eval()
+    audio_dataset = AudioDataset(dataset_path, {}, filter_white_noise,)
+    processed_audio = audio_dataset.preprocess_audio(audiopath)
+    inputs = {"input_values": processed_audio.to(device).unsqueeze(0)}
     with torch.no_grad():
         outputs = model(**inputs)
         logits = outputs.logits
+    return logits
+def predict(audio_path_pred):
     with torch.no_grad():
+        logits = call(audio_path_pred, model=model_class, dataset_path="data/mixed_data", filter_white_noise=True)
+        predicted_class_ids_class = torch.argmax(logits, dim=-1).item()
+        label_class = id2label_class[predicted_class_ids_class]
+        label_mapping = {0: 'Hambre', 1: 'Problemas para respirar', 2: 'Dolor', 3: 'Cansancio/Incomodidad'}
+        label_class = label_mapping.get(predicted_class_ids_class, label_class)
+    return label_class
+def predict_stream(audio_path_stream):
+    with torch.no_grad():
+        logits = call(audio_path_stream, model=model_mon, dataset_path="data/baby_cry_detection", filter_white_noise=False)
         probabilities = torch.nn.functional.softmax(logits, dim=-1)
         crying_probabilities = probabilities[:, 1]
+        avg_crying_probability = crying_probabilities.mean()*100
+        if avg_crying_probability < 15:
+            label_class = predict(audio_path_stream)
+            return "Está llorando por:", f"{label_class}. Probabilidad: {avg_crying_probability:.1f}%"
+        else:
+            return "No está llorando.", f"Probabilidad: {avg_crying_probability:.1f}%"
+def decibelios(audio_path_stream):
+    with torch.no_grad():
+        logits = call(audio_path_stream, model=model_mon, dataset_path="data/baby_cry_detection", filter_white_noise=False)
+        rms = torch.sqrt(torch.mean(torch.square(logits)))
+        db_level = 20 * torch.log10(rms + 1e-6).item()
+        return db_level
+def mostrar_decibelios(audio_path_stream, visual_threshold):
+    db_level = decibelios(audio_path_stream)
+    if db_level < visual_threshold:
+        return f"Prediciendo. Decibelios: {db_level:.2f}"
+    elif db_level > visual_threshold:
+        return "No detectamos ruido..."
+def predict_stream_decib(audio_path_stream, visual_threshold):
+    db_level = decibelios(audio_path_stream)
+    if db_level < visual_threshold:
+        llorando, probabilidad = predict_stream(audio_path_stream)
+        return f"{llorando} {probabilidad}"
     else:
+        return ""
 def chatbot_config(message, history: list[tuple[str, str]]):
     system_message = "You are a Chatbot specialized in baby health and care."
         with gr.Row():
             with gr.Column():
                 gr.Markdown("<h2>Predictor</h2>")
+                boton_predictor = gr.Button("Prueba el predictor")
+                gr.Markdown("<p>Descubre por qué llora tu bebé</p>")
             with gr.Column():
                 gr.Markdown("<h2>Monitor</h2>")
+                boton_monitor = gr.Button("Prueba el monitor")
+                gr.Markdown("<p>Monitoriza si tu hijo está llorando y por qué, sin levantarte del sofá</p>")
     with gr.Column(visible=False) as pag_predictor:
         gr.Markdown("<h2>Predictor</h2>")
         audio_input = gr.Audio(
             label="Baby recorder",
             type="filepath",
             )
+        gr.Button("¿Por qué llora?").click(
+            predict,
             inputs=audio_input,
             outputs=gr.Textbox(label="Tu bebé llora por:")
             )
     with gr.Column(visible=False) as pag_monitor:
         gr.Markdown("<h2>Monitor</h2>")
         audio_stream = gr.Audio(
                 format="wav",
                 label="Baby recorder",
                 type="filepath",
                 streaming=True
             )
+        threshold_db = gr.Slider(
+            minimum=0,
+            maximum=100,
+            step=1,
+            value=30,
+            label="Umbral de dB para activar la predicción"
+            )
+        audio_stream.stream(
+            mostrar_decibelios,
+            inputs=[audio_stream, threshold_db],
+            outputs=gr.Textbox(value="Esperando...", label="Estado")
+            )
         audio_stream.stream(
+            predict_stream_decib,
+            inputs=[audio_stream, threshold_db],
+            outputs=gr.Textbox(value="", label="Tu bebé:")
         )
         gr.Button("Volver a la pantalla inicial").click(cambiar_pestaña, outputs=[pag_monitor, chatbot])
+    boton_predictor.click(cambiar_pestaña, outputs=[chatbot, pag_predictor])
+    boton_monitor.click(cambiar_pestaña, outputs=[chatbot, pag_monitor])
 demo.launch(share=True)

interfaz.py CHANGED Viewed

@@ -93,9 +93,9 @@ def inicio():
         with gr.Column():
             gr.Markdown("<h2>Predictor</h2>")
             boton_pagina_1 = gr.Button("Prueba el predictor")
-            gr.Markdown("<p>Descubre por qué llora tu bebé y resuelve dudas sobre su cuidado con nuestro Iremia assistant</p>")
         with gr.Column():
             gr.Markdown("<h2>Monitor</h2>")
             boton_pagina_2 = gr.Button("Prueba el monitor")
-            gr.Markdown("<p>Un monitor inteligente que detecta si tu hijo está llorando y te indica el motivo antes de que puedas levantarte del sofá</p>")
     return boton_pagina_1, boton_pagina_2

         with gr.Column():
             gr.Markdown("<h2>Predictor</h2>")
             boton_pagina_1 = gr.Button("Prueba el predictor")
+            gr.Markdown("<p>Descubre por qué llora tu bebé</p>")
         with gr.Column():
             gr.Markdown("<h2>Monitor</h2>")
             boton_pagina_2 = gr.Button("Prueba el monitor")
+            gr.Markdown("<p>Detecta si tu hijo está llorando y por qué antes de que puedas levantarte del sofá</p>")
     return boton_pagina_1, boton_pagina_2

model.py CHANGED Viewed

@@ -5,8 +5,8 @@ import torch
 import torchaudio
 from torch.utils.data import Dataset, DataLoader
 from huggingface_hub import upload_folder
-from transformers.integrations import TensorBoardCallback
 from sklearn.metrics import accuracy_score, precision_recall_fscore_support
 from transformers import (
     Wav2Vec2FeatureExtractor, HubertConfig, HubertForSequenceClassification,
     Trainer, TrainingArguments,
@@ -121,7 +121,7 @@ def create_dataloader(dataset_path, filter_white_noise, test_size=0.2, shuffle=T
     )
     return train_dataloader, test_dataloader, label2id, id2label
-def load_model(model_path, num_labels, label2id, id2label):
     config = HubertConfig.from_pretrained(
         pretrained_model_name_or_path=model_path,
         num_labels=num_labels,
@@ -140,13 +140,13 @@ def load_model(model_path, num_labels, label2id, id2label):
 def train_params(dataset_path, filter_white_noise):
     train_dataloader, test_dataloader, label2id, id2label = create_dataloader(dataset_path, filter_white_noise)
-    model = load_model(model_path=MODEL, num_labels=len(id2label), label2id=label2id, id2label=id2label)
     return model, train_dataloader, test_dataloader, id2label
 def predict_params(dataset_path, model_path, filter_white_noise):
     _, _, label2id, id2label = create_dataloader(dataset_path, filter_white_noise)
-    model = load_model(model_path, num_labels=len(id2label), label2id=label2id, id2label=id2label)
-    return model, None, None, id2label
 def compute_metrics(eval_pred):
     predictions = torch.argmax(torch.tensor(eval_pred.predictions), dim=-1)
@@ -187,10 +187,10 @@ def load_config(model_name):
     return model_config
 if __name__ == "__main__":
-    config = load_config(clasificador) # PARA CAMBIAR MODELOS
-    filter_white_noise = True
-    # config = load_config(monitor) # PARA CAMBIAR MODELOS
-    # filter_white_noise = False
     training_args = config["training_args"]
     output_dir = config["output_dir"]
     dataset_path = config["dataset_path"]

 import torchaudio
 from torch.utils.data import Dataset, DataLoader
 from huggingface_hub import upload_folder
 from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+from transformers.integrations import TensorBoardCallback
 from transformers import (
     Wav2Vec2FeatureExtractor, HubertConfig, HubertForSequenceClassification,
     Trainer, TrainingArguments,
     )
     return train_dataloader, test_dataloader, label2id, id2label
+def load_model(model_path, label2id, id2label, num_labels):
     config = HubertConfig.from_pretrained(
         pretrained_model_name_or_path=model_path,
         num_labels=num_labels,
 def train_params(dataset_path, filter_white_noise):
     train_dataloader, test_dataloader, label2id, id2label = create_dataloader(dataset_path, filter_white_noise)
+    model = load_model(MODEL, label2id, id2label, num_labels=len(id2label))
     return model, train_dataloader, test_dataloader, id2label
 def predict_params(dataset_path, model_path, filter_white_noise):
     _, _, label2id, id2label = create_dataloader(dataset_path, filter_white_noise)
+    model = load_model(model_path, label2id, id2label, num_labels=len(id2label))
+    return model, id2label
 def compute_metrics(eval_pred):
     predictions = torch.argmax(torch.tensor(eval_pred.predictions), dim=-1)
     return model_config
 if __name__ == "__main__":
+    # config = load_config(clasificador) # PARA CAMBIAR MODELOS
+    # filter_white_noise = True
+    config = load_config(monitor) # PARA CAMBIAR MODELOS
+    filter_white_noise = False
     training_args = config["training_args"]
     output_dir = config["output_dir"]
     dataset_path = config["dataset_path"]