Problems with speechbrain.pretrained

#2
by Joan1949 - opened

Hello, I can麓t install the library speechbrain.pretrained in Google Colab. All times the message is that not found this module.

Is it possible that have had any kind of changes recently?

Thanks
-Joan

Hi @Joan1949

Try
pip install speechbrain=0.5.16
And restart the kernel or suggest you to use local ide like visual studio

Hello again, I managed to do it using a new local IDE, specifically PyCharm.

Now my problem is that I can't load the model I want to retrain.

Here's my code:

import os
import torch
from torch import optim
from speechbrain.pretrained import ASRCNNTransducer
from torch.utils.data import DataLoader
from torchaudio import load

Configuraci贸n de hiperpar谩metros

learning_rate = 1e-4
num_epochs = 10
batch_size = 8
model_checkpoint = "speechbrain/asr-crdnn-commonvoice-14-es"
dataset_folder = "C:\Users\jsgin\Desktop\compartida\dataset" # Carpeta que contiene todos los archivos

Cargar el modelo preentrenado

print("Cargando el modelo preentrenado...")
asr_model = ASRCNNTransducer.from_hparams(source=model_checkpoint, savedir="pretrained_model")
print("Modelo cargado:", asr_model)

Verificar si asr_model se inicializ贸 correctamente

if asr_model is not None:
print("El modelo se inicializ贸 correctamente.")
optimizer = optim.Adam(asr_model.parameters(), lr=learning_rate)

# Funci贸n de p茅rdida
criterion = torch.nn.CTCLoss(blank=asr_model.tokenizer.tokenizer.pad_id, reduction='mean')

# Funci贸n para cargar los archivos de texto y audio
def load_data(folder):
    audio_files = []
    text_data = {}
    for filename in os.listdir(folder):
        if filename.endswith(".wav"):
            audio_files.append(os.path.join(folder, filename))
        elif filename.endswith(".txt"):
            with open(os.path.join(folder, filename), "r", encoding="utf-8") as file:
                text = file.read().strip()
                basename = os.path.splitext(filename)[0]
                text_data[basename] = text
    return audio_files, text_data

# Cargar archivos de texto y audio
audio_files, text_data = load_data(dataset_folder)

# Combinar audio y texto
dataset = [(audio_file, text_data[os.path.splitext(os.path.basename(audio_file))[0]]) for audio_file in audio_files]

# DataLoader
def custom_collate(batch):
    audios, transcripts = zip(*batch)
    audio_tensors = []
    for audio_path in audios:
        waveform, _ = load(audio_path)
        audio_tensors.append(waveform)
    return audio_tensors, transcripts

dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=custom_collate)

# Entrenamiento del modelo
for epoch in range(num_epochs):
    asr_model.train()
    total_loss = 0.0
    for audio_batch, transcription_batch in dataloader:
        optimizer.zero_grad()
        logits = asr_model(audio_batch)
        logits = logits.transpose(1, 0)  # Transponer logits para que coincidan con la forma esperada por CTCLoss
        input_lengths = torch.full(size=(logits.size(1),), fill_value=logits.size(0), dtype=torch.int)
        target_lengths = torch.tensor([len(transcription) for transcription in transcription_batch], dtype=torch.int)
        targets = asr_model.tokenizer.encode_batch(transcription_batch)
        loss = criterion(logits, targets, input_lengths, target_lengths)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss}")

# Guardar el modelo entrenado
torch.save(asr_model.state_dict(), "trained_model.pth")

else:
print("Error: El modelo no se inicializ贸 correctamente.")

Thanks :)

Sign up or log in to comment