{ "cells": [ { "cell_type": "code", "execution_count": 7, "id": "9abf3270", "metadata": {}, "outputs": [], "source": [ "from transformers import AutoModelForCTC, Wav2Vec2Processor\n", "from datasets import load_dataset, load_metric, Audio\n", "import numpy as np\n", "import torch" ] }, { "cell_type": "code", "execution_count": 27, "id": "6e0830a2", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "# model = AutoModelForCTC.from_pretrained(\".\").to('cuda')\n", "# processor = Wav2Vec2Processor.from_pretrained(\".\")" ] }, { "cell_type": "code", "execution_count": 39, "id": "126e39e0", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a2fdec3c288946a19a5b36618af4c26c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/2.02k [00:00