diff --git "a/Fine_Tune_XLSR_Wav2Vec2_on_Greek_ASR_with_🤗_Transformers.ipynb" "b/Fine_Tune_XLSR_Wav2Vec2_on_Greek_ASR_with_🤗_Transformers.ipynb" --- "a/Fine_Tune_XLSR_Wav2Vec2_on_Greek_ASR_with_🤗_Transformers.ipynb" +++ "b/Fine_Tune_XLSR_Wav2Vec2_on_Greek_ASR_with_🤗_Transformers.ipynb" @@ -68,7 +68,7 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "start_time": "2021-03-13T13:41:40.397Z" + "start_time": "2021-03-14T09:54:55.056Z" }, "id": "c8eh87Hoee5d" }, @@ -215,11 +215,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.080344Z", - "start_time": "2021-03-13T19:16:06.048608Z" + "end_time": "2021-03-14T10:06:46.227453Z", + "start_time": "2021-03-14T10:06:40.271948Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -286,12 +286,23 @@ "id": "2MMXcWFFgCXU", "outputId": "82b573b6-fc21-465d-9029-f5490814169e" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using custom data configuration el-afd0a157f05ee080\n", + "Reusing dataset common_voice (/home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564)\n", + "Using custom data configuration el-afd0a157f05ee080\n", + "Reusing dataset common_voice (/home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564)\n" + ] + } + ], "source": [ "from datasets import load_dataset, load_metric\n", "\n", - "common_voice_train = load_dataset(\"common_voice\", \"el\", data_dir=\"./cv-corpus-6.1-2020-12-11\", split=\"train+validation\")\n", - "common_voice_test = load_dataset(\"common_voice\", \"el\", data_dir=\"./cv-corpus-6.1-2020-12-11\", split=\"test\")" + "common_voice_train = load_dataset(\"common_voice\", \"el\", data_dir=\"cv-corpus-6.1-2020-12-11\", split=\"train+validation\")\n", + "common_voice_test = load_dataset(\"common_voice\", \"el\", data_dir=\"cv-corpus-6.1-2020-12-11\", split=\"test\")" ] }, { @@ -306,11 +317,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.138515Z", - "start_time": "2021-03-13T19:16:12.132361Z" + "end_time": "2021-03-14T10:06:46.251880Z", + "start_time": "2021-03-14T10:06:46.247618Z" }, "id": "kbyq6lDgQc2a" }, @@ -331,11 +342,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.197724Z", - "start_time": "2021-03-13T19:16:12.194406Z" + "end_time": "2021-03-14T10:06:46.273881Z", + "start_time": "2021-03-14T10:06:46.271270Z" }, "id": "72737oog2F6U" }, @@ -361,11 +372,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.264786Z", - "start_time": "2021-03-13T19:16:12.249608Z" + "end_time": "2021-03-14T10:06:46.307239Z", + "start_time": "2021-03-14T10:06:46.295588Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -374,7 +385,109 @@ "id": "K_JUmf3G3b9S", "outputId": "a8fe6d21-b0ce-4d5b-e3a2-abe08ae551f7" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sentence
0Σηκώθηκε το Βασιλόπουλο και πήρε πάλι το δρόμο του.
1στο τέλος έβγαλε
2Τέσσερις λέξεις έχουν αρνητικά σηματοδοτήσει
3Σε μερικά χρόνια δηλαδή;
4αποκρίθηκε λαχανιασμένος.
5Πού να μας ακολουθήσει;
6πριν ξεβραστούν στο χαρτί τα καινούρια τους αδελφάκια
7Και είδα πρόσωπα γελαστά, και άκουσα τραγούδια παντού, και δεν είδα ζητιάνο κανένα.
8Μα πεσμένο χάμω
9\"Ξέρω 'γω! Έκλεψε, λένε, κάτι κότες.\"
10δεν τραγουδώ παρά γιατί μ’ αγάπησες.
11Ψήσε φαγί για περισσότερους
12Η Ειρηνούλα κοίταζε με θαυμασμό και απορία.
13Της Πικρόχολης έστειλε μια φούστα
14κι ένα γράμμα που σου έφερα.
15και σκούπισε το μέτωπο του με το μανίκι του ποκαμίσου του
16Αλλά η Γνώση λέγει πως έχει ένα γιατρικό και θα μας το δώσει.
17απομάκρυνε τις φελούκες του από την όχθη.
18Το Βασιλόπουλο έβγαλε τα ξυλαράκια από την τσέπη του και το μάθημα άρχισε.
19Στο βάθος ερχόταν ένας τύπος με πατίνι
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "show_random_elements(common_voice_train.remove_columns([\"path\"]), num_examples=20)" ] @@ -402,11 +515,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.316730Z", - "start_time": "2021-03-13T19:16:12.314918Z" + "end_time": "2021-03-14T10:06:46.328556Z", + "start_time": "2021-03-14T10:06:46.326639Z" }, "id": "svKzVJ_hQGK6" }, @@ -422,11 +535,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.368168Z", - "start_time": "2021-03-13T19:16:12.359740Z" + "end_time": "2021-03-14T10:06:46.355968Z", + "start_time": "2021-03-14T10:06:46.349638Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -453,7 +566,16 @@ "id": "XIHocAuTQbBR", "outputId": "cc1a70b2-7b4d-410b-f997-1f1c47c3c9e5" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-8c6db5e938951602.arrow\n", + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-0ce2ebca66096fff.arrow\n" + ] + } + ], "source": [ "common_voice_train = common_voice_train.map(remove_special_characters, remove_columns=[\"sentence\"])\n", "common_voice_test = common_voice_test.map(remove_special_characters, remove_columns=[\"sentence\"])" @@ -461,11 +583,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.415543Z", - "start_time": "2021-03-13T19:16:12.411407Z" + "end_time": "2021-03-14T10:06:46.383256Z", + "start_time": "2021-03-14T10:06:46.378748Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -474,7 +596,69 @@ "id": "RBDRAAYxRE6n", "outputId": "c3a72eaa-8ddc-4283-ccb8-52e50215b84d" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
text
0κι έφθασε την ώρα που το παιδί του κλέφτη ξέκοβε στο δάσος
1ελάτε πατριώτες πάμε να σκάψομε τα χωράφια
2και πως αντί να κουτσοζεί κατάφερε να καλοζεί
3μ' αυτός ήλθε ευθύς και μου είπε
4μόνο γιατί μ’ αγάπησες γεννήθηκα
5έλεγε αναστενάζοντας ο άνθρωπος
6άνοιξε η όρεξη μου μιλώντας για δουλειές
7να επιστρέψει τον κλεμμένο σάκο για να σωπάσει ο κακομοιρίδης
8με σένα πλάγι μου νιώθω πως θα εκτελέσω το σκοπό μου
9ο κακομοιρίδης χαμογέλασε πικρά
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "show_random_elements(common_voice_train.remove_columns([\"path\"]))" ] @@ -496,11 +680,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.467831Z", - "start_time": "2021-03-13T19:16:12.466017Z" + "end_time": "2021-03-14T10:06:46.405883Z", + "start_time": "2021-03-14T10:06:46.404039Z" }, "id": "LwCshNbbeRZR" }, @@ -514,11 +698,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:12.950015Z", - "start_time": "2021-03-13T19:16:12.512658Z" + "end_time": "2021-03-14T10:06:46.835943Z", + "start_time": "2021-03-14T10:06:46.431012Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -545,7 +729,50 @@ "id": "_m6uUjjcfbjH", "outputId": "75a1a23f-a9c7-4c8b-8777-dad120a9aa9a" }, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "50501fbad21d4061a0c5780a25aed098", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=1), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a8c10d5f8b754e7bb0559212b945c3ac", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=1), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], "source": [ "vocab_train = common_voice_train.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_train.column_names)\n", "vocab_test = common_voice_train.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_test.column_names)" @@ -562,11 +789,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:13.000771Z", - "start_time": "2021-03-13T19:16:12.998784Z" + "end_time": "2021-03-14T10:06:46.861391Z", + "start_time": "2021-03-14T10:06:46.858610Z" }, "id": "aQfneNsmlJI0" }, @@ -577,11 +804,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:13.051689Z", - "start_time": "2021-03-13T19:16:13.048729Z" + "end_time": "2021-03-14T10:06:46.921226Z", + "start_time": "2021-03-14T10:06:46.918222Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -589,7 +816,68 @@ "id": "_0kRndSvqaKk", "outputId": "29f5d23f-75b1-44d0-9975-87f9ec4c0aa5" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'ώ': 0,\n", + " 'γ': 1,\n", + " 'n': 2,\n", + " 'ϋ': 3,\n", + " 'κ': 4,\n", + " 'e': 5,\n", + " 'ξ': 6,\n", + " \"'\": 7,\n", + " 'θ': 8,\n", + " '’': 9,\n", + " 'σ': 10,\n", + " 'η': 11,\n", + " 'ι': 12,\n", + " 'α': 13,\n", + " 'ε': 14,\n", + " 'υ': 15,\n", + " 'v': 16,\n", + " 'μ': 17,\n", + " 'ο': 18,\n", + " '«': 19,\n", + " '»': 20,\n", + " 'έ': 21,\n", + " 'ν': 22,\n", + " ' ': 23,\n", + " 'ά': 24,\n", + " 'o': 25,\n", + " 'ζ': 26,\n", + " 'β': 27,\n", + " 'τ': 28,\n", + " 'π': 29,\n", + " 'ή': 30,\n", + " 'ψ': 31,\n", + " 'ΐ': 32,\n", + " 'ό': 33,\n", + " 'h': 34,\n", + " 'ύ': 35,\n", + " 'ω': 36,\n", + " '´': 37,\n", + " 'χ': 38,\n", + " 'ϊ': 39,\n", + " 'ρ': 40,\n", + " 'a': 41,\n", + " 'ς': 42,\n", + " 'r': 43,\n", + " 'g': 44,\n", + " 'm': 45,\n", + " 'λ': 46,\n", + " '́': 47,\n", + " 'ί': 48,\n", + " 'φ': 49,\n", + " 'δ': 50}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "vocab_dict = {v: k for k, v in enumerate(vocab_list)}\n", "vocab_dict" @@ -624,11 +912,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:13.107778Z", - "start_time": "2021-03-13T19:16:13.106285Z" + "end_time": "2021-03-14T10:06:46.944995Z", + "start_time": "2021-03-14T10:06:46.943544Z" }, "id": "npbIbBoLgaFX" }, @@ -640,11 +928,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:13.274310Z", - "start_time": "2021-03-13T19:16:13.271846Z" + "end_time": "2021-03-14T10:06:46.971142Z", + "start_time": "2021-03-14T10:06:46.969054Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -652,7 +940,18 @@ "id": "znF0bNunsjbl", "outputId": "6dd50862-f4c5-4a05-87a7-da03d157e30e" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "53" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "vocab_dict[\"[UNK]\"] = len(vocab_dict)\n", "vocab_dict[\"[PAD]\"] = len(vocab_dict)\n", @@ -679,11 +978,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:14.340079Z", - "start_time": "2021-03-13T19:16:14.334468Z" + "end_time": "2021-03-14T10:06:46.995090Z", + "start_time": "2021-03-14T10:06:46.993446Z" }, "id": "ehyUoh9vk191" }, @@ -705,11 +1004,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:14.803340Z", - "start_time": "2021-03-13T19:16:14.790121Z" + "end_time": "2021-03-14T10:06:47.031444Z", + "start_time": "2021-03-14T10:06:47.018317Z" }, "id": "xriFGEWQkO4M" }, @@ -771,11 +1070,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:16.478323Z", - "start_time": "2021-03-13T19:16:16.472694Z" + "end_time": "2021-03-14T10:06:47.057924Z", + "start_time": "2021-03-14T10:06:47.054784Z" }, "id": "kAR0-2KLkopp" }, @@ -799,11 +1098,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:17.035092Z", - "start_time": "2021-03-13T19:16:17.030121Z" + "end_time": "2021-03-14T10:06:47.083673Z", + "start_time": "2021-03-14T10:06:47.081127Z" }, "id": "KYZtoW-tlZgl" }, @@ -827,11 +1126,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:17.964373Z", - "start_time": "2021-03-13T19:16:17.960002Z" + "end_time": "2021-03-14T10:06:47.112744Z", + "start_time": "2021-03-14T10:06:47.111476Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -847,11 +1146,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:18.425263Z", - "start_time": "2021-03-13T19:16:18.421191Z" + "end_time": "2021-03-14T10:06:47.141369Z", + "start_time": "2021-03-14T10:06:47.138818Z" }, "id": "Par9rpypPsml" }, @@ -884,11 +1183,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:19.002139Z", - "start_time": "2021-03-13T19:16:18.999532Z" + "end_time": "2021-03-14T10:06:47.698154Z", + "start_time": "2021-03-14T10:06:47.695685Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -896,7 +1195,19 @@ "id": "TTCS7W6XJ9BG", "outputId": "9c3b8cc0-3bcd-43fe-87ca-2825239b365a" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'path': '/home/earendil/Desktop/ML_playground/wav2vec2-large-xlsr-53-greek/cv-corpus-6.1-2020-12-11/el/clips/common_voice_el_20430274.mp3',\n", + " 'text': 'το βασιλόπουλο έβγαλε τα ξυλαράκια από την τσέπη του και το μάθημα άρχισε '}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "common_voice_train[0]" ] @@ -914,15 +1225,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:19.745543Z", - "start_time": "2021-03-13T19:16:19.703155Z" + "end_time": "2021-03-14T10:06:48.542682Z", + "start_time": "2021-03-14T10:06:48.513736Z" }, "id": "al9Luo4LPpwJ" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/earendil/anaconda3/envs/cuda110/lib/python3.8/site-packages/torchaudio/backend/utils.py:53: UserWarning: \"sox\" backend is being deprecated. The default backend will be changed to \"sox_io\" backend in 0.8.0 and \"sox\" backend will be removed in 0.9.0. Please migrate to \"sox_io\" backend. Please refer to https://github.com/pytorch/audio/issues/903 for the detail.\n", + " warnings.warn(\n" + ] + } + ], "source": [ "import torchaudio\n", "\n", @@ -936,11 +1256,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:20.387907Z", - "start_time": "2021-03-13T19:16:20.363466Z" + "end_time": "2021-03-14T10:06:48.781708Z", + "start_time": "2021-03-14T10:06:48.755556Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -967,7 +1287,16 @@ "id": "afeicUeWlrRL", "outputId": "d5e4d41a-61d6-4094-eba9-a5bbed02cedc" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-d93f28c9964eab5e.arrow\n", + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-38a09981767eff59.arrow\n" + ] + } + ], "source": [ "common_voice_train = common_voice_train.map(speech_file_to_array_fn, remove_columns=common_voice_train.column_names)\n", "common_voice_test = common_voice_test.map(speech_file_to_array_fn, remove_columns=common_voice_test.column_names)" @@ -986,11 +1315,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:21.474323Z", - "start_time": "2021-03-13T19:16:20.977765Z" + "end_time": "2021-03-14T10:06:49.580796Z", + "start_time": "2021-03-14T10:06:49.185101Z" }, "id": "6Y6AK3Z-kHwP" }, @@ -1007,11 +1336,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:23.156665Z", - "start_time": "2021-03-13T19:16:21.953332Z" + "end_time": "2021-03-14T10:06:50.667773Z", + "start_time": "2021-03-14T10:06:49.605683Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -1086,7 +1415,180 @@ "id": "Ws8DyIL_kjwT", "outputId": "6176aea2-5986-4da0-a2ad-6ef5e2c85493" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-3c241372ac077a03.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-adc5c7b231c12387.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-8b677f7acb15af63.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-b301abaf3851c052.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-6e39c70c57299786.arrow\n", + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-6d7e0aa094522c60.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-872d739fcf9fc10a.arrow\n", + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-20a401885e80295d.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-ba8c6dd59eb8ccf2.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-2e240883a5f827fd.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-485c00dc9048ed50.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-923d905502a8661d.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-062aeafc3b8816c1.arrow\n", + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-bb54bb00dae79669.arrow\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-44bf1791baae8e2e.arrow\n", + "Loading cached processed dataset at /home/earendil/.cache/huggingface/datasets/common_voice/el-afd0a157f05ee080/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564/cache-ecc0dfac5615a58e.arrow\n" + ] + } + ], "source": [ "common_voice_train = common_voice_train.map(resample, num_proc=8)\n", "common_voice_test = common_voice_test.map(resample, num_proc=8)" @@ -1105,11 +1607,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:23.228138Z", - "start_time": "2021-03-13T19:16:23.210833Z" + "end_time": "2021-03-14T10:06:50.721070Z", + "start_time": "2021-03-14T10:06:50.694488Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -1118,7 +1620,26 @@ "id": "dueM6U7Ev0OA", "outputId": "1a3e579d-213e-4c7a-b2ec-9a7725d95afc" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import IPython.display as ipd\n", "import numpy as np\n", @@ -1144,11 +1665,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:25.363356Z", - "start_time": "2021-03-13T19:16:25.290149Z" + "end_time": "2021-03-14T10:06:50.779682Z", + "start_time": "2021-03-14T10:06:50.745322Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -1156,7 +1677,17 @@ "id": "1Po2g7YPuRTx", "outputId": "96b0b82c-a5df-4ae6-d17b-9c7d4f710b42" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target text: ξεπούλησε τα κτήματα μας \n", + "Input array shape: (47232,)\n", + "Sampling rate: 16000\n" + ] + } + ], "source": [ "rand_int = random.randint(0, len(common_voice_train))\n", "\n", @@ -1192,11 +1723,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:27.583682Z", - "start_time": "2021-03-13T19:16:27.581228Z" + "end_time": "2021-03-14T10:06:51.188480Z", + "start_time": "2021-03-14T10:06:51.185722Z" }, "id": "eJY7I0XAwe9p" }, @@ -1217,11 +1748,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:59.218803Z", - "start_time": "2021-03-13T19:16:28.412442Z" + "end_time": "2021-03-14T10:07:12.818162Z", + "start_time": "2021-03-14T10:06:51.733417Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -1296,7 +1827,290 @@ "id": "-np9xYK-wl8q", "outputId": "6155b5f0-a5a2-4e20-d0e2-0b3a60c13f98" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/earendil/anaconda3/envs/cuda110/lib/python3.8/site-packages/numpy/core/_asarray.py:83: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", + " return array(a, dtype, copy=False, order=order)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "76843ffa65004ab894917eaf37673b94", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#1', max=59, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1e96ebd9483a491bbe2e78938b0c1444", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#4', max=59, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a22ca1bc906b44c68d8f3455cda46d21", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#2', max=59, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1c19299d349c465094ffb86a81d8445c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#3', max=59, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bb87efafd5c44ba49cfbb0b9dea203bf", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#5', max=58, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7859c3186f9a40ba8c96ef0b6dd03e78", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#6', max=58, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b6cd7f6248ed423ca43ef5fe5698aa05", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#7', max=58, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1087e951f9854d3f832e841838929931", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#0', max=59, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " " + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ce50341c8bf147a5aac784d409a84ea8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#1', max=24, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7a539299f9ad455ca53b7317b5d6fcc0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#2', max=24, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "07b03f8be0da4857901f54f8f6f7b96f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#4', max=24, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fbb85e7ec24a4f22a12525d2b8ef53a1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#3', max=24, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2c4d007a3cf14faaa37d656e2eb1da34", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#7', max=24, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8165748923df4b828a6595c7b416a2cb", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#6', max=24, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c34f0a513be54597b83ba64aa3819d61", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#5', max=24, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e00e506138d046d1ab41aa243ced2680", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, description='#0', max=24, style=ProgressStyle(description_width='initial')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], "source": [ "common_voice_train = common_voice_train.map(prepare_dataset, remove_columns=common_voice_train.column_names, batch_size=8, num_proc=8, batched=True)\n", "common_voice_test = common_voice_test.map(prepare_dataset, remove_columns=common_voice_test.column_names, batch_size=8, num_proc=8, batched=True)" @@ -1339,11 +2153,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:59.275801Z", - "start_time": "2021-03-13T19:16:59.270920Z" + "end_time": "2021-03-14T10:07:12.850867Z", + "start_time": "2021-03-14T10:07:12.844816Z" }, "id": "tborvC9hx88e" }, @@ -1419,11 +2233,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:16:59.329608Z", - "start_time": "2021-03-13T19:16:59.328133Z" + "end_time": "2021-03-14T10:07:12.883379Z", + "start_time": "2021-03-14T10:07:12.881910Z" }, "id": "lbQf5GuZyQ4_" }, @@ -1444,11 +2258,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:17:02.227469Z", - "start_time": "2021-03-13T19:16:59.387853Z" + "end_time": "2021-03-14T10:07:15.225831Z", + "start_time": "2021-03-14T10:07:12.909977Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -1486,11 +2300,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:17:02.292196Z", - "start_time": "2021-03-13T19:17:02.289811Z" + "end_time": "2021-03-14T10:07:15.267780Z", + "start_time": "2021-03-14T10:07:15.265135Z" }, "id": "1XZ-kjweyTy_" }, @@ -1526,11 +2340,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:17:10.387115Z", - "start_time": "2021-03-13T19:17:02.345154Z" + "end_time": "2021-03-14T10:07:23.145790Z", + "start_time": "2021-03-14T10:07:15.296367Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -1538,7 +2352,16 @@ "id": "e7cqAWIayn6w", "outputId": "0a5ab559-6c38-47c6-b4f5-64480ed1df65" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-xlsr-53 and are newly initialized: ['lm_head.weight', 'lm_head.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + } + ], "source": [ "from transformers import Wav2Vec2ForCTC\n", "\n", @@ -1562,7 +2385,7 @@ "id": "1DwR3XLSzGDD" }, "source": [ - "NOTE: Since Greek is not one of the 53 languages that XLSR-Wav2Vec2 had been pretrained on, I did not follow the below suggestion of freezing the CNN layers. Thus, the following cell has been commented out\n", + "NOTE: Since Greek is not one of the 53 languages that XLSR-Wav2Vec2 had been pretrained on, we may or may not follow the below suggestion of freezing the CNN layers. \n", "\n", "Original text: The first component of XLSR-Wav2Vec2 consists of a stack of CNN layers that are used to extract acoustically meaningful - but contextually independent - features from the raw speech signal. This part of the model has already been sufficiently trained during pretraining and as stated in the [paper](https://arxiv.org/pdf/2006.13979.pdf) does not need to be fine-tuned anymore. \n", "Thus, we can set the `requires_grad` to `False` for all parameters of the *feature extraction* part." @@ -1570,17 +2393,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:17:10.485699Z", - "start_time": "2021-03-13T19:17:10.484259Z" + "end_time": "2021-03-14T10:07:23.193215Z", + "start_time": "2021-03-14T10:07:23.191472Z" }, "id": "oGI8zObtZ3V0" }, "outputs": [], "source": [ - "#model.freeze_feature_extractor()" + "model.freeze_feature_extractor()" ] }, { @@ -1601,11 +2424,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:17:10.616187Z", - "start_time": "2021-03-13T19:17:10.592735Z" + "end_time": "2021-03-14T10:07:23.256610Z", + "start_time": "2021-03-14T10:07:23.234660Z" }, "id": "KbeKSV7uzGPP" }, @@ -1620,7 +2443,7 @@ " per_device_train_batch_size=6,\n", " gradient_accumulation_steps=2,\n", " evaluation_strategy=\"steps\",\n", - " num_train_epochs=30,\n", + " num_train_epochs=60,\n", " fp16=True,\n", " save_steps=400,\n", " eval_steps=400,\n", @@ -1642,11 +2465,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:17:12.452612Z", - "start_time": "2021-03-13T19:17:10.702247Z" + "end_time": "2021-03-14T10:07:24.993889Z", + "start_time": "2021-03-14T10:07:23.305173Z" }, "id": "rY7vBmFCPFgC" }, @@ -1717,11 +2540,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T18:59:31.434948Z", - "start_time": "2021-03-13T18:12:38.586326Z" + "end_time": "2021-03-14T17:43:05.019454Z", + "start_time": "2021-03-14T10:07:25.037347Z" }, "colab": { "base_uri": "https://localhost:8080/", @@ -1731,7 +2554,434 @@ "outputId": "2e23b190-ca76-48ad-8117-376d1d7c058e", "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/earendil/anaconda3/envs/cuda110/lib/python3.8/site-packages/torch/optim/lr_scheduler.py:131: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", + " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " \n", + " [18600/18600 7:32:53, Epoch 60/60]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation LossWerRuntimeSamples Per Second
4007.1936003.2786841.000000123.64060012.310000
8003.1985002.8315000.996669121.95780012.480000
12001.1566000.7793330.772585113.90190013.362000
16000.5297000.6474420.675179122.78870012.395000
20000.3725000.5674490.607348114.37390013.307000
24000.2877000.5630490.562128114.40400013.304000
28000.2342000.5479920.540325123.02660012.371000
32000.1967000.6276340.557787114.86830013.250000
36000.1729000.5364310.533259114.60090013.281000
40000.1607000.5107040.495306115.38520013.191000
44000.1381000.5929790.522055123.41940012.332000
48000.1261000.6632550.530534124.10960012.263000
52000.1216000.6114310.508428116.88580013.021000
56000.1119000.6091170.504189123.42440012.331000
60000.1070000.5811590.495811131.86550011.542000
64000.0985000.6538780.508227132.65790011.473000
68000.0959000.6020020.490663132.22490011.511000
72000.0899000.6006930.488140115.00320013.234000
76000.0867000.5921050.487635115.06290013.228000
80000.0825000.6151730.493792115.05790013.228000
84000.0768000.6089130.477238123.81250012.293000
88000.0698000.6180130.473302123.65250012.309000
92000.0753000.6213840.486626115.31060013.199000
96000.0674000.6383550.481377123.63230012.311000
100000.0617000.6684080.476734122.69480012.405000
104000.0590000.6545930.467346115.38770013.190000
108000.0598000.6368860.466842115.14880013.218000
112000.0558000.6463530.465630115.13230013.220000
116000.0568000.6061360.469668115.48450013.179000
120000.0500000.6069870.463813115.39970013.189000
124000.0489000.6435590.454931115.28890013.202000
128000.0508000.6377200.454931115.71310013.153000
132000.0475000.6290170.458363115.54630013.172000
136000.0415000.6694880.452912115.62010013.164000
140000.0437000.5929790.445947115.60430013.166000
144000.0405000.5978020.448572115.56740013.170000
148000.0373000.6167140.448774129.26960011.774000
152000.0351000.6160410.441708132.65690011.473000
156000.0373000.5868550.438579116.87540013.022000
160000.0346000.6198850.435752124.18320012.256000
164000.0328000.6003890.439992124.37730012.237000
168000.0315000.6082200.437468124.19390012.255000
172000.0312000.6157350.434642124.17320012.257000
176000.0310000.6112750.430302125.76350012.102000
180000.0296000.6031030.428889125.68000012.110000
184000.0287000.6061920.428687124.51020012.224000

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "TrainOutput(global_step=18600, training_loss=0.34002172352165305, metrics={'train_runtime': 27175.8808, 'train_samples_per_second': 0.684, 'total_flos': 2.8460932280012886e+19, 'epoch': 60.0, 'init_mem_cpu_alloc_delta': 8143405, 'init_mem_gpu_alloc_delta': 1261972480, 'init_mem_cpu_peaked_delta': 18258, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 1358069, 'train_mem_gpu_alloc_delta': 3779249152, 'train_mem_cpu_peaked_delta': 183666894, 'train_mem_gpu_peaked_delta': 1681072128})" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "trainer.train()" ] @@ -1750,8 +3000,7 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:20:42.505632Z", - "start_time": "2021-03-13T19:20:36.981206Z" + "start_time": "2021-03-14T10:06:00.880Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -1761,7 +3010,7 @@ }, "outputs": [], "source": [ - "model = Wav2Vec2ForCTC.from_pretrained(\"wav2vec2-large-xlsr-greek/checkpoint-9200/\").to(\"cuda\")\n", + "model = Wav2Vec2ForCTC.from_pretrained(\"wav2vec2-large-xlsr-greek/checkpoint-18400/\").to(\"cuda\")\n", "processor = Wav2Vec2Processor.from_pretrained(\"wav2vec2-large-xlsr-greek/\")" ] }, @@ -1771,16 +3020,16 @@ "id": "QsfGCQYSvY8C" }, "source": [ - "Now, we will just take the first example of the test set, run it through the model and take the `argmax(...)` of the logits to retrieve the predicted token ids." + "Now, we will just take a random example of the test set, run it through the model and take the `argmax(...)` of the logits to retrieve the predicted token ids." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 61, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:21:08.891118Z", - "start_time": "2021-03-13T19:20:48.016164Z" + "end_time": "2021-03-14T17:50:43.012433Z", + "start_time": "2021-03-14T17:50:20.994866Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -1790,7 +3039,7 @@ }, "outputs": [], "source": [ - "input_dict = processor(common_voice_test[\"input_values\"][42], return_tensors=\"pt\", sampling_rate=16_000, padding=True)\n", + "input_dict = processor(common_voice_test[\"input_values\"][345], return_tensors=\"pt\", sampling_rate=16_000, padding=True)\n", "\n", "logits = model(input_dict.input_values.to(\"cuda\")).logits\n", "\n", @@ -1808,11 +3057,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:21:18.172643Z", - "start_time": "2021-03-13T19:21:14.933617Z" + "end_time": "2021-03-14T17:47:32.341035Z", + "start_time": "2021-03-14T17:47:29.882908Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -1820,7 +3069,16 @@ "id": "8dPE2GRIgtx-", "outputId": "a211d1ee-d850-481d-8bac-dc46c3efa561" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using custom data configuration el-ac779bf2c9f7c09b\n", + "Reusing dataset common_voice (/home/earendil/.cache/huggingface/datasets/common_voice/el-ac779bf2c9f7c09b/6.1.0/32954a9015faa0d840f6c6894938545c5d12bc5d8936a80079af74bf50d71564)\n" + ] + } + ], "source": [ "common_voice_test_transcription = load_dataset(\"common_voice\", \"el\", data_dir=\"./cv-corpus-6.1-2020-12-11\", split=\"test\")" ] @@ -1836,11 +3094,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": { "ExecuteTime": { - "end_time": "2021-03-13T19:21:18.351131Z", - "start_time": "2021-03-13T19:21:18.347466Z" + "end_time": "2021-03-14T17:50:43.087361Z", + "start_time": "2021-03-14T17:50:43.083839Z" }, "colab": { "base_uri": "https://localhost:8080/" @@ -1848,13 +3106,25 @@ "id": "Phqxa1O1jMDk", "outputId": "60d48c9f-f745-45ac-9105-446dc71025ca" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prediction:\n", + "και τι να δούμε\n", + "\n", + "Reference:\n", + "και τι να δούμε;\n" + ] + } + ], "source": [ "print(\"Prediction:\")\n", "print(processor.decode(pred_ids[0]))\n", "\n", "print(\"\\nReference:\")\n", - "print(common_voice_test_transcription[\"sentence\"][42].lower())\n" + "print(common_voice_test_transcription[\"sentence\"][345].lower())\n" ] }, { @@ -1871,16 +3141,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2021-03-13T19:04:51.390742Z", - "start_time": "2021-03-13T19:04:51.384593Z" - } - }, + "metadata": {}, "outputs": [], - "source": [ - "print(common_voice_test_transcription[\"sentence\"][42].lower())\n" - ] + "source": [] } ], "metadata": {