{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "5813b894", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "# For developers who want to use the latest development version or the library locally\n", "# Use poetry to install dependencies\n", "import sys\n", "sys.path.append(\"../\") # Or change to the folder to the direction of \n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import bechdelaidemo" ] }, { "cell_type": "markdown", "id": "6521fddd", "metadata": {}, "source": [ "# Bechdel test on a dialogue scene\n", "The test is already done on a dialogue scene. So easier, because we don't have to extract dialogue segments from a video" ] }, { "cell_type": "markdown", "id": "dfa5ac0e", "metadata": {}, "source": [ "## Test on 2 dialogue scenes" ] }, { "cell_type": "code", "execution_count": 1, "id": "b657fa08", "metadata": {}, "outputs": [], "source": [ "path1 = \"https://www.youtube.com/watch?v=b2f2Kqt_KcE&ab_channel=Movieclips\" #Devils wears prada\n", "path2 = \"https://www.youtube.com/watch?v=FDFdroN7d0w\" #Marriage story" ] }, { "cell_type": "markdown", "id": "fe733fa4", "metadata": {}, "source": [ "## Download videos and audios from Youtube\n", "Inspiration from https://huggingface.co/spaces/vumichien/whisper-speaker-diarization/blob/main/app.py" ] }, { "cell_type": "code", "execution_count": 5, "id": "ac260c66", "metadata": {}, "outputs": [], "source": [ "from bechdelaidemo.utils import download_youtube_video" ] }, { "cell_type": "code", "execution_count": 6, "id": "5aa1ce3d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Task Completed!\n" ] } ], "source": [ "download_youtube_video(path2,\"video.mp4\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "d78d3671", "metadata": {}, "outputs": [], "source": [ "video_path = \"video.mp4\"" ] }, { "cell_type": "markdown", "id": "999e8c78", "metadata": {}, "source": [ "## Convert video to audio wav file" ] }, { "cell_type": "code", "execution_count": 8, "id": "766a386d", "metadata": {}, "outputs": [], "source": [ "from bechdelaidemo.utils import extract_audio_from_movie" ] }, { "cell_type": "code", "execution_count": 9, "id": "8f5028ec", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MoviePy - Writing audio in video.wav\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " " ] }, { "name": "stdout", "output_type": "stream", "text": [ "MoviePy - Done.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r" ] } ], "source": [ "extract_audio_from_movie(video_path,\".wav\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "9af64b12", "metadata": {}, "outputs": [], "source": [ "audio_file = \"video.wav\"" ] }, { "cell_type": "markdown", "id": "88a8f8f1", "metadata": {}, "source": [ "## Clean audio\n", "- https://huggingface.co/speechbrain/sepformer-wham16k-enhancement\n", "- Spleeter" ] }, { "cell_type": "markdown", "id": "4f74de64", "metadata": {}, "source": [ "## Extract info using Whisper" ] }, { "cell_type": "code", "execution_count": 11, "id": "eb33487c", "metadata": {}, "outputs": [], "source": [ "import whisper" ] }, { "cell_type": "code", "execution_count": 12, "id": "12588878", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|█████████████████████████████████████| 72.1M/72.1M [00:00<00:00, 90.2MiB/s]\n" ] } ], "source": [ "model = whisper.load_model(\"tiny.en\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "59386a8d", "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'task': 'transcribe', 'language': 'en', 'beam_size': 5, 'best_of': 5}\n" ] } ], "source": [ "## Transcribe audio\n", "options = dict(language=\"en\", beam_size=5, best_of=5)\n", "transcribe_options = dict(task=\"transcribe\", **options)\n", "\n", "print(transcribe_options)\n", "\n", "result = model.transcribe(\"video.wav\", **transcribe_options)\n", "segments = result[\"segments\"]" ] }, { "cell_type": "code", "execution_count": 16, "id": "4a2357b8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\" You're being so much like your father. Do not compare me to my father. I didn't compare you, Dan. I said you were acting like him. You're exactly like your mother. Everything you're complaining about her you're doing. You're suffocating in me. First of all, I love my mother. She was a wonderful mother. Just repeating what you told me. Secondly, how dare you compare my mother into my mother? I'm maybe like my father, but I am not like my mother. You are! And you're like my father. You're also like my mother. You're all the bad things about all of these people. But mostly your mother. When we would lie in bed together, sometimes I would look at you and see her and just feel so gross. I felt appalled when you touched her. You're a slob. I mean all the beds, clothes on the camera, and it steps together with you like anything. You want to heal my skin off. You'll never be happy. And now lay her anywhere. You'll think you found some better opposite guy than me. And in a few years, you rebel against him because you need to have your voice. But you don't want a voice. You just want to fucking complain about not having a voice. I think about being married to you and that woman is a stranger to me. I mean we had a child and a bitch. You've gone back to your life before you met me. It's pathetic. People used to tell me that you were too selfish to be a great artist. And I used to defend you. They were absolutely right. All your best acting is behind you. You're back to being a hack. You got slighted me. You're a fucking villain. And you want to present yourself as a victim because it's a good legal strategy? Fine. But you and I both know you chose this life. You wanted it until you didn't. You used me so you could get out of LA. I didn't use you. You did. And then you blamed me for it. You always made me aware of what I was doing wrong. How I was falling short. Life with you was joyless. Then you had to go and fuck someone. You should be upset that I fucked her. You should be upset that I had a laugh with her. Do you love her? No. But she didn't hate me. You hated me. You hated me. You hated me. You fucked somebody we worked with. You stopped having sex with me in the last year. I never cheated on you. What was cheating on me? But there's so much I could have done. I was a director in my 20s who came from nothing and was suddenly on the cover of fucking Time Out New York. I was hot shit and I wanted to fuck everybody and I didn't. And I loved you and I didn't want to lose you. But I had made my 20s. And I didn't want to lose that too. And I kind of did. And you wanted so much so fast. I didn't even want to get married. Fuck it. There's so much I didn't do. Thanks for that. You're welcome. I can't believe I didn't know you forever. You're fucking insane. And you're fucking winning. Are you kidding me? I'm wanting to be married. I don't already lost. You didn't love me as much as I loved you. What does that have to do with LA? What? You're so merged with your own selfishness. You don't need to identify it and selfishness anymore. You're such a dick. Every day I wake up and I hope you're dead. Dead like it. If I can guarantee every movie, okay? I don't think I'm gonna kill this. As I can hit by a car and die. You don't work with me once you're left alone. I lost you and you 2, 3 4 I lost you. I lost you, 2 4 I lost you, 2 5 I'm sorry. Sheila. I'm sorry. Did you know? You\"" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result[\"text\"]" ] }, { "cell_type": "code", "execution_count": 17, "id": "26fcf349", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'id': 0,\n", " 'seek': 0,\n", " 'start': 0.0,\n", " 'end': 3.0,\n", " 'text': \" You're being so much like your father.\",\n", " 'tokens': [921, 821, 852, 523, 881, 588, 534, 2988, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 1,\n", " 'seek': 0,\n", " 'start': 3.0,\n", " 'end': 5.0,\n", " 'text': ' Do not compare me to my father.',\n", " 'tokens': [2141, 407, 8996, 502, 284, 616, 2988, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 2,\n", " 'seek': 0,\n", " 'start': 5.0,\n", " 'end': 6.0,\n", " 'text': \" I didn't compare you, Dan.\",\n", " 'tokens': [314, 1422, 470, 8996, 345, 11, 6035, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 3,\n", " 'seek': 0,\n", " 'start': 6.0,\n", " 'end': 7.0,\n", " 'text': ' I said you were acting like him.',\n", " 'tokens': [314, 531, 345, 547, 7205, 588, 683, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 4,\n", " 'seek': 0,\n", " 'start': 7.0,\n", " 'end': 8.0,\n", " 'text': \" You're exactly like your mother.\",\n", " 'tokens': [921, 821, 3446, 588, 534, 2802, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 5,\n", " 'seek': 0,\n", " 'start': 8.0,\n", " 'end': 10.0,\n", " 'text': \" Everything you're complaining about her you're doing.\",\n", " 'tokens': [11391, 345, 821, 18705, 546, 607, 345, 821, 1804, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 6,\n", " 'seek': 0,\n", " 'start': 10.0,\n", " 'end': 12.0,\n", " 'text': \" You're suffocating in me.\",\n", " 'tokens': [921, 821, 3027, 27123, 287, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 7,\n", " 'seek': 0,\n", " 'start': 12.0,\n", " 'end': 13.0,\n", " 'text': ' First of all, I love my mother.',\n", " 'tokens': [3274, 286, 477, 11, 314, 1842, 616, 2802, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 8,\n", " 'seek': 0,\n", " 'start': 13.0,\n", " 'end': 15.0,\n", " 'text': ' She was a wonderful mother.',\n", " 'tokens': [1375, 373, 257, 7932, 2802, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 9,\n", " 'seek': 0,\n", " 'start': 15.0,\n", " 'end': 16.0,\n", " 'text': ' Just repeating what you told me.',\n", " 'tokens': [2329, 20394, 644, 345, 1297, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 10,\n", " 'seek': 0,\n", " 'start': 16.0,\n", " 'end': 19.0,\n", " 'text': ' Secondly, how dare you compare my mother into my mother?',\n", " 'tokens': [34276, 11, 703, 16498, 345, 8996, 616, 2802, 656, 616, 2802, 30],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 11,\n", " 'seek': 0,\n", " 'start': 19.0,\n", " 'end': 22.0,\n", " 'text': \" I'm maybe like my father, but I am not like my mother.\",\n", " 'tokens': [314,\n", " 1101,\n", " 3863,\n", " 588,\n", " 616,\n", " 2988,\n", " 11,\n", " 475,\n", " 314,\n", " 716,\n", " 407,\n", " 588,\n", " 616,\n", " 2802,\n", " 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 12,\n", " 'seek': 0,\n", " 'start': 22.0,\n", " 'end': 23.0,\n", " 'text': ' You are!',\n", " 'tokens': [921, 389, 0],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 13,\n", " 'seek': 0,\n", " 'start': 23.0,\n", " 'end': 25.0,\n", " 'text': \" And you're like my father.\",\n", " 'tokens': [843, 345, 821, 588, 616, 2988, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 14,\n", " 'seek': 0,\n", " 'start': 25.0,\n", " 'end': 26.0,\n", " 'text': \" You're also like my mother.\",\n", " 'tokens': [921, 821, 635, 588, 616, 2802, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 15,\n", " 'seek': 0,\n", " 'start': 26.0,\n", " 'end': 29.0,\n", " 'text': \" You're all the bad things about all of these people.\",\n", " 'tokens': [921, 821, 477, 262, 2089, 1243, 546, 477, 286, 777, 661, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15723278113370817,\n", " 'compression_ratio': 1.9415807560137457,\n", " 'no_speech_prob': 0.16063228249549866},\n", " {'id': 16,\n", " 'seek': 2900,\n", " 'start': 29.0,\n", " 'end': 30.0,\n", " 'text': ' But mostly your mother.',\n", " 'tokens': [887, 4632, 534, 2802, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 17,\n", " 'seek': 2900,\n", " 'start': 30.0,\n", " 'end': 32.0,\n", " 'text': ' When we would lie in bed together, sometimes I would look at you',\n", " 'tokens': [1649,\n", " 356,\n", " 561,\n", " 6486,\n", " 287,\n", " 3996,\n", " 1978,\n", " 11,\n", " 3360,\n", " 314,\n", " 561,\n", " 804,\n", " 379,\n", " 345],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 18,\n", " 'seek': 2900,\n", " 'start': 32.0,\n", " 'end': 34.0,\n", " 'text': ' and see her and just feel so gross.',\n", " 'tokens': [290, 766, 607, 290, 655, 1254, 523, 10319, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 19,\n", " 'seek': 2900,\n", " 'start': 34.0,\n", " 'end': 36.0,\n", " 'text': ' I felt appalled when you touched her.',\n", " 'tokens': [314, 2936, 41586, 618, 345, 12615, 607, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 20,\n", " 'seek': 2900,\n", " 'start': 36.0,\n", " 'end': 37.0,\n", " 'text': \" You're a slob.\",\n", " 'tokens': [921, 821, 257, 1017, 672, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 21,\n", " 'seek': 2900,\n", " 'start': 37.0,\n", " 'end': 39.0,\n", " 'text': ' I mean all the beds, clothes on the camera,',\n", " 'tokens': [314, 1612, 477, 262, 20237, 11, 8242, 319, 262, 4676, 11],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 22,\n", " 'seek': 2900,\n", " 'start': 39.0,\n", " 'end': 41.0,\n", " 'text': ' and it steps together with you like anything.',\n", " 'tokens': [290, 340, 4831, 1978, 351, 345, 588, 1997, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 23,\n", " 'seek': 2900,\n", " 'start': 41.0,\n", " 'end': 42.0,\n", " 'text': ' You want to heal my skin off.',\n", " 'tokens': [921, 765, 284, 12035, 616, 4168, 572, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 24,\n", " 'seek': 2900,\n", " 'start': 42.0,\n", " 'end': 43.0,\n", " 'text': \" You'll never be happy.\",\n", " 'tokens': [921, 1183, 1239, 307, 3772, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 25,\n", " 'seek': 2900,\n", " 'start': 43.0,\n", " 'end': 44.0,\n", " 'text': ' And now lay her anywhere.',\n", " 'tokens': [843, 783, 3830, 607, 6609, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 26,\n", " 'seek': 2900,\n", " 'start': 44.0,\n", " 'end': 47.0,\n", " 'text': \" You'll think you found some better opposite guy than me.\",\n", " 'tokens': [921, 1183, 892, 345, 1043, 617, 1365, 6697, 3516, 621, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 27,\n", " 'seek': 2900,\n", " 'start': 47.0,\n", " 'end': 51.0,\n", " 'text': ' And in a few years, you rebel against him because you need to have your voice.',\n", " 'tokens': [843,\n", " 287,\n", " 257,\n", " 1178,\n", " 812,\n", " 11,\n", " 345,\n", " 14034,\n", " 1028,\n", " 683,\n", " 780,\n", " 345,\n", " 761,\n", " 284,\n", " 423,\n", " 534,\n", " 3809,\n", " 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 28,\n", " 'seek': 2900,\n", " 'start': 51.0,\n", " 'end': 53.0,\n", " 'text': \" But you don't want a voice.\",\n", " 'tokens': [887, 345, 836, 470, 765, 257, 3809, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 29,\n", " 'seek': 2900,\n", " 'start': 53.0,\n", " 'end': 55.0,\n", " 'text': ' You just want to fucking complain about not having a voice.',\n", " 'tokens': [921, 655, 765, 284, 9372, 13121, 546, 407, 1719, 257, 3809, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 30,\n", " 'seek': 2900,\n", " 'start': 55.0,\n", " 'end': 58.0,\n", " 'text': ' I think about being married to you and that woman is a stranger to me.',\n", " 'tokens': [314,\n", " 892,\n", " 546,\n", " 852,\n", " 6405,\n", " 284,\n", " 345,\n", " 290,\n", " 326,\n", " 2415,\n", " 318,\n", " 257,\n", " 16195,\n", " 284,\n", " 502,\n", " 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.22024561564127604,\n", " 'compression_ratio': 1.8056338028169014,\n", " 'no_speech_prob': 9.823902473726775e-07},\n", " {'id': 31,\n", " 'seek': 5800,\n", " 'start': 58.0,\n", " 'end': 61.0,\n", " 'text': ' I mean we had a child and a bitch.',\n", " 'tokens': [314, 1612, 356, 550, 257, 1200, 290, 257, 21551, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 32,\n", " 'seek': 5800,\n", " 'start': 61.0,\n", " 'end': 63.0,\n", " 'text': \" You've gone back to your life before you met me.\",\n", " 'tokens': [921, 1053, 3750, 736, 284, 534, 1204, 878, 345, 1138, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 33,\n", " 'seek': 5800,\n", " 'start': 63.0,\n", " 'end': 64.0,\n", " 'text': \" It's pathetic.\",\n", " 'tokens': [632, 338, 29215, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 34,\n", " 'seek': 5800,\n", " 'start': 64.0,\n", " 'end': 68.0,\n", " 'text': ' People used to tell me that you were too selfish to be a great artist.',\n", " 'tokens': [4380,\n", " 973,\n", " 284,\n", " 1560,\n", " 502,\n", " 326,\n", " 345,\n", " 547,\n", " 1165,\n", " 20363,\n", " 284,\n", " 307,\n", " 257,\n", " 1049,\n", " 6802,\n", " 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 35,\n", " 'seek': 5800,\n", " 'start': 68.0,\n", " 'end': 70.0,\n", " 'text': ' And I used to defend you.',\n", " 'tokens': [843, 314, 973, 284, 4404, 345, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 36,\n", " 'seek': 5800,\n", " 'start': 70.0,\n", " 'end': 71.0,\n", " 'text': ' They were absolutely right.',\n", " 'tokens': [1119, 547, 5543, 826, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 37,\n", " 'seek': 5800,\n", " 'start': 71.0,\n", " 'end': 73.0,\n", " 'text': ' All your best acting is behind you.',\n", " 'tokens': [1439, 534, 1266, 7205, 318, 2157, 345, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 38,\n", " 'seek': 5800,\n", " 'start': 73.0,\n", " 'end': 74.0,\n", " 'text': \" You're back to being a hack.\",\n", " 'tokens': [921, 821, 736, 284, 852, 257, 8156, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 39,\n", " 'seek': 5800,\n", " 'start': 74.0,\n", " 'end': 76.0,\n", " 'text': ' You got slighted me.',\n", " 'tokens': [921, 1392, 3731, 276, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 40,\n", " 'seek': 5800,\n", " 'start': 76.0,\n", " 'end': 77.0,\n", " 'text': \" You're a fucking villain.\",\n", " 'tokens': [921, 821, 257, 9372, 16687, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 41,\n", " 'seek': 5800,\n", " 'start': 77.0,\n", " 'end': 79.0,\n", " 'text': ' And you want to present yourself as a victim',\n", " 'tokens': [843, 345, 765, 284, 1944, 3511, 355, 257, 3117],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 42,\n", " 'seek': 5800,\n", " 'start': 79.0,\n", " 'end': 81.0,\n", " 'text': \" because it's a good legal strategy?\",\n", " 'tokens': [780, 340, 338, 257, 922, 2742, 4811, 30],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 43,\n", " 'seek': 5800,\n", " 'start': 81.0,\n", " 'end': 82.0,\n", " 'text': ' Fine.',\n", " 'tokens': [17867, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 44,\n", " 'seek': 5800,\n", " 'start': 82.0,\n", " 'end': 84.0,\n", " 'text': ' But you and I both know you chose this life.',\n", " 'tokens': [887, 345, 290, 314, 1111, 760, 345, 7690, 428, 1204, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 45,\n", " 'seek': 5800,\n", " 'start': 84.0,\n", " 'end': 87.0,\n", " 'text': \" You wanted it until you didn't.\",\n", " 'tokens': [921, 2227, 340, 1566, 345, 1422, 470, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.12269237166956852,\n", " 'compression_ratio': 1.6801346801346801,\n", " 'no_speech_prob': 8.356517469110258e-07},\n", " {'id': 46,\n", " 'seek': 8700,\n", " 'start': 87.0,\n", " 'end': 90.0,\n", " 'text': ' You used me so you could get out of LA.',\n", " 'tokens': [921, 973, 502, 523, 345, 714, 651, 503, 286, 9131, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 47,\n", " 'seek': 8700,\n", " 'start': 90.0,\n", " 'end': 91.0,\n", " 'text': \" I didn't use you.\",\n", " 'tokens': [314, 1422, 470, 779, 345, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 48,\n", " 'seek': 8700,\n", " 'start': 91.0,\n", " 'end': 92.0,\n", " 'text': ' You did.',\n", " 'tokens': [921, 750, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 49,\n", " 'seek': 8700,\n", " 'start': 92.0,\n", " 'end': 94.0,\n", " 'text': ' And then you blamed me for it.',\n", " 'tokens': [843, 788, 345, 13772, 502, 329, 340, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 50,\n", " 'seek': 8700,\n", " 'start': 94.0,\n", " 'end': 96.0,\n", " 'text': ' You always made me aware of what I was doing wrong.',\n", " 'tokens': [921, 1464, 925, 502, 3910, 286, 644, 314, 373, 1804, 2642, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 51,\n", " 'seek': 8700,\n", " 'start': 96.0,\n", " 'end': 98.0,\n", " 'text': ' How I was falling short.',\n", " 'tokens': [1374, 314, 373, 7463, 1790, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 52,\n", " 'seek': 8700,\n", " 'start': 98.0,\n", " 'end': 99.0,\n", " 'text': ' Life with you was joyless.',\n", " 'tokens': [5155, 351, 345, 373, 8716, 1203, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 53,\n", " 'seek': 8700,\n", " 'start': 99.0,\n", " 'end': 101.0,\n", " 'text': ' Then you had to go and fuck someone.',\n", " 'tokens': [3244, 345, 550, 284, 467, 290, 5089, 2130, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 54,\n", " 'seek': 8700,\n", " 'start': 101.0,\n", " 'end': 103.0,\n", " 'text': ' You should be upset that I fucked her.',\n", " 'tokens': [921, 815, 307, 9247, 326, 314, 20654, 607, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 55,\n", " 'seek': 8700,\n", " 'start': 103.0,\n", " 'end': 106.0,\n", " 'text': ' You should be upset that I had a laugh with her.',\n", " 'tokens': [921, 815, 307, 9247, 326, 314, 550, 257, 6487, 351, 607, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 56,\n", " 'seek': 8700,\n", " 'start': 106.0,\n", " 'end': 107.0,\n", " 'text': ' Do you love her?',\n", " 'tokens': [2141, 345, 1842, 607, 30],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 57,\n", " 'seek': 8700,\n", " 'start': 107.0,\n", " 'end': 108.0,\n", " 'text': ' No.',\n", " 'tokens': [1400, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 58,\n", " 'seek': 8700,\n", " 'start': 108.0,\n", " 'end': 109.0,\n", " 'text': \" But she didn't hate me.\",\n", " 'tokens': [887, 673, 1422, 470, 5465, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 59,\n", " 'seek': 8700,\n", " 'start': 109.0,\n", " 'end': 110.0,\n", " 'text': ' You hated me.',\n", " 'tokens': [921, 16563, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 60,\n", " 'seek': 8700,\n", " 'start': 110.0,\n", " 'end': 111.0,\n", " 'text': ' You hated me.',\n", " 'tokens': [921, 16563, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 61,\n", " 'seek': 8700,\n", " 'start': 111.0,\n", " 'end': 112.0,\n", " 'text': ' You hated me.',\n", " 'tokens': [921, 16563, 502, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 62,\n", " 'seek': 8700,\n", " 'start': 112.0,\n", " 'end': 113.0,\n", " 'text': ' You fucked somebody we worked with.',\n", " 'tokens': [921, 20654, 8276, 356, 3111, 351, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 63,\n", " 'seek': 8700,\n", " 'start': 113.0,\n", " 'end': 116.0,\n", " 'text': ' You stopped having sex with me in the last year.',\n", " 'tokens': [921, 5025, 1719, 1714, 351, 502, 287, 262, 938, 614, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.08677079749829841,\n", " 'compression_ratio': 1.8721804511278195,\n", " 'no_speech_prob': 9.081037433134043e-07},\n", " {'id': 64,\n", " 'seek': 11600,\n", " 'start': 116.0,\n", " 'end': 117.0,\n", " 'text': ' I never cheated on you.',\n", " 'tokens': [314, 1239, 37264, 319, 345, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 65,\n", " 'seek': 11600,\n", " 'start': 117.0,\n", " 'end': 119.0,\n", " 'text': ' What was cheating on me?',\n", " 'tokens': [1867, 373, 21608, 319, 502, 30],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 66,\n", " 'seek': 11600,\n", " 'start': 119.0,\n", " 'end': 121.0,\n", " 'text': \" But there's so much I could have done.\",\n", " 'tokens': [887, 612, 338, 523, 881, 314, 714, 423, 1760, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 67,\n", " 'seek': 11600,\n", " 'start': 121.0,\n", " 'end': 123.0,\n", " 'text': ' I was a director in my 20s who came from nothing',\n", " 'tokens': [314, 373, 257, 3437, 287, 616, 1160, 82, 508, 1625, 422, 2147],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 68,\n", " 'seek': 11600,\n", " 'start': 123.0,\n", " 'end': 126.0,\n", " 'text': ' and was suddenly on the cover of fucking Time Out New York.',\n", " 'tokens': [290,\n", " 373,\n", " 6451,\n", " 319,\n", " 262,\n", " 3002,\n", " 286,\n", " 9372,\n", " 3862,\n", " 3806,\n", " 968,\n", " 1971,\n", " 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 69,\n", " 'seek': 11600,\n", " 'start': 126.0,\n", " 'end': 129.0,\n", " 'text': \" I was hot shit and I wanted to fuck everybody and I didn't.\",\n", " 'tokens': [314,\n", " 373,\n", " 3024,\n", " 7510,\n", " 290,\n", " 314,\n", " 2227,\n", " 284,\n", " 5089,\n", " 7288,\n", " 290,\n", " 314,\n", " 1422,\n", " 470,\n", " 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 70,\n", " 'seek': 11600,\n", " 'start': 129.0,\n", " 'end': 131.0,\n", " 'text': \" And I loved you and I didn't want to lose you.\",\n", " 'tokens': [843,\n", " 314,\n", " 6151,\n", " 345,\n", " 290,\n", " 314,\n", " 1422,\n", " 470,\n", " 765,\n", " 284,\n", " 4425,\n", " 345,\n", " 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 71,\n", " 'seek': 11600,\n", " 'start': 131.0,\n", " 'end': 133.0,\n", " 'text': ' But I had made my 20s.',\n", " 'tokens': [887, 314, 550, 925, 616, 1160, 82, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 72,\n", " 'seek': 11600,\n", " 'start': 133.0,\n", " 'end': 135.0,\n", " 'text': \" And I didn't want to lose that too.\",\n", " 'tokens': [843, 314, 1422, 470, 765, 284, 4425, 326, 1165, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 73,\n", " 'seek': 11600,\n", " 'start': 135.0,\n", " 'end': 136.0,\n", " 'text': ' And I kind of did.',\n", " 'tokens': [843, 314, 1611, 286, 750, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 74,\n", " 'seek': 11600,\n", " 'start': 136.0,\n", " 'end': 139.0,\n", " 'text': ' And you wanted so much so fast.',\n", " 'tokens': [843, 345, 2227, 523, 881, 523, 3049, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 75,\n", " 'seek': 11600,\n", " 'start': 139.0,\n", " 'end': 141.0,\n", " 'text': \" I didn't even want to get married.\",\n", " 'tokens': [314, 1422, 470, 772, 765, 284, 651, 6405, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 76,\n", " 'seek': 11600,\n", " 'start': 141.0,\n", " 'end': 142.0,\n", " 'text': ' Fuck it.',\n", " 'tokens': [25617, 340, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 77,\n", " 'seek': 11600,\n", " 'start': 142.0,\n", " 'end': 145.0,\n", " 'text': \" There's so much I didn't do.\",\n", " 'tokens': [1318, 338, 523, 881, 314, 1422, 470, 466, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.11258821849581561,\n", " 'compression_ratio': 1.8339622641509434,\n", " 'no_speech_prob': 2.3229101486776926e-07},\n", " {'id': 78,\n", " 'seek': 14500,\n", " 'start': 145.0,\n", " 'end': 147.0,\n", " 'text': ' Thanks for that.',\n", " 'tokens': [6930, 329, 326, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 79,\n", " 'seek': 14500,\n", " 'start': 147.0,\n", " 'end': 148.0,\n", " 'text': \" You're welcome.\",\n", " 'tokens': [921, 821, 7062, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 80,\n", " 'seek': 14500,\n", " 'start': 148.0,\n", " 'end': 151.0,\n", " 'text': \" I can't believe I didn't know you forever.\",\n", " 'tokens': [314, 460, 470, 1975, 314, 1422, 470, 760, 345, 8097, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 81,\n", " 'seek': 14500,\n", " 'start': 151.0,\n", " 'end': 154.0,\n", " 'text': \" You're fucking insane.\",\n", " 'tokens': [921, 821, 9372, 13251, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 82,\n", " 'seek': 14500,\n", " 'start': 154.0,\n", " 'end': 157.0,\n", " 'text': \" And you're fucking winning.\",\n", " 'tokens': [843, 345, 821, 9372, 5442, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 83,\n", " 'seek': 14500,\n", " 'start': 157.0,\n", " 'end': 159.0,\n", " 'text': ' Are you kidding me?',\n", " 'tokens': [4231, 345, 26471, 502, 30],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 84,\n", " 'seek': 14500,\n", " 'start': 159.0,\n", " 'end': 161.0,\n", " 'text': \" I'm wanting to be married.\",\n", " 'tokens': [314, 1101, 10291, 284, 307, 6405, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 85,\n", " 'seek': 14500,\n", " 'start': 161.0,\n", " 'end': 163.0,\n", " 'text': \" I don't already lost.\",\n", " 'tokens': [314, 836, 470, 1541, 2626, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 86,\n", " 'seek': 14500,\n", " 'start': 163.0,\n", " 'end': 167.0,\n", " 'text': \" You didn't love me as much as I loved you.\",\n", " 'tokens': [921, 1422, 470, 1842, 502, 355, 881, 355, 314, 6151, 345, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 87,\n", " 'seek': 14500,\n", " 'start': 167.0,\n", " 'end': 171.0,\n", " 'text': ' What does that have to do with LA?',\n", " 'tokens': [1867, 857, 326, 423, 284, 466, 351, 9131, 30],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 88,\n", " 'seek': 14500,\n", " 'start': 171.0,\n", " 'end': 172.0,\n", " 'text': ' What?',\n", " 'tokens': [1867, 30],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.15252724697715359,\n", " 'compression_ratio': 1.5245901639344261,\n", " 'no_speech_prob': 1.9235710624343483e-07},\n", " {'id': 89,\n", " 'seek': 17200,\n", " 'start': 172.0,\n", " 'end': 175.0,\n", " 'text': \" You're so merged with your own selfishness.\",\n", " 'tokens': [921, 821, 523, 23791, 351, 534, 898, 20363, 1108, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.2508346222259186,\n", " 'compression_ratio': 1.4583333333333333,\n", " 'no_speech_prob': 3.485897934751847e-07},\n", " {'id': 90,\n", " 'seek': 17200,\n", " 'start': 175.0,\n", " 'end': 178.0,\n", " 'text': \" You don't need to identify it and selfishness anymore.\",\n", " 'tokens': [921, 836, 470, 761, 284, 5911, 340, 290, 20363, 1108, 7471, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.2508346222259186,\n", " 'compression_ratio': 1.4583333333333333,\n", " 'no_speech_prob': 3.485897934751847e-07},\n", " {'id': 91,\n", " 'seek': 17200,\n", " 'start': 178.0,\n", " 'end': 181.0,\n", " 'text': \" You're such a dick.\",\n", " 'tokens': [921, 821, 884, 257, 19317, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.2508346222259186,\n", " 'compression_ratio': 1.4583333333333333,\n", " 'no_speech_prob': 3.485897934751847e-07},\n", " {'id': 92,\n", " 'seek': 17200,\n", " 'start': 181.0,\n", " 'end': 184.0,\n", " 'text': \" Every day I wake up and I hope you're dead.\",\n", " 'tokens': [3887, 1110, 314, 7765, 510, 290, 314, 2911, 345, 821, 2636, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.2508346222259186,\n", " 'compression_ratio': 1.4583333333333333,\n", " 'no_speech_prob': 3.485897934751847e-07},\n", " {'id': 93,\n", " 'seek': 17200,\n", " 'start': 184.0,\n", " 'end': 185.0,\n", " 'text': ' Dead like it.',\n", " 'tokens': [5542, 588, 340, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.2508346222259186,\n", " 'compression_ratio': 1.4583333333333333,\n", " 'no_speech_prob': 3.485897934751847e-07},\n", " {'id': 94,\n", " 'seek': 17200,\n", " 'start': 185.0,\n", " 'end': 187.0,\n", " 'text': ' If I can guarantee every movie, okay?',\n", " 'tokens': [1002, 314, 460, 9149, 790, 3807, 11, 8788, 30],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.2508346222259186,\n", " 'compression_ratio': 1.4583333333333333,\n", " 'no_speech_prob': 3.485897934751847e-07},\n", " {'id': 95,\n", " 'seek': 17200,\n", " 'start': 187.0,\n", " 'end': 189.0,\n", " 'text': \" I don't think I'm gonna kill this.\",\n", " 'tokens': [314, 836, 470, 892, 314, 1101, 8066, 1494, 428, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.2508346222259186,\n", " 'compression_ratio': 1.4583333333333333,\n", " 'no_speech_prob': 3.485897934751847e-07},\n", " {'id': 96,\n", " 'seek': 17200,\n", " 'start': 189.0,\n", " 'end': 191.0,\n", " 'text': ' As I can hit by a car and die.',\n", " 'tokens': [1081, 314, 460, 2277, 416, 257, 1097, 290, 4656, 13],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.2508346222259186,\n", " 'compression_ratio': 1.4583333333333333,\n", " 'no_speech_prob': 3.485897934751847e-07},\n", " {'id': 97,\n", " 'seek': 19100,\n", " 'start': 191.0,\n", " 'end': 196.0,\n", " 'text': \" You don't\",\n", " 'tokens': [921, 836, 470],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.0730719472847734,\n", " 'compression_ratio': 1.3488372093023255,\n", " 'no_speech_prob': 1.8071212252834812e-05},\n", " {'id': 98,\n", " 'seek': 19100,\n", " 'start': 196.0,\n", " 'end': 201.0,\n", " 'text': \" work with me once you're left alone.\",\n", " 'tokens': [670, 351, 502, 1752, 345, 821, 1364, 3436, 13],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.0730719472847734,\n", " 'compression_ratio': 1.3488372093023255,\n", " 'no_speech_prob': 1.8071212252834812e-05},\n", " {'id': 99,\n", " 'seek': 19100,\n", " 'start': 201.0,\n", " 'end': 205.0,\n", " 'text': ' I lost you and you 2, 3 4',\n", " 'tokens': [314, 2626, 345, 290, 345, 362, 11, 513, 604],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.0730719472847734,\n", " 'compression_ratio': 1.3488372093023255,\n", " 'no_speech_prob': 1.8071212252834812e-05},\n", " {'id': 100,\n", " 'seek': 19100,\n", " 'start': 205.0,\n", " 'end': 208.0,\n", " 'text': ' I lost you.',\n", " 'tokens': [314, 2626, 345, 13],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.0730719472847734,\n", " 'compression_ratio': 1.3488372093023255,\n", " 'no_speech_prob': 1.8071212252834812e-05},\n", " {'id': 101,\n", " 'seek': 19100,\n", " 'start': 208.0,\n", " 'end': 211.0,\n", " 'text': ' I lost you, 2 4',\n", " 'tokens': [314, 2626, 345, 11, 362, 604],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.0730719472847734,\n", " 'compression_ratio': 1.3488372093023255,\n", " 'no_speech_prob': 1.8071212252834812e-05},\n", " {'id': 102,\n", " 'seek': 19100,\n", " 'start': 211.0,\n", " 'end': 214.0,\n", " 'text': ' I lost you, 2 5',\n", " 'tokens': [314, 2626, 345, 11, 362, 642],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.0730719472847734,\n", " 'compression_ratio': 1.3488372093023255,\n", " 'no_speech_prob': 1.8071212252834812e-05},\n", " {'id': 103,\n", " 'seek': 21400,\n", " 'start': 214.0,\n", " 'end': 218.88,\n", " 'text': \" I'm sorry.\",\n", " 'tokens': [314, 1101, 7926, 13],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.222068927906178,\n", " 'compression_ratio': 1.0,\n", " 'no_speech_prob': 0.04370500147342682},\n", " {'id': 104,\n", " 'seek': 21400,\n", " 'start': 218.88,\n", " 'end': 220.54,\n", " 'text': ' Sheila.',\n", " 'tokens': [49627, 13],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.222068927906178,\n", " 'compression_ratio': 1.0,\n", " 'no_speech_prob': 0.04370500147342682},\n", " {'id': 105,\n", " 'seek': 21400,\n", " 'start': 220.54,\n", " 'end': 224.66,\n", " 'text': \" I'm sorry.\",\n", " 'tokens': [314, 1101, 7926, 13],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.222068927906178,\n", " 'compression_ratio': 1.0,\n", " 'no_speech_prob': 0.04370500147342682},\n", " {'id': 106,\n", " 'seek': 21400,\n", " 'start': 224.66,\n", " 'end': 234.78,\n", " 'text': ' Did you know?',\n", " 'tokens': [7731, 345, 760, 30],\n", " 'temperature': 1.0,\n", " 'avg_logprob': -2.222068927906178,\n", " 'compression_ratio': 1.0,\n", " 'no_speech_prob': 0.04370500147342682},\n", " {'id': 107,\n", " 'seek': 23478,\n", " 'start': 234.78,\n", " 'end': 236.78,\n", " 'text': ' You',\n", " 'tokens': [50363, 921, 50463],\n", " 'temperature': 0.0,\n", " 'avg_logprob': -0.9333540797233582,\n", " 'compression_ratio': 0.2727272727272727,\n", " 'no_speech_prob': 0.5948350429534912}]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "segments" ] }, { "cell_type": "markdown", "id": "23f31008", "metadata": {}, "source": [ "## Know duration" ] }, { "cell_type": "code", "execution_count": 18, "id": "5f0fe450", "metadata": {}, "outputs": [], "source": [ "import wave\n", "import contextlib" ] }, { "cell_type": "code", "execution_count": 19, "id": "1015519a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "conversion to wav ready, duration of audio file: 258.93\n" ] } ], "source": [ "# Get duration\n", "with contextlib.closing(wave.open(audio_file,'r')) as f:\n", " frames = f.getnframes()\n", " rate = f.getframerate()\n", " duration = frames / float(rate)\n", "print(f\"conversion to wav ready, duration of audio file: {duration}\")" ] }, { "cell_type": "markdown", "id": "d54e4a6f", "metadata": {}, "source": [ "## Speaker diarization" ] }, { "cell_type": "code", "execution_count": 1, "id": "4f978ac0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/codespace/.local/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import pyannote.audio\n", "from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding\n", "from pyannote.audio import Audio\n", "from pyannote.core import Segment\n", "from tqdm.auto import tqdm\n", "import numpy as np\n", "import torch" ] }, { "cell_type": "code", "execution_count": null, "id": "3f9a28ca", "metadata": {}, "outputs": [ { "ename": "OSError", "evalue": "[WinError 1314] Le client ne dispose pas d’un privilège nécessaire: 'C:\\\\Users\\\\theo.alvesdacosta\\\\.cache\\\\huggingface\\\\hub\\\\models--speechbrain--spkrec-ecapa-voxceleb\\\\snapshots\\\\5c0be3875fda05e81f3c004ed8c7c06be308de1e\\\\hyperparams.yaml' -> 'C:\\\\Users\\\\theo.alvesdacosta\\\\.cache\\\\torch\\\\pyannote\\\\speechbrain\\\\hyperparams.yaml'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mOSError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[30], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m embedding_model \u001b[38;5;241m=\u001b[39m \u001b[43mPretrainedSpeakerEmbedding\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\n\u001b[0;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mspeechbrain/spkrec-ecapa-voxceleb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcpu\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\bechdelai-lU12Pf_x-py3.8\\lib\\site-packages\\pyannote\\audio\\pipelines\\speaker_verification.py:463\u001b[0m, in \u001b[0;36mPretrainedSpeakerEmbedding\u001b[1;34m(embedding, device, use_auth_token)\u001b[0m\n\u001b[0;32m 431\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Pretrained speaker embedding\u001b[39;00m\n\u001b[0;32m 432\u001b[0m \n\u001b[0;32m 433\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 459\u001b[0m \u001b[38;5;124;03m>>> embeddings = get_embedding(waveforms, masks=masks)\u001b[39;00m\n\u001b[0;32m 460\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 462\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(embedding, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspeechbrain\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m embedding:\n\u001b[1;32m--> 463\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mSpeechBrainPretrainedSpeakerEmbedding\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 464\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_auth_token\u001b[49m\n\u001b[0;32m 465\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 467\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(embedding, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnvidia\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m embedding:\n\u001b[0;32m 468\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m NeMoPretrainedSpeakerEmbedding(embedding, device\u001b[38;5;241m=\u001b[39mdevice)\n", "File \u001b[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\bechdelai-lU12Pf_x-py3.8\\lib\\site-packages\\pyannote\\audio\\pipelines\\speaker_verification.py:242\u001b[0m, in \u001b[0;36mSpeechBrainPretrainedSpeakerEmbedding.__init__\u001b[1;34m(self, embedding, device, use_auth_token)\u001b[0m\n\u001b[0;32m 239\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membedding \u001b[38;5;241m=\u001b[39m embedding\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice \u001b[38;5;241m=\u001b[39m device\n\u001b[1;32m--> 242\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclassifier_ \u001b[38;5;241m=\u001b[39m \u001b[43mSpeechBrain_EncoderClassifier\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_hparams\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 243\u001b[0m \u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 244\u001b[0m \u001b[43m \u001b[49m\u001b[43msavedir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mCACHE_DIR\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/speechbrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 245\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_opts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdevice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 246\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_auth_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 247\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\bechdelai-lU12Pf_x-py3.8\\lib\\site-packages\\speechbrain\\pretrained\\interfaces.py:342\u001b[0m, in \u001b[0;36mPretrained.from_hparams\u001b[1;34m(cls, source, hparams_file, pymodule_file, overrides, savedir, use_auth_token, revision, **kwargs)\u001b[0m\n\u001b[0;32m 340\u001b[0m clsname \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[0;32m 341\u001b[0m savedir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./pretrained_models/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mclsname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhashlib\u001b[38;5;241m.\u001b[39mmd5(source\u001b[38;5;241m.\u001b[39mencode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUTF-8\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;250m \u001b[39merrors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreplace\u001b[39m\u001b[38;5;124m'\u001b[39m))\u001b[38;5;241m.\u001b[39mhexdigest()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 342\u001b[0m hparams_local_path \u001b[38;5;241m=\u001b[39m \u001b[43mfetch\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 343\u001b[0m \u001b[43m \u001b[49m\u001b[43mhparams_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msavedir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\n\u001b[0;32m 344\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 345\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 346\u001b[0m pymodule_local_path \u001b[38;5;241m=\u001b[39m fetch(\n\u001b[0;32m 347\u001b[0m pymodule_file, source, savedir, use_auth_token, revision\n\u001b[0;32m 348\u001b[0m )\n", "File \u001b[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\bechdelai-lU12Pf_x-py3.8\\lib\\site-packages\\speechbrain\\pretrained\\fetching.py:135\u001b[0m, in \u001b[0;36mfetch\u001b[1;34m(filename, source, savedir, overwrite, save_filename, use_auth_token, revision)\u001b[0m\n\u001b[0;32m 133\u001b[0m sourcepath \u001b[38;5;241m=\u001b[39m pathlib\u001b[38;5;241m.\u001b[39mPath(fetched_file)\u001b[38;5;241m.\u001b[39mabsolute()\n\u001b[0;32m 134\u001b[0m _missing_ok_unlink(destination)\n\u001b[1;32m--> 135\u001b[0m \u001b[43mdestination\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msymlink_to\u001b[49m\u001b[43m(\u001b[49m\u001b[43msourcepath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 136\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m destination\n", "File \u001b[1;32m~\\Anaconda3\\lib\\pathlib.py:1391\u001b[0m, in \u001b[0;36mPath.symlink_to\u001b[1;34m(self, target, target_is_directory)\u001b[0m\n\u001b[0;32m 1389\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_closed:\n\u001b[0;32m 1390\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_closed()\n\u001b[1;32m-> 1391\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_accessor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msymlink\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_is_directory\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[1;31mOSError\u001b[0m: [WinError 1314] Le client ne dispose pas d’un privilège nécessaire: 'C:\\\\Users\\\\theo.alvesdacosta\\\\.cache\\\\huggingface\\\\hub\\\\models--speechbrain--spkrec-ecapa-voxceleb\\\\snapshots\\\\5c0be3875fda05e81f3c004ed8c7c06be308de1e\\\\hyperparams.yaml' -> 'C:\\\\Users\\\\theo.alvesdacosta\\\\.cache\\\\torch\\\\pyannote\\\\speechbrain\\\\hyperparams.yaml'" ] } ], "source": [ "embedding_model = PretrainedSpeakerEmbedding( \n", " \"speechbrain/spkrec-ecapa-voxceleb\",\n", " device=\"cpu\")" ] }, { "cell_type": "code", "execution_count": 25, "id": "42b9dc6a", "metadata": {}, "outputs": [], "source": [ "def segment_embedding(segment):\n", " audio = Audio()\n", " start = segment[\"start\"]\n", " # Whisper overshoots the end timestamp in the last segment\n", " end = min(duration, segment[\"end\"])\n", " clip = Segment(start, end)\n", " waveform, sample_rate = audio.crop(audio_file, clip)\n", " return embedding_model(waveform[None])" ] }, { "cell_type": "code", "execution_count": 26, "id": "6acb732e", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d0408c2dfc644f52832b1978b9967944", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/101 [00:00 3\u001b[0m embeddings[i] \u001b[38;5;241m=\u001b[39m \u001b[43msegment_embedding\u001b[49m\u001b[43m(\u001b[49m\u001b[43msegment\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mnan_to_num(embeddings)\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEmbedding shape: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00membeddings\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n", "Cell \u001b[1;32mIn[25], line 8\u001b[0m, in \u001b[0;36msegment_embedding\u001b[1;34m(segment)\u001b[0m\n\u001b[0;32m 6\u001b[0m clip \u001b[38;5;241m=\u001b[39m Segment(start, end)\n\u001b[0;32m 7\u001b[0m waveform, sample_rate \u001b[38;5;241m=\u001b[39m audio\u001b[38;5;241m.\u001b[39mcrop(audio_file, clip)\n\u001b[1;32m----> 8\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43membedding_model\u001b[49m(waveform[\u001b[38;5;28;01mNone\u001b[39;00m])\n", "\u001b[1;31mNameError\u001b[0m: name 'embedding_model' is not defined" ] } ], "source": [ "embeddings = np.zeros(shape=(len(segments), 192))\n", "for i, segment in enumerate(tqdm(segments)):\n", " embeddings[i] = segment_embedding(segment)\n", "embeddings = np.nan_to_num(embeddings)\n", "print(f'Embedding shape: {embeddings.shape}')" ] }, { "cell_type": "code", "execution_count": null, "id": "537ea5b1", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "vscode": { "interpreter": { "hash": "3ad933181bd8a04b432d3370b9dc3b0662ad032c4dfaa4e4f1596c548f763858" } } }, "nbformat": 4, "nbformat_minor": 5 }