ecker commited on
Commit
0e59770
1 Parent(s): f56837f

added dataset: SH2 James Sunderland (v2)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. finetunes/james-sunderland-v2/cond_latents_9a58bc9a.pth +3 -0
  2. finetunes/james-sunderland-v2/dataset/lxgbsm_00000.wav +3 -0
  3. finetunes/james-sunderland-v2/dataset/lxgbsm_00001.wav +3 -0
  4. finetunes/james-sunderland-v2/dataset/lxgbsm_00002.wav +3 -0
  5. finetunes/james-sunderland-v2/dataset/lxgbsm_00003.wav +3 -0
  6. finetunes/james-sunderland-v2/dataset/lxgbsm_00004.wav +3 -0
  7. finetunes/james-sunderland-v2/dataset/lxgbsm_00005.wav +3 -0
  8. finetunes/james-sunderland-v2/dataset/lxgbsm_00006.wav +3 -0
  9. finetunes/james-sunderland-v2/dataset/lxgbsm_00007.wav +3 -0
  10. finetunes/james-sunderland-v2/dataset/lxgbsm_00008.wav +3 -0
  11. finetunes/james-sunderland-v2/dataset/lxgbsm_00009.wav +3 -0
  12. finetunes/james-sunderland-v2/dataset/lxgbsm_00010.wav +3 -0
  13. finetunes/james-sunderland-v2/dataset/lxgbsm_00011.wav +3 -0
  14. finetunes/james-sunderland-v2/dataset/lxgbsm_00012.wav +3 -0
  15. finetunes/james-sunderland-v2/dataset/lxgbsm_00013.wav +3 -0
  16. finetunes/james-sunderland-v2/dataset/lxgbsm_00014.wav +3 -0
  17. finetunes/james-sunderland-v2/dataset/lxgbsm_00015.wav +3 -0
  18. finetunes/james-sunderland-v2/dataset/lxgbsm_00016.wav +3 -0
  19. finetunes/james-sunderland-v2/dataset/lxgbsm_00017.wav +3 -0
  20. finetunes/james-sunderland-v2/dataset/lxgbsm_00018.wav +3 -0
  21. finetunes/james-sunderland-v2/dataset/lxgbsm_00019.wav +3 -0
  22. finetunes/james-sunderland-v2/dataset/lxgbsm_00020.wav +3 -0
  23. finetunes/james-sunderland-v2/dataset/lxgbsm_00021.wav +3 -0
  24. finetunes/james-sunderland-v2/dataset/lxgbsm_00022.wav +3 -0
  25. finetunes/james-sunderland-v2/dataset/lxgbsm_00023.wav +3 -0
  26. finetunes/james-sunderland-v2/dataset/lxgbsm_00024.wav +3 -0
  27. finetunes/james-sunderland-v2/dataset/lxgbsm_00025.wav +3 -0
  28. finetunes/james-sunderland-v2/dataset/lxgbsm_00026.wav +3 -0
  29. finetunes/james-sunderland-v2/dataset/lxgbsm_00027.wav +3 -0
  30. finetunes/james-sunderland-v2/dataset/lxgbsm_00028.wav +3 -0
  31. finetunes/james-sunderland-v2/dataset/lxgbsm_00029.wav +3 -0
  32. finetunes/james-sunderland-v2/dataset/lxgbsm_00030.wav +3 -0
  33. finetunes/james-sunderland-v2/dataset/train.txt +68 -0
  34. finetunes/james-sunderland-v2/dataset/train.yaml +147 -0
  35. finetunes/james-sunderland-v2/dataset/whisper.json +1741 -0
  36. finetunes/james-sunderland-v2/dataset/ynoeld_00000.wav +3 -0
  37. finetunes/james-sunderland-v2/dataset/ynoeld_00001.wav +3 -0
  38. finetunes/james-sunderland-v2/dataset/ynoeld_00002.wav +3 -0
  39. finetunes/james-sunderland-v2/dataset/ynoeld_00003.wav +3 -0
  40. finetunes/james-sunderland-v2/dataset/ynoeld_00004.wav +3 -0
  41. finetunes/james-sunderland-v2/dataset/ynoeld_00005.wav +3 -0
  42. finetunes/james-sunderland-v2/dataset/ynoeld_00006.wav +3 -0
  43. finetunes/james-sunderland-v2/dataset/ynoeld_00007.wav +3 -0
  44. finetunes/james-sunderland-v2/dataset/ynoeld_00008.wav +3 -0
  45. finetunes/james-sunderland-v2/dataset/ynoeld_00009.wav +3 -0
  46. finetunes/james-sunderland-v2/dataset/ynoeld_00010.wav +3 -0
  47. finetunes/james-sunderland-v2/dataset/ynoeld_00011.wav +3 -0
  48. finetunes/james-sunderland-v2/dataset/ynoeld_00012.wav +3 -0
  49. finetunes/james-sunderland-v2/dataset/ynoeld_00013.wav +3 -0
  50. finetunes/james-sunderland-v2/dataset/ynoeld_00014.wav +3 -0
finetunes/james-sunderland-v2/cond_latents_9a58bc9a.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c8e263823d5948b69e58d812ed88fbe5645c03ee460959d6e68a8f6629bb810
3
+ size 13343
finetunes/james-sunderland-v2/dataset/lxgbsm_00000.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa3a9334e6dfbdf8276a1193fbb712f809a45276c37be5df29f148c2fc324584
3
+ size 1234880
finetunes/james-sunderland-v2/dataset/lxgbsm_00001.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d583a0e40379dc041dbd6fb9fffd83d2ba6610339e4a2a293373973bfd81796c
3
+ size 1587680
finetunes/james-sunderland-v2/dataset/lxgbsm_00002.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e7ba89e497878b27b6764e15d33dbd4c45896117733df734dd8c63e8f6d4b0
3
+ size 1587680
finetunes/james-sunderland-v2/dataset/lxgbsm_00003.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cbde97c7e6cce1caa5a9b470fa0f00df7d04407240435aa1b18c03d3bdf2510
3
+ size 1323080
finetunes/james-sunderland-v2/dataset/lxgbsm_00004.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d6f665396d46818bbb75c05db5e67e6342a2d586427f2437d39df1c078de1f4
3
+ size 1404224
finetunes/james-sunderland-v2/dataset/lxgbsm_00005.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7175857259a6932e552c011e803d3cf67bd84851e1b6090a032088d49f3a056e
3
+ size 1305440
finetunes/james-sunderland-v2/dataset/lxgbsm_00006.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96eadc75411e796c7a92d1ecc83fd5da86e58ce5bbaca8a9627e9c2646a5c66f
3
+ size 1573568
finetunes/james-sunderland-v2/dataset/lxgbsm_00007.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aecd8dd965f3ccc91b3c0a03548590a93ef94abc99cb568a09a7bc1bfd65ac68
3
+ size 811520
finetunes/james-sunderland-v2/dataset/lxgbsm_00008.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8581aa01bd79ee275374eb99e34e94d3d3ebd4d12bef4dcff3eb10ae61b6c99
3
+ size 1185484
finetunes/james-sunderland-v2/dataset/lxgbsm_00009.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fc917ad96ed1776fc8952735eb41956419b82efd635c0c211a039f0d95e184b
3
+ size 1495956
finetunes/james-sunderland-v2/dataset/lxgbsm_00010.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5148663f1847680f3fdc3afe40b51ce8aa34b44182f440f098ef7e014d168a92
3
+ size 296432
finetunes/james-sunderland-v2/dataset/lxgbsm_00011.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af1449010c075cc581c349de386cfd98ecd421a2763e86acf9db1cc6ee05fdbd
3
+ size 606896
finetunes/james-sunderland-v2/dataset/lxgbsm_00012.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7895ad94887ad6cc759e04ddaac80aab7836104222cd58d7c953cddac5a4cc89
3
+ size 550444
finetunes/james-sunderland-v2/dataset/lxgbsm_00013.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7c4fad7a9efda7413fd74ff8dbb76db95c3027c8904d7207ee6a03e31468d7
3
+ size 462252
finetunes/james-sunderland-v2/dataset/lxgbsm_00014.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0385c4977ac17cdf665ce4a4ead641a26f4ded2cf265cb78f7c7a103a427bb6d
3
+ size 423440
finetunes/james-sunderland-v2/dataset/lxgbsm_00015.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c5c9def95cf337aec90a47ed0d95f7bde69da928ba1d76e2720b63036a105ed
3
+ size 342296
finetunes/james-sunderland-v2/dataset/lxgbsm_00016.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda34911ec3fbd4057d433c55bff93a31018062beca44896a373e6e366a43742
3
+ size 331712
finetunes/james-sunderland-v2/dataset/lxgbsm_00017.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d18fddf652f76f351e9308795c755884163ae8d84d15f0d6410362cf7b40df80
3
+ size 317600
finetunes/james-sunderland-v2/dataset/lxgbsm_00018.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f199524f7b217f5e8de979118fd62d04697a4262295039308ea827eaa599b9c5
3
+ size 437552
finetunes/james-sunderland-v2/dataset/lxgbsm_00019.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdfa30c56345d6c809b18fa3c0fadb8431e1bdb373f212be251a47985260c587
3
+ size 331712
finetunes/james-sunderland-v2/dataset/lxgbsm_00020.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e6a63424f17aa653370f053c350fb5c0ea9009c3f2ec2ad160dc024272b645d
3
+ size 398744
finetunes/james-sunderland-v2/dataset/lxgbsm_00021.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d8f5336ce59fcf739122d97e9e6e836d0a2013774465f0e2b5a440dd521390e
3
+ size 314072
finetunes/james-sunderland-v2/dataset/lxgbsm_00022.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9d5c5a2392f6fae8ba989236a445ab6e188121f3a9028778502d641ce68d5c
3
+ size 268208
finetunes/james-sunderland-v2/dataset/lxgbsm_00023.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d016113f32b555e86be30b1ce5831a8070cdd02490fe7edb3ff0afb61423d07
3
+ size 494000
finetunes/james-sunderland-v2/dataset/lxgbsm_00024.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4de3ee75a047008ef7d3c91546ca55535840571589e19a2e41f72c8e74566615
3
+ size 176480
finetunes/james-sunderland-v2/dataset/lxgbsm_00025.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7e6074038449455d9ecb40e541c155e69bf636bf0c259ce146b4dca928d0bb0
3
+ size 289376
finetunes/james-sunderland-v2/dataset/lxgbsm_00026.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a096af623b5671af14db11286d732f0cb856b5ef792a9694cb9e77f4787b50b1
3
+ size 338768
finetunes/james-sunderland-v2/dataset/lxgbsm_00027.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f03a5055ac9e6e5744afeef481083cdd6d66886c600f6d1ead43e7e050ba46b0
3
+ size 331712
finetunes/james-sunderland-v2/dataset/lxgbsm_00028.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eca4137c035820c531347f8b65e29e4339286df5a0ac891cc3e8c28ce1ee000a
3
+ size 338768
finetunes/james-sunderland-v2/dataset/lxgbsm_00029.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d107c0519eb95e65405a78eb677ba9d7773c73e876566088304f0cdb54e81671
3
+ size 677456
finetunes/james-sunderland-v2/dataset/lxgbsm_00030.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e84632d291e37ab1120ea51480b3be24df85f0c7df393bd83aad2e3b8bd7303a
3
+ size 211760
finetunes/james-sunderland-v2/dataset/train.txt ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lxgbsm_00000.wav|I'm kind of lost. I'm looking for Silent Hill. Silent Hill? Is this the right way?
2
+ lxgbsm_00001.wav|Well, there's the hotel 2, I guess. There's the hotel 2, I guess. The one on the lake? I wonder if it's still there.
3
+ lxgbsm_00002.wav|You're coming with me? Is it dangerous? I'll be careful. I guess I really don't care if it's dangerous or not.
4
+ lxgbsm_00003.wav|I'm going to town either way. I'm looking for someone. I'm looking for someone.
5
+ lxgbsm_00004.wav|Someone very important to me. I'm looking for Mary. Have you seen her?
6
+ lxgbsm_00005.wav|Without Mary I just can't go on. I'd do anything if I could be with her again.
7
+ lxgbsm_00006.wav|I'm gonna go look for her. I wish I knew. I killed her three years ago. Last year.
8
+ lxgbsm_00007.wav|But I got a letter from her. I got a letter from her. How do you know about
9
+ lxgbsm_00008.wav|Mary? You really seem to care about her. Is that why you're here too? You're not
10
+ lxgbsm_00009.wav|Mary. You're really not Mary. You're alive! I thought that thing killed you. Are you
11
+ lxgbsm_00010.wav|You hurt bad?
12
+ lxgbsm_00011.wav|Your face, your voice, just your hair and...
13
+ lxgbsm_00012.wav|I was confused.
14
+ lxgbsm_00013.wav|There are all sorts of strange things around here.
15
+ lxgbsm_00014.wav|This is no place for a kid.
16
+ lxgbsm_00015.wav|Eddie told me.
17
+ lxgbsm_00016.wav|Who's Rachel?
18
+ lxgbsm_00017.wav|What's that letter?
19
+ lxgbsm_00018.wav|It's time to end this nightmare.
20
+ lxgbsm_00019.wav|What the hell is it?
21
+ lxgbsm_00020.wav|I didn't come here to play, you know.
22
+ lxgbsm_00021.wav|You liar!
23
+ lxgbsm_00022.wav|Don't be ridiculous.
24
+ lxgbsm_00023.wav|How can you sit there and eat pizza?
25
+ lxgbsm_00024.wav|You!
26
+ lxgbsm_00025.wav|It was you, wasn't it?
27
+ lxgbsm_00026.wav|You're the one who stepped on my hand.
28
+ lxgbsm_00027.wav|I'm done with you.
29
+ lxgbsm_00028.wav|Forget you!
30
+ lxgbsm_00029.wav|Someone very important to me.
31
+ lxgbsm_00030.wav|You're not Mary.
32
+ ynoeld_00000.wav|It's you. It's you. But I don't need you anymore.
33
+ ynoeld_00001.wav|How old are you? Are you still here? Are you okay? What happened to you?
34
+ ynoeld_00002.wav|We met in the apartment building. We met in the apartment building. Sure is quiet here, huh?
35
+ ynoeld_00003.wav|What are you doing, Laura? Aren't you Maria?
36
+ ynoeld_00004.wav|So, you're Maria? You're Maria? What's a little girl like you doing here, anyway?
37
+ ynoeld_00005.wav|I didn't mean to scare you.
38
+ ynoeld_00006.wav|Are you alone here, Eddie?
39
+ ynoeld_00007.wav|James Sunderland.
40
+ ynoeld_00008.wav|James. My, uh, my name's James.
41
+ ynoeld_00009.wav|Eddie? Laura? Laura!
42
+ ynoeld_00010.wav|Maria? Maria? Mary? Mary? Mary?
43
+ ynoeld_00011.wav|Is that her name? Who is that girl, anyhow?
44
+ ynoeld_00012.wav|Yeah. Yeah. Yes! Okay.
45
+ ynoeld_00013.wav|That's right. Oh, yeah.
46
+ ynoeld_00014.wav|I understand now.
47
+ ynoeld_00015.wav|I believe you.
48
+ ynoeld_00016.wav|No.
49
+ ynoeld_00017.wav|No.
50
+ ynoeld_00018.wav|No, I...
51
+ ynoeld_00019.wav|No, you're not.
52
+ ynoeld_00020.wav|Nothing.
53
+ ynoeld_00021.wav|Oh, nothing.
54
+ ynoeld_00022.wav|I guess I really don't care.
55
+ ynoeld_00023.wav|I... don't know.
56
+ ynoeld_00024.wav|Sorry.
57
+ ynoeld_00025.wav|What?
58
+ ynoeld_00026.wav|Do what?
59
+ ynoeld_00027.wav|Tell me what happened.
60
+ ynoeld_00028.wav|Why can't you just tell me?
61
+ ynoeld_00029.wav|All I want from you is an answer.
62
+ ynoeld_00030.wav|How do you know about that?
63
+ ynoeld_00031.wav|Is there any other way?
64
+ ynoeld_00032.wav|Don't you remember?
65
+ ynoeld_00033.wav|Later, okay?
66
+ ynoeld_00034.wav|Stay right there. I'll be there soon.
67
+ ynoeld_00035.wav|I'll be there soon.
68
+ ynoeld_00036.wav|Thanks.
finetunes/james-sunderland-v2/dataset/train.yaml ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: james-finetune
2
+ model: extensibletrainer
3
+ scale: 1
4
+ gpu_ids: [0] # Superfluous, redundant, unnecessary, the way you launch the training script will set this
5
+ start_step: 0
6
+ checkpointing_enabled: true
7
+ fp16: false
8
+ wandb: false
9
+ use_tb_logger: true
10
+
11
+ datasets:
12
+ train:
13
+ name: james-train
14
+ n_workers: 2
15
+ batch_size: 68
16
+ mode: paired_voice_audio
17
+ path: ./training/james/train.txt
18
+ fetcher_mode: ['lj']
19
+ phase: train
20
+ max_wav_length: 255995
21
+ max_text_length: 200
22
+ sample_rate: 22050
23
+ load_conditioning: True
24
+ num_conditioning_candidates: 2
25
+ conditioning_length: 44000
26
+ use_bpe_tokenizer: True
27
+ tokenizer_vocab: ./models/tortoise/bpe_lowercase_asr_256.json
28
+ load_aligned_codes: False
29
+ val: # I really do not care about validation right now
30
+ name: james-val
31
+ n_workers: 1
32
+ batch_size: 1
33
+ mode: paired_voice_audio
34
+ path: ./training/james/train.txt
35
+ fetcher_mode: ['lj']
36
+ phase: val
37
+ max_wav_length: 255995
38
+ max_text_length: 200
39
+ sample_rate: 22050
40
+ load_conditioning: True
41
+ num_conditioning_candidates: 2
42
+ conditioning_length: 44000
43
+ use_bpe_tokenizer: True
44
+ tokenizer_vocab: ./models/tortoise/bpe_lowercase_asr_256.json
45
+ load_aligned_codes: False
46
+
47
+ steps:
48
+ gpt_train:
49
+ training: gpt
50
+ loss_log_buffer: 500
51
+
52
+ # Generally follows the recipe from the DALLE paper.
53
+ optimizer: adamw # this should be adamw_zero if you're using distributed training
54
+ optimizer_params:
55
+ lr: !!float 1e-05 # originally: 1e-4
56
+ weight_decay: !!float 1e-2
57
+ beta1: 0.9
58
+ beta2: 0.96
59
+ clip_grad_eps: 4
60
+
61
+ injectors:
62
+ paired_to_mel:
63
+ type: torch_mel_spectrogram
64
+ mel_norm_file: ./models/tortoise/clips_mel_norms.pth
65
+ in: wav
66
+ out: paired_mel
67
+ paired_cond_to_mel:
68
+ type: for_each
69
+ subtype: torch_mel_spectrogram
70
+ mel_norm_file: ./models/tortoise/clips_mel_norms.pth
71
+ in: conditioning
72
+ out: paired_conditioning_mel
73
+ to_codes:
74
+ type: discrete_token
75
+ in: paired_mel
76
+ out: paired_mel_codes
77
+ dvae_config: "./models/tortoise/train_diffusion_vocoder_22k_level.yml"
78
+ paired_fwd_text:
79
+ type: generator
80
+ generator: gpt
81
+ in: [paired_conditioning_mel, padded_text, text_lengths, paired_mel_codes, wav_lengths]
82
+ out: [loss_text_ce, loss_mel_ce, logits]
83
+ losses:
84
+ text_ce:
85
+ type: direct
86
+ weight: 0.01
87
+ key: loss_text_ce
88
+ mel_ce:
89
+ type: direct
90
+ weight: 1
91
+ key: loss_mel_ce
92
+
93
+ networks:
94
+ gpt:
95
+ type: generator
96
+ which_model_G: unified_voice2 # none of the unified_voice*.py files actually match the tortoise inference code... 4 and 3 have "alignment_head" (wtf is that?), 2 lacks the types=1 parameter.
97
+ kwargs:
98
+ layers: 30 # originally: 8
99
+ model_dim: 1024 # originally: 512
100
+ heads: 16 # originally: 8
101
+ max_text_tokens: 402 # originally: 120
102
+ max_mel_tokens: 604 # originally: 250
103
+ max_conditioning_inputs: 2 # originally: 1
104
+ mel_length_compression: 1024
105
+ number_text_tokens: 256 # supposed to be 255 for newer unified_voice files
106
+ number_mel_codes: 8194
107
+ start_mel_token: 8192
108
+ stop_mel_token: 8193
109
+ start_text_token: 255
110
+ train_solo_embeddings: False # missing in uv3/4
111
+ use_mel_codes_as_input: True # ditto
112
+ checkpointing: True
113
+ #types: 1 # this is MISSING, but in my analysis 1 is equivalent to not having it.
114
+ #only_alignment_head: False # uv3/4
115
+
116
+ path:
117
+ # pretrain_model_gpt: './models/tortoise/autoregressive.pth'
118
+ strict_load: true
119
+ resume_state: './training/james-finetune//training_state//500.state'
120
+
121
+ train:
122
+ niter: 2000
123
+ warmup_iter: -1
124
+ mega_batch_factor: 34
125
+ val_freq: 2000
126
+
127
+ ema_enabled: false # I really don't think EMA matters
128
+
129
+ default_lr_scheme: MultiStepLR
130
+ gen_lr_steps: [9, 18, 25, 33] #[50000, 100000, 140000, 180000]
131
+ lr_gamma: 0.5
132
+
133
+ eval:
134
+ output_state: gen
135
+ injectors:
136
+ gen_inj_eval:
137
+ type: generator
138
+ generator: generator
139
+ in: hq
140
+ out: [gen, codebook_commitment_loss]
141
+
142
+ logger:
143
+ print_freq: 5
144
+ save_checkpoint_freq: 50
145
+ visuals: [gen, mel]
146
+ visual_debug_rate: 5
147
+ is_mel_spectrogram: true
finetunes/james-sunderland-v2/dataset/whisper.json ADDED
@@ -0,0 +1,1741 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lxgbsm.wav": {
3
+ "text": " I'm kind of lost. I'm looking for Silent Hill. Silent Hill? Is this the right way? Well, there's the hotel 2, I guess. There's the hotel 2, I guess. The one on the lake? I wonder if it's still there. You're coming with me? Is it dangerous? I'll be careful. I guess I really don't care if it's dangerous or not. I'm going to town either way. I'm looking for someone. I'm looking for someone. Someone very important to me. I'm looking for Mary. Have you seen her? Without Mary I just can't go on. I'd do anything if I could be with her again. I'm gonna go look for her. I wish I knew. I killed her three years ago. Last year. But I got a letter from her. I got a letter from her. How do you know about Mary? You really seem to care about her. Is that why you're here too? You're not Mary. You're really not Mary. You're alive! I thought that thing killed you. Are you You hurt bad? Your face, your voice, just your hair and... I was confused. There are all sorts of strange things around here. This is no place for a kid. Eddie told me. Who's Rachel? What's that letter? It's time to end this nightmare. What the hell is it? I didn't come here to play, you know. You liar! Don't be ridiculous. How can you sit there and eat pizza? You! It was you, wasn't it? You're the one who stepped on my hand. I'm done with you. Forget you! Someone very important to me. You're not Mary.",
4
+ "segments": [
5
+ {
6
+ "id": 0,
7
+ "seek": 0,
8
+ "start": 0.0,
9
+ "end": 7.0,
10
+ "text": " I'm kind of lost. I'm looking for Silent Hill. Silent Hill? Is this the right way?",
11
+ "tokens": [
12
+ 314,
13
+ 1101,
14
+ 1611,
15
+ 286,
16
+ 2626,
17
+ 13,
18
+ 314,
19
+ 1101,
20
+ 2045,
21
+ 329,
22
+ 25083,
23
+ 3327,
24
+ 13,
25
+ 25083,
26
+ 3327,
27
+ 30,
28
+ 1148,
29
+ 428,
30
+ 262,
31
+ 826,
32
+ 835,
33
+ 30
34
+ ],
35
+ "temperature": 0.0,
36
+ "avg_logprob": -0.13676407516643566,
37
+ "compression_ratio": 1.6756756756756757,
38
+ "no_speech_prob": 0.0679725632071495,
39
+ "seg-text": [
40
+ " I'm kind of lost. I'm looking for Silent Hill. Silent Hill? Is this the right way?"
41
+ ]
42
+ },
43
+ {
44
+ "id": 1,
45
+ "seek": 0,
46
+ "start": 7.0,
47
+ "end": 16.0,
48
+ "text": " Well, there's the hotel 2, I guess. There's the hotel 2, I guess. The one on the lake? I wonder if it's still there.",
49
+ "tokens": [
50
+ 3894,
51
+ 11,
52
+ 612,
53
+ 338,
54
+ 262,
55
+ 7541,
56
+ 362,
57
+ 11,
58
+ 314,
59
+ 4724,
60
+ 13,
61
+ 1318,
62
+ 338,
63
+ 262,
64
+ 7541,
65
+ 362,
66
+ 11,
67
+ 314,
68
+ 4724,
69
+ 13,
70
+ 383,
71
+ 530,
72
+ 319,
73
+ 262,
74
+ 13546,
75
+ 30,
76
+ 314,
77
+ 4240,
78
+ 611,
79
+ 340,
80
+ 338,
81
+ 991,
82
+ 612,
83
+ 13
84
+ ],
85
+ "temperature": 0.0,
86
+ "avg_logprob": -0.13676407516643566,
87
+ "compression_ratio": 1.6756756756756757,
88
+ "no_speech_prob": 0.0679725632071495,
89
+ "seg-text": [
90
+ " Well, there's the hotel 2, I guess. There's the hotel 2, I guess. The one on the lake? I wonder if it's still there."
91
+ ]
92
+ },
93
+ {
94
+ "id": 2,
95
+ "seek": 0,
96
+ "start": 16.0,
97
+ "end": 25.0,
98
+ "text": " You're coming with me? Is it dangerous? I'll be careful. I guess I really don't care if it's dangerous or not.",
99
+ "tokens": [
100
+ 921,
101
+ 821,
102
+ 2406,
103
+ 351,
104
+ 502,
105
+ 30,
106
+ 1148,
107
+ 340,
108
+ 4923,
109
+ 30,
110
+ 314,
111
+ 1183,
112
+ 307,
113
+ 8161,
114
+ 13,
115
+ 314,
116
+ 4724,
117
+ 314,
118
+ 1107,
119
+ 836,
120
+ 470,
121
+ 1337,
122
+ 611,
123
+ 340,
124
+ 338,
125
+ 4923,
126
+ 393,
127
+ 407,
128
+ 13
129
+ ],
130
+ "temperature": 0.0,
131
+ "avg_logprob": -0.13676407516643566,
132
+ "compression_ratio": 1.6756756756756757,
133
+ "no_speech_prob": 0.0679725632071495,
134
+ "seg-text": [
135
+ " You're coming with me? Is it dangerous? I'll be careful. I guess I really don't care if it's dangerous or not."
136
+ ]
137
+ },
138
+ {
139
+ "id": 3,
140
+ "seek": 2500,
141
+ "start": 25.0,
142
+ "end": 32.5,
143
+ "text": " I'm going to town either way. I'm looking for someone. I'm looking for someone.",
144
+ "tokens": [
145
+ 314,
146
+ 1101,
147
+ 1016,
148
+ 284,
149
+ 3240,
150
+ 2035,
151
+ 835,
152
+ 13,
153
+ 314,
154
+ 1101,
155
+ 2045,
156
+ 329,
157
+ 2130,
158
+ 13,
159
+ 314,
160
+ 1101,
161
+ 2045,
162
+ 329,
163
+ 2130,
164
+ 13
165
+ ],
166
+ "temperature": 0.0,
167
+ "avg_logprob": -0.19361598563916754,
168
+ "compression_ratio": 1.5684931506849316,
169
+ "no_speech_prob": 0.09226225316524506,
170
+ "seg-text": [
171
+ " I'm going to town either way. I'm looking for someone. I'm looking for someone."
172
+ ]
173
+ },
174
+ {
175
+ "id": 4,
176
+ "seek": 2500,
177
+ "start": 32.5,
178
+ "end": 40.46,
179
+ "text": " Someone very important to me. I'm looking for Mary. Have you seen her?",
180
+ "tokens": [
181
+ 17877,
182
+ 845,
183
+ 1593,
184
+ 284,
185
+ 502,
186
+ 13,
187
+ 314,
188
+ 1101,
189
+ 2045,
190
+ 329,
191
+ 5335,
192
+ 13,
193
+ 8192,
194
+ 345,
195
+ 1775,
196
+ 607,
197
+ 30
198
+ ],
199
+ "temperature": 0.0,
200
+ "avg_logprob": -0.19361598563916754,
201
+ "compression_ratio": 1.5684931506849316,
202
+ "no_speech_prob": 0.09226225316524506,
203
+ "seg-text": [
204
+ " Someone very important to me. I'm looking for Mary. Have you seen her?"
205
+ ]
206
+ },
207
+ {
208
+ "id": 5,
209
+ "seek": 2500,
210
+ "start": 40.46,
211
+ "end": 47.86,
212
+ "text": " Without Mary I just can't go on. I'd do anything if I could be with her again.",
213
+ "tokens": [
214
+ 9170,
215
+ 5335,
216
+ 314,
217
+ 655,
218
+ 460,
219
+ 470,
220
+ 467,
221
+ 319,
222
+ 13,
223
+ 314,
224
+ 1549,
225
+ 466,
226
+ 1997,
227
+ 611,
228
+ 314,
229
+ 714,
230
+ 307,
231
+ 351,
232
+ 607,
233
+ 757,
234
+ 13
235
+ ],
236
+ "temperature": 0.0,
237
+ "avg_logprob": -0.19361598563916754,
238
+ "compression_ratio": 1.5684931506849316,
239
+ "no_speech_prob": 0.09226225316524506,
240
+ "seg-text": [
241
+ " Without Mary I just can't go on. I'd do anything if I could be with her again."
242
+ ]
243
+ },
244
+ {
245
+ "id": 6,
246
+ "seek": 4786,
247
+ "start": 47.86,
248
+ "end": 56.78,
249
+ "text": " I'm gonna go look for her. I wish I knew. I killed her three years ago. Last year.",
250
+ "tokens": [
251
+ 314,
252
+ 1101,
253
+ 8066,
254
+ 467,
255
+ 804,
256
+ 329,
257
+ 607,
258
+ 13,
259
+ 314,
260
+ 4601,
261
+ 314,
262
+ 2993,
263
+ 13,
264
+ 314,
265
+ 2923,
266
+ 607,
267
+ 1115,
268
+ 812,
269
+ 2084,
270
+ 13,
271
+ 4586,
272
+ 614,
273
+ 13
274
+ ],
275
+ "temperature": 0.0,
276
+ "avg_logprob": -0.1430604533145302,
277
+ "compression_ratio": 1.7234042553191489,
278
+ "no_speech_prob": 0.1444021761417389,
279
+ "seg-text": [
280
+ " I'm gonna go look for her. I wish I knew. I killed her three years ago. Last year."
281
+ ]
282
+ },
283
+ {
284
+ "id": 7,
285
+ "seek": 4786,
286
+ "start": 56.78,
287
+ "end": 61.379999999999995,
288
+ "text": " But I got a letter from her. I got a letter from her. How do you know about",
289
+ "tokens": [
290
+ 887,
291
+ 314,
292
+ 1392,
293
+ 257,
294
+ 3850,
295
+ 422,
296
+ 607,
297
+ 13,
298
+ 314,
299
+ 1392,
300
+ 257,
301
+ 3850,
302
+ 422,
303
+ 607,
304
+ 13,
305
+ 1374,
306
+ 466,
307
+ 345,
308
+ 760,
309
+ 546
310
+ ],
311
+ "temperature": 0.0,
312
+ "avg_logprob": -0.1430604533145302,
313
+ "compression_ratio": 1.7234042553191489,
314
+ "no_speech_prob": 0.1444021761417389,
315
+ "seg-text": [
316
+ " But I got a letter from her. I got a letter from her. How do you know about"
317
+ ]
318
+ },
319
+ {
320
+ "id": 8,
321
+ "seek": 4786,
322
+ "start": 61.379999999999995,
323
+ "end": 68.1,
324
+ "text": " Mary? You really seem to care about her. Is that why you're here too? You're not",
325
+ "tokens": [
326
+ 5335,
327
+ 30,
328
+ 921,
329
+ 1107,
330
+ 1283,
331
+ 284,
332
+ 1337,
333
+ 546,
334
+ 607,
335
+ 13,
336
+ 1148,
337
+ 326,
338
+ 1521,
339
+ 345,
340
+ 821,
341
+ 994,
342
+ 1165,
343
+ 30,
344
+ 921,
345
+ 821,
346
+ 407
347
+ ],
348
+ "temperature": 0.0,
349
+ "avg_logprob": -0.1430604533145302,
350
+ "compression_ratio": 1.7234042553191489,
351
+ "no_speech_prob": 0.1444021761417389,
352
+ "seg-text": [
353
+ " Mary? You really seem to care about her. Is that why you're here too? You're not"
354
+ ]
355
+ },
356
+ {
357
+ "id": 9,
358
+ "seek": 4786,
359
+ "start": 68.1,
360
+ "end": 76.58,
361
+ "text": " Mary. You're really not Mary. You're alive! I thought that thing killed you. Are you",
362
+ "tokens": [
363
+ 5335,
364
+ 13,
365
+ 921,
366
+ 821,
367
+ 1107,
368
+ 407,
369
+ 5335,
370
+ 13,
371
+ 921,
372
+ 821,
373
+ 6776,
374
+ 0,
375
+ 314,
376
+ 1807,
377
+ 326,
378
+ 1517,
379
+ 2923,
380
+ 345,
381
+ 13,
382
+ 4231,
383
+ 345
384
+ ],
385
+ "temperature": 0.0,
386
+ "avg_logprob": -0.1430604533145302,
387
+ "compression_ratio": 1.7234042553191489,
388
+ "no_speech_prob": 0.1444021761417389,
389
+ "seg-text": [
390
+ " Mary. You're really not Mary. You're alive! I thought that thing killed you. Are you"
391
+ ]
392
+ },
393
+ {
394
+ "id": 10,
395
+ "seek": 7658,
396
+ "start": 76.58,
397
+ "end": 78.26,
398
+ "text": " You hurt bad?",
399
+ "tokens": [
400
+ 921,
401
+ 5938,
402
+ 2089,
403
+ 30
404
+ ],
405
+ "temperature": 0.0,
406
+ "avg_logprob": -0.16667714849248663,
407
+ "compression_ratio": 1.4840182648401827,
408
+ "no_speech_prob": 0.6022158265113831,
409
+ "seg-text": [
410
+ " You hurt bad?"
411
+ ]
412
+ },
413
+ {
414
+ "id": 11,
415
+ "seek": 7658,
416
+ "start": 78.26,
417
+ "end": 81.7,
418
+ "text": " Your face, your voice, just your hair and...",
419
+ "tokens": [
420
+ 3406,
421
+ 1986,
422
+ 11,
423
+ 534,
424
+ 3809,
425
+ 11,
426
+ 655,
427
+ 534,
428
+ 4190,
429
+ 290,
430
+ 986
431
+ ],
432
+ "temperature": 0.0,
433
+ "avg_logprob": -0.16667714849248663,
434
+ "compression_ratio": 1.4840182648401827,
435
+ "no_speech_prob": 0.6022158265113831,
436
+ "seg-text": [
437
+ " Your face, your voice, just your hair and..."
438
+ ]
439
+ },
440
+ {
441
+ "id": 12,
442
+ "seek": 7658,
443
+ "start": 81.7,
444
+ "end": 84.82,
445
+ "text": " I was confused.",
446
+ "tokens": [
447
+ 314,
448
+ 373,
449
+ 10416,
450
+ 13
451
+ ],
452
+ "temperature": 0.0,
453
+ "avg_logprob": -0.16667714849248663,
454
+ "compression_ratio": 1.4840182648401827,
455
+ "no_speech_prob": 0.6022158265113831,
456
+ "seg-text": [
457
+ " I was confused."
458
+ ]
459
+ },
460
+ {
461
+ "id": 13,
462
+ "seek": 7658,
463
+ "start": 84.82,
464
+ "end": 87.44,
465
+ "text": " There are all sorts of strange things around here.",
466
+ "tokens": [
467
+ 1318,
468
+ 389,
469
+ 477,
470
+ 10524,
471
+ 286,
472
+ 6283,
473
+ 1243,
474
+ 1088,
475
+ 994,
476
+ 13
477
+ ],
478
+ "temperature": 0.0,
479
+ "avg_logprob": -0.16667714849248663,
480
+ "compression_ratio": 1.4840182648401827,
481
+ "no_speech_prob": 0.6022158265113831,
482
+ "seg-text": [
483
+ " There are all sorts of strange things around here."
484
+ ]
485
+ },
486
+ {
487
+ "id": 14,
488
+ "seek": 7658,
489
+ "start": 87.44,
490
+ "end": 89.84,
491
+ "text": " This is no place for a kid.",
492
+ "tokens": [
493
+ 770,
494
+ 318,
495
+ 645,
496
+ 1295,
497
+ 329,
498
+ 257,
499
+ 5141,
500
+ 13
501
+ ],
502
+ "temperature": 0.0,
503
+ "avg_logprob": -0.16667714849248663,
504
+ "compression_ratio": 1.4840182648401827,
505
+ "no_speech_prob": 0.6022158265113831,
506
+ "seg-text": [
507
+ " This is no place for a kid."
508
+ ]
509
+ },
510
+ {
511
+ "id": 15,
512
+ "seek": 7658,
513
+ "start": 89.84,
514
+ "end": 91.78,
515
+ "text": " Eddie told me.",
516
+ "tokens": [
517
+ 19478,
518
+ 1297,
519
+ 502,
520
+ 13
521
+ ],
522
+ "temperature": 0.0,
523
+ "avg_logprob": -0.16667714849248663,
524
+ "compression_ratio": 1.4840182648401827,
525
+ "no_speech_prob": 0.6022158265113831,
526
+ "seg-text": [
527
+ " Eddie told me."
528
+ ]
529
+ },
530
+ {
531
+ "id": 16,
532
+ "seek": 7658,
533
+ "start": 91.78,
534
+ "end": 93.66,
535
+ "text": " Who's Rachel?",
536
+ "tokens": [
537
+ 5338,
538
+ 338,
539
+ 15984,
540
+ 30
541
+ ],
542
+ "temperature": 0.0,
543
+ "avg_logprob": -0.16667714849248663,
544
+ "compression_ratio": 1.4840182648401827,
545
+ "no_speech_prob": 0.6022158265113831,
546
+ "seg-text": [
547
+ " Who's Rachel?"
548
+ ]
549
+ },
550
+ {
551
+ "id": 17,
552
+ "seek": 7658,
553
+ "start": 93.66,
554
+ "end": 95.46,
555
+ "text": " What's that letter?",
556
+ "tokens": [
557
+ 1867,
558
+ 338,
559
+ 326,
560
+ 3850,
561
+ 30
562
+ ],
563
+ "temperature": 0.0,
564
+ "avg_logprob": -0.16667714849248663,
565
+ "compression_ratio": 1.4840182648401827,
566
+ "no_speech_prob": 0.6022158265113831,
567
+ "seg-text": [
568
+ " What's that letter?"
569
+ ]
570
+ },
571
+ {
572
+ "id": 18,
573
+ "seek": 7658,
574
+ "start": 95.46,
575
+ "end": 97.94,
576
+ "text": " It's time to end this nightmare.",
577
+ "tokens": [
578
+ 632,
579
+ 338,
580
+ 640,
581
+ 284,
582
+ 886,
583
+ 428,
584
+ 17123,
585
+ 13
586
+ ],
587
+ "temperature": 0.0,
588
+ "avg_logprob": -0.16667714849248663,
589
+ "compression_ratio": 1.4840182648401827,
590
+ "no_speech_prob": 0.6022158265113831,
591
+ "seg-text": [
592
+ " It's time to end this nightmare."
593
+ ]
594
+ },
595
+ {
596
+ "id": 19,
597
+ "seek": 7658,
598
+ "start": 97.94,
599
+ "end": 99.82,
600
+ "text": " What the hell is it?",
601
+ "tokens": [
602
+ 1867,
603
+ 262,
604
+ 5968,
605
+ 318,
606
+ 340,
607
+ 30
608
+ ],
609
+ "temperature": 0.0,
610
+ "avg_logprob": -0.16667714849248663,
611
+ "compression_ratio": 1.4840182648401827,
612
+ "no_speech_prob": 0.6022158265113831,
613
+ "seg-text": [
614
+ " What the hell is it?"
615
+ ]
616
+ },
617
+ {
618
+ "id": 20,
619
+ "seek": 7658,
620
+ "start": 99.82,
621
+ "end": 102.08,
622
+ "text": " I didn't come here to play, you know.",
623
+ "tokens": [
624
+ 314,
625
+ 1422,
626
+ 470,
627
+ 1282,
628
+ 994,
629
+ 284,
630
+ 711,
631
+ 11,
632
+ 345,
633
+ 760,
634
+ 13
635
+ ],
636
+ "temperature": 0.0,
637
+ "avg_logprob": -0.16667714849248663,
638
+ "compression_ratio": 1.4840182648401827,
639
+ "no_speech_prob": 0.6022158265113831,
640
+ "seg-text": [
641
+ " I didn't come here to play, you know."
642
+ ]
643
+ },
644
+ {
645
+ "id": 21,
646
+ "seek": 7658,
647
+ "start": 102.08,
648
+ "end": 103.86,
649
+ "text": " You liar!",
650
+ "tokens": [
651
+ 921,
652
+ 31866,
653
+ 0
654
+ ],
655
+ "temperature": 0.0,
656
+ "avg_logprob": -0.16667714849248663,
657
+ "compression_ratio": 1.4840182648401827,
658
+ "no_speech_prob": 0.6022158265113831,
659
+ "seg-text": [
660
+ " You liar!"
661
+ ]
662
+ },
663
+ {
664
+ "id": 22,
665
+ "seek": 7658,
666
+ "start": 103.86,
667
+ "end": 105.38,
668
+ "text": " Don't be ridiculous.",
669
+ "tokens": [
670
+ 2094,
671
+ 470,
672
+ 307,
673
+ 11441,
674
+ 13
675
+ ],
676
+ "temperature": 0.0,
677
+ "avg_logprob": -0.16667714849248663,
678
+ "compression_ratio": 1.4840182648401827,
679
+ "no_speech_prob": 0.6022158265113831,
680
+ "seg-text": [
681
+ " Don't be ridiculous."
682
+ ]
683
+ },
684
+ {
685
+ "id": 23,
686
+ "seek": 10538,
687
+ "start": 105.38,
688
+ "end": 108.17999999999999,
689
+ "text": " How can you sit there and eat pizza?",
690
+ "tokens": [
691
+ 1374,
692
+ 460,
693
+ 345,
694
+ 1650,
695
+ 612,
696
+ 290,
697
+ 4483,
698
+ 14256,
699
+ 30
700
+ ],
701
+ "temperature": 0.0,
702
+ "avg_logprob": -0.25711625995058,
703
+ "compression_ratio": 1.3308823529411764,
704
+ "no_speech_prob": 0.058869555592536926,
705
+ "seg-text": [
706
+ " How can you sit there and eat pizza?"
707
+ ]
708
+ },
709
+ {
710
+ "id": 24,
711
+ "seek": 10538,
712
+ "start": 108.17999999999999,
713
+ "end": 109.17999999999999,
714
+ "text": " You!",
715
+ "tokens": [
716
+ 921,
717
+ 0
718
+ ],
719
+ "temperature": 0.0,
720
+ "avg_logprob": -0.25711625995058,
721
+ "compression_ratio": 1.3308823529411764,
722
+ "no_speech_prob": 0.058869555592536926,
723
+ "seg-text": [
724
+ " You!"
725
+ ]
726
+ },
727
+ {
728
+ "id": 25,
729
+ "seek": 10538,
730
+ "start": 109.17999999999999,
731
+ "end": 110.82,
732
+ "text": " It was you, wasn't it?",
733
+ "tokens": [
734
+ 632,
735
+ 373,
736
+ 345,
737
+ 11,
738
+ 2492,
739
+ 470,
740
+ 340,
741
+ 30
742
+ ],
743
+ "temperature": 0.0,
744
+ "avg_logprob": -0.25711625995058,
745
+ "compression_ratio": 1.3308823529411764,
746
+ "no_speech_prob": 0.058869555592536926,
747
+ "seg-text": [
748
+ " It was you, wasn't it?"
749
+ ]
750
+ },
751
+ {
752
+ "id": 26,
753
+ "seek": 10538,
754
+ "start": 110.82,
755
+ "end": 112.74,
756
+ "text": " You're the one who stepped on my hand.",
757
+ "tokens": [
758
+ 921,
759
+ 821,
760
+ 262,
761
+ 530,
762
+ 508,
763
+ 10764,
764
+ 319,
765
+ 616,
766
+ 1021,
767
+ 13
768
+ ],
769
+ "temperature": 0.0,
770
+ "avg_logprob": -0.25711625995058,
771
+ "compression_ratio": 1.3308823529411764,
772
+ "no_speech_prob": 0.058869555592536926,
773
+ "seg-text": [
774
+ " You're the one who stepped on my hand."
775
+ ]
776
+ },
777
+ {
778
+ "id": 27,
779
+ "seek": 10538,
780
+ "start": 112.74,
781
+ "end": 114.61999999999999,
782
+ "text": " I'm done with you.",
783
+ "tokens": [
784
+ 314,
785
+ 1101,
786
+ 1760,
787
+ 351,
788
+ 345,
789
+ 13
790
+ ],
791
+ "temperature": 0.0,
792
+ "avg_logprob": -0.25711625995058,
793
+ "compression_ratio": 1.3308823529411764,
794
+ "no_speech_prob": 0.058869555592536926,
795
+ "seg-text": [
796
+ " I'm done with you."
797
+ ]
798
+ },
799
+ {
800
+ "id": 28,
801
+ "seek": 10538,
802
+ "start": 114.61999999999999,
803
+ "end": 116.53999999999999,
804
+ "text": " Forget you!",
805
+ "tokens": [
806
+ 29624,
807
+ 345,
808
+ 0
809
+ ],
810
+ "temperature": 0.0,
811
+ "avg_logprob": -0.25711625995058,
812
+ "compression_ratio": 1.3308823529411764,
813
+ "no_speech_prob": 0.058869555592536926,
814
+ "seg-text": [
815
+ " Forget you!"
816
+ ]
817
+ },
818
+ {
819
+ "id": 29,
820
+ "seek": 10538,
821
+ "start": 116.53999999999999,
822
+ "end": 120.38,
823
+ "text": " Someone very important to me.",
824
+ "tokens": [
825
+ 17877,
826
+ 845,
827
+ 1593,
828
+ 284,
829
+ 502,
830
+ 13
831
+ ],
832
+ "temperature": 0.0,
833
+ "avg_logprob": -0.25711625995058,
834
+ "compression_ratio": 1.3308823529411764,
835
+ "no_speech_prob": 0.058869555592536926,
836
+ "seg-text": [
837
+ " Someone very important to me."
838
+ ]
839
+ },
840
+ {
841
+ "id": 30,
842
+ "seek": 12038,
843
+ "start": 120.38,
844
+ "end": 121.58,
845
+ "text": " You're not Mary.",
846
+ "tokens": [
847
+ 50363,
848
+ 921,
849
+ 821,
850
+ 407,
851
+ 5335,
852
+ 13,
853
+ 50423
854
+ ],
855
+ "temperature": 0.0,
856
+ "avg_logprob": -0.49764543771743774,
857
+ "compression_ratio": 0.6666666666666666,
858
+ "no_speech_prob": 0.273059606552124,
859
+ "seg-text": [
860
+ " You're not Mary."
861
+ ]
862
+ }
863
+ ],
864
+ "language": "en"
865
+ },
866
+ "ynoeld.wav": {
867
+ "text": " It's you. It's you. But I don't need you anymore. How old are you? Are you still here? Are you okay? What happened to you? We met in the apartment building. We met in the apartment building. Sure is quiet here, huh? What are you doing, Laura? Aren't you Maria? So, you're Maria? You're Maria? What's a little girl like you doing here, anyway? I didn't mean to scare you. Are you alone here, Eddie? James Sunderland. James. My, uh, my name's James. Eddie? Laura? Laura! Maria? Maria? Mary? Mary? Mary? Is that her name? Who is that girl, anyhow? Yeah. Yeah. Yes! Okay. That's right. Oh, yeah. I understand now. I believe you. No. No. No, I... No, you're not. Nothing. Oh, nothing. I guess I really don't care. I... don't know. Sorry. What? Do what? Tell me what happened. Why can't you just tell me? All I want from you is an answer. How do you know about that? Is there any other way? Don't you remember? Later, okay? Stay right there. I'll be there soon. I'll be there soon. Thanks.",
868
+ "segments": [
869
+ {
870
+ "id": 0,
871
+ "seek": 0,
872
+ "start": 0.0,
873
+ "end": 5.0,
874
+ "text": " It's you. It's you. But I don't need you anymore.",
875
+ "tokens": [
876
+ 632,
877
+ 338,
878
+ 345,
879
+ 13,
880
+ 632,
881
+ 338,
882
+ 345,
883
+ 13,
884
+ 887,
885
+ 314,
886
+ 836,
887
+ 470,
888
+ 761,
889
+ 345,
890
+ 7471,
891
+ 13
892
+ ],
893
+ "temperature": 0.0,
894
+ "avg_logprob": -0.1540551746592802,
895
+ "compression_ratio": 1.7720207253886011,
896
+ "no_speech_prob": 0.13705560564994812,
897
+ "seg-text": [
898
+ " It's you. It's you. But I don't need you anymore."
899
+ ]
900
+ },
901
+ {
902
+ "id": 1,
903
+ "seek": 0,
904
+ "start": 5.0,
905
+ "end": 11.0,
906
+ "text": " How old are you? Are you still here? Are you okay? What happened to you?",
907
+ "tokens": [
908
+ 1374,
909
+ 1468,
910
+ 389,
911
+ 345,
912
+ 30,
913
+ 4231,
914
+ 345,
915
+ 991,
916
+ 994,
917
+ 30,
918
+ 4231,
919
+ 345,
920
+ 8788,
921
+ 30,
922
+ 1867,
923
+ 3022,
924
+ 284,
925
+ 345,
926
+ 30
927
+ ],
928
+ "temperature": 0.0,
929
+ "avg_logprob": -0.1540551746592802,
930
+ "compression_ratio": 1.7720207253886011,
931
+ "no_speech_prob": 0.13705560564994812,
932
+ "seg-text": [
933
+ " How old are you? Are you still here? Are you okay? What happened to you?"
934
+ ]
935
+ },
936
+ {
937
+ "id": 2,
938
+ "seek": 0,
939
+ "start": 11.0,
940
+ "end": 18.0,
941
+ "text": " We met in the apartment building. We met in the apartment building. Sure is quiet here, huh?",
942
+ "tokens": [
943
+ 775,
944
+ 1138,
945
+ 287,
946
+ 262,
947
+ 7962,
948
+ 2615,
949
+ 13,
950
+ 775,
951
+ 1138,
952
+ 287,
953
+ 262,
954
+ 7962,
955
+ 2615,
956
+ 13,
957
+ 10889,
958
+ 318,
959
+ 5897,
960
+ 994,
961
+ 11,
962
+ 24926,
963
+ 30
964
+ ],
965
+ "temperature": 0.0,
966
+ "avg_logprob": -0.1540551746592802,
967
+ "compression_ratio": 1.7720207253886011,
968
+ "no_speech_prob": 0.13705560564994812,
969
+ "seg-text": [
970
+ " We met in the apartment building. We met in the apartment building. Sure is quiet here, huh?"
971
+ ]
972
+ },
973
+ {
974
+ "id": 3,
975
+ "seek": 0,
976
+ "start": 18.0,
977
+ "end": 22.0,
978
+ "text": " What are you doing, Laura? Aren't you Maria?",
979
+ "tokens": [
980
+ 1867,
981
+ 389,
982
+ 345,
983
+ 1804,
984
+ 11,
985
+ 16753,
986
+ 30,
987
+ 9843,
988
+ 470,
989
+ 345,
990
+ 14200,
991
+ 30
992
+ ],
993
+ "temperature": 0.0,
994
+ "avg_logprob": -0.1540551746592802,
995
+ "compression_ratio": 1.7720207253886011,
996
+ "no_speech_prob": 0.13705560564994812,
997
+ "seg-text": [
998
+ " What are you doing, Laura? Aren't you Maria?"
999
+ ]
1000
+ },
1001
+ {
1002
+ "id": 4,
1003
+ "seek": 0,
1004
+ "start": 22.0,
1005
+ "end": 29.0,
1006
+ "text": " So, you're Maria? You're Maria? What's a little girl like you doing here, anyway?",
1007
+ "tokens": [
1008
+ 1406,
1009
+ 11,
1010
+ 345,
1011
+ 821,
1012
+ 14200,
1013
+ 30,
1014
+ 921,
1015
+ 821,
1016
+ 14200,
1017
+ 30,
1018
+ 1867,
1019
+ 338,
1020
+ 257,
1021
+ 1310,
1022
+ 2576,
1023
+ 588,
1024
+ 345,
1025
+ 1804,
1026
+ 994,
1027
+ 11,
1028
+ 6949,
1029
+ 30
1030
+ ],
1031
+ "temperature": 0.0,
1032
+ "avg_logprob": -0.1540551746592802,
1033
+ "compression_ratio": 1.7720207253886011,
1034
+ "no_speech_prob": 0.13705560564994812,
1035
+ "seg-text": [
1036
+ " So, you're Maria? You're Maria? What's a little girl like you doing here, anyway?"
1037
+ ]
1038
+ },
1039
+ {
1040
+ "id": 5,
1041
+ "seek": 2900,
1042
+ "start": 29.0,
1043
+ "end": 31.0,
1044
+ "text": " I didn't mean to scare you.",
1045
+ "tokens": [
1046
+ 314,
1047
+ 1422,
1048
+ 470,
1049
+ 1612,
1050
+ 284,
1051
+ 19437,
1052
+ 345,
1053
+ 13
1054
+ ],
1055
+ "temperature": 0.0,
1056
+ "avg_logprob": -0.27043379114029253,
1057
+ "compression_ratio": 1.521472392638037,
1058
+ "no_speech_prob": 0.15195460617542267,
1059
+ "seg-text": [
1060
+ " I didn't mean to scare you."
1061
+ ]
1062
+ },
1063
+ {
1064
+ "id": 6,
1065
+ "seek": 2900,
1066
+ "start": 31.0,
1067
+ "end": 33.0,
1068
+ "text": " Are you alone here, Eddie?",
1069
+ "tokens": [
1070
+ 4231,
1071
+ 345,
1072
+ 3436,
1073
+ 994,
1074
+ 11,
1075
+ 19478,
1076
+ 30
1077
+ ],
1078
+ "temperature": 0.0,
1079
+ "avg_logprob": -0.27043379114029253,
1080
+ "compression_ratio": 1.521472392638037,
1081
+ "no_speech_prob": 0.15195460617542267,
1082
+ "seg-text": [
1083
+ " Are you alone here, Eddie?"
1084
+ ]
1085
+ },
1086
+ {
1087
+ "id": 7,
1088
+ "seek": 2900,
1089
+ "start": 33.0,
1090
+ "end": 35.0,
1091
+ "text": " James Sunderland.",
1092
+ "tokens": [
1093
+ 3700,
1094
+ 35706,
1095
+ 13
1096
+ ],
1097
+ "temperature": 0.0,
1098
+ "avg_logprob": -0.27043379114029253,
1099
+ "compression_ratio": 1.521472392638037,
1100
+ "no_speech_prob": 0.15195460617542267,
1101
+ "seg-text": [
1102
+ " James Sunderland."
1103
+ ]
1104
+ },
1105
+ {
1106
+ "id": 8,
1107
+ "seek": 2900,
1108
+ "start": 35.0,
1109
+ "end": 38.5,
1110
+ "text": " James. My, uh, my name's James.",
1111
+ "tokens": [
1112
+ 3700,
1113
+ 13,
1114
+ 2011,
1115
+ 11,
1116
+ 21480,
1117
+ 11,
1118
+ 616,
1119
+ 1438,
1120
+ 338,
1121
+ 3700,
1122
+ 13
1123
+ ],
1124
+ "temperature": 0.0,
1125
+ "avg_logprob": -0.27043379114029253,
1126
+ "compression_ratio": 1.521472392638037,
1127
+ "no_speech_prob": 0.15195460617542267,
1128
+ "seg-text": [
1129
+ " James. My, uh, my name's James."
1130
+ ]
1131
+ },
1132
+ {
1133
+ "id": 9,
1134
+ "seek": 2900,
1135
+ "start": 38.5,
1136
+ "end": 42.0,
1137
+ "text": " Eddie? Laura? Laura!",
1138
+ "tokens": [
1139
+ 19478,
1140
+ 30,
1141
+ 16753,
1142
+ 30,
1143
+ 16753,
1144
+ 0
1145
+ ],
1146
+ "temperature": 0.0,
1147
+ "avg_logprob": -0.27043379114029253,
1148
+ "compression_ratio": 1.521472392638037,
1149
+ "no_speech_prob": 0.15195460617542267,
1150
+ "seg-text": [
1151
+ " Eddie? Laura? Laura!"
1152
+ ]
1153
+ },
1154
+ {
1155
+ "id": 10,
1156
+ "seek": 2900,
1157
+ "start": 42.0,
1158
+ "end": 48.5,
1159
+ "text": " Maria? Maria? Mary? Mary? Mary?",
1160
+ "tokens": [
1161
+ 14200,
1162
+ 30,
1163
+ 14200,
1164
+ 30,
1165
+ 5335,
1166
+ 30,
1167
+ 5335,
1168
+ 30,
1169
+ 5335,
1170
+ 30
1171
+ ],
1172
+ "temperature": 0.0,
1173
+ "avg_logprob": -0.27043379114029253,
1174
+ "compression_ratio": 1.521472392638037,
1175
+ "no_speech_prob": 0.15195460617542267,
1176
+ "seg-text": [
1177
+ " Maria? Maria? Mary? Mary? Mary?"
1178
+ ]
1179
+ },
1180
+ {
1181
+ "id": 11,
1182
+ "seek": 2900,
1183
+ "start": 48.5,
1184
+ "end": 52.0,
1185
+ "text": " Is that her name? Who is that girl, anyhow?",
1186
+ "tokens": [
1187
+ 1148,
1188
+ 326,
1189
+ 607,
1190
+ 1438,
1191
+ 30,
1192
+ 5338,
1193
+ 318,
1194
+ 326,
1195
+ 2576,
1196
+ 11,
1197
+ 597,
1198
+ 4919,
1199
+ 30
1200
+ ],
1201
+ "temperature": 0.0,
1202
+ "avg_logprob": -0.27043379114029253,
1203
+ "compression_ratio": 1.521472392638037,
1204
+ "no_speech_prob": 0.15195460617542267,
1205
+ "seg-text": [
1206
+ " Is that her name? Who is that girl, anyhow?"
1207
+ ]
1208
+ },
1209
+ {
1210
+ "id": 12,
1211
+ "seek": 2900,
1212
+ "start": 52.0,
1213
+ "end": 56.0,
1214
+ "text": " Yeah. Yeah. Yes! Okay.",
1215
+ "tokens": [
1216
+ 9425,
1217
+ 13,
1218
+ 9425,
1219
+ 13,
1220
+ 3363,
1221
+ 0,
1222
+ 16805,
1223
+ 13
1224
+ ],
1225
+ "temperature": 0.0,
1226
+ "avg_logprob": -0.27043379114029253,
1227
+ "compression_ratio": 1.521472392638037,
1228
+ "no_speech_prob": 0.15195460617542267,
1229
+ "seg-text": [
1230
+ " Yeah. Yeah. Yes! Okay."
1231
+ ]
1232
+ },
1233
+ {
1234
+ "id": 13,
1235
+ "seek": 2900,
1236
+ "start": 56.0,
1237
+ "end": 58.5,
1238
+ "text": " That's right. Oh, yeah.",
1239
+ "tokens": [
1240
+ 1320,
1241
+ 338,
1242
+ 826,
1243
+ 13,
1244
+ 3966,
1245
+ 11,
1246
+ 10194,
1247
+ 13
1248
+ ],
1249
+ "temperature": 0.0,
1250
+ "avg_logprob": -0.27043379114029253,
1251
+ "compression_ratio": 1.521472392638037,
1252
+ "no_speech_prob": 0.15195460617542267,
1253
+ "seg-text": [
1254
+ " That's right. Oh, yeah."
1255
+ ]
1256
+ },
1257
+ {
1258
+ "id": 14,
1259
+ "seek": 5850,
1260
+ "start": 58.5,
1261
+ "end": 60.22,
1262
+ "text": " I understand now.",
1263
+ "tokens": [
1264
+ 314,
1265
+ 1833,
1266
+ 783,
1267
+ 13
1268
+ ],
1269
+ "temperature": 0.0,
1270
+ "avg_logprob": -0.3413577498051158,
1271
+ "compression_ratio": 1.4972067039106145,
1272
+ "no_speech_prob": 0.08281701803207397,
1273
+ "seg-text": [
1274
+ " I understand now."
1275
+ ]
1276
+ },
1277
+ {
1278
+ "id": 15,
1279
+ "seek": 5850,
1280
+ "start": 60.22,
1281
+ "end": 61.58,
1282
+ "text": " I believe you.",
1283
+ "tokens": [
1284
+ 314,
1285
+ 1975,
1286
+ 345,
1287
+ 13
1288
+ ],
1289
+ "temperature": 0.0,
1290
+ "avg_logprob": -0.3413577498051158,
1291
+ "compression_ratio": 1.4972067039106145,
1292
+ "no_speech_prob": 0.08281701803207397,
1293
+ "seg-text": [
1294
+ " I believe you."
1295
+ ]
1296
+ },
1297
+ {
1298
+ "id": 16,
1299
+ "seek": 5850,
1300
+ "start": 61.58,
1301
+ "end": 62.58,
1302
+ "text": " No.",
1303
+ "tokens": [
1304
+ 1400,
1305
+ 13
1306
+ ],
1307
+ "temperature": 0.0,
1308
+ "avg_logprob": -0.3413577498051158,
1309
+ "compression_ratio": 1.4972067039106145,
1310
+ "no_speech_prob": 0.08281701803207397,
1311
+ "seg-text": [
1312
+ " No."
1313
+ ]
1314
+ },
1315
+ {
1316
+ "id": 17,
1317
+ "seek": 5850,
1318
+ "start": 62.58,
1319
+ "end": 63.58,
1320
+ "text": " No.",
1321
+ "tokens": [
1322
+ 1400,
1323
+ 13
1324
+ ],
1325
+ "temperature": 0.0,
1326
+ "avg_logprob": -0.3413577498051158,
1327
+ "compression_ratio": 1.4972067039106145,
1328
+ "no_speech_prob": 0.08281701803207397,
1329
+ "seg-text": [
1330
+ " No."
1331
+ ]
1332
+ },
1333
+ {
1334
+ "id": 18,
1335
+ "seek": 5850,
1336
+ "start": 63.58,
1337
+ "end": 65.58,
1338
+ "text": " No, I...",
1339
+ "tokens": [
1340
+ 1400,
1341
+ 11,
1342
+ 314,
1343
+ 986
1344
+ ],
1345
+ "temperature": 0.0,
1346
+ "avg_logprob": -0.3413577498051158,
1347
+ "compression_ratio": 1.4972067039106145,
1348
+ "no_speech_prob": 0.08281701803207397,
1349
+ "seg-text": [
1350
+ " No, I..."
1351
+ ]
1352
+ },
1353
+ {
1354
+ "id": 19,
1355
+ "seek": 5850,
1356
+ "start": 65.58,
1357
+ "end": 68.06,
1358
+ "text": " No, you're not.",
1359
+ "tokens": [
1360
+ 1400,
1361
+ 11,
1362
+ 345,
1363
+ 821,
1364
+ 407,
1365
+ 13
1366
+ ],
1367
+ "temperature": 0.0,
1368
+ "avg_logprob": -0.3413577498051158,
1369
+ "compression_ratio": 1.4972067039106145,
1370
+ "no_speech_prob": 0.08281701803207397,
1371
+ "seg-text": [
1372
+ " No, you're not."
1373
+ ]
1374
+ },
1375
+ {
1376
+ "id": 20,
1377
+ "seek": 5850,
1378
+ "start": 68.06,
1379
+ "end": 69.06,
1380
+ "text": " Nothing.",
1381
+ "tokens": [
1382
+ 10528,
1383
+ 13
1384
+ ],
1385
+ "temperature": 0.0,
1386
+ "avg_logprob": -0.3413577498051158,
1387
+ "compression_ratio": 1.4972067039106145,
1388
+ "no_speech_prob": 0.08281701803207397,
1389
+ "seg-text": [
1390
+ " Nothing."
1391
+ ]
1392
+ },
1393
+ {
1394
+ "id": 21,
1395
+ "seek": 5850,
1396
+ "start": 69.06,
1397
+ "end": 70.06,
1398
+ "text": " Oh, nothing.",
1399
+ "tokens": [
1400
+ 3966,
1401
+ 11,
1402
+ 2147,
1403
+ 13
1404
+ ],
1405
+ "temperature": 0.0,
1406
+ "avg_logprob": -0.3413577498051158,
1407
+ "compression_ratio": 1.4972067039106145,
1408
+ "no_speech_prob": 0.08281701803207397,
1409
+ "seg-text": [
1410
+ " Oh, nothing."
1411
+ ]
1412
+ },
1413
+ {
1414
+ "id": 22,
1415
+ "seek": 5850,
1416
+ "start": 70.06,
1417
+ "end": 72.86,
1418
+ "text": " I guess I really don't care.",
1419
+ "tokens": [
1420
+ 314,
1421
+ 4724,
1422
+ 314,
1423
+ 1107,
1424
+ 836,
1425
+ 470,
1426
+ 1337,
1427
+ 13
1428
+ ],
1429
+ "temperature": 0.0,
1430
+ "avg_logprob": -0.3413577498051158,
1431
+ "compression_ratio": 1.4972067039106145,
1432
+ "no_speech_prob": 0.08281701803207397,
1433
+ "seg-text": [
1434
+ " I guess I really don't care."
1435
+ ]
1436
+ },
1437
+ {
1438
+ "id": 23,
1439
+ "seek": 5850,
1440
+ "start": 72.86,
1441
+ "end": 75.66,
1442
+ "text": " I... don't know.",
1443
+ "tokens": [
1444
+ 314,
1445
+ 986,
1446
+ 836,
1447
+ 470,
1448
+ 760,
1449
+ 13
1450
+ ],
1451
+ "temperature": 0.0,
1452
+ "avg_logprob": -0.3413577498051158,
1453
+ "compression_ratio": 1.4972067039106145,
1454
+ "no_speech_prob": 0.08281701803207397,
1455
+ "seg-text": [
1456
+ " I... don't know."
1457
+ ]
1458
+ },
1459
+ {
1460
+ "id": 24,
1461
+ "seek": 5850,
1462
+ "start": 75.66,
1463
+ "end": 76.66,
1464
+ "text": " Sorry.",
1465
+ "tokens": [
1466
+ 19061,
1467
+ 13
1468
+ ],
1469
+ "temperature": 0.0,
1470
+ "avg_logprob": -0.3413577498051158,
1471
+ "compression_ratio": 1.4972067039106145,
1472
+ "no_speech_prob": 0.08281701803207397,
1473
+ "seg-text": [
1474
+ " Sorry."
1475
+ ]
1476
+ },
1477
+ {
1478
+ "id": 25,
1479
+ "seek": 5850,
1480
+ "start": 76.66,
1481
+ "end": 77.66,
1482
+ "text": " What?",
1483
+ "tokens": [
1484
+ 1867,
1485
+ 30
1486
+ ],
1487
+ "temperature": 0.0,
1488
+ "avg_logprob": -0.3413577498051158,
1489
+ "compression_ratio": 1.4972067039106145,
1490
+ "no_speech_prob": 0.08281701803207397,
1491
+ "seg-text": [
1492
+ " What?"
1493
+ ]
1494
+ },
1495
+ {
1496
+ "id": 26,
1497
+ "seek": 5850,
1498
+ "start": 77.66,
1499
+ "end": 78.66,
1500
+ "text": " Do what?",
1501
+ "tokens": [
1502
+ 2141,
1503
+ 644,
1504
+ 30
1505
+ ],
1506
+ "temperature": 0.0,
1507
+ "avg_logprob": -0.3413577498051158,
1508
+ "compression_ratio": 1.4972067039106145,
1509
+ "no_speech_prob": 0.08281701803207397,
1510
+ "seg-text": [
1511
+ " Do what?"
1512
+ ]
1513
+ },
1514
+ {
1515
+ "id": 27,
1516
+ "seek": 5850,
1517
+ "start": 78.66,
1518
+ "end": 81.66,
1519
+ "text": " Tell me what happened.",
1520
+ "tokens": [
1521
+ 14026,
1522
+ 502,
1523
+ 644,
1524
+ 3022,
1525
+ 13
1526
+ ],
1527
+ "temperature": 0.0,
1528
+ "avg_logprob": -0.3413577498051158,
1529
+ "compression_ratio": 1.4972067039106145,
1530
+ "no_speech_prob": 0.08281701803207397,
1531
+ "seg-text": [
1532
+ " Tell me what happened."
1533
+ ]
1534
+ },
1535
+ {
1536
+ "id": 28,
1537
+ "seek": 5850,
1538
+ "start": 81.66,
1539
+ "end": 83.66,
1540
+ "text": " Why can't you just tell me?",
1541
+ "tokens": [
1542
+ 4162,
1543
+ 460,
1544
+ 470,
1545
+ 345,
1546
+ 655,
1547
+ 1560,
1548
+ 502,
1549
+ 30
1550
+ ],
1551
+ "temperature": 0.0,
1552
+ "avg_logprob": -0.3413577498051158,
1553
+ "compression_ratio": 1.4972067039106145,
1554
+ "no_speech_prob": 0.08281701803207397,
1555
+ "seg-text": [
1556
+ " Why can't you just tell me?"
1557
+ ]
1558
+ },
1559
+ {
1560
+ "id": 29,
1561
+ "seek": 5850,
1562
+ "start": 83.66,
1563
+ "end": 85.66,
1564
+ "text": " All I want from you is an answer.",
1565
+ "tokens": [
1566
+ 1439,
1567
+ 314,
1568
+ 765,
1569
+ 422,
1570
+ 345,
1571
+ 318,
1572
+ 281,
1573
+ 3280,
1574
+ 13
1575
+ ],
1576
+ "temperature": 0.0,
1577
+ "avg_logprob": -0.3413577498051158,
1578
+ "compression_ratio": 1.4972067039106145,
1579
+ "no_speech_prob": 0.08281701803207397,
1580
+ "seg-text": [
1581
+ " All I want from you is an answer."
1582
+ ]
1583
+ },
1584
+ {
1585
+ "id": 30,
1586
+ "seek": 5850,
1587
+ "start": 85.66,
1588
+ "end": 87.66,
1589
+ "text": " How do you know about that?",
1590
+ "tokens": [
1591
+ 1374,
1592
+ 466,
1593
+ 345,
1594
+ 760,
1595
+ 546,
1596
+ 326,
1597
+ 30
1598
+ ],
1599
+ "temperature": 0.0,
1600
+ "avg_logprob": -0.3413577498051158,
1601
+ "compression_ratio": 1.4972067039106145,
1602
+ "no_speech_prob": 0.08281701803207397,
1603
+ "seg-text": [
1604
+ " How do you know about that?"
1605
+ ]
1606
+ },
1607
+ {
1608
+ "id": 31,
1609
+ "seek": 8766,
1610
+ "start": 87.66,
1611
+ "end": 89.66,
1612
+ "text": " Is there any other way?",
1613
+ "tokens": [
1614
+ 1148,
1615
+ 612,
1616
+ 597,
1617
+ 584,
1618
+ 835,
1619
+ 30
1620
+ ],
1621
+ "temperature": 0.0,
1622
+ "avg_logprob": -0.15933908586916717,
1623
+ "compression_ratio": 1.2842105263157895,
1624
+ "no_speech_prob": 0.11254733055830002,
1625
+ "seg-text": [
1626
+ " Is there any other way?"
1627
+ ]
1628
+ },
1629
+ {
1630
+ "id": 32,
1631
+ "seek": 8766,
1632
+ "start": 89.66,
1633
+ "end": 91.66,
1634
+ "text": " Don't you remember?",
1635
+ "tokens": [
1636
+ 2094,
1637
+ 470,
1638
+ 345,
1639
+ 3505,
1640
+ 30
1641
+ ],
1642
+ "temperature": 0.0,
1643
+ "avg_logprob": -0.15933908586916717,
1644
+ "compression_ratio": 1.2842105263157895,
1645
+ "no_speech_prob": 0.11254733055830002,
1646
+ "seg-text": [
1647
+ " Don't you remember?"
1648
+ ]
1649
+ },
1650
+ {
1651
+ "id": 33,
1652
+ "seek": 8766,
1653
+ "start": 91.66,
1654
+ "end": 92.66,
1655
+ "text": " Later, okay?",
1656
+ "tokens": [
1657
+ 11450,
1658
+ 11,
1659
+ 8788,
1660
+ 30
1661
+ ],
1662
+ "temperature": 0.0,
1663
+ "avg_logprob": -0.15933908586916717,
1664
+ "compression_ratio": 1.2842105263157895,
1665
+ "no_speech_prob": 0.11254733055830002,
1666
+ "seg-text": [
1667
+ " Later, okay?"
1668
+ ]
1669
+ },
1670
+ {
1671
+ "id": 34,
1672
+ "seek": 8766,
1673
+ "start": 92.66,
1674
+ "end": 95.66,
1675
+ "text": " Stay right there. I'll be there soon.",
1676
+ "tokens": [
1677
+ 16160,
1678
+ 826,
1679
+ 612,
1680
+ 13,
1681
+ 314,
1682
+ 1183,
1683
+ 307,
1684
+ 612,
1685
+ 2582,
1686
+ 13
1687
+ ],
1688
+ "temperature": 0.0,
1689
+ "avg_logprob": -0.15933908586916717,
1690
+ "compression_ratio": 1.2842105263157895,
1691
+ "no_speech_prob": 0.11254733055830002,
1692
+ "seg-text": [
1693
+ " Stay right there. I'll be there soon."
1694
+ ]
1695
+ },
1696
+ {
1697
+ "id": 35,
1698
+ "seek": 8766,
1699
+ "start": 95.66,
1700
+ "end": 97.66,
1701
+ "text": " I'll be there soon.",
1702
+ "tokens": [
1703
+ 314,
1704
+ 1183,
1705
+ 307,
1706
+ 612,
1707
+ 2582,
1708
+ 13
1709
+ ],
1710
+ "temperature": 0.0,
1711
+ "avg_logprob": -0.15933908586916717,
1712
+ "compression_ratio": 1.2842105263157895,
1713
+ "no_speech_prob": 0.11254733055830002,
1714
+ "seg-text": [
1715
+ " I'll be there soon."
1716
+ ]
1717
+ },
1718
+ {
1719
+ "id": 36,
1720
+ "seek": 9766,
1721
+ "start": 97.66,
1722
+ "end": 98.66,
1723
+ "text": " Thanks.",
1724
+ "tokens": [
1725
+ 50363,
1726
+ 6930,
1727
+ 13,
1728
+ 50413
1729
+ ],
1730
+ "temperature": 0.0,
1731
+ "avg_logprob": -0.7431881904602051,
1732
+ "compression_ratio": 0.4666666666666667,
1733
+ "no_speech_prob": 0.008630666881799698,
1734
+ "seg-text": [
1735
+ " Thanks."
1736
+ ]
1737
+ }
1738
+ ],
1739
+ "language": "en"
1740
+ }
1741
+ }
finetunes/james-sunderland-v2/dataset/ynoeld_00000.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e435018e3435a88ca51a8e744e3c842ee53dcab922360402fef6d736c88ca79
3
+ size 882080
finetunes/james-sunderland-v2/dataset/ynoeld_00001.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7bff135be4a8d9a33d7b987b37d3e8fe6b9daa9618b5b0f5cd14287d78c576f
3
+ size 1058480
finetunes/james-sunderland-v2/dataset/ynoeld_00002.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:362f390cd119e0d22c4b29a11fcf95b969c32138f9bd6e65fbd18d97e06b42c7
3
+ size 1234880
finetunes/james-sunderland-v2/dataset/ynoeld_00003.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c34556f613d94aba9f79c7c105ddbd9468425bd5eea55402721dfe54e5e043bc
3
+ size 705680
finetunes/james-sunderland-v2/dataset/ynoeld_00004.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:735ca52243a4626822a96ecfc64a8e304707282c961d06a3fba91ec5a6e4dd9d
3
+ size 1234880
finetunes/james-sunderland-v2/dataset/ynoeld_00005.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d853f8d20a2e497aa5578caed08851a105f80fd6527daedf25f17d55fd907089
3
+ size 352880
finetunes/james-sunderland-v2/dataset/ynoeld_00006.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8333ebc2ef88def5545ab1262fb3cd27dd639bc4d1d28c83d81c78983b365863
3
+ size 352880
finetunes/james-sunderland-v2/dataset/ynoeld_00007.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3968685fff1ea301026b84feb82404d0436f033f8a270926ae3ee702444c66d
3
+ size 352880
finetunes/james-sunderland-v2/dataset/ynoeld_00008.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04b72788ffe2515c185a7408440e15b5f14ae98f51615ea0d316797d09af409
3
+ size 617480
finetunes/james-sunderland-v2/dataset/ynoeld_00009.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c82d2159fa64e178c5192400b5c49be356744b0c54aff0fc8da6becd42ce17
3
+ size 617480
finetunes/james-sunderland-v2/dataset/ynoeld_00010.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10cde908ff5d1b9a3c27b428f9eb1f83d632d53f8e554f4d0a6b627c61db5564
3
+ size 1146680
finetunes/james-sunderland-v2/dataset/ynoeld_00011.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d1d75b7f7dd9fe4994eabc242889d86b8fb68df7853fe69d4585830351b745
3
+ size 617480
finetunes/james-sunderland-v2/dataset/ynoeld_00012.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f70df39da3d3d0d1fc412e0ded5af3f32da788c85f86a21bca1f86cb23e6e8b
3
+ size 705680
finetunes/james-sunderland-v2/dataset/ynoeld_00013.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:051ee0a96a55cf545477550647ab19b37dae9592c255ef8ed938744e24699881
3
+ size 441080
finetunes/james-sunderland-v2/dataset/ynoeld_00014.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc13d0f9c0a70690358e98f66b75e5b0f7f4fb6c30e70c6c17e8b5f8734cd54a
3
+ size 303488