Spaces:
Sleeping
Sleeping
Alex Cabrera
commited on
Commit
•
c24ff9a
1
Parent(s):
c52f77a
audio
Browse files- .dockerignore +1 -0
- .gitattributes +0 -34
- .zeno_cache/OUTPUTsilero_sst.pickle +0 -0
- .zeno_cache/OUTPUTwhisper.pickle +0 -0
- .zeno_cache/POSTDISTILLwer_msilero_sst.pickle +0 -0
- .zeno_cache/POSTDISTILLwer_mwhisper.pickle +0 -0
- .zeno_cache/PREDISTILLamplitude.pickle +0 -0
- .zeno_cache/PREDISTILLcountry.pickle +0 -0
- .zeno_cache/PREDISTILLlength.pickle +0 -0
- .zeno_cache/folders.pickle +0 -0
- .zeno_cache/reports.pickle +0 -0
- .zeno_cache/slices.pickle +0 -0
- .zeno_cache/view.mjs +788 -0
- Dockerfile +22 -0
- config.toml +11 -0
- functions/audio_characteristics.py +25 -0
- functions/model.py +73 -0
- jupyter_accent.ipynb +223 -0
- latest_silero_models.yml +563 -0
- metadata.csv +0 -0
- process_metadata.ipynb +412 -0
- requirements.txt +3 -0
- speakers_all.csv +0 -0
.dockerignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.git
|
.gitattributes
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.zeno_cache/OUTPUTsilero_sst.pickle
ADDED
Binary file (768 kB). View file
|
|
.zeno_cache/OUTPUTwhisper.pickle
ADDED
Binary file (804 kB). View file
|
|
.zeno_cache/POSTDISTILLwer_msilero_sst.pickle
ADDED
Binary file (63.7 kB). View file
|
|
.zeno_cache/POSTDISTILLwer_mwhisper.pickle
ADDED
Binary file (63.7 kB). View file
|
|
.zeno_cache/PREDISTILLamplitude.pickle
ADDED
Binary file (63.7 kB). View file
|
|
.zeno_cache/PREDISTILLcountry.pickle
ADDED
Binary file (65 kB). View file
|
|
.zeno_cache/PREDISTILLlength.pickle
ADDED
Binary file (55.1 kB). View file
|
|
.zeno_cache/folders.pickle
ADDED
Binary file (28 Bytes). View file
|
|
.zeno_cache/reports.pickle
ADDED
Binary file (7.95 kB). View file
|
|
.zeno_cache/slices.pickle
ADDED
Binary file (2.87 kB). View file
|
|
.zeno_cache/view.mjs
ADDED
@@ -0,0 +1,788 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
function noop() { }
|
2 |
+
function run(fn) {
|
3 |
+
return fn();
|
4 |
+
}
|
5 |
+
function blank_object() {
|
6 |
+
return Object.create(null);
|
7 |
+
}
|
8 |
+
function run_all(fns) {
|
9 |
+
fns.forEach(run);
|
10 |
+
}
|
11 |
+
function is_function(thing) {
|
12 |
+
return typeof thing === 'function';
|
13 |
+
}
|
14 |
+
function safe_not_equal(a, b) {
|
15 |
+
return a != a ? b == b : a !== b || ((a && typeof a === 'object') || typeof a === 'function');
|
16 |
+
}
|
17 |
+
let src_url_equal_anchor;
|
18 |
+
function src_url_equal(element_src, url) {
|
19 |
+
if (!src_url_equal_anchor) {
|
20 |
+
src_url_equal_anchor = document.createElement('a');
|
21 |
+
}
|
22 |
+
src_url_equal_anchor.href = url;
|
23 |
+
return element_src === src_url_equal_anchor.href;
|
24 |
+
}
|
25 |
+
function is_empty(obj) {
|
26 |
+
return Object.keys(obj).length === 0;
|
27 |
+
}
|
28 |
+
|
29 |
+
// Track which nodes are claimed during hydration. Unclaimed nodes can then be removed from the DOM
|
30 |
+
// at the end of hydration without touching the remaining nodes.
|
31 |
+
let is_hydrating = false;
|
32 |
+
function start_hydrating() {
|
33 |
+
is_hydrating = true;
|
34 |
+
}
|
35 |
+
function end_hydrating() {
|
36 |
+
is_hydrating = false;
|
37 |
+
}
|
38 |
+
function upper_bound(low, high, key, value) {
|
39 |
+
// Return first index of value larger than input value in the range [low, high)
|
40 |
+
while (low < high) {
|
41 |
+
const mid = low + ((high - low) >> 1);
|
42 |
+
if (key(mid) <= value) {
|
43 |
+
low = mid + 1;
|
44 |
+
}
|
45 |
+
else {
|
46 |
+
high = mid;
|
47 |
+
}
|
48 |
+
}
|
49 |
+
return low;
|
50 |
+
}
|
51 |
+
function init_hydrate(target) {
|
52 |
+
if (target.hydrate_init)
|
53 |
+
return;
|
54 |
+
target.hydrate_init = true;
|
55 |
+
// We know that all children have claim_order values since the unclaimed have been detached if target is not <head>
|
56 |
+
let children = target.childNodes;
|
57 |
+
// If target is <head>, there may be children without claim_order
|
58 |
+
if (target.nodeName === 'HEAD') {
|
59 |
+
const myChildren = [];
|
60 |
+
for (let i = 0; i < children.length; i++) {
|
61 |
+
const node = children[i];
|
62 |
+
if (node.claim_order !== undefined) {
|
63 |
+
myChildren.push(node);
|
64 |
+
}
|
65 |
+
}
|
66 |
+
children = myChildren;
|
67 |
+
}
|
68 |
+
/*
|
69 |
+
* Reorder claimed children optimally.
|
70 |
+
* We can reorder claimed children optimally by finding the longest subsequence of
|
71 |
+
* nodes that are already claimed in order and only moving the rest. The longest
|
72 |
+
* subsequence subsequence of nodes that are claimed in order can be found by
|
73 |
+
* computing the longest increasing subsequence of .claim_order values.
|
74 |
+
*
|
75 |
+
* This algorithm is optimal in generating the least amount of reorder operations
|
76 |
+
* possible.
|
77 |
+
*
|
78 |
+
* Proof:
|
79 |
+
* We know that, given a set of reordering operations, the nodes that do not move
|
80 |
+
* always form an increasing subsequence, since they do not move among each other
|
81 |
+
* meaning that they must be already ordered among each other. Thus, the maximal
|
82 |
+
* set of nodes that do not move form a longest increasing subsequence.
|
83 |
+
*/
|
84 |
+
// Compute longest increasing subsequence
|
85 |
+
// m: subsequence length j => index k of smallest value that ends an increasing subsequence of length j
|
86 |
+
const m = new Int32Array(children.length + 1);
|
87 |
+
// Predecessor indices + 1
|
88 |
+
const p = new Int32Array(children.length);
|
89 |
+
m[0] = -1;
|
90 |
+
let longest = 0;
|
91 |
+
for (let i = 0; i < children.length; i++) {
|
92 |
+
const current = children[i].claim_order;
|
93 |
+
// Find the largest subsequence length such that it ends in a value less than our current value
|
94 |
+
// upper_bound returns first greater value, so we subtract one
|
95 |
+
// with fast path for when we are on the current longest subsequence
|
96 |
+
const seqLen = ((longest > 0 && children[m[longest]].claim_order <= current) ? longest + 1 : upper_bound(1, longest, idx => children[m[idx]].claim_order, current)) - 1;
|
97 |
+
p[i] = m[seqLen] + 1;
|
98 |
+
const newLen = seqLen + 1;
|
99 |
+
// We can guarantee that current is the smallest value. Otherwise, we would have generated a longer sequence.
|
100 |
+
m[newLen] = i;
|
101 |
+
longest = Math.max(newLen, longest);
|
102 |
+
}
|
103 |
+
// The longest increasing subsequence of nodes (initially reversed)
|
104 |
+
const lis = [];
|
105 |
+
// The rest of the nodes, nodes that will be moved
|
106 |
+
const toMove = [];
|
107 |
+
let last = children.length - 1;
|
108 |
+
for (let cur = m[longest] + 1; cur != 0; cur = p[cur - 1]) {
|
109 |
+
lis.push(children[cur - 1]);
|
110 |
+
for (; last >= cur; last--) {
|
111 |
+
toMove.push(children[last]);
|
112 |
+
}
|
113 |
+
last--;
|
114 |
+
}
|
115 |
+
for (; last >= 0; last--) {
|
116 |
+
toMove.push(children[last]);
|
117 |
+
}
|
118 |
+
lis.reverse();
|
119 |
+
// We sort the nodes being moved to guarantee that their insertion order matches the claim order
|
120 |
+
toMove.sort((a, b) => a.claim_order - b.claim_order);
|
121 |
+
// Finally, we move the nodes
|
122 |
+
for (let i = 0, j = 0; i < toMove.length; i++) {
|
123 |
+
while (j < lis.length && toMove[i].claim_order >= lis[j].claim_order) {
|
124 |
+
j++;
|
125 |
+
}
|
126 |
+
const anchor = j < lis.length ? lis[j] : null;
|
127 |
+
target.insertBefore(toMove[i], anchor);
|
128 |
+
}
|
129 |
+
}
|
130 |
+
function append(target, node) {
|
131 |
+
target.appendChild(node);
|
132 |
+
}
|
133 |
+
function append_styles(target, style_sheet_id, styles) {
|
134 |
+
const append_styles_to = get_root_for_style(target);
|
135 |
+
if (!append_styles_to.getElementById(style_sheet_id)) {
|
136 |
+
const style = element('style');
|
137 |
+
style.id = style_sheet_id;
|
138 |
+
style.textContent = styles;
|
139 |
+
append_stylesheet(append_styles_to, style);
|
140 |
+
}
|
141 |
+
}
|
142 |
+
function get_root_for_style(node) {
|
143 |
+
if (!node)
|
144 |
+
return document;
|
145 |
+
const root = node.getRootNode ? node.getRootNode() : node.ownerDocument;
|
146 |
+
if (root && root.host) {
|
147 |
+
return root;
|
148 |
+
}
|
149 |
+
return node.ownerDocument;
|
150 |
+
}
|
151 |
+
function append_stylesheet(node, style) {
|
152 |
+
append(node.head || node, style);
|
153 |
+
}
|
154 |
+
function append_hydration(target, node) {
|
155 |
+
if (is_hydrating) {
|
156 |
+
init_hydrate(target);
|
157 |
+
if ((target.actual_end_child === undefined) || ((target.actual_end_child !== null) && (target.actual_end_child.parentElement !== target))) {
|
158 |
+
target.actual_end_child = target.firstChild;
|
159 |
+
}
|
160 |
+
// Skip nodes of undefined ordering
|
161 |
+
while ((target.actual_end_child !== null) && (target.actual_end_child.claim_order === undefined)) {
|
162 |
+
target.actual_end_child = target.actual_end_child.nextSibling;
|
163 |
+
}
|
164 |
+
if (node !== target.actual_end_child) {
|
165 |
+
// We only insert if the ordering of this node should be modified or the parent node is not target
|
166 |
+
if (node.claim_order !== undefined || node.parentNode !== target) {
|
167 |
+
target.insertBefore(node, target.actual_end_child);
|
168 |
+
}
|
169 |
+
}
|
170 |
+
else {
|
171 |
+
target.actual_end_child = node.nextSibling;
|
172 |
+
}
|
173 |
+
}
|
174 |
+
else if (node.parentNode !== target || node.nextSibling !== null) {
|
175 |
+
target.appendChild(node);
|
176 |
+
}
|
177 |
+
}
|
178 |
+
function insert_hydration(target, node, anchor) {
|
179 |
+
if (is_hydrating && !anchor) {
|
180 |
+
append_hydration(target, node);
|
181 |
+
}
|
182 |
+
else if (node.parentNode !== target || node.nextSibling != anchor) {
|
183 |
+
target.insertBefore(node, anchor || null);
|
184 |
+
}
|
185 |
+
}
|
186 |
+
function detach(node) {
|
187 |
+
node.parentNode.removeChild(node);
|
188 |
+
}
|
189 |
+
function element(name) {
|
190 |
+
return document.createElement(name);
|
191 |
+
}
|
192 |
+
function text(data) {
|
193 |
+
return document.createTextNode(data);
|
194 |
+
}
|
195 |
+
function space() {
|
196 |
+
return text(' ');
|
197 |
+
}
|
198 |
+
function attr(node, attribute, value) {
|
199 |
+
if (value == null)
|
200 |
+
node.removeAttribute(attribute);
|
201 |
+
else if (node.getAttribute(attribute) !== value)
|
202 |
+
node.setAttribute(attribute, value);
|
203 |
+
}
|
204 |
+
function children(element) {
|
205 |
+
return Array.from(element.childNodes);
|
206 |
+
}
|
207 |
+
function init_claim_info(nodes) {
|
208 |
+
if (nodes.claim_info === undefined) {
|
209 |
+
nodes.claim_info = { last_index: 0, total_claimed: 0 };
|
210 |
+
}
|
211 |
+
}
|
212 |
+
function claim_node(nodes, predicate, processNode, createNode, dontUpdateLastIndex = false) {
|
213 |
+
// Try to find nodes in an order such that we lengthen the longest increasing subsequence
|
214 |
+
init_claim_info(nodes);
|
215 |
+
const resultNode = (() => {
|
216 |
+
// We first try to find an element after the previous one
|
217 |
+
for (let i = nodes.claim_info.last_index; i < nodes.length; i++) {
|
218 |
+
const node = nodes[i];
|
219 |
+
if (predicate(node)) {
|
220 |
+
const replacement = processNode(node);
|
221 |
+
if (replacement === undefined) {
|
222 |
+
nodes.splice(i, 1);
|
223 |
+
}
|
224 |
+
else {
|
225 |
+
nodes[i] = replacement;
|
226 |
+
}
|
227 |
+
if (!dontUpdateLastIndex) {
|
228 |
+
nodes.claim_info.last_index = i;
|
229 |
+
}
|
230 |
+
return node;
|
231 |
+
}
|
232 |
+
}
|
233 |
+
// Otherwise, we try to find one before
|
234 |
+
// We iterate in reverse so that we don't go too far back
|
235 |
+
for (let i = nodes.claim_info.last_index - 1; i >= 0; i--) {
|
236 |
+
const node = nodes[i];
|
237 |
+
if (predicate(node)) {
|
238 |
+
const replacement = processNode(node);
|
239 |
+
if (replacement === undefined) {
|
240 |
+
nodes.splice(i, 1);
|
241 |
+
}
|
242 |
+
else {
|
243 |
+
nodes[i] = replacement;
|
244 |
+
}
|
245 |
+
if (!dontUpdateLastIndex) {
|
246 |
+
nodes.claim_info.last_index = i;
|
247 |
+
}
|
248 |
+
else if (replacement === undefined) {
|
249 |
+
// Since we spliced before the last_index, we decrease it
|
250 |
+
nodes.claim_info.last_index--;
|
251 |
+
}
|
252 |
+
return node;
|
253 |
+
}
|
254 |
+
}
|
255 |
+
// If we can't find any matching node, we create a new one
|
256 |
+
return createNode();
|
257 |
+
})();
|
258 |
+
resultNode.claim_order = nodes.claim_info.total_claimed;
|
259 |
+
nodes.claim_info.total_claimed += 1;
|
260 |
+
return resultNode;
|
261 |
+
}
|
262 |
+
function claim_element_base(nodes, name, attributes, create_element) {
|
263 |
+
return claim_node(nodes, (node) => node.nodeName === name, (node) => {
|
264 |
+
const remove = [];
|
265 |
+
for (let j = 0; j < node.attributes.length; j++) {
|
266 |
+
const attribute = node.attributes[j];
|
267 |
+
if (!attributes[attribute.name]) {
|
268 |
+
remove.push(attribute.name);
|
269 |
+
}
|
270 |
+
}
|
271 |
+
remove.forEach(v => node.removeAttribute(v));
|
272 |
+
return undefined;
|
273 |
+
}, () => create_element(name));
|
274 |
+
}
|
275 |
+
function claim_element(nodes, name, attributes) {
|
276 |
+
return claim_element_base(nodes, name, attributes, element);
|
277 |
+
}
|
278 |
+
function claim_text(nodes, data) {
|
279 |
+
return claim_node(nodes, (node) => node.nodeType === 3, (node) => {
|
280 |
+
const dataStr = '' + data;
|
281 |
+
if (node.data.startsWith(dataStr)) {
|
282 |
+
if (node.data.length !== dataStr.length) {
|
283 |
+
return node.splitText(dataStr.length);
|
284 |
+
}
|
285 |
+
}
|
286 |
+
else {
|
287 |
+
node.data = dataStr;
|
288 |
+
}
|
289 |
+
}, () => text(data), true // Text nodes should not update last index since it is likely not worth it to eliminate an increasing subsequence of actual elements
|
290 |
+
);
|
291 |
+
}
|
292 |
+
function claim_space(nodes) {
|
293 |
+
return claim_text(nodes, ' ');
|
294 |
+
}
|
295 |
+
function set_data(text, data) {
|
296 |
+
data = '' + data;
|
297 |
+
if (text.wholeText !== data)
|
298 |
+
text.data = data;
|
299 |
+
}
|
300 |
+
function set_style(node, key, value, important) {
|
301 |
+
if (value === null) {
|
302 |
+
node.style.removeProperty(key);
|
303 |
+
}
|
304 |
+
else {
|
305 |
+
node.style.setProperty(key, value, important ? 'important' : '');
|
306 |
+
}
|
307 |
+
}
|
308 |
+
|
309 |
+
let current_component;
|
310 |
+
function set_current_component(component) {
|
311 |
+
current_component = component;
|
312 |
+
}
|
313 |
+
|
314 |
+
const dirty_components = [];
|
315 |
+
const binding_callbacks = [];
|
316 |
+
const render_callbacks = [];
|
317 |
+
const flush_callbacks = [];
|
318 |
+
const resolved_promise = Promise.resolve();
|
319 |
+
let update_scheduled = false;
|
320 |
+
function schedule_update() {
|
321 |
+
if (!update_scheduled) {
|
322 |
+
update_scheduled = true;
|
323 |
+
resolved_promise.then(flush);
|
324 |
+
}
|
325 |
+
}
|
326 |
+
function add_render_callback(fn) {
|
327 |
+
render_callbacks.push(fn);
|
328 |
+
}
|
329 |
+
// flush() calls callbacks in this order:
|
330 |
+
// 1. All beforeUpdate callbacks, in order: parents before children
|
331 |
+
// 2. All bind:this callbacks, in reverse order: children before parents.
|
332 |
+
// 3. All afterUpdate callbacks, in order: parents before children. EXCEPT
|
333 |
+
// for afterUpdates called during the initial onMount, which are called in
|
334 |
+
// reverse order: children before parents.
|
335 |
+
// Since callbacks might update component values, which could trigger another
|
336 |
+
// call to flush(), the following steps guard against this:
|
337 |
+
// 1. During beforeUpdate, any updated components will be added to the
|
338 |
+
// dirty_components array and will cause a reentrant call to flush(). Because
|
339 |
+
// the flush index is kept outside the function, the reentrant call will pick
|
340 |
+
// up where the earlier call left off and go through all dirty components. The
|
341 |
+
// current_component value is saved and restored so that the reentrant call will
|
342 |
+
// not interfere with the "parent" flush() call.
|
343 |
+
// 2. bind:this callbacks cannot trigger new flush() calls.
|
344 |
+
// 3. During afterUpdate, any updated components will NOT have their afterUpdate
|
345 |
+
// callback called a second time; the seen_callbacks set, outside the flush()
|
346 |
+
// function, guarantees this behavior.
|
347 |
+
const seen_callbacks = new Set();
|
348 |
+
let flushidx = 0; // Do *not* move this inside the flush() function
|
349 |
+
function flush() {
|
350 |
+
const saved_component = current_component;
|
351 |
+
do {
|
352 |
+
// first, call beforeUpdate functions
|
353 |
+
// and update components
|
354 |
+
while (flushidx < dirty_components.length) {
|
355 |
+
const component = dirty_components[flushidx];
|
356 |
+
flushidx++;
|
357 |
+
set_current_component(component);
|
358 |
+
update(component.$$);
|
359 |
+
}
|
360 |
+
set_current_component(null);
|
361 |
+
dirty_components.length = 0;
|
362 |
+
flushidx = 0;
|
363 |
+
while (binding_callbacks.length)
|
364 |
+
binding_callbacks.pop()();
|
365 |
+
// then, once components are updated, call
|
366 |
+
// afterUpdate functions. This may cause
|
367 |
+
// subsequent updates...
|
368 |
+
for (let i = 0; i < render_callbacks.length; i += 1) {
|
369 |
+
const callback = render_callbacks[i];
|
370 |
+
if (!seen_callbacks.has(callback)) {
|
371 |
+
// ...so guard against infinite loops
|
372 |
+
seen_callbacks.add(callback);
|
373 |
+
callback();
|
374 |
+
}
|
375 |
+
}
|
376 |
+
render_callbacks.length = 0;
|
377 |
+
} while (dirty_components.length);
|
378 |
+
while (flush_callbacks.length) {
|
379 |
+
flush_callbacks.pop()();
|
380 |
+
}
|
381 |
+
update_scheduled = false;
|
382 |
+
seen_callbacks.clear();
|
383 |
+
set_current_component(saved_component);
|
384 |
+
}
|
385 |
+
function update($$) {
|
386 |
+
if ($$.fragment !== null) {
|
387 |
+
$$.update();
|
388 |
+
run_all($$.before_update);
|
389 |
+
const dirty = $$.dirty;
|
390 |
+
$$.dirty = [-1];
|
391 |
+
$$.fragment && $$.fragment.p($$.ctx, dirty);
|
392 |
+
$$.after_update.forEach(add_render_callback);
|
393 |
+
}
|
394 |
+
}
|
395 |
+
const outroing = new Set();
|
396 |
+
function transition_in(block, local) {
|
397 |
+
if (block && block.i) {
|
398 |
+
outroing.delete(block);
|
399 |
+
block.i(local);
|
400 |
+
}
|
401 |
+
}
|
402 |
+
function mount_component(component, target, anchor, customElement) {
|
403 |
+
const { fragment, on_mount, on_destroy, after_update } = component.$$;
|
404 |
+
fragment && fragment.m(target, anchor);
|
405 |
+
if (!customElement) {
|
406 |
+
// onMount happens before the initial afterUpdate
|
407 |
+
add_render_callback(() => {
|
408 |
+
const new_on_destroy = on_mount.map(run).filter(is_function);
|
409 |
+
if (on_destroy) {
|
410 |
+
on_destroy.push(...new_on_destroy);
|
411 |
+
}
|
412 |
+
else {
|
413 |
+
// Edge case - component was destroyed immediately,
|
414 |
+
// most likely as a result of a binding initialising
|
415 |
+
run_all(new_on_destroy);
|
416 |
+
}
|
417 |
+
component.$$.on_mount = [];
|
418 |
+
});
|
419 |
+
}
|
420 |
+
after_update.forEach(add_render_callback);
|
421 |
+
}
|
422 |
+
function destroy_component(component, detaching) {
|
423 |
+
const $$ = component.$$;
|
424 |
+
if ($$.fragment !== null) {
|
425 |
+
run_all($$.on_destroy);
|
426 |
+
$$.fragment && $$.fragment.d(detaching);
|
427 |
+
// TODO null out other refs, including component.$$ (but need to
|
428 |
+
// preserve final state?)
|
429 |
+
$$.on_destroy = $$.fragment = null;
|
430 |
+
$$.ctx = [];
|
431 |
+
}
|
432 |
+
}
|
433 |
+
function make_dirty(component, i) {
|
434 |
+
if (component.$$.dirty[0] === -1) {
|
435 |
+
dirty_components.push(component);
|
436 |
+
schedule_update();
|
437 |
+
component.$$.dirty.fill(0);
|
438 |
+
}
|
439 |
+
component.$$.dirty[(i / 31) | 0] |= (1 << (i % 31));
|
440 |
+
}
|
441 |
+
function init(component, options, instance, create_fragment, not_equal, props, append_styles, dirty = [-1]) {
|
442 |
+
const parent_component = current_component;
|
443 |
+
set_current_component(component);
|
444 |
+
const $$ = component.$$ = {
|
445 |
+
fragment: null,
|
446 |
+
ctx: null,
|
447 |
+
// state
|
448 |
+
props,
|
449 |
+
update: noop,
|
450 |
+
not_equal,
|
451 |
+
bound: blank_object(),
|
452 |
+
// lifecycle
|
453 |
+
on_mount: [],
|
454 |
+
on_destroy: [],
|
455 |
+
on_disconnect: [],
|
456 |
+
before_update: [],
|
457 |
+
after_update: [],
|
458 |
+
context: new Map(options.context || (parent_component ? parent_component.$$.context : [])),
|
459 |
+
// everything else
|
460 |
+
callbacks: blank_object(),
|
461 |
+
dirty,
|
462 |
+
skip_bound: false,
|
463 |
+
root: options.target || parent_component.$$.root
|
464 |
+
};
|
465 |
+
append_styles && append_styles($$.root);
|
466 |
+
let ready = false;
|
467 |
+
$$.ctx = instance
|
468 |
+
? instance(component, options.props || {}, (i, ret, ...rest) => {
|
469 |
+
const value = rest.length ? rest[0] : ret;
|
470 |
+
if ($$.ctx && not_equal($$.ctx[i], $$.ctx[i] = value)) {
|
471 |
+
if (!$$.skip_bound && $$.bound[i])
|
472 |
+
$$.bound[i](value);
|
473 |
+
if (ready)
|
474 |
+
make_dirty(component, i);
|
475 |
+
}
|
476 |
+
return ret;
|
477 |
+
})
|
478 |
+
: [];
|
479 |
+
$$.update();
|
480 |
+
ready = true;
|
481 |
+
run_all($$.before_update);
|
482 |
+
// `false` as a special case of no DOM component
|
483 |
+
$$.fragment = create_fragment ? create_fragment($$.ctx) : false;
|
484 |
+
if (options.target) {
|
485 |
+
if (options.hydrate) {
|
486 |
+
start_hydrating();
|
487 |
+
const nodes = children(options.target);
|
488 |
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
489 |
+
$$.fragment && $$.fragment.l(nodes);
|
490 |
+
nodes.forEach(detach);
|
491 |
+
}
|
492 |
+
else {
|
493 |
+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
494 |
+
$$.fragment && $$.fragment.c();
|
495 |
+
}
|
496 |
+
if (options.intro)
|
497 |
+
transition_in(component.$$.fragment);
|
498 |
+
mount_component(component, options.target, options.anchor, options.customElement);
|
499 |
+
end_hydrating();
|
500 |
+
flush();
|
501 |
+
}
|
502 |
+
set_current_component(parent_component);
|
503 |
+
}
|
504 |
+
/**
|
505 |
+
* Base class for Svelte components. Used when dev=false.
|
506 |
+
*/
|
507 |
+
class SvelteComponent {
|
508 |
+
$destroy() {
|
509 |
+
destroy_component(this, 1);
|
510 |
+
this.$destroy = noop;
|
511 |
+
}
|
512 |
+
$on(type, callback) {
|
513 |
+
const callbacks = (this.$$.callbacks[type] || (this.$$.callbacks[type] = []));
|
514 |
+
callbacks.push(callback);
|
515 |
+
return () => {
|
516 |
+
const index = callbacks.indexOf(callback);
|
517 |
+
if (index !== -1)
|
518 |
+
callbacks.splice(index, 1);
|
519 |
+
};
|
520 |
+
}
|
521 |
+
$set($$props) {
|
522 |
+
if (this.$$set && !is_empty($$props)) {
|
523 |
+
this.$$.skip_bound = true;
|
524 |
+
this.$$set($$props);
|
525 |
+
this.$$.skip_bound = false;
|
526 |
+
}
|
527 |
+
}
|
528 |
+
}
|
529 |
+
|
530 |
+
/* src/InstanceView.svelte generated by Svelte v3.49.0 */
|
531 |
+
|
532 |
+
function add_css(target) {
|
533 |
+
append_styles(target, "svelte-1qkvlix", ".label.svelte-1qkvlix{font-size:12px;color:rgba(0, 0, 0, 0.5);font-variant:small-caps}.value.svelte-1qkvlix{font-size:12px}.box.svelte-1qkvlix{padding:10px;border:0.5px solid rgb(224, 224, 224);max-width:400px}#container.svelte-1qkvlix{display:flex;flex-direction:row;flex-wrap:wrap}spectrogram canvas{z-index:0 !important}wave canvas{z-index:0 !important}wave{z-index:0 !important}");
|
534 |
+
}
|
535 |
+
|
536 |
+
// (27:4) {#if modelColumn && entry[modelColumn] !== undefined}
|
537 |
+
function create_if_block(ctx) {
|
538 |
+
let br;
|
539 |
+
let t0;
|
540 |
+
let span0;
|
541 |
+
let t1;
|
542 |
+
let t2;
|
543 |
+
let span1;
|
544 |
+
let t3_value = /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] + "";
|
545 |
+
let t3;
|
546 |
+
|
547 |
+
return {
|
548 |
+
c() {
|
549 |
+
br = element("br");
|
550 |
+
t0 = space();
|
551 |
+
span0 = element("span");
|
552 |
+
t1 = text("output:");
|
553 |
+
t2 = space();
|
554 |
+
span1 = element("span");
|
555 |
+
t3 = text(t3_value);
|
556 |
+
this.h();
|
557 |
+
},
|
558 |
+
l(nodes) {
|
559 |
+
br = claim_element(nodes, "BR", {});
|
560 |
+
t0 = claim_space(nodes);
|
561 |
+
span0 = claim_element(nodes, "SPAN", { class: true });
|
562 |
+
var span0_nodes = children(span0);
|
563 |
+
t1 = claim_text(span0_nodes, "output:");
|
564 |
+
span0_nodes.forEach(detach);
|
565 |
+
t2 = claim_space(nodes);
|
566 |
+
span1 = claim_element(nodes, "SPAN", { class: true });
|
567 |
+
var span1_nodes = children(span1);
|
568 |
+
t3 = claim_text(span1_nodes, t3_value);
|
569 |
+
span1_nodes.forEach(detach);
|
570 |
+
this.h();
|
571 |
+
},
|
572 |
+
h() {
|
573 |
+
attr(span0, "class", "label svelte-1qkvlix");
|
574 |
+
attr(span1, "class", "value svelte-1qkvlix");
|
575 |
+
},
|
576 |
+
m(target, anchor) {
|
577 |
+
insert_hydration(target, br, anchor);
|
578 |
+
insert_hydration(target, t0, anchor);
|
579 |
+
insert_hydration(target, span0, anchor);
|
580 |
+
append_hydration(span0, t1);
|
581 |
+
insert_hydration(target, t2, anchor);
|
582 |
+
insert_hydration(target, span1, anchor);
|
583 |
+
append_hydration(span1, t3);
|
584 |
+
},
|
585 |
+
p(ctx, dirty) {
|
586 |
+
if (dirty & /*entry, modelColumn*/ 3 && t3_value !== (t3_value = /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] + "")) set_data(t3, t3_value);
|
587 |
+
},
|
588 |
+
d(detaching) {
|
589 |
+
if (detaching) detach(br);
|
590 |
+
if (detaching) detach(t0);
|
591 |
+
if (detaching) detach(span0);
|
592 |
+
if (detaching) detach(t2);
|
593 |
+
if (detaching) detach(span1);
|
594 |
+
}
|
595 |
+
};
|
596 |
+
}
|
597 |
+
|
598 |
+
function create_fragment(ctx) {
|
599 |
+
let div2;
|
600 |
+
let div1;
|
601 |
+
let div0;
|
602 |
+
let audio;
|
603 |
+
let source;
|
604 |
+
let source_src_value;
|
605 |
+
let source_type_value;
|
606 |
+
let audio_src_value;
|
607 |
+
let t0;
|
608 |
+
let span0;
|
609 |
+
let t1;
|
610 |
+
let span1;
|
611 |
+
let t2_value = /*entry*/ ctx[0][/*labelColumn*/ ctx[2]] + "";
|
612 |
+
let t2;
|
613 |
+
let t3;
|
614 |
+
let if_block = /*modelColumn*/ ctx[1] && /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] !== undefined && create_if_block(ctx);
|
615 |
+
|
616 |
+
return {
|
617 |
+
c() {
|
618 |
+
div2 = element("div");
|
619 |
+
div1 = element("div");
|
620 |
+
div0 = element("div");
|
621 |
+
audio = element("audio");
|
622 |
+
source = element("source");
|
623 |
+
t0 = space();
|
624 |
+
span0 = element("span");
|
625 |
+
t1 = text("label: ");
|
626 |
+
span1 = element("span");
|
627 |
+
t2 = text(t2_value);
|
628 |
+
t3 = space();
|
629 |
+
if (if_block) if_block.c();
|
630 |
+
this.h();
|
631 |
+
},
|
632 |
+
l(nodes) {
|
633 |
+
div2 = claim_element(nodes, "DIV", { id: true, class: true });
|
634 |
+
var div2_nodes = children(div2);
|
635 |
+
div1 = claim_element(div2_nodes, "DIV", { class: true });
|
636 |
+
var div1_nodes = children(div1);
|
637 |
+
div0 = claim_element(div1_nodes, "DIV", {});
|
638 |
+
var div0_nodes = children(div0);
|
639 |
+
audio = claim_element(div0_nodes, "AUDIO", { src: true });
|
640 |
+
var audio_nodes = children(audio);
|
641 |
+
source = claim_element(audio_nodes, "SOURCE", { src: true, type: true });
|
642 |
+
audio_nodes.forEach(detach);
|
643 |
+
div0_nodes.forEach(detach);
|
644 |
+
t0 = claim_space(div1_nodes);
|
645 |
+
span0 = claim_element(div1_nodes, "SPAN", { class: true });
|
646 |
+
var span0_nodes = children(span0);
|
647 |
+
t1 = claim_text(span0_nodes, "label: ");
|
648 |
+
span0_nodes.forEach(detach);
|
649 |
+
span1 = claim_element(div1_nodes, "SPAN", { class: true });
|
650 |
+
var span1_nodes = children(span1);
|
651 |
+
t2 = claim_text(span1_nodes, t2_value);
|
652 |
+
span1_nodes.forEach(detach);
|
653 |
+
t3 = claim_space(div1_nodes);
|
654 |
+
if (if_block) if_block.l(div1_nodes);
|
655 |
+
div1_nodes.forEach(detach);
|
656 |
+
div2_nodes.forEach(detach);
|
657 |
+
this.h();
|
658 |
+
},
|
659 |
+
h() {
|
660 |
+
if (!src_url_equal(source.src, source_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) attr(source, "src", source_src_value);
|
661 |
+
attr(source, "type", source_type_value = "audio/" + /*entry*/ ctx[0][/*idColumn*/ ctx[4]].split(".").at(-1));
|
662 |
+
audio.controls = true;
|
663 |
+
if (!src_url_equal(audio.src, audio_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) attr(audio, "src", audio_src_value);
|
664 |
+
set_style(div0, "display", `flex`, false);
|
665 |
+
attr(span0, "class", "label svelte-1qkvlix");
|
666 |
+
attr(span1, "class", "value svelte-1qkvlix");
|
667 |
+
attr(div1, "class", "box svelte-1qkvlix");
|
668 |
+
attr(div2, "id", "container");
|
669 |
+
attr(div2, "class", "svelte-1qkvlix");
|
670 |
+
},
|
671 |
+
m(target, anchor) {
|
672 |
+
insert_hydration(target, div2, anchor);
|
673 |
+
append_hydration(div2, div1);
|
674 |
+
append_hydration(div1, div0);
|
675 |
+
append_hydration(div0, audio);
|
676 |
+
append_hydration(audio, source);
|
677 |
+
append_hydration(div1, t0);
|
678 |
+
append_hydration(div1, span0);
|
679 |
+
append_hydration(span0, t1);
|
680 |
+
append_hydration(div1, span1);
|
681 |
+
append_hydration(span1, t2);
|
682 |
+
append_hydration(div1, t3);
|
683 |
+
if (if_block) if_block.m(div1, null);
|
684 |
+
},
|
685 |
+
p(ctx, [dirty]) {
|
686 |
+
if (dirty & /*entry, dataColumn*/ 9 && !src_url_equal(source.src, source_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) {
|
687 |
+
attr(source, "src", source_src_value);
|
688 |
+
}
|
689 |
+
|
690 |
+
if (dirty & /*entry, idColumn*/ 17 && source_type_value !== (source_type_value = "audio/" + /*entry*/ ctx[0][/*idColumn*/ ctx[4]].split(".").at(-1))) {
|
691 |
+
attr(source, "type", source_type_value);
|
692 |
+
}
|
693 |
+
|
694 |
+
if (dirty & /*entry, dataColumn*/ 9 && !src_url_equal(audio.src, audio_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) {
|
695 |
+
attr(audio, "src", audio_src_value);
|
696 |
+
}
|
697 |
+
|
698 |
+
if (dirty & /*entry, labelColumn*/ 5 && t2_value !== (t2_value = /*entry*/ ctx[0][/*labelColumn*/ ctx[2]] + "")) set_data(t2, t2_value);
|
699 |
+
|
700 |
+
if (/*modelColumn*/ ctx[1] && /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] !== undefined) {
|
701 |
+
if (if_block) {
|
702 |
+
if_block.p(ctx, dirty);
|
703 |
+
} else {
|
704 |
+
if_block = create_if_block(ctx);
|
705 |
+
if_block.c();
|
706 |
+
if_block.m(div1, null);
|
707 |
+
}
|
708 |
+
} else if (if_block) {
|
709 |
+
if_block.d(1);
|
710 |
+
if_block = null;
|
711 |
+
}
|
712 |
+
},
|
713 |
+
i: noop,
|
714 |
+
o: noop,
|
715 |
+
d(detaching) {
|
716 |
+
if (detaching) detach(div2);
|
717 |
+
if (if_block) if_block.d();
|
718 |
+
}
|
719 |
+
};
|
720 |
+
}
|
721 |
+
|
722 |
+
function instance($$self, $$props, $$invalidate) {
|
723 |
+
let { entry } = $$props;
|
724 |
+
let { options } = $$props;
|
725 |
+
let { modelColumn } = $$props;
|
726 |
+
let { labelColumn } = $$props;
|
727 |
+
let { dataColumn } = $$props;
|
728 |
+
let { idColumn } = $$props;
|
729 |
+
|
730 |
+
$$self.$$set = $$props => {
|
731 |
+
if ('entry' in $$props) $$invalidate(0, entry = $$props.entry);
|
732 |
+
if ('options' in $$props) $$invalidate(5, options = $$props.options);
|
733 |
+
if ('modelColumn' in $$props) $$invalidate(1, modelColumn = $$props.modelColumn);
|
734 |
+
if ('labelColumn' in $$props) $$invalidate(2, labelColumn = $$props.labelColumn);
|
735 |
+
if ('dataColumn' in $$props) $$invalidate(3, dataColumn = $$props.dataColumn);
|
736 |
+
if ('idColumn' in $$props) $$invalidate(4, idColumn = $$props.idColumn);
|
737 |
+
};
|
738 |
+
|
739 |
+
return [entry, modelColumn, labelColumn, dataColumn, idColumn, options];
|
740 |
+
}
|
741 |
+
|
742 |
+
class InstanceView extends SvelteComponent {
|
743 |
+
constructor(options) {
|
744 |
+
super();
|
745 |
+
|
746 |
+
init(
|
747 |
+
this,
|
748 |
+
options,
|
749 |
+
instance,
|
750 |
+
create_fragment,
|
751 |
+
safe_not_equal,
|
752 |
+
{
|
753 |
+
entry: 0,
|
754 |
+
options: 5,
|
755 |
+
modelColumn: 1,
|
756 |
+
labelColumn: 2,
|
757 |
+
dataColumn: 3,
|
758 |
+
idColumn: 4
|
759 |
+
},
|
760 |
+
add_css
|
761 |
+
);
|
762 |
+
}
|
763 |
+
}
|
764 |
+
|
765 |
+
function getInstance(
|
766 |
+
div,
|
767 |
+
options,
|
768 |
+
entry,
|
769 |
+
modelColumn,
|
770 |
+
labelColumn,
|
771 |
+
dataColumn,
|
772 |
+
idColumn
|
773 |
+
) {
|
774 |
+
new InstanceView({
|
775 |
+
target: div,
|
776 |
+
props: {
|
777 |
+
entry: entry,
|
778 |
+
options: options,
|
779 |
+
modelColumn: modelColumn,
|
780 |
+
labelColumn: labelColumn,
|
781 |
+
dataColumn: dataColumn,
|
782 |
+
idColumn: idColumn,
|
783 |
+
},
|
784 |
+
hydrate: true,
|
785 |
+
});
|
786 |
+
}
|
787 |
+
|
788 |
+
export { getInstance };
|
Dockerfile
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.8
|
5 |
+
|
6 |
+
RUN useradd -m -u 1000 user
|
7 |
+
USER user
|
8 |
+
# Set home to the user's home directory
|
9 |
+
ENV HOME=/home/user \
|
10 |
+
PATH=/home/user/.local/bin:$PATH
|
11 |
+
WORKDIR $HOME/app
|
12 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
13 |
+
COPY --chown=user . $HOME/app
|
14 |
+
ADD --chown=user ./.zeno_cache $HOME/app/.zeno_cache
|
15 |
+
RUN chown user:user -R $HOME/app
|
16 |
+
|
17 |
+
COPY ./requirements.txt /code/requirements.txt
|
18 |
+
|
19 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
20 |
+
|
21 |
+
|
22 |
+
CMD ["zeno", "config.toml"]
|
config.toml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
view = "audio-transcription"
|
2 |
+
functions = "./functions/"
|
3 |
+
models = ["silero_sst", "whisper"]
|
4 |
+
metadata = "metadata.csv"
|
5 |
+
data_path = "/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/"
|
6 |
+
data_column = "id"
|
7 |
+
id_column = "id"
|
8 |
+
label_column = "label"
|
9 |
+
port = 7860
|
10 |
+
host = "0.0.0.0"
|
11 |
+
editable = false
|
functions/audio_characteristics.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import librosa
|
4 |
+
import numpy as np
|
5 |
+
from zeno import ZenoOptions, distill, DistillReturn
|
6 |
+
|
7 |
+
|
8 |
+
@distill
|
9 |
+
def amplitude(df, ops: ZenoOptions):
|
10 |
+
files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
|
11 |
+
amps = []
|
12 |
+
for audio in files:
|
13 |
+
y, _ = librosa.load(audio)
|
14 |
+
amps.append(float(np.abs(y).mean()))
|
15 |
+
return DistillReturn(distill_output=amps)
|
16 |
+
|
17 |
+
|
18 |
+
@distill
|
19 |
+
def length(df, ops: ZenoOptions):
|
20 |
+
files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
|
21 |
+
amps = []
|
22 |
+
for audio in files:
|
23 |
+
y, _ = librosa.load(audio)
|
24 |
+
amps.append(len(y))
|
25 |
+
return DistillReturn(distill_output=amps)
|
functions/model.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import os
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
import torch
|
6 |
+
import whisper
|
7 |
+
from jiwer import wer
|
8 |
+
|
9 |
+
from zeno import (
|
10 |
+
ZenoOptions,
|
11 |
+
distill,
|
12 |
+
metric,
|
13 |
+
model,
|
14 |
+
DistillReturn,
|
15 |
+
ModelReturn,
|
16 |
+
MetricReturn,
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
@model
|
21 |
+
def load_model(model_path):
|
22 |
+
if "sst" in model_path:
|
23 |
+
device = torch.device("cpu")
|
24 |
+
model, decoder, utils = torch.hub.load(
|
25 |
+
repo_or_dir="snakers4/silero-models",
|
26 |
+
model="silero_stt",
|
27 |
+
language="en",
|
28 |
+
device=device,
|
29 |
+
)
|
30 |
+
(read_batch, _, _, prepare_model_input) = utils
|
31 |
+
|
32 |
+
def pred(df, ops: ZenoOptions):
|
33 |
+
files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
|
34 |
+
input = prepare_model_input(read_batch(files), device=device)
|
35 |
+
return ModelReturn(model_output=[decoder(x.cpu()) for x in model(input)])
|
36 |
+
|
37 |
+
return pred
|
38 |
+
|
39 |
+
elif "whisper" in model_path:
|
40 |
+
model = whisper.load_model("tiny")
|
41 |
+
|
42 |
+
def pred(df, ops: ZenoOptions):
|
43 |
+
files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
|
44 |
+
outs = []
|
45 |
+
for f in files:
|
46 |
+
outs.append(model.transcribe(f)["text"])
|
47 |
+
return ModelReturn(model_output=outs)
|
48 |
+
|
49 |
+
return pred
|
50 |
+
|
51 |
+
|
52 |
+
@distill
|
53 |
+
def country(df, ops: ZenoOptions):
|
54 |
+
if df["birthplace"][0] == df["birthplace"][0]:
|
55 |
+
return DistillReturn(distill_output=[df["birthplace"].str.split(", ")[-1][-1]])
|
56 |
+
return DistillReturn(distill_output=[""] * len(df))
|
57 |
+
|
58 |
+
|
59 |
+
@distill
|
60 |
+
def wer_m(df, ops: ZenoOptions):
|
61 |
+
return DistillReturn(
|
62 |
+
distill_output=df.apply(
|
63 |
+
lambda x: wer(x[ops.label_column], x[ops.output_column]), axis=1
|
64 |
+
)
|
65 |
+
)
|
66 |
+
|
67 |
+
|
68 |
+
@metric
|
69 |
+
def avg_wer(df, ops: ZenoOptions):
|
70 |
+
avg = df[ops.distill_columns["wer_m"]].mean()
|
71 |
+
if pd.isnull(avg) or math.isnan(avg):
|
72 |
+
return MetricReturn(metric=0)
|
73 |
+
return MetricReturn(metric=avg)
|
jupyter_accent.ipynb
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"%load_ext autoreload\n",
|
10 |
+
"%autoreload 2\n",
|
11 |
+
"\n",
|
12 |
+
"from zeno import zeno\n",
|
13 |
+
"import math\n",
|
14 |
+
"import os\n",
|
15 |
+
"import pandas as pd"
|
16 |
+
]
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"cell_type": "code",
|
20 |
+
"execution_count": 9,
|
21 |
+
"metadata": {},
|
22 |
+
"outputs": [],
|
23 |
+
"source": [
|
24 |
+
"df = pd.read_csv(\"metadata.csv\")"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 10,
|
30 |
+
"metadata": {},
|
31 |
+
"outputs": [],
|
32 |
+
"source": [
|
33 |
+
"df.set_index('id', inplace=True, drop=False)"
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"cell_type": "code",
|
38 |
+
"execution_count": 12,
|
39 |
+
"metadata": {},
|
40 |
+
"outputs": [
|
41 |
+
{
|
42 |
+
"ename": "ValueError",
|
43 |
+
"evalue": "'id' is both an index level and a column label, which is ambiguous.",
|
44 |
+
"output_type": "error",
|
45 |
+
"traceback": [
|
46 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
47 |
+
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
48 |
+
"Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df\u001b[39m.\u001b[39;49mgroupby(\u001b[39m'\u001b[39;49m\u001b[39mid\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
|
49 |
+
"File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/frame.py:8402\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[1;32m 8399\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mYou have to supply one of \u001b[39m\u001b[39m'\u001b[39m\u001b[39mby\u001b[39m\u001b[39m'\u001b[39m\u001b[39m and \u001b[39m\u001b[39m'\u001b[39m\u001b[39mlevel\u001b[39m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 8400\u001b[0m axis \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_axis_number(axis)\n\u001b[0;32m-> 8402\u001b[0m \u001b[39mreturn\u001b[39;00m DataFrameGroupBy(\n\u001b[1;32m 8403\u001b[0m obj\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m,\n\u001b[1;32m 8404\u001b[0m keys\u001b[39m=\u001b[39;49mby,\n\u001b[1;32m 8405\u001b[0m axis\u001b[39m=\u001b[39;49maxis,\n\u001b[1;32m 8406\u001b[0m level\u001b[39m=\u001b[39;49mlevel,\n\u001b[1;32m 8407\u001b[0m as_index\u001b[39m=\u001b[39;49mas_index,\n\u001b[1;32m 8408\u001b[0m sort\u001b[39m=\u001b[39;49msort,\n\u001b[1;32m 8409\u001b[0m group_keys\u001b[39m=\u001b[39;49mgroup_keys,\n\u001b[1;32m 8410\u001b[0m squeeze\u001b[39m=\u001b[39;49msqueeze,\n\u001b[1;32m 8411\u001b[0m observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[1;32m 8412\u001b[0m dropna\u001b[39m=\u001b[39;49mdropna,\n\u001b[1;32m 8413\u001b[0m )\n",
|
50 |
+
"File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:965\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[1;32m 962\u001b[0m \u001b[39mif\u001b[39;00m grouper \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 963\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgroupby\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgrouper\u001b[39;00m \u001b[39mimport\u001b[39;00m get_grouper\n\u001b[0;32m--> 965\u001b[0m grouper, exclusions, obj \u001b[39m=\u001b[39m get_grouper(\n\u001b[1;32m 966\u001b[0m obj,\n\u001b[1;32m 967\u001b[0m keys,\n\u001b[1;32m 968\u001b[0m axis\u001b[39m=\u001b[39;49maxis,\n\u001b[1;32m 969\u001b[0m level\u001b[39m=\u001b[39;49mlevel,\n\u001b[1;32m 970\u001b[0m sort\u001b[39m=\u001b[39;49msort,\n\u001b[1;32m 971\u001b[0m observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[1;32m 972\u001b[0m mutated\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmutated,\n\u001b[1;32m 973\u001b[0m dropna\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdropna,\n\u001b[1;32m 974\u001b[0m )\n\u001b[1;32m 976\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj \u001b[39m=\u001b[39m obj\n\u001b[1;32m 977\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxis \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39m_get_axis_number(axis)\n",
|
51 |
+
"File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/groupby/grouper.py:878\u001b[0m, in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[1;32m 876\u001b[0m \u001b[39mif\u001b[39;00m gpr \u001b[39min\u001b[39;00m obj:\n\u001b[1;32m 877\u001b[0m \u001b[39mif\u001b[39;00m validate:\n\u001b[0;32m--> 878\u001b[0m obj\u001b[39m.\u001b[39;49m_check_label_or_level_ambiguity(gpr, axis\u001b[39m=\u001b[39;49maxis)\n\u001b[1;32m 879\u001b[0m in_axis, name, gpr \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m, gpr, obj[gpr]\n\u001b[1;32m 880\u001b[0m \u001b[39mif\u001b[39;00m gpr\u001b[39m.\u001b[39mndim \u001b[39m!=\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m 881\u001b[0m \u001b[39m# non-unique columns; raise here to get the name in the\u001b[39;00m\n\u001b[1;32m 882\u001b[0m \u001b[39m# exception message\u001b[39;00m\n",
|
52 |
+
"File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/generic.py:1797\u001b[0m, in \u001b[0;36mNDFrame._check_label_or_level_ambiguity\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1789\u001b[0m label_article, label_type \u001b[39m=\u001b[39m (\n\u001b[1;32m 1790\u001b[0m (\u001b[39m\"\u001b[39m\u001b[39ma\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mcolumn\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mif\u001b[39;00m axis \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m \u001b[39melse\u001b[39;00m (\u001b[39m\"\u001b[39m\u001b[39man\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 1791\u001b[0m )\n\u001b[1;32m 1793\u001b[0m msg \u001b[39m=\u001b[39m (\n\u001b[1;32m 1794\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m is both \u001b[39m\u001b[39m{\u001b[39;00mlevel_article\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m{\u001b[39;00mlevel_type\u001b[39m}\u001b[39;00m\u001b[39m level and \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 1795\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mlabel_article\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m{\u001b[39;00mlabel_type\u001b[39m}\u001b[39;00m\u001b[39m label, which is ambiguous.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 1796\u001b[0m )\n\u001b[0;32m-> 1797\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(msg)\n",
|
53 |
+
"\u001b[0;31mValueError\u001b[0m: 'id' is both an index level and a column label, which is ambiguous."
|
54 |
+
]
|
55 |
+
}
|
56 |
+
],
|
57 |
+
"source": [
|
58 |
+
"df.groupby('id')"
|
59 |
+
]
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"cell_type": "code",
|
63 |
+
"execution_count": null,
|
64 |
+
"metadata": {},
|
65 |
+
"outputs": [],
|
66 |
+
"source": [
|
67 |
+
"zeno({\n",
|
68 |
+
" \"metadata\": df[0:10],\n",
|
69 |
+
" \"view\": \"audio-transcription\",\n",
|
70 |
+
" \"data_path\": \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/\",\n",
|
71 |
+
" \"label_column\": \"label\",\n",
|
72 |
+
" \"data_column\": \"id\"\n",
|
73 |
+
"})"
|
74 |
+
]
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": null,
|
79 |
+
"metadata": {},
|
80 |
+
"outputs": [],
|
81 |
+
"source": [
|
82 |
+
"import torch\n",
|
83 |
+
"import whisper\n",
|
84 |
+
"from jiwer import wer\n",
|
85 |
+
"from zeno import ZenoOptions, distill, metric, model\n",
|
86 |
+
"import numpy as np\n",
|
87 |
+
"from zeno import ZenoOptions, distill"
|
88 |
+
]
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"cell_type": "code",
|
92 |
+
"execution_count": null,
|
93 |
+
"metadata": {},
|
94 |
+
"outputs": [],
|
95 |
+
"source": [
|
96 |
+
"@model\n",
|
97 |
+
"def load_model(model_path):\n",
|
98 |
+
" if \"sst\" in model_path:\n",
|
99 |
+
" device = torch.device(\"cpu\")\n",
|
100 |
+
" model, decoder, utils = torch.hub.load(\n",
|
101 |
+
" repo_or_dir=\"snakers4/silero-models\",\n",
|
102 |
+
" model=\"silero_stt\",\n",
|
103 |
+
" language=\"en\",\n",
|
104 |
+
" device=device,\n",
|
105 |
+
" )\n",
|
106 |
+
" (read_batch, _, _, prepare_model_input) = utils\n",
|
107 |
+
"\n",
|
108 |
+
" def pred(df, ops: ZenoOptions):\n",
|
109 |
+
" files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
|
110 |
+
" input = prepare_model_input(read_batch(files), device=device)\n",
|
111 |
+
" return [decoder(x.cpu()) for x in model(input)]\n",
|
112 |
+
"\n",
|
113 |
+
" return pred\n",
|
114 |
+
"\n",
|
115 |
+
" elif \"whisper\" in model_path:\n",
|
116 |
+
" model = whisper.load_model(\"tiny\")\n",
|
117 |
+
"\n",
|
118 |
+
" def pred(df, ops: ZenoOptions):\n",
|
119 |
+
" files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
|
120 |
+
" outs = []\n",
|
121 |
+
" for f in files:\n",
|
122 |
+
" outs.append(model.transcribe(f)[\"text\"])\n",
|
123 |
+
" return outs\n",
|
124 |
+
"\n",
|
125 |
+
" return pred\n",
|
126 |
+
"\n",
|
127 |
+
"\n",
|
128 |
+
"@distill\n",
|
129 |
+
"def country(df, ops: ZenoOptions):\n",
|
130 |
+
" if df[\"0birthplace\"][0] == df[\"0birthplace\"][0]:\n",
|
131 |
+
" return df[\"0birthplace\"].str.split(\", \")[-1][-1]\n",
|
132 |
+
" return \"\"\n",
|
133 |
+
"\n",
|
134 |
+
"\n",
|
135 |
+
"@distill\n",
|
136 |
+
"def wer_m(df, ops: ZenoOptions):\n",
|
137 |
+
" return df.apply(lambda x: wer(x[ops.label_column], x[ops.output_column]), axis=1)\n",
|
138 |
+
"\n",
|
139 |
+
"\n",
|
140 |
+
"@metric\n",
|
141 |
+
"def avg_wer(df, ops: ZenoOptions):\n",
|
142 |
+
" avg = df[ops.distill_columns[\"wer_m\"]].mean()\n",
|
143 |
+
" if math.isnan(avg):\n",
|
144 |
+
" return 0\n",
|
145 |
+
" return avg\n",
|
146 |
+
"\n",
|
147 |
+
"# @distill\n",
|
148 |
+
"# def amplitude(df, ops: ZenoOptions):\n",
|
149 |
+
"# files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
|
150 |
+
"# amps = []\n",
|
151 |
+
"# for audio in files:\n",
|
152 |
+
"# y, _ = librosa.load(audio)\n",
|
153 |
+
"# amps.append(float(np.abs(y).mean()))\n",
|
154 |
+
"# return amps\n",
|
155 |
+
"\n",
|
156 |
+
"\n",
|
157 |
+
"# @distill\n",
|
158 |
+
"# def length(df, ops: ZenoOptions):\n",
|
159 |
+
"# files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
|
160 |
+
"# amps = []\n",
|
161 |
+
"# for audio in files:\n",
|
162 |
+
"# y, _ = librosa.load(audio)\n",
|
163 |
+
"# amps.append(len(y))\n",
|
164 |
+
"# return amps"
|
165 |
+
]
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"cell_type": "code",
|
169 |
+
"execution_count": null,
|
170 |
+
"metadata": {
|
171 |
+
"tags": []
|
172 |
+
},
|
173 |
+
"outputs": [],
|
174 |
+
"source": [
|
175 |
+
"zeno({\n",
|
176 |
+
" \"metadata\": df,\n",
|
177 |
+
" \"functions\": [load_model, country, wer_m, avg_wer],\n",
|
178 |
+
" \"view\": \"audio-transcription\",\n",
|
179 |
+
" \"models\": [\"silero_sst\", \"whisper\"],\n",
|
180 |
+
" \"data_path\": \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/\",\n",
|
181 |
+
" \"data_column\": \"id\",\n",
|
182 |
+
" \"label_column\": \"label\",\n",
|
183 |
+
" \"samples\": 10,\n",
|
184 |
+
"})\n",
|
185 |
+
"# metadata = \"metadata.csv\"\n",
|
186 |
+
"# # data_path = \"https://zenoml.s3.amazonaws.com/accents/\""
|
187 |
+
]
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"cell_type": "code",
|
191 |
+
"execution_count": null,
|
192 |
+
"metadata": {},
|
193 |
+
"outputs": [],
|
194 |
+
"source": []
|
195 |
+
}
|
196 |
+
],
|
197 |
+
"metadata": {
|
198 |
+
"kernelspec": {
|
199 |
+
"display_name": "Python 3 (ipykernel)",
|
200 |
+
"language": "python",
|
201 |
+
"name": "python3"
|
202 |
+
},
|
203 |
+
"language_info": {
|
204 |
+
"codemirror_mode": {
|
205 |
+
"name": "ipython",
|
206 |
+
"version": 3
|
207 |
+
},
|
208 |
+
"file_extension": ".py",
|
209 |
+
"mimetype": "text/x-python",
|
210 |
+
"name": "python",
|
211 |
+
"nbconvert_exporter": "python",
|
212 |
+
"pygments_lexer": "ipython3",
|
213 |
+
"version": "3.8.12"
|
214 |
+
},
|
215 |
+
"vscode": {
|
216 |
+
"interpreter": {
|
217 |
+
"hash": "59d606a796fde3c997548ee5ab3f3009081de8aa2fb58c2406e58b3c7613e786"
|
218 |
+
}
|
219 |
+
}
|
220 |
+
},
|
221 |
+
"nbformat": 4,
|
222 |
+
"nbformat_minor": 4
|
223 |
+
}
|
latest_silero_models.yml
ADDED
@@ -0,0 +1,563 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pre-trained STT models
|
2 |
+
stt_models:
|
3 |
+
en:
|
4 |
+
latest:
|
5 |
+
meta:
|
6 |
+
name: "en_v6"
|
7 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
8 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
9 |
+
jit: "https://models.silero.ai/models/en/en_v6.jit"
|
10 |
+
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
|
11 |
+
jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
|
12 |
+
jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
|
13 |
+
onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
|
14 |
+
v6:
|
15 |
+
meta:
|
16 |
+
name: "en_v6"
|
17 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
18 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
19 |
+
jit: "https://models.silero.ai/models/en/en_v6.jit"
|
20 |
+
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
|
21 |
+
jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
|
22 |
+
jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
|
23 |
+
onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
|
24 |
+
v5:
|
25 |
+
meta:
|
26 |
+
name: "en_v5"
|
27 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
28 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
29 |
+
jit: "https://models.silero.ai/models/en/en_v5.jit"
|
30 |
+
onnx: "https://models.silero.ai/models/en/en_v5.onnx"
|
31 |
+
onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
|
32 |
+
jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
|
33 |
+
jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
|
34 |
+
onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
|
35 |
+
v4_0:
|
36 |
+
meta:
|
37 |
+
name: "en_v4_0"
|
38 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
39 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
40 |
+
jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
|
41 |
+
onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
|
42 |
+
v3:
|
43 |
+
meta:
|
44 |
+
name: "en_v3"
|
45 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
46 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
47 |
+
jit: "https://models.silero.ai/models/en/en_v3_jit.model"
|
48 |
+
onnx: "https://models.silero.ai/models/en/en_v3.onnx"
|
49 |
+
jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
|
50 |
+
jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
|
51 |
+
jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
|
52 |
+
onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
|
53 |
+
jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
|
54 |
+
jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
|
55 |
+
onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
|
56 |
+
v2:
|
57 |
+
meta:
|
58 |
+
name: "en_v2"
|
59 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
60 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
61 |
+
jit: "https://models.silero.ai/models/en/en_v2_jit.model"
|
62 |
+
onnx: "https://models.silero.ai/models/en/en_v2.onnx"
|
63 |
+
tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
|
64 |
+
v1:
|
65 |
+
meta:
|
66 |
+
name: "en_v1"
|
67 |
+
sample: "https://models.silero.ai/examples/en_sample.wav"
|
68 |
+
labels: "https://models.silero.ai/models/en/en_v1_labels.json"
|
69 |
+
jit: "https://models.silero.ai/models/en/en_v1_jit.model"
|
70 |
+
onnx: "https://models.silero.ai/models/en/en_v1.onnx"
|
71 |
+
tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
|
72 |
+
de:
|
73 |
+
latest:
|
74 |
+
meta:
|
75 |
+
name: "de_v1"
|
76 |
+
sample: "https://models.silero.ai/examples/de_sample.wav"
|
77 |
+
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
|
78 |
+
jit: "https://models.silero.ai/models/de/de_v1_jit.model"
|
79 |
+
onnx: "https://models.silero.ai/models/de/de_v1.onnx"
|
80 |
+
tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
|
81 |
+
v1:
|
82 |
+
meta:
|
83 |
+
name: "de_v1"
|
84 |
+
sample: "https://models.silero.ai/examples/de_sample.wav"
|
85 |
+
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
|
86 |
+
jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
|
87 |
+
onnx: "https://models.silero.ai/models/de/de_v1.onnx"
|
88 |
+
tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
|
89 |
+
v3:
|
90 |
+
meta:
|
91 |
+
name: "de_v3"
|
92 |
+
sample: "https://models.silero.ai/examples/de_sample.wav"
|
93 |
+
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
|
94 |
+
jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
|
95 |
+
v4:
|
96 |
+
meta:
|
97 |
+
name: "de_v4"
|
98 |
+
sample: "https://models.silero.ai/examples/de_sample.wav"
|
99 |
+
labels: "https://models.silero.ai/models/de/de_v1_labels.json"
|
100 |
+
jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
|
101 |
+
onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
|
102 |
+
es:
|
103 |
+
latest:
|
104 |
+
meta:
|
105 |
+
name: "es_v1"
|
106 |
+
sample: "https://models.silero.ai/examples/es_sample.wav"
|
107 |
+
labels: "https://models.silero.ai/models/es/es_v1_labels.json"
|
108 |
+
jit: "https://models.silero.ai/models/es/es_v1_jit.model"
|
109 |
+
onnx: "https://models.silero.ai/models/es/es_v1.onnx"
|
110 |
+
tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
|
111 |
+
ua:
|
112 |
+
latest:
|
113 |
+
meta:
|
114 |
+
name: "ua_v3"
|
115 |
+
sample: "https://models.silero.ai/examples/ua_sample.wav"
|
116 |
+
credits:
|
117 |
+
datasets:
|
118 |
+
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
|
119 |
+
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
|
120 |
+
jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
|
121 |
+
jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
|
122 |
+
onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
|
123 |
+
v3:
|
124 |
+
meta:
|
125 |
+
name: "ua_v3"
|
126 |
+
sample: "https://models.silero.ai/examples/ua_sample.wav"
|
127 |
+
credits:
|
128 |
+
datasets:
|
129 |
+
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
|
130 |
+
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
|
131 |
+
jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
|
132 |
+
jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
|
133 |
+
onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
|
134 |
+
v1:
|
135 |
+
meta:
|
136 |
+
name: "ua_v1"
|
137 |
+
sample: "https://models.silero.ai/examples/ua_sample.wav"
|
138 |
+
credits:
|
139 |
+
datasets:
|
140 |
+
speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
|
141 |
+
labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
|
142 |
+
jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
|
143 |
+
jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
|
144 |
+
tts_models:
|
145 |
+
ru:
|
146 |
+
v3_1_ru:
|
147 |
+
latest:
|
148 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
149 |
+
package: 'https://models.silero.ai/models/tts/ru/v3_1_ru.pt'
|
150 |
+
sample_rate: [8000, 24000, 48000]
|
151 |
+
ru_v3:
|
152 |
+
latest:
|
153 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
154 |
+
package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
|
155 |
+
sample_rate: [8000, 24000, 48000]
|
156 |
+
aidar_v2:
|
157 |
+
latest:
|
158 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
159 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
|
160 |
+
sample_rate: [8000, 16000]
|
161 |
+
aidar_8khz:
|
162 |
+
latest:
|
163 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
164 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
165 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
|
166 |
+
sample_rate: 8000
|
167 |
+
v1:
|
168 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
169 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
170 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
|
171 |
+
sample_rate: 8000
|
172 |
+
aidar_16khz:
|
173 |
+
latest:
|
174 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
175 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
176 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
|
177 |
+
sample_rate: 16000
|
178 |
+
v1:
|
179 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
180 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
181 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
|
182 |
+
sample_rate: 16000
|
183 |
+
baya_v2:
|
184 |
+
latest:
|
185 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
186 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
|
187 |
+
sample_rate: [8000, 16000]
|
188 |
+
baya_8khz:
|
189 |
+
latest:
|
190 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
191 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
192 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
|
193 |
+
sample_rate: 8000
|
194 |
+
v1:
|
195 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
196 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
197 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
|
198 |
+
sample_rate: 8000
|
199 |
+
baya_16khz:
|
200 |
+
latest:
|
201 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
202 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
203 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
|
204 |
+
sample_rate: 16000
|
205 |
+
v1:
|
206 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
207 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
208 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
|
209 |
+
sample_rate: 16000
|
210 |
+
irina_v2:
|
211 |
+
latest:
|
212 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
213 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
|
214 |
+
sample_rate: [8000, 16000]
|
215 |
+
irina_8khz:
|
216 |
+
latest:
|
217 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
218 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
219 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
|
220 |
+
sample_rate: 8000
|
221 |
+
v1:
|
222 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
223 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
224 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
|
225 |
+
sample_rate: 8000
|
226 |
+
irina_16khz:
|
227 |
+
latest:
|
228 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
229 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
230 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
|
231 |
+
sample_rate: 16000
|
232 |
+
v1:
|
233 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
234 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
235 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
|
236 |
+
sample_rate: 16000
|
237 |
+
kseniya_v2:
|
238 |
+
latest:
|
239 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
240 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
|
241 |
+
sample_rate: [8000, 16000]
|
242 |
+
kseniya_8khz:
|
243 |
+
latest:
|
244 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
245 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
246 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
|
247 |
+
sample_rate: 8000
|
248 |
+
v1:
|
249 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
250 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
251 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
|
252 |
+
sample_rate: 8000
|
253 |
+
kseniya_16khz:
|
254 |
+
latest:
|
255 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
256 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
257 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
|
258 |
+
sample_rate: 16000
|
259 |
+
v1:
|
260 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
261 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
262 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
|
263 |
+
sample_rate: 16000
|
264 |
+
natasha_v2:
|
265 |
+
latest:
|
266 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
267 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
|
268 |
+
sample_rate: [8000, 16000]
|
269 |
+
natasha_8khz:
|
270 |
+
latest:
|
271 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
272 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
273 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
|
274 |
+
sample_rate: 8000
|
275 |
+
v1:
|
276 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
277 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
278 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
|
279 |
+
sample_rate: 8000
|
280 |
+
natasha_16khz:
|
281 |
+
latest:
|
282 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
283 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
284 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
|
285 |
+
sample_rate: 16000
|
286 |
+
v1:
|
287 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
288 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
289 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
|
290 |
+
sample_rate: 16000
|
291 |
+
ruslan_v2:
|
292 |
+
latest:
|
293 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
294 |
+
package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
|
295 |
+
sample_rate: [8000, 16000]
|
296 |
+
ruslan_8khz:
|
297 |
+
latest:
|
298 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
299 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
300 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
|
301 |
+
sample_rate: 8000
|
302 |
+
v1:
|
303 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
304 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
305 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
|
306 |
+
sample_rate: 8000
|
307 |
+
ruslan_16khz:
|
308 |
+
latest:
|
309 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
310 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
311 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
|
312 |
+
sample_rate: 16000
|
313 |
+
v1:
|
314 |
+
tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
|
315 |
+
example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
|
316 |
+
jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
|
317 |
+
sample_rate: 16000
|
318 |
+
en:
|
319 |
+
v3_en:
|
320 |
+
latest:
|
321 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
322 |
+
package: 'https://models.silero.ai/models/tts/en/v3_en.pt'
|
323 |
+
sample_rate: [8000, 24000, 48000]
|
324 |
+
v3_en_indic:
|
325 |
+
latest:
|
326 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
327 |
+
package: 'https://models.silero.ai/models/tts/en/v3_en_indic.pt'
|
328 |
+
sample_rate: [8000, 24000, 48000]
|
329 |
+
lj_v2:
|
330 |
+
latest:
|
331 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
332 |
+
package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
|
333 |
+
sample_rate: [8000, 16000]
|
334 |
+
lj_8khz:
|
335 |
+
latest:
|
336 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
|
337 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
338 |
+
jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
|
339 |
+
sample_rate: 8000
|
340 |
+
v1:
|
341 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
|
342 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
343 |
+
jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
|
344 |
+
sample_rate: 8000
|
345 |
+
lj_16khz:
|
346 |
+
latest:
|
347 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
|
348 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
349 |
+
jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
|
350 |
+
sample_rate: 16000
|
351 |
+
v1:
|
352 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
|
353 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
354 |
+
jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
|
355 |
+
sample_rate: 16000
|
356 |
+
de:
|
357 |
+
v3_de:
|
358 |
+
latest:
|
359 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
360 |
+
package: 'https://models.silero.ai/models/tts/de/v3_de.pt'
|
361 |
+
sample_rate: [8000, 24000, 48000]
|
362 |
+
thorsten_v2:
|
363 |
+
latest:
|
364 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
365 |
+
package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
|
366 |
+
sample_rate: [8000, 16000]
|
367 |
+
thorsten_8khz:
|
368 |
+
latest:
|
369 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
|
370 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
371 |
+
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
|
372 |
+
sample_rate: 8000
|
373 |
+
v1:
|
374 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
|
375 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
376 |
+
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
|
377 |
+
sample_rate: 8000
|
378 |
+
thorsten_16khz:
|
379 |
+
latest:
|
380 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
|
381 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
382 |
+
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
|
383 |
+
sample_rate: 16000
|
384 |
+
v1:
|
385 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
|
386 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
387 |
+
jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
|
388 |
+
sample_rate: 16000
|
389 |
+
es:
|
390 |
+
v3_es:
|
391 |
+
latest:
|
392 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
393 |
+
package: 'https://models.silero.ai/models/tts/es/v3_es.pt'
|
394 |
+
sample_rate: [8000, 24000, 48000]
|
395 |
+
tux_v2:
|
396 |
+
latest:
|
397 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
398 |
+
package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
|
399 |
+
sample_rate: [8000, 16000]
|
400 |
+
tux_8khz:
|
401 |
+
latest:
|
402 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
|
403 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
404 |
+
jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
|
405 |
+
sample_rate: 8000
|
406 |
+
v1:
|
407 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
|
408 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
409 |
+
jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
|
410 |
+
sample_rate: 8000
|
411 |
+
tux_16khz:
|
412 |
+
latest:
|
413 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
|
414 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
415 |
+
jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
|
416 |
+
sample_rate: 16000
|
417 |
+
v1:
|
418 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
|
419 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
420 |
+
jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
|
421 |
+
sample_rate: 16000
|
422 |
+
fr:
|
423 |
+
v3_fr:
|
424 |
+
latest:
|
425 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
426 |
+
package: 'https://models.silero.ai/models/tts/fr/v3_fr.pt'
|
427 |
+
sample_rate: [8000, 24000, 48000]
|
428 |
+
gilles_v2:
|
429 |
+
latest:
|
430 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
431 |
+
package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
|
432 |
+
sample_rate: [8000, 16000]
|
433 |
+
gilles_8khz:
|
434 |
+
latest:
|
435 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
|
436 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
437 |
+
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
|
438 |
+
sample_rate: 8000
|
439 |
+
v1:
|
440 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
|
441 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
442 |
+
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
|
443 |
+
sample_rate: 8000
|
444 |
+
gilles_16khz:
|
445 |
+
latest:
|
446 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
|
447 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
448 |
+
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
|
449 |
+
sample_rate: 16000
|
450 |
+
v1:
|
451 |
+
tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
|
452 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
453 |
+
jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
|
454 |
+
sample_rate: 16000
|
455 |
+
ba:
|
456 |
+
aigul_v2:
|
457 |
+
latest:
|
458 |
+
example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
|
459 |
+
package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
|
460 |
+
sample_rate: [8000, 16000]
|
461 |
+
language_name: 'bashkir'
|
462 |
+
xal:
|
463 |
+
v3_xal:
|
464 |
+
latest:
|
465 |
+
example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
|
466 |
+
package: 'https://models.silero.ai/models/tts/xal/v3_xal.pt'
|
467 |
+
sample_rate: [8000, 24000, 48000]
|
468 |
+
erdni_v2:
|
469 |
+
latest:
|
470 |
+
example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
|
471 |
+
package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
|
472 |
+
sample_rate: [8000, 16000]
|
473 |
+
language_name: 'kalmyk'
|
474 |
+
tt:
|
475 |
+
v3_tt:
|
476 |
+
latest:
|
477 |
+
example: 'Исәнмесез, саумысез, нишләп кәҗәгезне саумыйсыз, әтәчегез күкәй салган, нишләп чыгып алмыйсыз.'
|
478 |
+
package: 'https://models.silero.ai/models/tts/tt/v3_tt.pt'
|
479 |
+
sample_rate: [8000, 24000, 48000]
|
480 |
+
dilyara_v2:
|
481 |
+
latest:
|
482 |
+
example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
|
483 |
+
package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
|
484 |
+
sample_rate: [8000, 16000]
|
485 |
+
language_name: 'tatar'
|
486 |
+
uz:
|
487 |
+
v3_uz:
|
488 |
+
latest:
|
489 |
+
example: 'Tanishganimdan xursandman.'
|
490 |
+
package: 'https://models.silero.ai/models/tts/uz/v3_uz.pt'
|
491 |
+
sample_rate: [8000, 24000, 48000]
|
492 |
+
dilnavoz_v2:
|
493 |
+
latest:
|
494 |
+
example: 'Tanishganimdan xursandman.'
|
495 |
+
package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
|
496 |
+
sample_rate: [8000, 16000]
|
497 |
+
language_name: 'uzbek'
|
498 |
+
ua:
|
499 |
+
v3_ua:
|
500 |
+
latest:
|
501 |
+
example: 'К+отики - пухн+асті жив+отики.'
|
502 |
+
package: 'https://models.silero.ai/models/tts/ua/v3_ua.pt'
|
503 |
+
sample_rate: [8000, 24000, 48000]
|
504 |
+
mykyta_v2:
|
505 |
+
latest:
|
506 |
+
example: 'К+отики - пухн+асті жив+отики.'
|
507 |
+
package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
|
508 |
+
sample_rate: [8000, 24000, 48000]
|
509 |
+
language_name: 'ukrainian'
|
510 |
+
indic:
|
511 |
+
v3_indic:
|
512 |
+
latest:
|
513 |
+
example: 'prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai'
|
514 |
+
package: 'https://models.silero.ai/models/tts/indic/v3_indic.pt'
|
515 |
+
sample_rate: [8000, 24000, 48000]
|
516 |
+
multi:
|
517 |
+
multi_v2:
|
518 |
+
latest:
|
519 |
+
package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
|
520 |
+
sample_rate: [8000, 16000]
|
521 |
+
speakers:
|
522 |
+
aidar:
|
523 |
+
lang: 'ru'
|
524 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
525 |
+
baya:
|
526 |
+
lang: 'ru'
|
527 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
528 |
+
kseniya:
|
529 |
+
lang: 'ru'
|
530 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
531 |
+
irina:
|
532 |
+
lang: 'ru'
|
533 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
534 |
+
ruslan:
|
535 |
+
lang: 'ru'
|
536 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
537 |
+
natasha:
|
538 |
+
lang: 'ru'
|
539 |
+
example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
|
540 |
+
thorsten:
|
541 |
+
lang: 'de'
|
542 |
+
example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
|
543 |
+
tux:
|
544 |
+
lang: 'es'
|
545 |
+
example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
|
546 |
+
gilles:
|
547 |
+
lang: 'fr'
|
548 |
+
example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
|
549 |
+
lj:
|
550 |
+
lang: 'en'
|
551 |
+
example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
|
552 |
+
dilyara:
|
553 |
+
lang: 'tt'
|
554 |
+
example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
|
555 |
+
te_models:
|
556 |
+
latest:
|
557 |
+
package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
|
558 |
+
languages: ['en', 'de', 'ru', 'es']
|
559 |
+
punct: '.,-!?—'
|
560 |
+
v2:
|
561 |
+
package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
|
562 |
+
languages: ['en', 'de', 'ru', 'es']
|
563 |
+
punct: '.,-!?—'
|
metadata.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
process_metadata.ipynb
ADDED
@@ -0,0 +1,412 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 19,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import pandas as pd"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 20,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"df = pd.read_csv(\"./speakers_all.csv\")"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": 21,
|
24 |
+
"metadata": {},
|
25 |
+
"outputs": [],
|
26 |
+
"source": [
|
27 |
+
"df['id'] = df['filename'].apply(lambda x: x + \".wav\")\n",
|
28 |
+
"df = df[df['file_missing?'] == False]"
|
29 |
+
]
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"cell_type": "code",
|
33 |
+
"execution_count": 22,
|
34 |
+
"metadata": {},
|
35 |
+
"outputs": [
|
36 |
+
{
|
37 |
+
"data": {
|
38 |
+
"text/html": [
|
39 |
+
"<div>\n",
|
40 |
+
"<style scoped>\n",
|
41 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
42 |
+
" vertical-align: middle;\n",
|
43 |
+
" }\n",
|
44 |
+
"\n",
|
45 |
+
" .dataframe tbody tr th {\n",
|
46 |
+
" vertical-align: top;\n",
|
47 |
+
" }\n",
|
48 |
+
"\n",
|
49 |
+
" .dataframe thead th {\n",
|
50 |
+
" text-align: right;\n",
|
51 |
+
" }\n",
|
52 |
+
"</style>\n",
|
53 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
54 |
+
" <thead>\n",
|
55 |
+
" <tr style=\"text-align: right;\">\n",
|
56 |
+
" <th></th>\n",
|
57 |
+
" <th>age</th>\n",
|
58 |
+
" <th>age_onset</th>\n",
|
59 |
+
" <th>birthplace</th>\n",
|
60 |
+
" <th>filename</th>\n",
|
61 |
+
" <th>native_language</th>\n",
|
62 |
+
" <th>sex</th>\n",
|
63 |
+
" <th>speakerid</th>\n",
|
64 |
+
" <th>country</th>\n",
|
65 |
+
" <th>file_missing?</th>\n",
|
66 |
+
" <th>Unnamed: 9</th>\n",
|
67 |
+
" <th>Unnamed: 10</th>\n",
|
68 |
+
" <th>Unnamed: 11</th>\n",
|
69 |
+
" <th>id</th>\n",
|
70 |
+
" </tr>\n",
|
71 |
+
" </thead>\n",
|
72 |
+
" <tbody>\n",
|
73 |
+
" <tr>\n",
|
74 |
+
" <th>32</th>\n",
|
75 |
+
" <td>27.0</td>\n",
|
76 |
+
" <td>9.0</td>\n",
|
77 |
+
" <td>virginia, south africa</td>\n",
|
78 |
+
" <td>afrikaans1</td>\n",
|
79 |
+
" <td>afrikaans</td>\n",
|
80 |
+
" <td>female</td>\n",
|
81 |
+
" <td>1</td>\n",
|
82 |
+
" <td>south africa</td>\n",
|
83 |
+
" <td>False</td>\n",
|
84 |
+
" <td>NaN</td>\n",
|
85 |
+
" <td>NaN</td>\n",
|
86 |
+
" <td>NaN</td>\n",
|
87 |
+
" <td>afrikaans1.wav</td>\n",
|
88 |
+
" </tr>\n",
|
89 |
+
" <tr>\n",
|
90 |
+
" <th>33</th>\n",
|
91 |
+
" <td>40.0</td>\n",
|
92 |
+
" <td>5.0</td>\n",
|
93 |
+
" <td>pretoria, south africa</td>\n",
|
94 |
+
" <td>afrikaans2</td>\n",
|
95 |
+
" <td>afrikaans</td>\n",
|
96 |
+
" <td>male</td>\n",
|
97 |
+
" <td>2</td>\n",
|
98 |
+
" <td>south africa</td>\n",
|
99 |
+
" <td>False</td>\n",
|
100 |
+
" <td>NaN</td>\n",
|
101 |
+
" <td>NaN</td>\n",
|
102 |
+
" <td>NaN</td>\n",
|
103 |
+
" <td>afrikaans2.wav</td>\n",
|
104 |
+
" </tr>\n",
|
105 |
+
" <tr>\n",
|
106 |
+
" <th>34</th>\n",
|
107 |
+
" <td>43.0</td>\n",
|
108 |
+
" <td>4.0</td>\n",
|
109 |
+
" <td>pretoria, transvaal, south africa</td>\n",
|
110 |
+
" <td>afrikaans3</td>\n",
|
111 |
+
" <td>afrikaans</td>\n",
|
112 |
+
" <td>male</td>\n",
|
113 |
+
" <td>418</td>\n",
|
114 |
+
" <td>south africa</td>\n",
|
115 |
+
" <td>False</td>\n",
|
116 |
+
" <td>NaN</td>\n",
|
117 |
+
" <td>NaN</td>\n",
|
118 |
+
" <td>NaN</td>\n",
|
119 |
+
" <td>afrikaans3.wav</td>\n",
|
120 |
+
" </tr>\n",
|
121 |
+
" <tr>\n",
|
122 |
+
" <th>35</th>\n",
|
123 |
+
" <td>26.0</td>\n",
|
124 |
+
" <td>8.0</td>\n",
|
125 |
+
" <td>pretoria, south africa</td>\n",
|
126 |
+
" <td>afrikaans4</td>\n",
|
127 |
+
" <td>afrikaans</td>\n",
|
128 |
+
" <td>male</td>\n",
|
129 |
+
" <td>1159</td>\n",
|
130 |
+
" <td>south africa</td>\n",
|
131 |
+
" <td>False</td>\n",
|
132 |
+
" <td>NaN</td>\n",
|
133 |
+
" <td>NaN</td>\n",
|
134 |
+
" <td>NaN</td>\n",
|
135 |
+
" <td>afrikaans4.wav</td>\n",
|
136 |
+
" </tr>\n",
|
137 |
+
" <tr>\n",
|
138 |
+
" <th>36</th>\n",
|
139 |
+
" <td>19.0</td>\n",
|
140 |
+
" <td>6.0</td>\n",
|
141 |
+
" <td>cape town, south africa</td>\n",
|
142 |
+
" <td>afrikaans5</td>\n",
|
143 |
+
" <td>afrikaans</td>\n",
|
144 |
+
" <td>male</td>\n",
|
145 |
+
" <td>1432</td>\n",
|
146 |
+
" <td>south africa</td>\n",
|
147 |
+
" <td>False</td>\n",
|
148 |
+
" <td>NaN</td>\n",
|
149 |
+
" <td>NaN</td>\n",
|
150 |
+
" <td>NaN</td>\n",
|
151 |
+
" <td>afrikaans5.wav</td>\n",
|
152 |
+
" </tr>\n",
|
153 |
+
" </tbody>\n",
|
154 |
+
"</table>\n",
|
155 |
+
"</div>"
|
156 |
+
],
|
157 |
+
"text/plain": [
|
158 |
+
" age age_onset birthplace filename \\\n",
|
159 |
+
"32 27.0 9.0 virginia, south africa afrikaans1 \n",
|
160 |
+
"33 40.0 5.0 pretoria, south africa afrikaans2 \n",
|
161 |
+
"34 43.0 4.0 pretoria, transvaal, south africa afrikaans3 \n",
|
162 |
+
"35 26.0 8.0 pretoria, south africa afrikaans4 \n",
|
163 |
+
"36 19.0 6.0 cape town, south africa afrikaans5 \n",
|
164 |
+
"\n",
|
165 |
+
" native_language sex speakerid country file_missing? \\\n",
|
166 |
+
"32 afrikaans female 1 south africa False \n",
|
167 |
+
"33 afrikaans male 2 south africa False \n",
|
168 |
+
"34 afrikaans male 418 south africa False \n",
|
169 |
+
"35 afrikaans male 1159 south africa False \n",
|
170 |
+
"36 afrikaans male 1432 south africa False \n",
|
171 |
+
"\n",
|
172 |
+
" Unnamed: 9 Unnamed: 10 Unnamed: 11 id \n",
|
173 |
+
"32 NaN NaN NaN afrikaans1.wav \n",
|
174 |
+
"33 NaN NaN NaN afrikaans2.wav \n",
|
175 |
+
"34 NaN NaN NaN afrikaans3.wav \n",
|
176 |
+
"35 NaN NaN NaN afrikaans4.wav \n",
|
177 |
+
"36 NaN NaN NaN afrikaans5.wav "
|
178 |
+
]
|
179 |
+
},
|
180 |
+
"execution_count": 22,
|
181 |
+
"metadata": {},
|
182 |
+
"output_type": "execute_result"
|
183 |
+
}
|
184 |
+
],
|
185 |
+
"source": [
|
186 |
+
"df.head()"
|
187 |
+
]
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"cell_type": "code",
|
191 |
+
"execution_count": 23,
|
192 |
+
"metadata": {},
|
193 |
+
"outputs": [],
|
194 |
+
"source": [
|
195 |
+
"df['label'] = \"Please call Stella. Ask her to bring these things with her from the store: Six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob. We also need a small plastic snake and a big toy frog for the kids. She can scoop these things into three red bags, and we will go meet her Wednesday at the train station.\""
|
196 |
+
]
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"cell_type": "code",
|
200 |
+
"execution_count": 24,
|
201 |
+
"metadata": {},
|
202 |
+
"outputs": [
|
203 |
+
{
|
204 |
+
"data": {
|
205 |
+
"text/plain": [
|
206 |
+
"Index(['age', 'age_onset', 'birthplace', 'filename', 'native_language', 'sex',\n",
|
207 |
+
" 'speakerid', 'country', 'file_missing?', 'Unnamed: 9', 'Unnamed: 10',\n",
|
208 |
+
" 'Unnamed: 11', 'id', 'label'],\n",
|
209 |
+
" dtype='object')"
|
210 |
+
]
|
211 |
+
},
|
212 |
+
"execution_count": 24,
|
213 |
+
"metadata": {},
|
214 |
+
"output_type": "execute_result"
|
215 |
+
}
|
216 |
+
],
|
217 |
+
"source": [
|
218 |
+
"df.columns"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"cell_type": "code",
|
223 |
+
"execution_count": 25,
|
224 |
+
"metadata": {},
|
225 |
+
"outputs": [],
|
226 |
+
"source": [
|
227 |
+
"df = df.drop(\"Unnamed: 9\", axis=1)\n",
|
228 |
+
"df = df.drop(\"Unnamed: 10\", axis=1)\n",
|
229 |
+
"df = df.drop(\"Unnamed: 11\", axis=1)\n",
|
230 |
+
"df = df.drop(\"file_missing?\", axis=1)\n",
|
231 |
+
"df = df.drop(\"filename\", axis=1)"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"cell_type": "code",
|
236 |
+
"execution_count": 26,
|
237 |
+
"metadata": {},
|
238 |
+
"outputs": [],
|
239 |
+
"source": [
|
240 |
+
"df.loc[df['sex'] == 'famale', 'sex'] = 'female'"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"cell_type": "code",
|
245 |
+
"execution_count": 27,
|
246 |
+
"metadata": {},
|
247 |
+
"outputs": [],
|
248 |
+
"source": [
|
249 |
+
"import pycountry_convert as pc\n",
|
250 |
+
"\n",
|
251 |
+
"def country_to_continent(country_name):\n",
|
252 |
+
" try:\n",
|
253 |
+
" country_alpha2 = pc.country_name_to_country_alpha2(country_name, cn_name_format=pc.COUNTRY_NAME_FORMAT_LOWER)\n",
|
254 |
+
" country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)\n",
|
255 |
+
" country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)\n",
|
256 |
+
" return country_continent_name\n",
|
257 |
+
" except:\n",
|
258 |
+
" return None\n",
|
259 |
+
"\n",
|
260 |
+
"df[\"continent\"] = df[\"country\"].map(lambda x: country_to_continent(x))"
|
261 |
+
]
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"cell_type": "code",
|
265 |
+
"execution_count": 28,
|
266 |
+
"metadata": {},
|
267 |
+
"outputs": [
|
268 |
+
{
|
269 |
+
"data": {
|
270 |
+
"text/plain": [
|
271 |
+
"False 1647\n",
|
272 |
+
"True 493\n",
|
273 |
+
"Name: continent, dtype: int64"
|
274 |
+
]
|
275 |
+
},
|
276 |
+
"execution_count": 28,
|
277 |
+
"metadata": {},
|
278 |
+
"output_type": "execute_result"
|
279 |
+
}
|
280 |
+
],
|
281 |
+
"source": [
|
282 |
+
"df[\"continent\"].isnull().value_counts()"
|
283 |
+
]
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"cell_type": "code",
|
287 |
+
"execution_count": 29,
|
288 |
+
"metadata": {},
|
289 |
+
"outputs": [],
|
290 |
+
"source": [
|
291 |
+
"df = df.drop([1544, 1771])"
|
292 |
+
]
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"cell_type": "code",
|
296 |
+
"execution_count": 30,
|
297 |
+
"metadata": {},
|
298 |
+
"outputs": [],
|
299 |
+
"source": [
|
300 |
+
"df.to_csv(\"metadata.csv\", index=False)"
|
301 |
+
]
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"cell_type": "markdown",
|
305 |
+
"metadata": {},
|
306 |
+
"source": [
|
307 |
+
"## Whisper"
|
308 |
+
]
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"cell_type": "code",
|
312 |
+
"execution_count": 5,
|
313 |
+
"metadata": {},
|
314 |
+
"outputs": [
|
315 |
+
{
|
316 |
+
"name": "stderr",
|
317 |
+
"output_type": "stream",
|
318 |
+
"text": [
|
319 |
+
"100%|███████████████████████████████████████| 139M/139M [00:04<00:00, 30.3MiB/s]\n"
|
320 |
+
]
|
321 |
+
}
|
322 |
+
],
|
323 |
+
"source": [
|
324 |
+
"import whisper\n",
|
325 |
+
"model = whisper.load_model(\"base\")"
|
326 |
+
]
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"cell_type": "code",
|
330 |
+
"execution_count": 8,
|
331 |
+
"metadata": {},
|
332 |
+
"outputs": [
|
333 |
+
{
|
334 |
+
"name": "stderr",
|
335 |
+
"output_type": "stream",
|
336 |
+
"text": [
|
337 |
+
"/opt/anaconda3/lib/python3.8/site-packages/whisper/transcribe.py:78: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
|
338 |
+
" warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n"
|
339 |
+
]
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"ename": "TypeError",
|
343 |
+
"evalue": "expected np.ndarray (got list)",
|
344 |
+
"output_type": "error",
|
345 |
+
"traceback": [
|
346 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
347 |
+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
348 |
+
"\u001b[0;32m/var/folders/tq/kqg2ct9d123gd0wmshf2bd3r0000gp/T/ipykernel_157/3894641212.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtranscribe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
349 |
+
"\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/whisper/transcribe.py\u001b[0m in \u001b[0;36mtranscribe\u001b[0;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, **decode_options)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0mdecode_options\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"fp16\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mmel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlog_mel_spectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdecode_options\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"language\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
350 |
+
"\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/whisper/audio.py\u001b[0m in \u001b[0;36mlog_mel_spectrogram\u001b[0;34m(audio, n_mels)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[0maudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_audio\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 112\u001b[0;31m \u001b[0maudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0mwindow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhann_window\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_FFT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
351 |
+
"\u001b[0;31mTypeError\u001b[0m: expected np.ndarray (got list)"
|
352 |
+
]
|
353 |
+
}
|
354 |
+
],
|
355 |
+
"source": [
|
356 |
+
"result = model.transcribe([\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\", \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"])"
|
357 |
+
]
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"cell_type": "code",
|
361 |
+
"execution_count": 7,
|
362 |
+
"metadata": {},
|
363 |
+
"outputs": [
|
364 |
+
{
|
365 |
+
"data": {
|
366 |
+
"text/plain": [
|
367 |
+
"' Please call Stella, ask her to bring these things with her from the store. 6 spoons of fresh snow peas, 5 thick slabs of blue cheese and maybe a snack for her brother Bob. We also need a small plastic snake and a big twig frog for the kids. She can scoop these things into free-rate bags and we will go meet a wind state train station.'"
|
368 |
+
]
|
369 |
+
},
|
370 |
+
"execution_count": 7,
|
371 |
+
"metadata": {},
|
372 |
+
"output_type": "execute_result"
|
373 |
+
}
|
374 |
+
],
|
375 |
+
"source": [
|
376 |
+
"result[\"text\"]"
|
377 |
+
]
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"cell_type": "markdown",
|
381 |
+
"metadata": {},
|
382 |
+
"source": []
|
383 |
+
}
|
384 |
+
],
|
385 |
+
"metadata": {
|
386 |
+
"kernelspec": {
|
387 |
+
"display_name": "Python 3.8.12 ('base')",
|
388 |
+
"language": "python",
|
389 |
+
"name": "python3"
|
390 |
+
},
|
391 |
+
"language_info": {
|
392 |
+
"codemirror_mode": {
|
393 |
+
"name": "ipython",
|
394 |
+
"version": 3
|
395 |
+
},
|
396 |
+
"file_extension": ".py",
|
397 |
+
"mimetype": "text/x-python",
|
398 |
+
"name": "python",
|
399 |
+
"nbconvert_exporter": "python",
|
400 |
+
"pygments_lexer": "ipython3",
|
401 |
+
"version": "3.8.12"
|
402 |
+
},
|
403 |
+
"orig_nbformat": 4,
|
404 |
+
"vscode": {
|
405 |
+
"interpreter": {
|
406 |
+
"hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
|
407 |
+
}
|
408 |
+
}
|
409 |
+
},
|
410 |
+
"nbformat": 4,
|
411 |
+
"nbformat_minor": 2
|
412 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
zenoml>=0.4.6
|
2 |
+
inspiredco
|
3 |
+
sentence_transformers
|
speakers_all.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|