Spaces:

zeno-ml
/

audio-transcription

Running

App Files Files Community

Alex Cabrera commited on Apr 16, 2023

Commit

c24ff9a

1 Parent(s): c52f77a

audio

Browse files

Files changed (23) hide show

.dockerignore +1 -0
.gitattributes +0 -34
.zeno_cache/OUTPUTsilero_sst.pickle +0 -0
.zeno_cache/OUTPUTwhisper.pickle +0 -0
.zeno_cache/POSTDISTILLwer_msilero_sst.pickle +0 -0
.zeno_cache/POSTDISTILLwer_mwhisper.pickle +0 -0
.zeno_cache/PREDISTILLamplitude.pickle +0 -0
.zeno_cache/PREDISTILLcountry.pickle +0 -0
.zeno_cache/PREDISTILLlength.pickle +0 -0
.zeno_cache/folders.pickle +0 -0
.zeno_cache/reports.pickle +0 -0
.zeno_cache/slices.pickle +0 -0
.zeno_cache/view.mjs +788 -0
Dockerfile +22 -0
config.toml +11 -0
functions/audio_characteristics.py +25 -0
functions/model.py +73 -0
jupyter_accent.ipynb +223 -0
latest_silero_models.yml +563 -0
metadata.csv +0 -0
process_metadata.ipynb +412 -0
requirements.txt +3 -0
speakers_all.csv +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .git

.gitattributes DELETED Viewed

@@ -1,34 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.zeno_cache/OUTPUTsilero_sst.pickle ADDED Viewed

Binary file (768 kB). View file

.zeno_cache/OUTPUTwhisper.pickle ADDED Viewed

Binary file (804 kB). View file

.zeno_cache/POSTDISTILLwer_msilero_sst.pickle ADDED Viewed

Binary file (63.7 kB). View file

.zeno_cache/POSTDISTILLwer_mwhisper.pickle ADDED Viewed

Binary file (63.7 kB). View file

.zeno_cache/PREDISTILLamplitude.pickle ADDED Viewed

Binary file (63.7 kB). View file

.zeno_cache/PREDISTILLcountry.pickle ADDED Viewed

Binary file (65 kB). View file

.zeno_cache/PREDISTILLlength.pickle ADDED Viewed

Binary file (55.1 kB). View file

.zeno_cache/folders.pickle ADDED Viewed

Binary file (28 Bytes). View file

.zeno_cache/reports.pickle ADDED Viewed

Binary file (7.95 kB). View file

.zeno_cache/slices.pickle ADDED Viewed

Binary file (2.87 kB). View file

.zeno_cache/view.mjs ADDED Viewed

	@@ -0,0 +1,788 @@

+function noop() { }
+function run(fn) {
+    return fn();
+}
+function blank_object() {
+    return Object.create(null);
+}
+function run_all(fns) {
+    fns.forEach(run);
+}
+function is_function(thing) {
+    return typeof thing === 'function';
+}
+function safe_not_equal(a, b) {
+    return a != a ? b == b : a !== b || ((a && typeof a === 'object') || typeof a === 'function');
+}
+let src_url_equal_anchor;
+function src_url_equal(element_src, url) {
+    if (!src_url_equal_anchor) {
+        src_url_equal_anchor = document.createElement('a');
+    }
+    src_url_equal_anchor.href = url;
+    return element_src === src_url_equal_anchor.href;
+}
+function is_empty(obj) {
+    return Object.keys(obj).length === 0;
+}
+// Track which nodes are claimed during hydration. Unclaimed nodes can then be removed from the DOM
+// at the end of hydration without touching the remaining nodes.
+let is_hydrating = false;
+function start_hydrating() {
+    is_hydrating = true;
+}
+function end_hydrating() {
+    is_hydrating = false;
+}
+function upper_bound(low, high, key, value) {
+    // Return first index of value larger than input value in the range [low, high)
+    while (low < high) {
+        const mid = low + ((high - low) >> 1);
+        if (key(mid) <= value) {
+            low = mid + 1;
+        }
+        else {
+            high = mid;
+        }
+    }
+    return low;
+}
+function init_hydrate(target) {
+    if (target.hydrate_init)
+        return;
+    target.hydrate_init = true;
+    // We know that all children have claim_order values since the unclaimed have been detached if target is not <head>
+    let children = target.childNodes;
+    // If target is <head>, there may be children without claim_order
+    if (target.nodeName === 'HEAD') {
+        const myChildren = [];
+        for (let i = 0; i < children.length; i++) {
+            const node = children[i];
+            if (node.claim_order !== undefined) {
+                myChildren.push(node);
+            }
+        }
+        children = myChildren;
+    }
+    /*
+    * Reorder claimed children optimally.
+    * We can reorder claimed children optimally by finding the longest subsequence of
+    * nodes that are already claimed in order and only moving the rest. The longest
+    * subsequence subsequence of nodes that are claimed in order can be found by
+    * computing the longest increasing subsequence of .claim_order values.
+    *
+    * This algorithm is optimal in generating the least amount of reorder operations
+    * possible.
+    *
+    * Proof:
+    * We know that, given a set of reordering operations, the nodes that do not move
+    * always form an increasing subsequence, since they do not move among each other
+    * meaning that they must be already ordered among each other. Thus, the maximal
+    * set of nodes that do not move form a longest increasing subsequence.
+    */
+    // Compute longest increasing subsequence
+    // m: subsequence length j => index k of smallest value that ends an increasing subsequence of length j
+    const m = new Int32Array(children.length + 1);
+    // Predecessor indices + 1
+    const p = new Int32Array(children.length);
+    m[0] = -1;
+    let longest = 0;
+    for (let i = 0; i < children.length; i++) {
+        const current = children[i].claim_order;
+        // Find the largest subsequence length such that it ends in a value less than our current value
+        // upper_bound returns first greater value, so we subtract one
+        // with fast path for when we are on the current longest subsequence
+        const seqLen = ((longest > 0 && children[m[longest]].claim_order <= current) ? longest + 1 : upper_bound(1, longest, idx => children[m[idx]].claim_order, current)) - 1;
+        p[i] = m[seqLen] + 1;
+        const newLen = seqLen + 1;
+        // We can guarantee that current is the smallest value. Otherwise, we would have generated a longer sequence.
+        m[newLen] = i;
+        longest = Math.max(newLen, longest);
+    }
+    // The longest increasing subsequence of nodes (initially reversed)
+    const lis = [];
+    // The rest of the nodes, nodes that will be moved
+    const toMove = [];
+    let last = children.length - 1;
+    for (let cur = m[longest] + 1; cur != 0; cur = p[cur - 1]) {
+        lis.push(children[cur - 1]);
+        for (; last >= cur; last--) {
+            toMove.push(children[last]);
+        }
+        last--;
+    }
+    for (; last >= 0; last--) {
+        toMove.push(children[last]);
+    }
+    lis.reverse();
+    // We sort the nodes being moved to guarantee that their insertion order matches the claim order
+    toMove.sort((a, b) => a.claim_order - b.claim_order);
+    // Finally, we move the nodes
+    for (let i = 0, j = 0; i < toMove.length; i++) {
+        while (j < lis.length && toMove[i].claim_order >= lis[j].claim_order) {
+            j++;
+        }
+        const anchor = j < lis.length ? lis[j] : null;
+        target.insertBefore(toMove[i], anchor);
+    }
+}
+function append(target, node) {
+    target.appendChild(node);
+}
+function append_styles(target, style_sheet_id, styles) {
+    const append_styles_to = get_root_for_style(target);
+    if (!append_styles_to.getElementById(style_sheet_id)) {
+        const style = element('style');
+        style.id = style_sheet_id;
+        style.textContent = styles;
+        append_stylesheet(append_styles_to, style);
+    }
+}
+function get_root_for_style(node) {
+    if (!node)
+        return document;
+    const root = node.getRootNode ? node.getRootNode() : node.ownerDocument;
+    if (root && root.host) {
+        return root;
+    }
+    return node.ownerDocument;
+}
+function append_stylesheet(node, style) {
+    append(node.head || node, style);
+}
+function append_hydration(target, node) {
+    if (is_hydrating) {
+        init_hydrate(target);
+        if ((target.actual_end_child === undefined) || ((target.actual_end_child !== null) && (target.actual_end_child.parentElement !== target))) {
+            target.actual_end_child = target.firstChild;
+        }
+        // Skip nodes of undefined ordering
+        while ((target.actual_end_child !== null) && (target.actual_end_child.claim_order === undefined)) {
+            target.actual_end_child = target.actual_end_child.nextSibling;
+        }
+        if (node !== target.actual_end_child) {
+            // We only insert if the ordering of this node should be modified or the parent node is not target
+            if (node.claim_order !== undefined || node.parentNode !== target) {
+                target.insertBefore(node, target.actual_end_child);
+            }
+        }
+        else {
+            target.actual_end_child = node.nextSibling;
+        }
+    }
+    else if (node.parentNode !== target || node.nextSibling !== null) {
+        target.appendChild(node);
+    }
+}
+function insert_hydration(target, node, anchor) {
+    if (is_hydrating && !anchor) {
+        append_hydration(target, node);
+    }
+    else if (node.parentNode !== target || node.nextSibling != anchor) {
+        target.insertBefore(node, anchor || null);
+    }
+}
+function detach(node) {
+    node.parentNode.removeChild(node);
+}
+function element(name) {
+    return document.createElement(name);
+}
+function text(data) {
+    return document.createTextNode(data);
+}
+function space() {
+    return text(' ');
+}
+function attr(node, attribute, value) {
+    if (value == null)
+        node.removeAttribute(attribute);
+    else if (node.getAttribute(attribute) !== value)
+        node.setAttribute(attribute, value);
+}
+function children(element) {
+    return Array.from(element.childNodes);
+}
+function init_claim_info(nodes) {
+    if (nodes.claim_info === undefined) {
+        nodes.claim_info = { last_index: 0, total_claimed: 0 };
+    }
+}
+function claim_node(nodes, predicate, processNode, createNode, dontUpdateLastIndex = false) {
+    // Try to find nodes in an order such that we lengthen the longest increasing subsequence
+    init_claim_info(nodes);
+    const resultNode = (() => {
+        // We first try to find an element after the previous one
+        for (let i = nodes.claim_info.last_index; i < nodes.length; i++) {
+            const node = nodes[i];
+            if (predicate(node)) {
+                const replacement = processNode(node);
+                if (replacement === undefined) {
+                    nodes.splice(i, 1);
+                }
+                else {
+                    nodes[i] = replacement;
+                }
+                if (!dontUpdateLastIndex) {
+                    nodes.claim_info.last_index = i;
+                }
+                return node;
+            }
+        }
+        // Otherwise, we try to find one before
+        // We iterate in reverse so that we don't go too far back
+        for (let i = nodes.claim_info.last_index - 1; i >= 0; i--) {
+            const node = nodes[i];
+            if (predicate(node)) {
+                const replacement = processNode(node);
+                if (replacement === undefined) {
+                    nodes.splice(i, 1);
+                }
+                else {
+                    nodes[i] = replacement;
+                }
+                if (!dontUpdateLastIndex) {
+                    nodes.claim_info.last_index = i;
+                }
+                else if (replacement === undefined) {
+                    // Since we spliced before the last_index, we decrease it
+                    nodes.claim_info.last_index--;
+                }
+                return node;
+            }
+        }
+        // If we can't find any matching node, we create a new one
+        return createNode();
+    })();
+    resultNode.claim_order = nodes.claim_info.total_claimed;
+    nodes.claim_info.total_claimed += 1;
+    return resultNode;
+}
+function claim_element_base(nodes, name, attributes, create_element) {
+    return claim_node(nodes, (node) => node.nodeName === name, (node) => {
+        const remove = [];
+        for (let j = 0; j < node.attributes.length; j++) {
+            const attribute = node.attributes[j];
+            if (!attributes[attribute.name]) {
+                remove.push(attribute.name);
+            }
+        }
+        remove.forEach(v => node.removeAttribute(v));
+        return undefined;
+    }, () => create_element(name));
+}
+function claim_element(nodes, name, attributes) {
+    return claim_element_base(nodes, name, attributes, element);
+}
+function claim_text(nodes, data) {
+    return claim_node(nodes, (node) => node.nodeType === 3, (node) => {
+        const dataStr = '' + data;
+        if (node.data.startsWith(dataStr)) {
+            if (node.data.length !== dataStr.length) {
+                return node.splitText(dataStr.length);
+            }
+        }
+        else {
+            node.data = dataStr;
+        }
+    }, () => text(data), true // Text nodes should not update last index since it is likely not worth it to eliminate an increasing subsequence of actual elements
+    );
+}
+function claim_space(nodes) {
+    return claim_text(nodes, ' ');
+}
+function set_data(text, data) {
+    data = '' + data;
+    if (text.wholeText !== data)
+        text.data = data;
+}
+function set_style(node, key, value, important) {
+    if (value === null) {
+        node.style.removeProperty(key);
+    }
+    else {
+        node.style.setProperty(key, value, important ? 'important' : '');
+    }
+}
+let current_component;
+function set_current_component(component) {
+    current_component = component;
+}
+const dirty_components = [];
+const binding_callbacks = [];
+const render_callbacks = [];
+const flush_callbacks = [];
+const resolved_promise = Promise.resolve();
+let update_scheduled = false;
+function schedule_update() {
+    if (!update_scheduled) {
+        update_scheduled = true;
+        resolved_promise.then(flush);
+    }
+}
+function add_render_callback(fn) {
+    render_callbacks.push(fn);
+}
+// flush() calls callbacks in this order:
+// 1. All beforeUpdate callbacks, in order: parents before children
+// 2. All bind:this callbacks, in reverse order: children before parents.
+// 3. All afterUpdate callbacks, in order: parents before children. EXCEPT
+//    for afterUpdates called during the initial onMount, which are called in
+//    reverse order: children before parents.
+// Since callbacks might update component values, which could trigger another
+// call to flush(), the following steps guard against this:
+// 1. During beforeUpdate, any updated components will be added to the
+//    dirty_components array and will cause a reentrant call to flush(). Because
+//    the flush index is kept outside the function, the reentrant call will pick
+//    up where the earlier call left off and go through all dirty components. The
+//    current_component value is saved and restored so that the reentrant call will
+//    not interfere with the "parent" flush() call.
+// 2. bind:this callbacks cannot trigger new flush() calls.
+// 3. During afterUpdate, any updated components will NOT have their afterUpdate
+//    callback called a second time; the seen_callbacks set, outside the flush()
+//    function, guarantees this behavior.
+const seen_callbacks = new Set();
+let flushidx = 0; // Do *not* move this inside the flush() function
+function flush() {
+    const saved_component = current_component;
+    do {
+        // first, call beforeUpdate functions
+        // and update components
+        while (flushidx < dirty_components.length) {
+            const component = dirty_components[flushidx];
+            flushidx++;
+            set_current_component(component);
+            update(component.$$);
+        }
+        set_current_component(null);
+        dirty_components.length = 0;
+        flushidx = 0;
+        while (binding_callbacks.length)
+            binding_callbacks.pop()();
+        // then, once components are updated, call
+        // afterUpdate functions. This may cause
+        // subsequent updates...
+        for (let i = 0; i < render_callbacks.length; i += 1) {
+            const callback = render_callbacks[i];
+            if (!seen_callbacks.has(callback)) {
+                // ...so guard against infinite loops
+                seen_callbacks.add(callback);
+                callback();
+            }
+        }
+        render_callbacks.length = 0;
+    } while (dirty_components.length);
+    while (flush_callbacks.length) {
+        flush_callbacks.pop()();
+    }
+    update_scheduled = false;
+    seen_callbacks.clear();
+    set_current_component(saved_component);
+}
+function update($$) {
+    if ($$.fragment !== null) {
+        $$.update();
+        run_all($$.before_update);
+        const dirty = $$.dirty;
+        $$.dirty = [-1];
+        $$.fragment && $$.fragment.p($$.ctx, dirty);
+        $$.after_update.forEach(add_render_callback);
+    }
+}
+const outroing = new Set();
+function transition_in(block, local) {
+    if (block && block.i) {
+        outroing.delete(block);
+        block.i(local);
+    }
+}
+function mount_component(component, target, anchor, customElement) {
+    const { fragment, on_mount, on_destroy, after_update } = component.$$;
+    fragment && fragment.m(target, anchor);
+    if (!customElement) {
+        // onMount happens before the initial afterUpdate
+        add_render_callback(() => {
+            const new_on_destroy = on_mount.map(run).filter(is_function);
+            if (on_destroy) {
+                on_destroy.push(...new_on_destroy);
+            }
+            else {
+                // Edge case - component was destroyed immediately,
+                // most likely as a result of a binding initialising
+                run_all(new_on_destroy);
+            }
+            component.$$.on_mount = [];
+        });
+    }
+    after_update.forEach(add_render_callback);
+}
+function destroy_component(component, detaching) {
+    const $$ = component.$$;
+    if ($$.fragment !== null) {
+        run_all($$.on_destroy);
+        $$.fragment && $$.fragment.d(detaching);
+        // TODO null out other refs, including component.$$ (but need to
+        // preserve final state?)
+        $$.on_destroy = $$.fragment = null;
+        $$.ctx = [];
+    }
+}
+function make_dirty(component, i) {
+    if (component.$$.dirty[0] === -1) {
+        dirty_components.push(component);
+        schedule_update();
+        component.$$.dirty.fill(0);
+    }
+    component.$$.dirty[(i / 31) | 0] |= (1 << (i % 31));
+}
+function init(component, options, instance, create_fragment, not_equal, props, append_styles, dirty = [-1]) {
+    const parent_component = current_component;
+    set_current_component(component);
+    const $$ = component.$$ = {
+        fragment: null,
+        ctx: null,
+        // state
+        props,
+        update: noop,
+        not_equal,
+        bound: blank_object(),
+        // lifecycle
+        on_mount: [],
+        on_destroy: [],
+        on_disconnect: [],
+        before_update: [],
+        after_update: [],
+        context: new Map(options.context || (parent_component ? parent_component.$$.context : [])),
+        // everything else
+        callbacks: blank_object(),
+        dirty,
+        skip_bound: false,
+        root: options.target || parent_component.$$.root
+    };
+    append_styles && append_styles($$.root);
+    let ready = false;
+    $$.ctx = instance
+        ? instance(component, options.props || {}, (i, ret, ...rest) => {
+            const value = rest.length ? rest[0] : ret;
+            if ($$.ctx && not_equal($$.ctx[i], $$.ctx[i] = value)) {
+                if (!$$.skip_bound && $$.bound[i])
+                    $$.bound[i](value);
+                if (ready)
+                    make_dirty(component, i);
+            }
+            return ret;
+        })
+        : [];
+    $$.update();
+    ready = true;
+    run_all($$.before_update);
+    // `false` as a special case of no DOM component
+    $$.fragment = create_fragment ? create_fragment($$.ctx) : false;
+    if (options.target) {
+        if (options.hydrate) {
+            start_hydrating();
+            const nodes = children(options.target);
+            // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+            $$.fragment && $$.fragment.l(nodes);
+            nodes.forEach(detach);
+        }
+        else {
+            // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+            $$.fragment && $$.fragment.c();
+        }
+        if (options.intro)
+            transition_in(component.$$.fragment);
+        mount_component(component, options.target, options.anchor, options.customElement);
+        end_hydrating();
+        flush();
+    }
+    set_current_component(parent_component);
+}
+/**
+ * Base class for Svelte components. Used when dev=false.
+ */
+class SvelteComponent {
+    $destroy() {
+        destroy_component(this, 1);
+        this.$destroy = noop;
+    }
+    $on(type, callback) {
+        const callbacks = (this.$$.callbacks[type] || (this.$$.callbacks[type] = []));
+        callbacks.push(callback);
+        return () => {
+            const index = callbacks.indexOf(callback);
+            if (index !== -1)
+                callbacks.splice(index, 1);
+        };
+    }
+    $set($$props) {
+        if (this.$$set && !is_empty($$props)) {
+            this.$$.skip_bound = true;
+            this.$$set($$props);
+            this.$$.skip_bound = false;
+        }
+    }
+}
+/* src/InstanceView.svelte generated by Svelte v3.49.0 */
+function add_css(target) {
+	append_styles(target, "svelte-1qkvlix", ".label.svelte-1qkvlix{font-size:12px;color:rgba(0, 0, 0, 0.5);font-variant:small-caps}.value.svelte-1qkvlix{font-size:12px}.box.svelte-1qkvlix{padding:10px;border:0.5px solid rgb(224, 224, 224);max-width:400px}#container.svelte-1qkvlix{display:flex;flex-direction:row;flex-wrap:wrap}spectrogram canvas{z-index:0 !important}wave canvas{z-index:0 !important}wave{z-index:0 !important}");
+}
+// (27:4) {#if modelColumn && entry[modelColumn] !== undefined}
+function create_if_block(ctx) {
+	let br;
+	let t0;
+	let span0;
+	let t1;
+	let t2;
+	let span1;
+	let t3_value = /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] + "";
+	let t3;
+	return {
+		c() {
+			br = element("br");
+			t0 = space();
+			span0 = element("span");
+			t1 = text("output:");
+			t2 = space();
+			span1 = element("span");
+			t3 = text(t3_value);
+			this.h();
+		},
+		l(nodes) {
+			br = claim_element(nodes, "BR", {});
+			t0 = claim_space(nodes);
+			span0 = claim_element(nodes, "SPAN", { class: true });
+			var span0_nodes = children(span0);
+			t1 = claim_text(span0_nodes, "output:");
+			span0_nodes.forEach(detach);
+			t2 = claim_space(nodes);
+			span1 = claim_element(nodes, "SPAN", { class: true });
+			var span1_nodes = children(span1);
+			t3 = claim_text(span1_nodes, t3_value);
+			span1_nodes.forEach(detach);
+			this.h();
+		},
+		h() {
+			attr(span0, "class", "label svelte-1qkvlix");
+			attr(span1, "class", "value svelte-1qkvlix");
+		},
+		m(target, anchor) {
+			insert_hydration(target, br, anchor);
+			insert_hydration(target, t0, anchor);
+			insert_hydration(target, span0, anchor);
+			append_hydration(span0, t1);
+			insert_hydration(target, t2, anchor);
+			insert_hydration(target, span1, anchor);
+			append_hydration(span1, t3);
+		},
+		p(ctx, dirty) {
+			if (dirty & /*entry, modelColumn*/ 3 && t3_value !== (t3_value = /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] + "")) set_data(t3, t3_value);
+		},
+		d(detaching) {
+			if (detaching) detach(br);
+			if (detaching) detach(t0);
+			if (detaching) detach(span0);
+			if (detaching) detach(t2);
+			if (detaching) detach(span1);
+		}
+	};
+}
+function create_fragment(ctx) {
+	let div2;
+	let div1;
+	let div0;
+	let audio;
+	let source;
+	let source_src_value;
+	let source_type_value;
+	let audio_src_value;
+	let t0;
+	let span0;
+	let t1;
+	let span1;
+	let t2_value = /*entry*/ ctx[0][/*labelColumn*/ ctx[2]] + "";
+	let t2;
+	let t3;
+	let if_block = /*modelColumn*/ ctx[1] && /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] !== undefined && create_if_block(ctx);
+	return {
+		c() {
+			div2 = element("div");
+			div1 = element("div");
+			div0 = element("div");
+			audio = element("audio");
+			source = element("source");
+			t0 = space();
+			span0 = element("span");
+			t1 = text("label: ");
+			span1 = element("span");
+			t2 = text(t2_value);
+			t3 = space();
+			if (if_block) if_block.c();
+			this.h();
+		},
+		l(nodes) {
+			div2 = claim_element(nodes, "DIV", { id: true, class: true });
+			var div2_nodes = children(div2);
+			div1 = claim_element(div2_nodes, "DIV", { class: true });
+			var div1_nodes = children(div1);
+			div0 = claim_element(div1_nodes, "DIV", {});
+			var div0_nodes = children(div0);
+			audio = claim_element(div0_nodes, "AUDIO", { src: true });
+			var audio_nodes = children(audio);
+			source = claim_element(audio_nodes, "SOURCE", { src: true, type: true });
+			audio_nodes.forEach(detach);
+			div0_nodes.forEach(detach);
+			t0 = claim_space(div1_nodes);
+			span0 = claim_element(div1_nodes, "SPAN", { class: true });
+			var span0_nodes = children(span0);
+			t1 = claim_text(span0_nodes, "label: ");
+			span0_nodes.forEach(detach);
+			span1 = claim_element(div1_nodes, "SPAN", { class: true });
+			var span1_nodes = children(span1);
+			t2 = claim_text(span1_nodes, t2_value);
+			span1_nodes.forEach(detach);
+			t3 = claim_space(div1_nodes);
+			if (if_block) if_block.l(div1_nodes);
+			div1_nodes.forEach(detach);
+			div2_nodes.forEach(detach);
+			this.h();
+		},
+		h() {
+			if (!src_url_equal(source.src, source_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) attr(source, "src", source_src_value);
+			attr(source, "type", source_type_value = "audio/" + /*entry*/ ctx[0][/*idColumn*/ ctx[4]].split(".").at(-1));
+			audio.controls = true;
+			if (!src_url_equal(audio.src, audio_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) attr(audio, "src", audio_src_value);
+			set_style(div0, "display", `flex`, false);
+			attr(span0, "class", "label svelte-1qkvlix");
+			attr(span1, "class", "value svelte-1qkvlix");
+			attr(div1, "class", "box svelte-1qkvlix");
+			attr(div2, "id", "container");
+			attr(div2, "class", "svelte-1qkvlix");
+		},
+		m(target, anchor) {
+			insert_hydration(target, div2, anchor);
+			append_hydration(div2, div1);
+			append_hydration(div1, div0);
+			append_hydration(div0, audio);
+			append_hydration(audio, source);
+			append_hydration(div1, t0);
+			append_hydration(div1, span0);
+			append_hydration(span0, t1);
+			append_hydration(div1, span1);
+			append_hydration(span1, t2);
+			append_hydration(div1, t3);
+			if (if_block) if_block.m(div1, null);
+		},
+		p(ctx, [dirty]) {
+			if (dirty & /*entry, dataColumn*/ 9 && !src_url_equal(source.src, source_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) {
+				attr(source, "src", source_src_value);
+			}
+			if (dirty & /*entry, idColumn*/ 17 && source_type_value !== (source_type_value = "audio/" + /*entry*/ ctx[0][/*idColumn*/ ctx[4]].split(".").at(-1))) {
+				attr(source, "type", source_type_value);
+			}
+			if (dirty & /*entry, dataColumn*/ 9 && !src_url_equal(audio.src, audio_src_value = `${/*entry*/ ctx[0][/*dataColumn*/ ctx[3]]}`)) {
+				attr(audio, "src", audio_src_value);
+			}
+			if (dirty & /*entry, labelColumn*/ 5 && t2_value !== (t2_value = /*entry*/ ctx[0][/*labelColumn*/ ctx[2]] + "")) set_data(t2, t2_value);
+			if (/*modelColumn*/ ctx[1] && /*entry*/ ctx[0][/*modelColumn*/ ctx[1]] !== undefined) {
+				if (if_block) {
+					if_block.p(ctx, dirty);
+				} else {
+					if_block = create_if_block(ctx);
+					if_block.c();
+					if_block.m(div1, null);
+				}
+			} else if (if_block) {
+				if_block.d(1);
+				if_block = null;
+			}
+		},
+		i: noop,
+		o: noop,
+		d(detaching) {
+			if (detaching) detach(div2);
+			if (if_block) if_block.d();
+		}
+	};
+}
+function instance($$self, $$props, $$invalidate) {
+	let { entry } = $$props;
+	let { options } = $$props;
+	let { modelColumn } = $$props;
+	let { labelColumn } = $$props;
+	let { dataColumn } = $$props;
+	let { idColumn } = $$props;
+	$$self.$$set = $$props => {
+		if ('entry' in $$props) $$invalidate(0, entry = $$props.entry);
+		if ('options' in $$props) $$invalidate(5, options = $$props.options);
+		if ('modelColumn' in $$props) $$invalidate(1, modelColumn = $$props.modelColumn);
+		if ('labelColumn' in $$props) $$invalidate(2, labelColumn = $$props.labelColumn);
+		if ('dataColumn' in $$props) $$invalidate(3, dataColumn = $$props.dataColumn);
+		if ('idColumn' in $$props) $$invalidate(4, idColumn = $$props.idColumn);
+	};
+	return [entry, modelColumn, labelColumn, dataColumn, idColumn, options];
+}
+class InstanceView extends SvelteComponent {
+	constructor(options) {
+		super();
+		init(
+			this,
+			options,
+			instance,
+			create_fragment,
+			safe_not_equal,
+			{
+				entry: 0,
+				options: 5,
+				modelColumn: 1,
+				labelColumn: 2,
+				dataColumn: 3,
+				idColumn: 4
+			},
+			add_css
+		);
+	}
+}
+function getInstance(
+  div,
+  options,
+  entry,
+  modelColumn,
+  labelColumn,
+  dataColumn,
+  idColumn
+) {
+  new InstanceView({
+    target: div,
+    props: {
+      entry: entry,
+      options: options,
+      modelColumn: modelColumn,
+      labelColumn: labelColumn,
+      dataColumn: dataColumn,
+      idColumn: idColumn,
+    },
+    hydrate: true,
+  });
+}
+export { getInstance };

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.8
+RUN useradd -m -u 1000 user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+ADD --chown=user ./.zeno_cache $HOME/app/.zeno_cache
+RUN chown user:user -R $HOME/app
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+CMD ["zeno", "config.toml"]

config.toml ADDED Viewed

	@@ -0,0 +1,11 @@

+view = "audio-transcription"
+functions = "./functions/"
+models = ["silero_sst", "whisper"]
+metadata = "metadata.csv"
+data_path = "/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/"
+data_column = "id"
+id_column = "id"
+label_column = "label"
+port = 7860
+host = "0.0.0.0"
+editable = false

functions/audio_characteristics.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import librosa
+import numpy as np
+from zeno import ZenoOptions, distill, DistillReturn
+@distill
+def amplitude(df, ops: ZenoOptions):
+    files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
+    amps = []
+    for audio in files:
+        y, _ = librosa.load(audio)
+        amps.append(float(np.abs(y).mean()))
+    return DistillReturn(distill_output=amps)
+@distill
+def length(df, ops: ZenoOptions):
+    files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
+    amps = []
+    for audio in files:
+        y, _ = librosa.load(audio)
+        amps.append(len(y))
+    return DistillReturn(distill_output=amps)

functions/model.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import math
+import os
+import pandas as pd
+import torch
+import whisper
+from jiwer import wer
+from zeno import (
+    ZenoOptions,
+    distill,
+    metric,
+    model,
+    DistillReturn,
+    ModelReturn,
+    MetricReturn,
+)
+@model
+def load_model(model_path):
+    if "sst" in model_path:
+        device = torch.device("cpu")
+        model, decoder, utils = torch.hub.load(
+            repo_or_dir="snakers4/silero-models",
+            model="silero_stt",
+            language="en",
+            device=device,
+        )
+        (read_batch, _, _, prepare_model_input) = utils
+        def pred(df, ops: ZenoOptions):
+            files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
+            input = prepare_model_input(read_batch(files), device=device)
+            return ModelReturn(model_output=[decoder(x.cpu()) for x in model(input)])
+        return pred
+    elif "whisper" in model_path:
+        model = whisper.load_model("tiny")
+        def pred(df, ops: ZenoOptions):
+            files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]
+            outs = []
+            for f in files:
+                outs.append(model.transcribe(f)["text"])
+            return ModelReturn(model_output=outs)
+        return pred
+@distill
+def country(df, ops: ZenoOptions):
+    if df["birthplace"][0] == df["birthplace"][0]:
+        return DistillReturn(distill_output=[df["birthplace"].str.split(", ")[-1][-1]])
+    return DistillReturn(distill_output=[""] * len(df))
+@distill
+def wer_m(df, ops: ZenoOptions):
+    return DistillReturn(
+        distill_output=df.apply(
+            lambda x: wer(x[ops.label_column], x[ops.output_column]), axis=1
+        )
+    )
+@metric
+def avg_wer(df, ops: ZenoOptions):
+    avg = df[ops.distill_columns["wer_m"]].mean()
+    if pd.isnull(avg) or math.isnan(avg):
+        return MetricReturn(metric=0)
+    return MetricReturn(metric=avg)

jupyter_accent.ipynb ADDED Viewed

	@@ -0,0 +1,223 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "from zeno import zeno\n",
+    "import math\n",
+    "import os\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"metadata.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.set_index('id', inplace=True, drop=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "'id' is both an index level and a column label, which is ambiguous.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df\u001b[39m.\u001b[39;49mgroupby(\u001b[39m'\u001b[39;49m\u001b[39mid\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
+      "File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/frame.py:8402\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[1;32m   8399\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mYou have to supply one of \u001b[39m\u001b[39m'\u001b[39m\u001b[39mby\u001b[39m\u001b[39m'\u001b[39m\u001b[39m and \u001b[39m\u001b[39m'\u001b[39m\u001b[39mlevel\u001b[39m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m   8400\u001b[0m axis \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_axis_number(axis)\n\u001b[0;32m-> 8402\u001b[0m \u001b[39mreturn\u001b[39;00m DataFrameGroupBy(\n\u001b[1;32m   8403\u001b[0m     obj\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m,\n\u001b[1;32m   8404\u001b[0m     keys\u001b[39m=\u001b[39;49mby,\n\u001b[1;32m   8405\u001b[0m     axis\u001b[39m=\u001b[39;49maxis,\n\u001b[1;32m   8406\u001b[0m     level\u001b[39m=\u001b[39;49mlevel,\n\u001b[1;32m   8407\u001b[0m     as_index\u001b[39m=\u001b[39;49mas_index,\n\u001b[1;32m   8408\u001b[0m     sort\u001b[39m=\u001b[39;49msort,\n\u001b[1;32m   8409\u001b[0m     group_keys\u001b[39m=\u001b[39;49mgroup_keys,\n\u001b[1;32m   8410\u001b[0m     squeeze\u001b[39m=\u001b[39;49msqueeze,\n\u001b[1;32m   8411\u001b[0m     observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[1;32m   8412\u001b[0m     dropna\u001b[39m=\u001b[39;49mdropna,\n\u001b[1;32m   8413\u001b[0m )\n",
+      "File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:965\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[1;32m    962\u001b[0m \u001b[39mif\u001b[39;00m grouper \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    963\u001b[0m     \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgroupby\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mgrouper\u001b[39;00m \u001b[39mimport\u001b[39;00m get_grouper\n\u001b[0;32m--> 965\u001b[0m     grouper, exclusions, obj \u001b[39m=\u001b[39m get_grouper(\n\u001b[1;32m    966\u001b[0m         obj,\n\u001b[1;32m    967\u001b[0m         keys,\n\u001b[1;32m    968\u001b[0m         axis\u001b[39m=\u001b[39;49maxis,\n\u001b[1;32m    969\u001b[0m         level\u001b[39m=\u001b[39;49mlevel,\n\u001b[1;32m    970\u001b[0m         sort\u001b[39m=\u001b[39;49msort,\n\u001b[1;32m    971\u001b[0m         observed\u001b[39m=\u001b[39;49mobserved,\n\u001b[1;32m    972\u001b[0m         mutated\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmutated,\n\u001b[1;32m    973\u001b[0m         dropna\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdropna,\n\u001b[1;32m    974\u001b[0m     )\n\u001b[1;32m    976\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj \u001b[39m=\u001b[39m obj\n\u001b[1;32m    977\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxis \u001b[39m=\u001b[39m obj\u001b[39m.\u001b[39m_get_axis_number(axis)\n",
+      "File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/groupby/grouper.py:878\u001b[0m, in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[1;32m    876\u001b[0m \u001b[39mif\u001b[39;00m gpr \u001b[39min\u001b[39;00m obj:\n\u001b[1;32m    877\u001b[0m     \u001b[39mif\u001b[39;00m validate:\n\u001b[0;32m--> 878\u001b[0m         obj\u001b[39m.\u001b[39;49m_check_label_or_level_ambiguity(gpr, axis\u001b[39m=\u001b[39;49maxis)\n\u001b[1;32m    879\u001b[0m     in_axis, name, gpr \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m, gpr, obj[gpr]\n\u001b[1;32m    880\u001b[0m     \u001b[39mif\u001b[39;00m gpr\u001b[39m.\u001b[39mndim \u001b[39m!=\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m    881\u001b[0m         \u001b[39m# non-unique columns; raise here to get the name in the\u001b[39;00m\n\u001b[1;32m    882\u001b[0m         \u001b[39m# exception message\u001b[39;00m\n",
+      "File \u001b[0;32m~/dev-research/22-zeno/zeno/.venv/lib/python3.8/site-packages/pandas/core/generic.py:1797\u001b[0m, in \u001b[0;36mNDFrame._check_label_or_level_ambiguity\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m   1789\u001b[0m label_article, label_type \u001b[39m=\u001b[39m (\n\u001b[1;32m   1790\u001b[0m     (\u001b[39m\"\u001b[39m\u001b[39ma\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mcolumn\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mif\u001b[39;00m axis \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m \u001b[39melse\u001b[39;00m (\u001b[39m\"\u001b[39m\u001b[39man\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m   1791\u001b[0m )\n\u001b[1;32m   1793\u001b[0m msg \u001b[39m=\u001b[39m (\n\u001b[1;32m   1794\u001b[0m     \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m is both \u001b[39m\u001b[39m{\u001b[39;00mlevel_article\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m{\u001b[39;00mlevel_type\u001b[39m}\u001b[39;00m\u001b[39m level and \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m   1795\u001b[0m     \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mlabel_article\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m{\u001b[39;00mlabel_type\u001b[39m}\u001b[39;00m\u001b[39m label, which is ambiguous.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m   1796\u001b[0m )\n\u001b[0;32m-> 1797\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(msg)\n",
+      "\u001b[0;31mValueError\u001b[0m: 'id' is both an index level and a column label, which is ambiguous."
+     ]
+    }
+   ],
+   "source": [
+    "df.groupby('id')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "zeno({\n",
+    "    \"metadata\": df[0:10],\n",
+    "    \"view\": \"audio-transcription\",\n",
+    "    \"data_path\": \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/\",\n",
+    "    \"label_column\": \"label\",\n",
+    "    \"data_column\": \"id\"\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import whisper\n",
+    "from jiwer import wer\n",
+    "from zeno import ZenoOptions, distill, metric, model\n",
+    "import numpy as np\n",
+    "from zeno import ZenoOptions, distill"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@model\n",
+    "def load_model(model_path):\n",
+    "    if \"sst\" in model_path:\n",
+    "        device = torch.device(\"cpu\")\n",
+    "        model, decoder, utils = torch.hub.load(\n",
+    "            repo_or_dir=\"snakers4/silero-models\",\n",
+    "            model=\"silero_stt\",\n",
+    "            language=\"en\",\n",
+    "            device=device,\n",
+    "        )\n",
+    "        (read_batch, _, _, prepare_model_input) = utils\n",
+    "\n",
+    "        def pred(df, ops: ZenoOptions):\n",
+    "            files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
+    "            input = prepare_model_input(read_batch(files), device=device)\n",
+    "            return [decoder(x.cpu()) for x in model(input)]\n",
+    "\n",
+    "        return pred\n",
+    "\n",
+    "    elif \"whisper\" in model_path:\n",
+    "        model = whisper.load_model(\"tiny\")\n",
+    "\n",
+    "        def pred(df, ops: ZenoOptions):\n",
+    "            files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
+    "            outs = []\n",
+    "            for f in files:\n",
+    "                outs.append(model.transcribe(f)[\"text\"])\n",
+    "            return outs\n",
+    "\n",
+    "        return pred\n",
+    "\n",
+    "\n",
+    "@distill\n",
+    "def country(df, ops: ZenoOptions):\n",
+    "    if df[\"0birthplace\"][0] == df[\"0birthplace\"][0]:\n",
+    "        return df[\"0birthplace\"].str.split(\", \")[-1][-1]\n",
+    "    return \"\"\n",
+    "\n",
+    "\n",
+    "@distill\n",
+    "def wer_m(df, ops: ZenoOptions):\n",
+    "    return df.apply(lambda x: wer(x[ops.label_column], x[ops.output_column]), axis=1)\n",
+    "\n",
+    "\n",
+    "@metric\n",
+    "def avg_wer(df, ops: ZenoOptions):\n",
+    "    avg = df[ops.distill_columns[\"wer_m\"]].mean()\n",
+    "    if math.isnan(avg):\n",
+    "        return 0\n",
+    "    return avg\n",
+    "\n",
+    "# @distill\n",
+    "# def amplitude(df, ops: ZenoOptions):\n",
+    "#     files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
+    "#     amps = []\n",
+    "#     for audio in files:\n",
+    "#         y, _ = librosa.load(audio)\n",
+    "#         amps.append(float(np.abs(y).mean()))\n",
+    "#     return amps\n",
+    "\n",
+    "\n",
+    "# @distill\n",
+    "# def length(df, ops: ZenoOptions):\n",
+    "#     files = [os.path.join(ops.data_path, f) for f in df[ops.data_column]]\n",
+    "#     amps = []\n",
+    "#     for audio in files:\n",
+    "#         y, _ = librosa.load(audio)\n",
+    "#         amps.append(len(y))\n",
+    "#     return amps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "zeno({\n",
+    "    \"metadata\": df,\n",
+    "    \"functions\": [load_model, country, wer_m, avg_wer],\n",
+    "    \"view\": \"audio-transcription\",\n",
+    "    \"models\": [\"silero_sst\", \"whisper\"],\n",
+    "    \"data_path\": \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/\",\n",
+    "    \"data_column\": \"id\",\n",
+    "    \"label_column\": \"label\",\n",
+    "    \"samples\": 10,\n",
+    "})\n",
+    "# metadata = \"metadata.csv\"\n",
+    "# # data_path = \"https://zenoml.s3.amazonaws.com/accents/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "59d606a796fde3c997548ee5ab3f3009081de8aa2fb58c2406e58b3c7613e786"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

latest_silero_models.yml ADDED Viewed

	@@ -0,0 +1,563 @@

+# pre-trained STT models
+stt_models:
+  en:
+    latest:
+      meta:
+        name: "en_v6"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v6.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
+    v6:
+      meta:
+        name: "en_v6"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v6.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
+    v5:
+      meta:
+        name: "en_v5"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v5.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
+    v4_0:
+      meta:
+        name: "en_v4_0"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
+      onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
+    v3:
+      meta:
+        name: "en_v3"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v3_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v3.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
+      jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
+      jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
+      onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
+      jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
+      jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
+      onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
+    v2:
+      meta:
+        name: "en_v2"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v2_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v2.onnx"
+      tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
+    v1:
+      meta:
+        name: "en_v1"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v1_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v1.onnx"
+      tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
+  de:
+    latest:
+      meta:
+        name: "de_v1"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit: "https://models.silero.ai/models/de/de_v1_jit.model"
+      onnx: "https://models.silero.ai/models/de/de_v1.onnx"
+      tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
+    v1:
+      meta:
+        name: "de_v1"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
+      onnx: "https://models.silero.ai/models/de/de_v1.onnx"
+      tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
+    v3:
+      meta:
+        name: "de_v3"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
+    v4:
+      meta:
+        name: "de_v4"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
+      onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
+  es:
+    latest:
+      meta:
+        name: "es_v1"
+        sample: "https://models.silero.ai/examples/es_sample.wav"
+      labels: "https://models.silero.ai/models/es/es_v1_labels.json"
+      jit: "https://models.silero.ai/models/es/es_v1_jit.model"
+      onnx: "https://models.silero.ai/models/es/es_v1.onnx"
+      tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
+  ua:
+    latest:
+      meta:
+        name: "ua_v3"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
+      onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
+    v3:
+      meta:
+        name: "ua_v3"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
+      onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
+    v1:
+      meta:
+        name: "ua_v1"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
+tts_models:
+  ru:
+    v3_1_ru:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v3_1_ru.pt'
+        sample_rate: [8000, 24000, 48000]
+    ru_v3:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
+        sample_rate: [8000, 24000, 48000]
+    aidar_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
+        sample_rate: [8000, 16000]
+    aidar_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
+        sample_rate: 8000
+    aidar_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
+        sample_rate: 16000
+    baya_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
+        sample_rate: [8000, 16000]
+    baya_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
+        sample_rate: 8000
+    baya_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
+        sample_rate: 16000
+    irina_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
+        sample_rate: [8000, 16000]
+    irina_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
+        sample_rate: 8000
+    irina_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
+        sample_rate: 16000
+    kseniya_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
+        sample_rate: [8000, 16000]
+    kseniya_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
+        sample_rate: 8000
+    kseniya_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
+        sample_rate: 16000
+    natasha_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
+        sample_rate: [8000, 16000]
+    natasha_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
+        sample_rate: 8000
+    natasha_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
+        sample_rate: 16000
+    ruslan_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
+        sample_rate: [8000, 16000]
+    ruslan_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
+        sample_rate: 8000
+    ruslan_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
+        sample_rate: 16000
+  en:
+    v3_en:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v3_en.pt'
+        sample_rate: [8000, 24000, 48000]
+    v3_en_indic:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v3_en_indic.pt'
+        sample_rate: [8000, 24000, 48000]
+    lj_v2:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
+        sample_rate: [8000, 16000]
+    lj_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
+        sample_rate: 8000
+    lj_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
+        sample_rate: 16000
+  de:
+    v3_de:
+      latest:
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        package: 'https://models.silero.ai/models/tts/de/v3_de.pt'
+        sample_rate: [8000, 24000, 48000]
+    thorsten_v2:
+      latest:
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
+        sample_rate: [8000, 16000]
+    thorsten_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
+        sample_rate: 8000
+    thorsten_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
+        sample_rate: 16000
+  es:
+    v3_es:
+      latest:
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        package: 'https://models.silero.ai/models/tts/es/v3_es.pt'
+        sample_rate: [8000, 24000, 48000]
+    tux_v2:
+      latest:
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
+        sample_rate: [8000, 16000]
+    tux_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
+        sample_rate: 8000
+    tux_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
+        sample_rate: 16000
+  fr:
+    v3_fr:
+      latest:
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        package: 'https://models.silero.ai/models/tts/fr/v3_fr.pt'
+        sample_rate: [8000, 24000, 48000]
+    gilles_v2:
+      latest:
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
+        sample_rate: [8000, 16000]
+    gilles_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
+        sample_rate: 8000
+    gilles_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
+        sample_rate: 16000
+  ba:
+    aigul_v2:
+      latest:
+        example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
+        package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'bashkir'
+  xal:
+    v3_xal:
+      latest:
+        example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
+        package: 'https://models.silero.ai/models/tts/xal/v3_xal.pt'
+        sample_rate: [8000, 24000, 48000]
+    erdni_v2:
+      latest:
+        example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
+        package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'kalmyk'
+  tt:
+    v3_tt:
+      latest:
+        example: 'Исәнмесез, саумысез, нишләп кәҗәгезне саумыйсыз, әтәчегез күкәй салган, нишләп чыгып алмыйсыз.'
+        package: 'https://models.silero.ai/models/tts/tt/v3_tt.pt'
+        sample_rate: [8000, 24000, 48000]
+    dilyara_v2:
+      latest:
+        example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
+        package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'tatar'
+  uz:
+    v3_uz:
+      latest:
+        example: 'Tanishganimdan xursandman.'
+        package: 'https://models.silero.ai/models/tts/uz/v3_uz.pt'
+        sample_rate: [8000, 24000, 48000]
+    dilnavoz_v2:
+      latest:
+        example: 'Tanishganimdan xursandman.'
+        package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'uzbek'
+  ua:
+    v3_ua:
+      latest:
+        example: 'К+отики - пухн+асті жив+отики.'
+        package: 'https://models.silero.ai/models/tts/ua/v3_ua.pt'
+        sample_rate: [8000, 24000, 48000]
+    mykyta_v2:
+      latest:
+        example: 'К+отики - пухн+асті жив+отики.'
+        package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
+        sample_rate: [8000, 24000, 48000]
+        language_name: 'ukrainian'
+  indic:
+    v3_indic:
+      latest:
+        example: 'prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai'
+        package: 'https://models.silero.ai/models/tts/indic/v3_indic.pt'
+        sample_rate: [8000, 24000, 48000]
+  multi:
+    multi_v2:
+      latest:
+        package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
+        sample_rate: [8000, 16000]
+        speakers:
+          aidar:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          baya:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          kseniya:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          irina:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          ruslan:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          natasha:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          thorsten:
+            lang: 'de'
+            example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+          tux:
+            lang: 'es'
+            example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+          gilles:
+            lang: 'fr'
+            example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+          lj:
+            lang: 'en'
+            example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+          dilyara:
+            lang: 'tt'
+            example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
+te_models:
+  latest:
+    package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
+    languages: ['en', 'de', 'ru', 'es']
+    punct: '.,-!?—'
+  v2:
+    package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
+    languages: ['en', 'de', 'ru', 'es']
+    punct: '.,-!?—'

metadata.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

process_metadata.ipynb ADDED Viewed

	@@ -0,0 +1,412 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"./speakers_all.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['id'] = df['filename'].apply(lambda x: x + \".wav\")\n",
+    "df = df[df['file_missing?'] == False]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>age_onset</th>\n",
+       "      <th>birthplace</th>\n",
+       "      <th>filename</th>\n",
+       "      <th>native_language</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>speakerid</th>\n",
+       "      <th>country</th>\n",
+       "      <th>file_missing?</th>\n",
+       "      <th>Unnamed: 9</th>\n",
+       "      <th>Unnamed: 10</th>\n",
+       "      <th>Unnamed: 11</th>\n",
+       "      <th>id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>27.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>virginia, south africa</td>\n",
+       "      <td>afrikaans1</td>\n",
+       "      <td>afrikaans</td>\n",
+       "      <td>female</td>\n",
+       "      <td>1</td>\n",
+       "      <td>south africa</td>\n",
+       "      <td>False</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>afrikaans1.wav</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>40.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>pretoria, south africa</td>\n",
+       "      <td>afrikaans2</td>\n",
+       "      <td>afrikaans</td>\n",
+       "      <td>male</td>\n",
+       "      <td>2</td>\n",
+       "      <td>south africa</td>\n",
+       "      <td>False</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>afrikaans2.wav</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>43.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>pretoria, transvaal, south africa</td>\n",
+       "      <td>afrikaans3</td>\n",
+       "      <td>afrikaans</td>\n",
+       "      <td>male</td>\n",
+       "      <td>418</td>\n",
+       "      <td>south africa</td>\n",
+       "      <td>False</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>afrikaans3.wav</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>26.0</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>pretoria, south africa</td>\n",
+       "      <td>afrikaans4</td>\n",
+       "      <td>afrikaans</td>\n",
+       "      <td>male</td>\n",
+       "      <td>1159</td>\n",
+       "      <td>south africa</td>\n",
+       "      <td>False</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>afrikaans4.wav</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>19.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>cape town, south africa</td>\n",
+       "      <td>afrikaans5</td>\n",
+       "      <td>afrikaans</td>\n",
+       "      <td>male</td>\n",
+       "      <td>1432</td>\n",
+       "      <td>south africa</td>\n",
+       "      <td>False</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>afrikaans5.wav</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     age  age_onset                         birthplace    filename  \\\n",
+       "32  27.0        9.0             virginia, south africa  afrikaans1   \n",
+       "33  40.0        5.0             pretoria, south africa  afrikaans2   \n",
+       "34  43.0        4.0  pretoria, transvaal, south africa  afrikaans3   \n",
+       "35  26.0        8.0             pretoria, south africa  afrikaans4   \n",
+       "36  19.0        6.0            cape town, south africa  afrikaans5   \n",
+       "\n",
+       "   native_language     sex  speakerid       country  file_missing?  \\\n",
+       "32       afrikaans  female          1  south africa          False   \n",
+       "33       afrikaans    male          2  south africa          False   \n",
+       "34       afrikaans    male        418  south africa          False   \n",
+       "35       afrikaans    male       1159  south africa          False   \n",
+       "36       afrikaans    male       1432  south africa          False   \n",
+       "\n",
+       "    Unnamed: 9  Unnamed: 10 Unnamed: 11              id  \n",
+       "32         NaN          NaN         NaN  afrikaans1.wav  \n",
+       "33         NaN          NaN         NaN  afrikaans2.wav  \n",
+       "34         NaN          NaN         NaN  afrikaans3.wav  \n",
+       "35         NaN          NaN         NaN  afrikaans4.wav  \n",
+       "36         NaN          NaN         NaN  afrikaans5.wav  "
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df['label'] = \"Please call Stella.  Ask her to bring these things with her from the store:  Six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob.  We also need a small plastic snake and a big toy frog for the kids.  She can scoop these things into three red bags, and we will go meet her Wednesday at the train station.\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['age', 'age_onset', 'birthplace', 'filename', 'native_language', 'sex',\n",
+       "       'speakerid', 'country', 'file_missing?', 'Unnamed: 9', 'Unnamed: 10',\n",
+       "       'Unnamed: 11', 'id', 'label'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.drop(\"Unnamed: 9\", axis=1)\n",
+    "df = df.drop(\"Unnamed: 10\", axis=1)\n",
+    "df = df.drop(\"Unnamed: 11\", axis=1)\n",
+    "df = df.drop(\"file_missing?\", axis=1)\n",
+    "df = df.drop(\"filename\", axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.loc[df['sex'] == 'famale', 'sex'] = 'female'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pycountry_convert as pc\n",
+    "\n",
+    "def country_to_continent(country_name):\n",
+    "    try:\n",
+    "        country_alpha2 = pc.country_name_to_country_alpha2(country_name, cn_name_format=pc.COUNTRY_NAME_FORMAT_LOWER)\n",
+    "        country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)\n",
+    "        country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)\n",
+    "        return country_continent_name\n",
+    "    except:\n",
+    "        return None\n",
+    "\n",
+    "df[\"continent\"] = df[\"country\"].map(lambda x: country_to_continent(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False    1647\n",
+       "True      493\n",
+       "Name: continent, dtype: int64"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[\"continent\"].isnull().value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.drop([1544, 1771])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(\"metadata.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Whisper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|███████████████████████████████████████| 139M/139M [00:04<00:00, 30.3MiB/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import whisper\n",
+    "model = whisper.load_model(\"base\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/anaconda3/lib/python3.8/site-packages/whisper/transcribe.py:78: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
+      "  warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n"
+     ]
+    },
+    {
+     "ename": "TypeError",
+     "evalue": "expected np.ndarray (got list)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m/var/folders/tq/kqg2ct9d123gd0wmshf2bd3r0000gp/T/ipykernel_157/3894641212.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtranscribe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/whisper/transcribe.py\u001b[0m in \u001b[0;36mtranscribe\u001b[0;34m(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, **decode_options)\u001b[0m\n\u001b[1;32m     82\u001b[0m         \u001b[0mdecode_options\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"fp16\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m     \u001b[0mmel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlog_mel_spectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     85\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     86\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mdecode_options\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"language\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/whisper/audio.py\u001b[0m in \u001b[0;36mlog_mel_spectrogram\u001b[0;34m(audio, n_mels)\u001b[0m\n\u001b[1;32m    110\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    111\u001b[0m             \u001b[0maudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_audio\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 112\u001b[0;31m         \u001b[0maudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    113\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    114\u001b[0m     \u001b[0mwindow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhann_window\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mN_FFT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maudio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mTypeError\u001b[0m: expected np.ndarray (got list)"
+     ]
+    }
+   ],
+   "source": [
+    "result = model.transcribe([\"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\", \"/Users/acabrera/dev/data/speech-accent-archive/recordings/recordings/afrikaans1.wav\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "' Please call Stella, ask her to bring these things with her from the store. 6 spoons of fresh snow peas, 5 thick slabs of blue cheese and maybe a snack for her brother Bob. We also need a small plastic snake and a big twig frog for the kids. She can scoop these things into free-rate bags and we will go meet a wind state train station.'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result[\"text\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.12 ('base')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+zenoml>=0.4.6
+inspiredco
+sentence_transformers

speakers_all.csv ADDED Viewed

The diff for this file is too large to render. See raw diff