Spaces:
Runtime error
Runtime error
File size: 3,285 Bytes
b8b135b c2110e8 55a586c bba23d3 14c0ec2 e559d03 b8b135b e559d03 8549c9b e559d03 55a586c e559d03 77fc3c3 e559d03 77fc3c3 accb4e2 55a586c 77fc3c3 55a586c 77fc3c3 bba23d3 77fc3c3 55a586c c2110e8 accb4e2 14c0ec2 55a586c bba23d3 55a586c 77fc3c3 89c0d34 accb4e2 89c0d34 55a586c 89c0d34 14c0ec2 accb4e2 14c0ec2 be35c90 14c0ec2 accb4e2 4c692e0 55a586c e891b09 14c0ec2 55a586c 14c0ec2 bba23d3 c1335fa 89c0d34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
import torch
from charts import spider_chart
from dictionaries import calculate_average, transform_dict
from icon import generate_icon
from transformers import pipeline
from timestamp import format_timestamp
MODEL_NAME = "openai/whisper-medium"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
#Define classifier for sentiment analysis
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None)
def transcribe(file, task, return_timestamps):
outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
text = outputs["text"]
timestamps = outputs["chunks"]
#If return timestamps is True, return html text with timestamps format
if return_timestamps==True:
spider_text = [f"{chunk['text']}" for chunk in timestamps] #Text for spider chart without timestamps
timestamps = [f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}" for chunk in timestamps]
else:
timestamps = [f"{chunk['text']}" for chunk in timestamps]
spider_text = timestamps
text = "<br>".join(str(feature) for feature in timestamps)
text = f"<h4>Transcription</h4><div style='overflow-y: scroll; height: 250px;'>{text}</div>"
spider_text = "\n".join(str(feature) for feature in spider_text)
fig = spider_chart(calculate_average([transform_dict(classifier.predict(t)[0]) for t in spider_text.split("\n")]))
return file, text, fig
inputs = [gr.Audio(source="upload", label="Audio file", type="filepath"),
gr.Radio(["transcribe"], label="Task", value="transcribe"),
gr.Checkbox(value=True, label="Return timestamps")]
outputs = [gr.Audio(label="Processed Audio", type="filepath"),
gr.outputs.HTML("text"),
gr.Plot(label="fig")]
title = "Whisper Demo: Transcribe Audio"
MODEL_NAME1 = "jpdiazpardo/whisper-tiny-metal"
description = ("Transcribe long-form audio inputs with the click of a button! Demo uses the"
f" checkpoint [{MODEL_NAME1}](https://huggingface.co/{MODEL_NAME1}) and 🤗 Transformers to transcribe audio files"
" of arbitrary length. Check some of the 'cool' examples below")
examples = [["When a Demon Defiles a Witch.wav","transcribe",True],
["Immaculate Misconception.wav","transcribe", True]]
linkedin = generate_icon("linkedin")
github = generate_icon("github")
article = ("<div style='text-align: center; max-width:800px; margin:10px auto;'>"
f"<p>{linkedin} <a href='https://www.linkedin.com/in/juanpablodiazp/' target='_blank'>Juan Pablo Díaz Pardo</a><br>"
f"{github} <a href='https://github.com/jpdiazpardo' target='_blank'>jpdiazpardo</a></p>"
)
title = "Scream: Fine-Tuned Whisper model for automatic gutural speech recognition 🤟🤟🤟"
demo = gr.Interface(title = title, fn=transcribe, inputs = inputs, outputs = outputs, description=description, cache_examples=True, allow_flagging="never", article = article , examples=examples)
demo.queue(concurrency_count=3)
demo.launch(debug = True) |