Spaces:
Sleeping
Sleeping
yellowcandle
commited on
Added audio transcription and proofreading functionality using Gradio and Hugging Face Transformers
Browse files- Implemented `transcribe_audio` function to transcribe audio files using a specified model
- Implemented `proofread` function to proofread transcribed text using a specified model
- Created a Gradio interface to upload audio files, select models, and display transcribed and proofread text
- Integrated GPU support for faster processing
app.py
CHANGED
@@ -2,7 +2,7 @@ import spaces
|
|
2 |
import gradio as gr
|
3 |
# Use a pipeline as a high-level helper
|
4 |
import torch
|
5 |
-
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM
|
6 |
|
7 |
@spaces.GPU(duration=120)
|
8 |
def transcribe_audio(audio, model_id):
|
@@ -51,7 +51,8 @@ def proofread(prompt, text):
|
|
51 |
model.to(device)
|
52 |
|
53 |
# Perform proofreading using the model
|
54 |
-
|
|
|
55 |
output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
|
56 |
proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
57 |
|
@@ -59,8 +60,12 @@ def proofread(prompt, text):
|
|
59 |
|
60 |
|
61 |
with gr.Blocks() as demo:
|
62 |
-
gr.Markdown("
|
63 |
-
|
|
|
|
|
|
|
|
|
64 |
|
65 |
with gr.Row():
|
66 |
audio = gr.Audio(sources="upload", type="filepath")
|
@@ -73,7 +78,7 @@ with gr.Blocks() as demo:
|
|
73 |
proofread_output = gr.Textbox(label="Proofread Text")
|
74 |
|
75 |
transcribe_button.click(transcribe_audio, inputs=[audio, model_dropdown], outputs=transcribed_text)
|
76 |
-
proofread_button.click(proofread, inputs=transcribed_text, outputs=proofread_output)
|
77 |
-
transcribed_text.change(proofread, inputs=transcribed_text, outputs=proofread_output)
|
78 |
|
79 |
demo.launch()
|
|
|
2 |
import gradio as gr
|
3 |
# Use a pipeline as a high-level helper
|
4 |
import torch
|
5 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
|
6 |
|
7 |
@spaces.GPU(duration=120)
|
8 |
def transcribe_audio(audio, model_id):
|
|
|
51 |
model.to(device)
|
52 |
|
53 |
# Perform proofreading using the model
|
54 |
+
input_text = prompt + text
|
55 |
+
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
56 |
output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
|
57 |
proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
58 |
|
|
|
60 |
|
61 |
|
62 |
with gr.Blocks() as demo:
|
63 |
+
gr.Markdown("""
|
64 |
+
# Audio Transcription and Proofreading
|
65 |
+
1. Upload an audio file (Wait for the file to be fully loaded first)
|
66 |
+
2. Select a model for transcription
|
67 |
+
3. Proofread the transcribed text
|
68 |
+
""")
|
69 |
|
70 |
with gr.Row():
|
71 |
audio = gr.Audio(sources="upload", type="filepath")
|
|
|
78 |
proofread_output = gr.Textbox(label="Proofread Text")
|
79 |
|
80 |
transcribe_button.click(transcribe_audio, inputs=[audio, model_dropdown], outputs=transcribed_text)
|
81 |
+
proofread_button.click(proofread, inputs=[transcribed_text], outputs=proofread_output)
|
82 |
+
transcribed_text.change(proofread, inputs=["", transcribed_text], outputs=proofread_output)
|
83 |
|
84 |
demo.launch()
|