yellowcandle commited on
Commit
6e40332
·
unverified ·
1 Parent(s): e648c2d

feat: Add audio transcription and proofreading functionality

Browse files

- Implement audio transcription using Whisper models
- Add proofreading feature using LLaMA-3-Chinese-8B-Instruct-v3 model
- Create Gradio interface for uploading audio, selecting models, and displaying results

Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -2,8 +2,7 @@ import spaces
2
  import gradio as gr
3
  # Use a pipeline as a high-level helper
4
  import torch
5
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
6
- # from datasets import load_dataset
7
 
8
  @spaces.GPU(duration=120)
9
  def transcribe_audio(audio, model_id):
@@ -54,18 +53,21 @@ def proofread(text):
54
  return proofread_text
55
 
56
 
57
- demo = gr.Interface(
58
- [transcribe_audio, proofread],
59
- [
60
- gr.Audio(sources="upload", type="filepath"),
61
- gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"]),
62
- "text"
63
- ],
64
- "text",
65
- allow_flagging="never",
66
- title="Audio Transcription and Proofreading",
67
- description="Upload an audio file, select a model for transcription, and then proofread the transcribed text.",
68
- )
69
- demo.launch()
70
 
 
 
 
 
 
71
 
 
 
 
 
 
2
  import gradio as gr
3
  # Use a pipeline as a high-level helper
4
  import torch
5
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM
 
6
 
7
  @spaces.GPU(duration=120)
8
  def transcribe_audio(audio, model_id):
 
53
  return proofread_text
54
 
55
 
56
+ with gr.Blocks() as demo:
57
+ gr.Markdown("# Audio Transcription and Proofreading")
58
+ gr.Markdown("Upload an audio file, select a model for transcription, and then proofread the transcribed text.")
59
+
60
+ with gr.Row():
61
+ audio = gr.Audio(source="upload", type="filepath")
62
+ model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"])
 
 
 
 
 
 
63
 
64
+ transcribe_button = gr.Button("Transcribe")
65
+ transcribed_text = gr.Textbox(label="Transcribed Text")
66
+
67
+ proofread_button = gr.Button("Proofread")
68
+ proofread_output = gr.Textbox(label="Proofread Text")
69
 
70
+ transcribe_button.click(transcribe_audio, inputs=[audio, model_dropdown], outputs=transcribed_text)
71
+ proofread_button.click(proofread, inputs=transcribed_text, outputs=proofread_output)
72
+
73
+ demo.launch()