File size: 1,356 Bytes
66ade06
 
c804b71
94aa357
66ade06
94aa357
0b6f67c
66ade06
5a22b5d
94aa357
 
 
 
 
 
 
 
c804b71
5a22b5d
145198e
66ade06
 
 
 
145198e
a07082e
66ade06
 
 
 
 
 
 
 
 
 
 
 
5a22b5d
66ade06
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from transformers import pipeline
import gradio as gr
from pytube import YouTube
import os

# Get model from my model repo
pipe = pipeline(model="Akseluhr/whisper-small-sv-SE-auhr-v2")

def get_audio(url):
  yt = YouTube(url) # Downloads yt video
  video = yt.streams.filter(only_audio=True).first() # Gets the audio of the video
  print(video)
  out_file=video.download(output_path=".") # Write the stream to disk
  base, ext = os.path.splitext(out_file) # Split the path
  new_file = base+'.mp3' 
  os.rename(out_file, new_file) # Convert to .mp3
  audio_file = new_file 
  return audio_file

def transcribe(rec=None, file=None, url=""):
    if rec is not None:
        audio = rec
    elif file is not None:
        audio = file
    elif url is not "":
        audio = get_audio(url)
    else:
        return "Provide a recording or a file."

    text = pipe(audio)["text"]
    return text


iface = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(source="microphone", type="filepath", optional=True),
        gr.Audio(source="upload", type="filepath", optional=True),
        gr.Textbox(placeholder='Enter the Youtube video URL', label='URL', optional=True),
    ],
    outputs="text",
    title="Whisper Small Swedish",
    description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper model.",
)


iface.launch()