File size: 2,100 Bytes
b7d825d
 
1383129
b7d825d
3a4a7b8
12ef9d2
b7d825d
88f848a
 
 
 
 
ee53ffb
1383129
12ef9d2
88f848a
1383129
 
6db55eb
1383129
 
 
 
 
 
88f848a
 
1383129
 
 
 
 
ee53ffb
1383129
cdb5b64
62d1fdc
 
1383129
 
 
 
 
cdb5b64
62d1fdc
 
b7d825d
 
ee53ffb
 
 
cdb5b64
62d1fdc
 
ee53ffb
 
1383129
ee53ffb
e14e7c5
1383129
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from transformers import pipeline
import gradio as gr
import pytube as pt

pipe = pipeline(model="Hoft/whisper-small-swedish-asr")  # change to "your-username/the-name-you-picked"
sa = pipeline('sentiment-analysis', model='marma/bert-base-swedish-cased-sentiment')

def get_emoji(feeling):
    if feeling == 'POSITIVE':
        return '😊'
    else:
        return 'πŸ˜”'
def microphone_or_file_transcribe(audio):
    text = pipe(audio)["text"]
    sa_result = sa(text)[0]
    return text, get_emoji(sa_result['label'])
    
def youtube_transcribe(url):
    yt = pt.YouTube(url)
    
    stream = yt.streams.filter(only_audio=True)[0]
    stream.download(filename="audio.mp3")

    text = pipe("audio.mp3")["text"]

    sa_result = sa(text)[0]
    return text, get_emoji(sa_result['label'])


app = gr.Blocks()

microphone_tab = gr.Interface(
    fn=microphone_or_file_transcribe, 
    inputs=gr.Audio(source="microphone", type="filepath"), 
    outputs=[gr.Textbox(label="Text"), gr.Textbox(label="Feeling")],
    title="Whisper Small Swedish: Microphone ",
    description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model and Sentiment Analysis.",
)

youtube_tab = gr.Interface(
    fn=youtube_transcribe, 
    inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video", label="URL")], 
    outputs=[gr.Textbox(label="Text"), gr.Textbox(label="Feeling")],
    title="Whisper Small Swedish: Youtube",
    description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model and Sentiment Analysis.",
)

file_tab = gr.Interface(
    fn=microphone_or_file_transcribe, 
    inputs= gr.inputs.Audio(source="upload", type="filepath"), 
    outputs=[gr.Textbox(label="Text"), gr.Textbox(label="Feeling")],
    title="Whisper Small Swedish: File",
    description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model and Sentiment Analysis.",
)

with app:
    gr.TabbedInterface([microphone_tab, youtube_tab, file_tab], ["Microphone", "YouTube", "File"])

app.launch(enable_queue=True)