File size: 1,946 Bytes
5a9e3df
42b3bbc
5a9e3df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42b3bbc
5a9e3df
d5ecb95
c54611e
d5ecb95
04f2503
6d3ca62
42b3bbc
b14b05d
 
 
 
 
 
 
04f2503
 
b14b05d
 
 
079ae85
 
b14b05d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import whisper
import gradio as gr
from transformers import pipeline


model = whisper.load_model("base")
sentiment_analysis = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english")

def process_audio_file(file):
    with open(file, "rb") as f:
        inputs = f.read()

    audio = ffmpeg_read(inputs, sampling_rate)
    return audio


def transcribe(Microphone, File_Upload):
    warn_output = ""
    if (Microphone is not None) and (File_Upload is not None):
        warn_output = "WARNING: You've uploaded an audio file and used the microphone. " \
                      "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
        file = Microphone

    elif (Microphone is None) and (File_Upload is None):
        return "ERROR: You have to either use the microphone or upload an audio file"

    elif Microphone is not None:
        file = Microphone
    else:
        file = File_Upload

    result = model.transcribe(file, task="translate")
    text = sentiment_analysis(result['text'])
    
    label = text[0]['label']
    score = text[0]['score']
    return label, score

iface = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="microphone", type='filepath', optional=True),
        gr.inputs.Audio(source="upload", type='filepath', optional=True),
    ],
    outputs=[
        gr.outputs.Textbox(label="Sentiment"),
        gr.outputs.Textbox(label="Score")
    ],
    layout="horizontal",
    theme="huggingface",
    title="AzVoiceSent: Sentiment Classification from Voice Transcriptions in Azerbaijani",
    description="AzVoiceSent is research project focused on sentiment classification from voice transcriptions in Azerbaijani. The project has the potential to provide valuable insights into the sentiment expressed by speakers in various domains and applications. ",
    allow_flagging='never',
)
iface.launch(enable_queue=True)