File size: 1,381 Bytes
8d5928a
d6dd1d2
0a1d195
d6dd1d2
6afc25f
4ce6436
fce051b
d6dd1d2
a195ded
d6dd1d2
 
 
a195ded
fce051b
d6dd1d2
 
 
fce051b
d6dd1d2
 
 
 
fce051b
d6dd1d2
 
 
 
 
0cecb50
d6dd1d2
fce051b
d6dd1d2
fce051b
d6dd1d2
fce051b
17f3c05
e5d627d
 
 
 
 
 
 
d6dd1d2
fce051b
642d01b
fce051b
0b3bc06
191715b
fce051b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import os
import torch

data_dict = {}
with open('./intents.json', 'r') as file:
    data = json.load(file)    
intents_dict = data
tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model = AutoModelForSequenceClassification.from_pretrained("./")

def preprocess(text):
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    return inputs

def postprocess(outputs):
    logits = outputs.logits
    predicted_labels = logits.argmax(dim=1).tolist()
    return predicted_labels

def predict(text):
    inputs = preprocess(text)
    with torch.no_grad():
        outputs = model(**inputs)
    predicted_labels = postprocess(outputs)
    ans = intents_dict[str(predicted_labels[0])]
    return ans

from transformers import pipeline

p = pipeline(model="openai/whisper-medium")

def transcribe(text,audio):
    if audio:
        t = p(audio)['text']
        ans = predict(t)
    elif text:
        ans = predict(text)
    else:
        ans = "please give input"
    return ans


get_intent = gr.Interface(fn = transcribe,
                          inputs=[gr.Textbox(label="Enter Text Input", type="text"),gr.Audio(source="microphone", type="filepath")],
                          outputs="text").launch()