alnalda's picture
Update app.py
9d31e5f
raw
history blame contribute delete
No virus
4.22 kB
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio
import torchaudio.functional as Fu
import sys
import matplotlib.pyplot as plt
import IPython.display as ipd
from tqdm import tqdm
import gradio as gr
import ffmpeg
import resampy
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sample_rate = 48000
new_sample_rate = 8000
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio
import torchaudio.functional as Fu
import sys
import matplotlib.pyplot as plt
import IPython.display as ipd
from tqdm import tqdm
import gradio as gr
import ffmpeg
import resampy
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sample_rate = 48000
new_sample_rate = 8000
class M5(nn.Module):
def __init__(self, n_input=1, n_output=35, stride=16, n_channel=32):
super().__init__()
self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=80, stride=stride)
self.bn1 = nn.BatchNorm1d(n_channel)
self.pool1 = nn.MaxPool1d(4)
self.conv2 = nn.Conv1d(n_channel, n_channel, kernel_size=3)
self.bn2 = nn.BatchNorm1d(n_channel)
self.pool2 = nn.MaxPool1d(4)
self.conv3 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=3)
self.bn3 = nn.BatchNorm1d(2 * n_channel)
self.pool3 = nn.MaxPool1d(4)
self.conv4 = nn.Conv1d(2 * n_channel, 2 * n_channel, kernel_size=3)
self.bn4 = nn.BatchNorm1d(2 * n_channel)
self.pool4 = nn.MaxPool1d(4)
self.fc1 = nn.Linear(2 * n_channel, n_output)
def forward(self, x):
x = self.conv1(x)
x = F.relu(self.bn1(x))
x = self.pool1(x)
x = self.conv2(x)
x = F.relu(self.bn2(x))
x = self.pool2(x)
x = self.conv3(x)
x = F.relu(self.bn3(x))
x = self.pool3(x)
x = self.conv4(x)
x = F.relu(self.bn4(x))
x = self.pool4(x)
x = F.avg_pool1d(x, x.shape[-1])
x = x.permute(0, 2, 1)
x = self.fc1(x)
return F.log_softmax(x, dim=2)
def get_likely_index(tensor):
# find most likely label index for each element in the batch
return tensor.argmax(dim=-1)
def index_to_label(index):
# Return the word corresponding to the index in labels
# This is the inverse of label_to_index
return labels[index]
def predict(filepath):
tensor,sample_rate = torchaudio.load(filepath)
transformchannels = torchaudio.transforms.MuLawEncoding(quantization_channels=1)
tensor= transformchannels (tensor)
tensor = tensor.to(device)
tensor = tensor.type(torch.LongTensor)
#transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=new_sample_rate)
#tensor = transform(tensor)
tensor = model(tensor.unsqueeze(0))
#tensor = get_likely_index(tensor)
#tensor = index_to_label(tensor.squeeze())
#return tensor
return tensor
model = torch.load('export.pkl',map_location=torch.device('cpu'))
gr.Interface(fn=predict, inputs=gr.inputs.Audio(source='microphone',type='filepath'), outputs='text').launch(share=False)
def get_likely_index(tensor):
# find most likely label index for each element in the batch
return tensor.argmax(dim=-1)
def index_to_label(index):
# Return the word corresponding to the index in labels
# This is the inverse of label_to_index
return labels[index]
def predict(filepath):
tensor,sample_rate = torchaudio.load(filepath)
transformchannels = torchaudio.transforms.MuLawEncoding(quantization_channels=1)
tensor= transformchannels (tensor)
tensor = tensor.to(device)
tensor = tensor.type(torch.LongTensor)
transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=new_sample_rate)
tensor = transform(tensor)
tensor = model(tensor.unsqueeze(0))
tensor = get_likely_index(tensor)
tensor = index_to_label(tensor.squeeze())
return tensor
model = torch.load('export.pkl',map_location=torch.device('cpu'))
gr.Interface(fn=predict, inputs=gr.inputs.Audio(source='microphone',type='filepath'), outputs='text').launch(share=False)