harveysamson commited on
Commit
cd87e9f
1 Parent(s): 5d47fc0

added comments

Browse files
Files changed (3) hide show
  1. app.py +4 -2
  2. src/modeling_outputs.py +2 -0
  3. src/models.py +2 -0
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import torch
2
  import torch.nn.functional as F
3
  from transformers import AutoConfig, Wav2Vec2FeatureExtractor
4
- from src.models import Wav2Vec2ForSpeechClassification
5
  import gradio as gr
6
  import librosa
7
 
@@ -12,6 +12,7 @@ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
12
  sampling_rate = feature_extractor.sampling_rate
13
  model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path)
14
 
 
15
  def load_data(path):
16
  speech, sampling_rate = librosa.load(path)
17
  if len(speech.shape) > 1:
@@ -20,6 +21,7 @@ def load_data(path):
20
  speech = librosa.resample(speech, sampling_rate,16000)
21
  return speech
22
 
 
23
  def inference(path):
24
  speech = load_data(path)
25
  inputs = feature_extractor(speech, return_tensors="pt").input_values
@@ -32,7 +34,7 @@ def inference(path):
32
  inputs = gr.inputs.Audio(label="Input Audio", type="filepath", source="upload")
33
  outputs = gr.outputs.Label(type="confidences", label = "Output Scores")
34
  title = "Wav2Vec2 Speech Emotion Recognition"
35
- description = "This is a demo of the Wav2Vec2 Speech Emotion Recognition model. Upload a .wav file (preferably small) and the top emotions predicted will be displayed."
36
  examples = ['data/heart.wav', 'data/happy26.wav', 'data/jm24.wav', 'data/newton.wav', 'data/speeding.wav']
37
  article = "<a href = 'https://github.com/m3hrdadfi/soxan'> Wav2Vec2 Speech Classification Github Repository"
38
 
 
1
  import torch
2
  import torch.nn.functional as F
3
  from transformers import AutoConfig, Wav2Vec2FeatureExtractor
4
+ from src.models import Wav2Vec2ForSpeechClassification #imported from https://github.com/m3hrdadfi/soxan
5
  import gradio as gr
6
  import librosa
7
 
 
12
  sampling_rate = feature_extractor.sampling_rate
13
  model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path)
14
 
15
+ #load input file and resample to 16kHz
16
  def load_data(path):
17
  speech, sampling_rate = librosa.load(path)
18
  if len(speech.shape) > 1:
 
21
  speech = librosa.resample(speech, sampling_rate,16000)
22
  return speech
23
 
24
+ #modified version of predict function from https://github.com/m3hrdadfi/soxan
25
  def inference(path):
26
  speech = load_data(path)
27
  inputs = feature_extractor(speech, return_tensors="pt").input_values
 
34
  inputs = gr.inputs.Audio(label="Input Audio", type="filepath", source="upload")
35
  outputs = gr.outputs.Label(type="confidences", label = "Output Scores")
36
  title = "Wav2Vec2 Speech Emotion Recognition"
37
+ description = "This is a demo of the Wav2Vec2 Speech Emotion Recognition model. Upload an audio file and the top emotions predicted will be displayed."
38
  examples = ['data/heart.wav', 'data/happy26.wav', 'data/jm24.wav', 'data/newton.wav', 'data/speeding.wav']
39
  article = "<a href = 'https://github.com/m3hrdadfi/soxan'> Wav2Vec2 Speech Classification Github Repository"
40
 
src/modeling_outputs.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from dataclasses import dataclass
2
  from typing import Optional, Tuple
3
  import torch
 
1
+ #imported from https://github.com/m3hrdadfi/soxan to implement Wav2Vec2 for speech classification
2
+
3
  from dataclasses import dataclass
4
  from typing import Optional, Tuple
5
  import torch
src/models.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import torch
2
  import torch.nn as nn
3
  from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 
1
+ #imported from https://github.com/m3hrdadfi/soxan to implement Wav2Vec2 for speech classification
2
+
3
  import torch
4
  import torch.nn as nn
5
  from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss