JohnJumon commited on
Commit
b784e4c
1 Parent(s): 425647d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -9
app.py CHANGED
@@ -1,18 +1,36 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import numpy as np
 
4
 
5
  accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy")
6
  fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy")
7
  prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy")
8
 
9
  def pronunciation_scoring(audio):
10
- y = audio
11
- y = y.astype(np.float32)
12
- y /= np.max(np.abs(y))
13
- accuracy = accuracy_classifier(y)
14
- fluency = fluency_classifier(y)
15
- prosodic = prosodic_classifier(y)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  result = {
17
  'accuracy': accuracy,
18
  'fluency': fluency,
@@ -21,13 +39,24 @@ def pronunciation_scoring(audio):
21
  for category, scores in result.items():
22
  max_score_label = max(scores, key=lambda x: x['score'])['label']
23
  result[category] = max_score_label
24
- return result
25
 
26
  gradio_app = gr.Interface(
27
  pronunciation_scoring,
28
- inputs=gr.Audio(sources=["microphone"]),
29
- outputs=gr.Label(label="Result"),
 
 
 
 
 
 
 
30
  title="Pronunciation Scoring",
 
 
 
 
31
  )
32
 
33
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import numpy as np
4
+ import os
5
 
6
  accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy")
7
  fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy")
8
  prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy")
9
 
10
  def pronunciation_scoring(audio):
11
+ accuracy_description = {
12
+ 'Extremely Poor': 'Extremely poor pronunciation and only one or two words are recognizable',
13
+ 'Poor': 'Poor, clumsy and rigid pronunciation of the sentence as a whole, with serious pronunciation mistakes',
14
+ 'Average': 'The overall pronunciation of the sentence is understandable, with many pronunciation mistakes and accent, but it does not affect the understanding of basic meanings',
15
+ 'Good': 'The overall pronunciation of the sentence is good, with a few pronunciation mistakes',
16
+ 'Excellent': 'The overall pronunciation of the sentence is excellent, with accurate phonology and no obvious pronunciation mistakes'
17
+ }
18
+ fluency_description = {
19
+ 'Very Influent': 'Intermittent, very influent speech, with lots of pauses, repetition, and stammering',
20
+ 'Influent': 'The speech is a little influent, with many pauses, repetition, and stammering',
21
+ 'Average': 'Fluent in general, with a few pauses, repetition, and stammering',
22
+ 'Fluent': 'Fluent without noticeable pauses or stammering'
23
+ }
24
+ prosodic_description = {
25
+ 'Poor': 'Poor intonation and lots of stammering and pauses, unable to read a complete sentence',
26
+ 'Unstable': 'Unstable speech speed, speak too fast or too slow, without the sense of rhythm',
27
+ 'Stable': 'Unstable speech speed, many stammering and pauses with a poor sense of rhythm',
28
+ 'Almost': 'Nearly correct intonation at a stable speaking speed, nearly smooth and coherent, but with little stammering and few pauses',
29
+ 'Perfect': 'Correct intonation at a stable speaking speed, speak with cadence, and can speak like a native'
30
+ }
31
+ accuracy = accuracy_classifier(audio)
32
+ fluency = fluency_classifier(audio)
33
+ prosodic = prosodic_classifier(audio)
34
  result = {
35
  'accuracy': accuracy,
36
  'fluency': fluency,
 
39
  for category, scores in result.items():
40
  max_score_label = max(scores, key=lambda x: x['score'])['label']
41
  result[category] = max_score_label
42
+ return result['accuracy'], accuracy_description[result['accuracy']], result['fluency'], fluency_description[result['fluency']], result['prosodic'], prosodic_description[result['prosodic']]
43
 
44
  gradio_app = gr.Interface(
45
  pronunciation_scoring,
46
+ inputs=gr.Audio(sources="microphone", type="filepath"),
47
+ outputs=[
48
+ gr.Label(label="Accuracy Result"),
49
+ gr.Textbox(interactive=False, show_label=False),
50
+ gr.Label(label="Fluency Result"),
51
+ gr.Textbox(interactive=False, show_label=False),
52
+ gr.Label(label="Prosodic Result"),
53
+ gr.Textbox(interactive=False, show_label=False)
54
+ ],
55
  title="Pronunciation Scoring",
56
+ description="This app will score your pronunciation accuracy, fluency, and prosodic (intonation)",
57
+ examples=[
58
+ [os.path.join(os.path.dirname(__file__),"audio.wav")],
59
+ ]
60
  )
61
 
62
  if __name__ == "__main__":