Spaces:

JohnJumon
/

pronunciation-scoring

Running

File size: 3,349 Bytes

e34aefe
 
c4c15bc
b784e4c
e34aefe
2e9243a
 
 
e34aefe
 
b784e4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e34aefe
 
 
 
 
 
 
 
b784e4c
e34aefe
 
 
b784e4c
 
 
 
 
 
 
 
 
e34aefe
b784e4c
 
 
 
e34aefe

import gradio as gr
from transformers import pipeline
import numpy as np
import os

accuracy_classifier = pipeline(task="audio-classification", model="JohnJumon/pronunciation_accuracy")
fluency_classifier = pipeline(task="audio-classification", model="JohnJumon/fluency_accuracy")
prosodic_classifier = pipeline(task="audio-classification", model="JohnJumon/prosodic_accuracy")

def pronunciation_scoring(audio):
  accuracy_description = {
      'Extremely Poor': 'Extremely poor pronunciation and only one or two words are recognizable',
      'Poor': 'Poor, clumsy and rigid pronunciation of the sentence as a whole, with serious pronunciation mistakes',
      'Average': 'The overall pronunciation of the sentence is understandable, with many pronunciation mistakes and accent, but it does not affect the understanding of basic meanings',
      'Good': 'The overall pronunciation of the sentence is good, with a few pronunciation mistakes',
      'Excellent': 'The overall pronunciation of the sentence is excellent, with accurate phonology and no obvious pronunciation mistakes'
    }
  fluency_description = {
      'Very Influent': 'Intermittent, very influent speech, with lots of pauses, repetition, and stammering', 
      'Influent': 'The speech is a little influent, with many pauses, repetition, and stammering', 
      'Average': 'Fluent in general, with a few pauses, repetition, and stammering', 
      'Fluent': 'Fluent without noticeable pauses or stammering'
    }
  prosodic_description = {
      'Poor': 'Poor intonation and lots of stammering and pauses, unable to read a complete sentence', 
      'Unstable': 'Unstable speech speed, speak too fast or too slow, without the sense of rhythm', 
      'Stable': 'Unstable speech speed, many stammering and pauses with a poor sense of rhythm', 
      'Almost': 'Nearly correct intonation at a stable speaking speed, nearly smooth and coherent, but with little stammering and few pauses', 
      'Perfect': 'Correct intonation at a stable speaking speed, speak with cadence, and can speak like a native'
    }
  accuracy = accuracy_classifier(audio)
  fluency = fluency_classifier(audio)
  prosodic = prosodic_classifier(audio)
  result = {
      'accuracy': accuracy,
      'fluency': fluency,
      'prosodic': prosodic
      }
  for category, scores in result.items():
    max_score_label = max(scores, key=lambda x: x['score'])['label']
    result[category] = max_score_label
  return result['accuracy'], accuracy_description[result['accuracy']], result['fluency'], fluency_description[result['fluency']], result['prosodic'], prosodic_description[result['prosodic']]

gradio_app = gr.Interface(
    pronunciation_scoring,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=[
        gr.Label(label="Accuracy Result"),
        gr.Textbox(interactive=False, show_label=False),
        gr.Label(label="Fluency Result"),
        gr.Textbox(interactive=False, show_label=False),
        gr.Label(label="Prosodic Result"),
        gr.Textbox(interactive=False, show_label=False)
      ],
    title="Pronunciation Scoring",
    description="This app will score your pronunciation accuracy, fluency, and prosodic (intonation)",
    examples=[
        [os.path.join(os.path.dirname(__file__),"audio.wav")],
    ]
)

if __name__ == "__main__":
    gradio_app.launch()