Spaces:

seba3y
/

CAPT-ReadAloud

Running

File size: 4,905 Bytes

be1b9b7

import gradio as gr
# from logic import Speaker_speech_analysis
from scipy.io import wavfile
from wav2vec_aligen import speaker_pronunciation_assesment



def create_html_from_scores(word_levels):
    html_output = ''
    for word, level in word_levels:
        if level == '/':
            html_output += f'<span style="color: #0000ff;">{level}</span> '
        elif level == 'Wrong':
          html_output += f'<span style="color: #dc3545;">{word}</span> '
        elif level == 'Understandable':
          html_output += f'<span style="color: #ffc107;">{word}</span> '
        else:
            html_output += f'<span style="color: #28a745;">{word}</span> '
    return html_output
  
def generate_progress_bar(score, label):
    score = round(score, 2)
    score_text = f"{score:.2f}" if score < 100 else "100"
    if score < 30:
        bar_color = "#dc3545" 
    elif score < 60:
        bar_color = "#dc6545" 
    elif score < 80:
        bar_color = "#ffc107"
    else:
        bar_color = "#28a745"
    bar_length = f"{(score / 100) * 100}%"
    return f"""
    <div class="progress-label">{label}:</div>
    <div class="progress-container">
        <div class="progress-bar" style="width: {bar_length}; background-color: {bar_color};">
            <div class="progress-score">{score_text}</div>
        </div>
    </div>
    <div class="progress-max">Max: 100</div>
    """
# CSS to be used in the Gradio Interface




def analyze_audio(text, audio):
# Write the processed audio to a temporary WAV file
    if text is None or audio is None:
      return 'the audio or the text is missing'
    temp_filename = 'temp_audio.wav'
    wavfile.write(temp_filename, audio[0], audio[1])


    result = speaker_pronunciation_assesment(temp_filename)
    accuracy_score = result['pronunciation_accuracy']
    fluency_score   = result['fluency_score']
    total_score       = result['total_score']
    content_scores = result['content_scores']
    
    pronunciation_progress_bar = generate_progress_bar(accuracy_score, "Pronunciation Accuracy")
    fluency_progress_bar = generate_progress_bar(fluency_score, "Fluency Score")
    content_progress_bar = generate_progress_bar(content_scores, "Content Score")
    total_progress_bar = generate_progress_bar(total_score, "Total Score")
    
    
    html_with_css = f"""
    <style>
    .legend {{
      font-size: 22px;
      display: flex;
      align-items: center;
      gap: 12px;
    }}
    
    .legend-dot {{
        height: 15px;
        width: 15px;
        border-radius: 50%;
        display: inline-block;
      }}
      
    .good {{ color: #28a745; 
    }}
    .average {{ color: #ffc107; 
    }}
    .bad {{ color: #dc3545;
    }}
    
    .wrong {{ color: #dc3545;
    }}
        
    .text {{
        font-size: 20px;
        margin-bottom: 20px;
      }}

    .progress-container {{
        width: 100%;
        background-color: #ddd;
        border-radius: 13px;
        overflow: hidden;
      }}

    .progress-bar {{
        height: 30px;
        line-height: 30px;
        text-align: center;
        font-size: 16px;
        border-radius: 15px;
        transition: width 1s ease;
      }}

    .progress-label {{
        font-weight: bold;
        font-size: 22px;
        margin-bottom: 20px;
        margin-top: 5px;
        text-align: center;
      }}

    .progress-score {{
        display: inline-block;
        color: black;
      }}

    .progress-max {{
        text-align: right;
        margin: 10px;
        font-size: 16px;
      }}
        
    </style>
    
    
    <div class="legend">
      <span class="legend-dot" style="background-color: #28a745;"></span><span>Good</span>
      <span class="legend-dot" style="background-color: #ffc107;"></span><span>Understandable</span>
      <span class="legend-dot" style="background-color: #dc3545;"></span><span>Bad</span>
      <span class="legend-dot" style="background-color: #0000ff;"></span><span>No Speech</span>
    </div>
    
    {total_progress_bar}
    {pronunciation_progress_bar}
    {fluency_progress_bar}
    {content_progress_bar}
    """
        # 

    return html_with_css

# Define the Gradio interface
iface = gr.Interface(fn=analyze_audio,
                     inputs=[gr.Textbox(label='Training Text', placeholder='Write the text for pronunciation task', interactive=True, visible=True, show_copy_button=True,), 
                             gr.Audio(label="Recoreded Audio", sources=['microphone', 'upload'])
                             ],
                     outputs=[gr.HTML(label="Analysis of pronunciation"),
                              ],
                    #  css=additional_css,
                     # title="Audio Analysis Tool",
                     description="Write any text and recored an audio to predict pronunciation erors"
                     )

# Run the Gradio app
if __name__ == "__main__":
    iface.launch(share=True)