Spaces:

seba3y
/

CAPT-ReadAloud

Running

File size: 2,416 Bytes

5ac4106
86a5e7d
 
5ac4106
 
 
86a5e7d
 
5ac4106
86a5e7d
5ac4106
86a5e7d
 
 
 
 
 
 
 
 
5ac4106
 
 
86a5e7d
 
 
 
5ac4106
86a5e7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ac4106
86a5e7d
 
 
5ac4106
86a5e7d
 
5ac4106
86a5e7d
 
 
 
 
 
 
 
 
 
 
5ac4106
 
86a5e7d
 
91d2a90
86a5e7d
 
 
 
 
91d2a90
86a5e7d
5ac4106
 
 
86a5e7d

import gradio as gr
from logic import compare_audio_with_text
from scipy.io import wavfile



def create_html_from_scores(word_scores):
    html_output = ''
    
    # Ensure the number of words and scores match
    
    for word, score in word_scores:
        if score == 1:
            html_output += f'<span style="color: red;">{word}</span> '
        elif score == 2:
            html_output += f'<span style="color: orange;">{word}</span> '
        else:
            html_output += f'<span style="color: green;">{word}</span> '
    return html_output




def analyze_audio(text, audio):
# Write the processed audio to a temporary WAV file
    temp_filename = 'temp_audio.wav'
    wavfile.write(temp_filename, audio[0], audio[1])


    result = compare_audio_with_text(temp_filename, text)
    html_content = create_html_from_scores(result)
    html_with_css = f"""
    <style>
    .legend {{
      font-size: 22px;
      display: flex;
      align-items: center;
      gap: 12px;
    }}
    
    .legend-dot {{
      height: 15px;
      width: 15px;
      border-radius: 50%;
      display: inline-block;
    }}
    
    .good {{ color: #28a745; }}
    .average {{ color: #ffc107; }}
    .bad {{ color: #dc3545; }}
    
    .text {{ font-size: 20px; }}
    </style>
    
    <div class="legend">
      <span class="legend-dot" style="background-color: #28a745;"></span><span>Good</span>
      <span class="legend-dot" style="background-color: #ffc107;"></span><span>Understandable</span>
      <span class="legend-dot" style="background-color: #dc3545;"></span><span>Bad</span>
    </div>
    
    <p class="text">
      {html_content}
    </p>
    """
    return html_with_css

# Define the Gradio interface
iface = gr.Interface(fn=analyze_audio,
                     inputs=[gr.Textbox(label='Training Text', placeholder='Write the text for pronunciation task', interactive=True, visible=True, show_copy_button=True,), 
                             gr.Audio(label="Recoreded Audio")
                             ],
                     outputs=[gr.HTML(label="Analysis of pronunciation"),
                              ],
                     # css=additional_css,
                     # title="Audio Analysis Tool",
                     description="Write any text and recored an audio to predict pronunciation erors"
                     )

# Run the Gradio app
if __name__ == "__main__":
    iface.launch()