File size: 5,648 Bytes
9683cd0
 
 
 
 
 
51558b1
9683cd0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51558b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9683cd0
 
 
 
51558b1
9683cd0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
import os
import requests
from transformers import pipeline

# Set your FastAPI backend endpoint
BACKEND_URL = "https://35d2-41-84-202-90.ngrok-free.app/submit-feedback"

# Map of models
model_map = {
    "english": "jonatasgrosman/wav2vec2-large-xlsr-53-english"
}

# Create storage directory
os.makedirs("responses", exist_ok=True)

# Transcription function
def transcribe(audio, language):
    asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0)
    text = asr(audio)["text"]
    return text, audio

# Save feedback by sending it to FastAPI backend
def save_feedback(audio_file, transcription, age_group, gender, evaluated_language, speak_level, write_level,
                  native, native_language, env, device, domain, accuracy, orthography, meaning, errors,
                  performance, improvement, usability, technical_issues, final_comments, email):
    try:
        # Read binary content of audio file
        with open(audio_file, "rb") as f:
            audio_content = f.read()

        # Prepare metadata as form fields
        metadata = {
            "transcription": transcription,
            "age_group": age_group,
            "gender": gender,
            "evaluated_language": evaluated_language,
            "speak_level": speak_level,
            "write_level": write_level,
            "native": native,
            "native_language": native_language,
            "environment": env,
            "device": device,
            "domain": domain,
            "accuracy": accuracy,
            "orthography": orthography,
            "meaning": meaning,
            "errors": ",".join(errors) if errors else "",
            "performance": performance,
            "improvement": improvement,
            "usability": usability,
            "technical_issues": technical_issues,
            "final_comments": final_comments,
            "email": email
        }

        files = {
            "audio_file": ("audio.wav", audio_content, "audio/wav")
        }

        response = requests.post(BACKEND_URL, data=metadata, files=files, timeout=20)

        if response.status_code == 201:
            return "✅ Feedback submitted successfully. Thank you!"
        else:
            return f"⚠️ Submission failed: {response.status_code}{response.text}"

    except Exception as e:
        return f"❌ Could not connect to the backend: {str(e)}"


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## African ASR + Feedback")

    with gr.Row():
        audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or record audio")
        lang = gr.Dropdown(list(model_map.keys()), label="Select Language")

    transcribed_text = gr.Textbox(label="Transcribed Text")
    submit_btn = gr.Button("Transcribe")
    submit_btn.click(fn=transcribe, inputs=[audio_input, lang], outputs=[transcribed_text, audio_input])

    gr.Markdown("---\n## Feedback Form")

    age_group = gr.Dropdown(["18 to 30", "31 to 50", "50+", "Prefer not to say"], label="Age Group")
    gender = gr.Dropdown(["Male", "Female", "Prefer not to say", "Other"], label="Gender")
    evaluated_language = gr.Dropdown(list(model_map.keys()), label="Which language did you evaluate for?")
    speak_level = gr.Slider(1, 10, label="How well do you speak this language?")
    write_level = gr.Slider(1, 10, label="How well do you write the language?")
    native = gr.Radio(["Yes", "No"], label="Are you a native speaker of this language?")
    native_language = gr.Textbox(label="If not, what is your native language?")
    env = gr.Dropdown(["Studio/Professional Recording", "Quiet Room", "Noisy Background", "Multiple Environments", "Unsure", "Other"], label="Recording environment")
    device = gr.Dropdown(["Mobile Phone/Tablet", "Tablet", "Laptop/Computer Microphone", "Dedicated Microphone", "Unsure", "Other"], label="Recording device")
    domain = gr.Textbox(label="Was the speech related to a specific domain or topic? (Optional)")
    accuracy = gr.Slider(1, 10, label="How accurate was the model’s transcription?")
    orthography = gr.Dropdown(["Yes, mostly correct", "No, major issues", "Partially", "Not Applicable"], label="Did the transcription use standard orthography?")
    meaning = gr.Slider(1, 10, label="Did the transcription preserve the original meaning?")
    errors = gr.CheckboxGroup([
        "Substitutions", "Omissions", "Insertions", "Pronunciation-related", "Diacritic Errors",
        "Code-switching Errors", "Named Entity Errors", "Punctuation Errors", "No significant errors"
    ], label="Which errors were prominent?")
    performance = gr.Textbox(label="What did the model do well? What did it struggle with?")
    improvement = gr.Textbox(label="How could this ASR model be improved?")
    usability = gr.Slider(1, 5, label="How easy was it to use the tool?")
    technical_issues = gr.Textbox(label="Did you encounter any technical issues?")
    final_comments = gr.Textbox(label="Any other comments or suggestions?")
    email = gr.Textbox(label="Email (optional)")

    save_btn = gr.Button("Submit Feedback")
    output_msg = gr.Textbox(interactive=False)
    save_btn.click(fn=save_feedback, 
                   inputs=[audio_input, transcribed_text, age_group, gender, evaluated_language, speak_level, write_level,
                           native, native_language, env, device, domain, accuracy, orthography, meaning, errors,
                           performance, improvement, usability, technical_issues, final_comments, email],
                   outputs=[output_msg])

# Launch the interface
demo.launch()