File size: 2,615 Bytes
850343e
f626de0
 
 
 
 
 
 
 
850343e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f626de0
850343e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# import gradio as gr
from ultralytics import YOLO

model = YOLO('yolo11n-pose.pt')

def poseImage(image):
    results = model(image)
    return results[0].plot()

# interface = gr.Interface(
#     fn=poseImage,
#     inputs=gr.Image(streaming=True),
#     outputs=gr.Image(),
#     live=True
# )

# interface.launch()

import gradio as gr
import speech_recognition as sr
from PIL import Image

# Fungsi untuk memproses gambar
def process_image(image):
    return image  # Mengembalikan gambar yang sama

# Fungsi untuk memproses audio
def process_audio(audio):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
            return text
        except sr.UnknownValueError:
            return "Audio tidak dapat dikenali."
        except sr.RequestError as e:
            return f"Error dengan layanan pengenalan suara: {e}"

# Interface Gradio
with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown("## Aplikasi Input Gambar/Audio")
    
    with gr.Row():
        input_type = gr.Radio(["Gambar", "Audio"], label="Pilih Jenis Input", value="Gambar")

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type='numpy', label="Masukkan Gambar", live=True, visible=False)
            audio_input = gr.Audio(sources="microphone", type="filepath", label="Masukkan Audio", visible=False)

        with gr.Column():
            image_output = gr.Image(label="Hasil Gambar", visible=False)
            text_output = gr.Textbox(label="Hasil Audio", visible=False)
    
    # Fungsi untuk mengatur visibilitas berdasarkan jenis input
    def update_visibility(input_type):
        if input_type == "Gambar":
            return (
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=True),
                gr.update(visible=False),
            )
        elif input_type == "Audio":
            return (
                gr.update(visible=False),
                gr.update(visible=True),
                gr.update(visible=False),
                gr.update(visible=True),
            )

    input_type.change(
        update_visibility,
        inputs=[input_type],
        outputs=[image_input, audio_input, image_output, text_output],
    )

    # Menghubungkan input dengan output
    image_input.change(poseImage, inputs=[image_input], outputs=[image_output])
    audio_input.change(process_audio, inputs=[audio_input], outputs=[text_output])

# Menjalankan aplikasi
demo.launch()