File size: 1,728 Bytes
ccee59f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import json
from typing import Iterator
import gradio as gr
from groq import Groq
from elevenlabs.client import ElevenLabs
import logging as log

# Initialize Groq client
client = Groq(api_key="")
elevenlabs_client = ElevenLabs(
    api_key=""
)



def transcribe_audio(audio_file_path, language, additional_text):
    try:
        bytes_data = open('greetings.mp3', 'rb').read()
        yield "Checking ...", bytes_data

    except Exception as e:
        print(f"error: {e}")
        log.info(f'error: {e}')
        yield f"An error occurred: {str(e)}", None


def speach_to_text():
    # List of supported languages (this is an example, adjust based on Groq's actual supported languages)
    languages = ["en", "ba", "ms", "is", "no", "id"]

    # Create Gradio interface
    iface = gr.Interface(
        fn=transcribe_audio,
        inputs=[
            gr.Audio(type="filepath", label="Upload Audio File"),
            gr.Dropdown(choices=languages, label="Select Language", value="en"),
            # gr.Radio(["standard", "high"], label="Transcription Quality", value="standard"),
            gr.Textbox(label="Additional Text", placeholder="Enter any additional context or instructions here...")
        ],
        outputs=[
            gr.Textbox(label="Response"),
            gr.Audio(label="Audio Stream", autoplay=True, format="mp3")
        ],
        title="Groq Speech-to-Text Transcription",
        description="Upload an audio file, set parameters, and provide additional text for context in the "
                    "transcription process."
    )

    # Launch the interface
    iface.launch()


# Press the green button in the gutter to run the script.
if __name__ == '__main__':    
    speach_to_text()