api_caller_v2 / caller_v2.py
freddyaboulton's picture
Update caller_v2.py
4de2c01 verified
import json
from typing import Iterator
import gradio as gr
from groq import Groq
from elevenlabs.client import ElevenLabs
import logging as log
import os
def transcribe_audio(audio_file_path, language, additional_text):
try:
bytes_data = open('greetings.mp3', 'rb').read()
yield "Checking ...", bytes_data
except Exception as e:
print(f"error: {e}")
log.info(f'error: {e}')
yield f"An error occurred: {str(e)}", None
def speach_to_text():
# List of supported languages (this is an example, adjust based on Groq's actual supported languages)
languages = ["en", "ba", "ms", "is", "no", "id"]
# Create Gradio interface
iface = gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Audio(type="filepath", label="Upload Audio File"),
gr.Dropdown(choices=languages, label="Select Language", value="en"),
# gr.Radio(["standard", "high"], label="Transcription Quality", value="standard"),
gr.Textbox(label="Additional Text", placeholder="Enter any additional context or instructions here...")
],
outputs=[
gr.Textbox(label="Response"),
gr.Audio(label="Audio Stream", autoplay=True, format="mp3")
],
title="Groq Speech-to-Text Transcription",
description="Upload an audio file, set parameters, and provide additional text for context in the "
"transcription process."
)
# Launch the interface
iface.launch()
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
speach_to_text()