Spaces:
Sleeping
Sleeping
File size: 4,705 Bytes
4177df5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import requests
import base64
import io
import json
import gradio as gr
from gradio import Text
import base64
import numpy as np
from pydub import AudioSegment
# Define the API endpoint URL
url = "https://ruslanmv-hf-llm-api-collection.hf.space/tts"
# Set headers for content type and desired response format
headers = {
"Content-Type": "application/json",
"accept": "application/json" # May need adjustment if endpoint doesn't support JSON
}
def convert_text_to_base64(text, language="en"):
"""Converts text to base64 encoded audio string using the provided API.
Args:
text (str): The text to convert to speech.
language (str, optional): The language code for the speech (default: "en").
Returns:
str: The base64 encoded audio string on success, None on error.
"""
try:
# Prepare the data
data = {
"input_text": text,
"from_language": language
}
# Send the POST request
response = requests.post(url, headers=headers, json=data)
# Check for successful response
if response.status_code == 200:
try:
# Check for JSON response format first
response_data = response.json()
# Check for errors in the response (if JSON)
if "detail" in response_data:
print(f"Error: {response_data['detail']}")
return None
# Extract audio data from the response (assuming it's in a field)
audio_data = response_data.get("audio", None)
if not audio_data:
print("Error: Missing audio data in response.")
return None
except json.JSONDecodeError:
# If not JSON, assume raw binary data
audio_data = response.content
# Use an in-memory buffer
with io.BytesIO() as buffer:
# Write audio data to the buffer
buffer.write(audio_data)
# Encode audio data to base64 string
base64_encoded_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
return base64_encoded_str
else:
print(f"Error: {response.status_code}")
return None
except Exception as e:
print(f"Error: {e}")
return None
def get_audio_properties(audio_data):
try:
# Try to read as WAV
audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="wav")
format = "wav"
except:
try:
# Try to read as MP3
audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="mp3")
format = "mp3"
except Exception as e:
raise ValueError(f"Unknown audio format: {e}")
duration = len(audio_segment) / 1000.0 # duration in seconds
bitrate = audio_segment.frame_rate
channels = audio_segment.channels
sample_width = audio_segment.sample_width
return {
"format": format,
"duration": duration,
"bitrate": bitrate,
"channels": channels,
"sample_width": sample_width,
"audio_segment": audio_segment
}
def play_audio(text):
"""Converts text to speech using the provided API and plays the audio."""
base64_encoded_audio = convert_text_to_base64(text)
if base64_encoded_audio:
# Decode base64 string to bytes (assuming known format)
# Decode the base64 string
audio_data = base64.b64decode(base64_encoded_audio)
# Get audio properties
properties = get_audio_properties(audio_data)
print("Audio Properties:", properties)
# Convert audio segment to numpy array
audio_segment = properties["audio_segment"]
samples = np.array(audio_segment.get_array_of_samples())
if audio_segment.channels == 2:
samples = samples.reshape((-1, 2))
# Create the audio component with controls and optional download button
return 24000, samples
else:
return "Error occurred during conversion."
# Define the Gradio interface with clear labels for user interaction
interface = gr.Interface(
fn=play_audio,
title="Text to Speech API", # Add a title to the interface
description="Developed by Ruslan Magana, visit <a href='https://ruslanmv.com/' target='_blank'>ruslanmv.com</a> for more information.",
inputs=Text(label="Enter text to convert to speech"),
outputs=gr.Audio(label="Generated audio", type="numpy"),
#live=True # Enable live updates
)
# Launch the Gradio interface
interface.launch()
|