Spaces:
Sleeping
Sleeping
import requests | |
import os | |
import boto3 | |
from botocore import client | |
from datetime import datetime | |
from unidecode import unidecode | |
from langdetect import detect | |
from consts.locale import locales | |
from utils.tts import text_to_speech | |
from dotenv import load_dotenv | |
load_dotenv() | |
s3 = boto3.resource( | |
service_name=os.environ["S3_SERVICE_NAME"], | |
region_name=os.environ["S3_REGION_NAME"], | |
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"], | |
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"], | |
) | |
API_URL = 'https://westus.tts.speech.microsoft.com/cognitiveservices/v1' | |
headers = { | |
'X-Microsoft-OutputFormat': 'riff-24khz-16bit-mono-pcm', | |
'Content-Type': 'application/ssml+xml', | |
'Host': 'westus.tts.speech.microsoft.com', | |
'Ocp-Apim-Subscription-Key': os.environ["SPEECH_ACCESS_TOKEN"], | |
'User-Agent': '<Your application name>' | |
} | |
def texttospeech (text, to_lang=None): | |
global s3 | |
# global s3 | |
# s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file('output_audio.wav', 'output_audio.wav') | |
# try: | |
# # get file from aws s3 | |
# result = s3.Bucket(os.environ["S3_BUCKET_NAME"]).download_file('output_audio2.wav', 'output_audio23.wav') | |
try: | |
lang = to_lang or detect(text) | |
if lang not in locales: return False | |
converted_text = unidecode(text) | |
print(converted_text) | |
# The SSML message | |
ssml_message = f''' | |
<speak version='1.0' xml:lang='{locales[lang]["locale"]}'><voice xml:lang='{locales[lang]["locale"]}' xml:gender='{locales[lang]["gender"]}' | |
name='{locales[lang]["short_name"]}'> | |
{converted_text} | |
</voice></speak>''' | |
# Calculate the length of your SSML message | |
content_length = len(ssml_message.encode('utf-8')) | |
# Headers | |
headers['Content-Length'] = str(content_length) | |
# Making the POST request | |
response = requests.post(API_URL, headers=headers, data=ssml_message) | |
print(response, 'response') | |
if response.status_code != 200: return False | |
# Upload the content to aws s3 | |
now = str(datetime.now()).replace(':', '-') | |
path = f'tmp/{now}.wav' | |
s3_path = f'speech/{now}.wav' | |
# Assuming you want to save the response (audio content) to a file | |
with open(path, 'wb') as audio_file: | |
print(path, 'path') | |
audio_file.write(response.content) | |
s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file(path, s3_path) | |
url = s3.meta.client.generate_presigned_url( | |
ClientMethod='get_object', | |
Params={ | |
'Bucket': os.environ["S3_BUCKET_NAME"], | |
'Key': s3_path | |
} | |
) | |
return url | |
except Exception as e: | |
print('error occurred!', str(e)) | |
return False | |
def texttospeech2 (text, to_lang=None): | |
global s3 | |
# global s3 | |
# s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file('output_audio.wav', 'output_audio.wav') | |
# try: | |
# # get file from aws s3 | |
# result = s3.Bucket(os.environ["S3_BUCKET_NAME"]).download_file('output_audio2.wav', 'output_audio23.wav') | |
try: | |
lang = to_lang or detect(text) | |
if lang not in locales: return False | |
# Upload the content to aws s3 | |
now = str(datetime.now()) | |
path = f'tmp/{now}.wav' | |
s3_path = f'speech/{now}.wav' | |
result = text_to_speech(text, lang, path) | |
if not result: return False | |
s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file(path, s3_path) | |
url = s3.meta.client.generate_presigned_url( | |
ClientMethod='get_object', | |
Params={ | |
'Bucket': os.environ["S3_BUCKET_NAME"], | |
'Key': s3_path | |
} | |
) | |
os.remove(path) | |
return url | |
except Exception as e: | |
print(e) | |
return False |