Spaces:
Sleeping
Sleeping
File size: 4,002 Bytes
9fe0418 0122136 ea55bce 0122136 ea55bce 5ff2cba 0122136 9fe0418 0122136 300a138 0122136 ea55bce 0122136 ea55bce 300a138 ea55bce 300a138 ea55bce 8507b9e ea55bce 9fe0418 ea55bce 8507b9e ea55bce 0122136 ea55bce 3d73eb2 ea55bce 0122136 ea55bce 3d73eb2 ea55bce 9fe0418 0122136 ea55bce 0122136 ea55bce 0122136 5ff2cba 3d73eb2 5ff2cba ea55bce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import requests
import os
import boto3
from botocore import client
from datetime import datetime
from unidecode import unidecode
from langdetect import detect
from consts.locale import locales
from utils.tts import text_to_speech
from dotenv import load_dotenv
load_dotenv()
s3 = boto3.resource(
service_name=os.environ["S3_SERVICE_NAME"],
region_name=os.environ["S3_REGION_NAME"],
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
)
API_URL = 'https://westus.tts.speech.microsoft.com/cognitiveservices/v1'
headers = {
'X-Microsoft-OutputFormat': 'riff-24khz-16bit-mono-pcm',
'Content-Type': 'application/ssml+xml',
'Host': 'westus.tts.speech.microsoft.com',
'Ocp-Apim-Subscription-Key': os.environ["SPEECH_ACCESS_TOKEN"],
'User-Agent': '<Your application name>'
}
def texttospeech (text, to_lang=None):
global s3
# global s3
# s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file('output_audio.wav', 'output_audio.wav')
# try:
# # get file from aws s3
# result = s3.Bucket(os.environ["S3_BUCKET_NAME"]).download_file('output_audio2.wav', 'output_audio23.wav')
try:
lang = to_lang or detect(text)
if lang not in locales: return False
converted_text = unidecode(text)
print(converted_text)
# The SSML message
ssml_message = f'''
<speak version='1.0' xml:lang='{locales[lang]["locale"]}'><voice xml:lang='{locales[lang]["locale"]}' xml:gender='{locales[lang]["gender"]}'
name='{locales[lang]["short_name"]}'>
{converted_text}
</voice></speak>'''
# Calculate the length of your SSML message
content_length = len(ssml_message.encode('utf-8'))
# Headers
headers['Content-Length'] = str(content_length)
# Making the POST request
response = requests.post(API_URL, headers=headers, data=ssml_message)
print(response, 'response')
if response.status_code != 200: return False
# Upload the content to aws s3
now = str(datetime.now()).replace(':', '-')
path = f'tmp/{now}.wav'
s3_path = f'speech/{now}.wav'
# Assuming you want to save the response (audio content) to a file
with open(path, 'wb') as audio_file:
print(path, 'path')
audio_file.write(response.content)
s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file(path, s3_path)
url = s3.meta.client.generate_presigned_url(
ClientMethod='get_object',
Params={
'Bucket': os.environ["S3_BUCKET_NAME"],
'Key': s3_path
}
)
return url
except Exception as e:
print('error occurred!', str(e))
return False
def texttospeech2 (text, to_lang=None):
global s3
# global s3
# s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file('output_audio.wav', 'output_audio.wav')
# try:
# # get file from aws s3
# result = s3.Bucket(os.environ["S3_BUCKET_NAME"]).download_file('output_audio2.wav', 'output_audio23.wav')
try:
lang = to_lang or detect(text)
if lang not in locales: return False
# Upload the content to aws s3
now = str(datetime.now())
path = f'tmp/{now}.wav'
s3_path = f'speech/{now}.wav'
result = text_to_speech(text, lang, path)
if not result: return False
s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file(path, s3_path)
url = s3.meta.client.generate_presigned_url(
ClientMethod='get_object',
Params={
'Bucket': os.environ["S3_BUCKET_NAME"],
'Key': s3_path
}
)
os.remove(path)
return url
except Exception as e:
print(e)
return False |