nextchat / ai /tts.py
shoaibatservionsoft
hf update
3d73eb2
import requests
import os
import boto3
from botocore import client
from datetime import datetime
from unidecode import unidecode
from langdetect import detect
from consts.locale import locales
from utils.tts import text_to_speech
from dotenv import load_dotenv
load_dotenv()
s3 = boto3.resource(
service_name=os.environ["S3_SERVICE_NAME"],
region_name=os.environ["S3_REGION_NAME"],
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
)
API_URL = 'https://westus.tts.speech.microsoft.com/cognitiveservices/v1'
headers = {
'X-Microsoft-OutputFormat': 'riff-24khz-16bit-mono-pcm',
'Content-Type': 'application/ssml+xml',
'Host': 'westus.tts.speech.microsoft.com',
'Ocp-Apim-Subscription-Key': os.environ["SPEECH_ACCESS_TOKEN"],
'User-Agent': '<Your application name>'
}
def texttospeech (text, to_lang=None):
global s3
# global s3
# s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file('output_audio.wav', 'output_audio.wav')
# try:
# # get file from aws s3
# result = s3.Bucket(os.environ["S3_BUCKET_NAME"]).download_file('output_audio2.wav', 'output_audio23.wav')
try:
lang = to_lang or detect(text)
if lang not in locales: return False
converted_text = unidecode(text)
print(converted_text)
# The SSML message
ssml_message = f'''
<speak version='1.0' xml:lang='{locales[lang]["locale"]}'><voice xml:lang='{locales[lang]["locale"]}' xml:gender='{locales[lang]["gender"]}'
name='{locales[lang]["short_name"]}'>
{converted_text}
</voice></speak>'''
# Calculate the length of your SSML message
content_length = len(ssml_message.encode('utf-8'))
# Headers
headers['Content-Length'] = str(content_length)
# Making the POST request
response = requests.post(API_URL, headers=headers, data=ssml_message)
print(response, 'response')
if response.status_code != 200: return False
# Upload the content to aws s3
now = str(datetime.now()).replace(':', '-')
path = f'tmp/{now}.wav'
s3_path = f'speech/{now}.wav'
# Assuming you want to save the response (audio content) to a file
with open(path, 'wb') as audio_file:
print(path, 'path')
audio_file.write(response.content)
s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file(path, s3_path)
url = s3.meta.client.generate_presigned_url(
ClientMethod='get_object',
Params={
'Bucket': os.environ["S3_BUCKET_NAME"],
'Key': s3_path
}
)
return url
except Exception as e:
print('error occurred!', str(e))
return False
def texttospeech2 (text, to_lang=None):
global s3
# global s3
# s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file('output_audio.wav', 'output_audio.wav')
# try:
# # get file from aws s3
# result = s3.Bucket(os.environ["S3_BUCKET_NAME"]).download_file('output_audio2.wav', 'output_audio23.wav')
try:
lang = to_lang or detect(text)
if lang not in locales: return False
# Upload the content to aws s3
now = str(datetime.now())
path = f'tmp/{now}.wav'
s3_path = f'speech/{now}.wav'
result = text_to_speech(text, lang, path)
if not result: return False
s3.Bucket(os.environ["S3_BUCKET_NAME"]).upload_file(path, s3_path)
url = s3.meta.client.generate_presigned_url(
ClientMethod='get_object',
Params={
'Bucket': os.environ["S3_BUCKET_NAME"],
'Key': s3_path
}
)
os.remove(path)
return url
except Exception as e:
print(e)
return False