Spaces:

aryamanstha
/

TTSNepali

Sleeping

App Files Files Community

TTSNepali / app.py

aryamanstha

Update app.py

2a70d4e verified 4 months ago

raw

history blame contribute delete

6.4 kB

	import gradio as gr
	from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech,pipeline
	from datasets import Dataset, Audio,load_dataset
	import os
	import torch
	import pandas as pd
	import numpy as np
	import librosa
	from speechbrain.inference.speaker import EncoderClassifier
	from transformers import SpeechT5HifiGan
	import soundfile as sf
	import matplotlib.pyplot as plt
	import lakh_crore_numbers as lcn
	import re
	from nepali.number import nepalinumber

	# Convert Nepali digits to words
	def convert_to_text(text):
	nepali_numeral_pattern = '\(([०१२३४५६७८९0123456789]+)\)'
	normal_pattern = '[०१२३४५६७८९0123456789]+'

	transcriptions = {
	'०': 'शून्य', '१': 'एक', '२': 'दुई', '३': 'तीन',
	'४': 'चार', '५': 'पांच', '६': 'छ', '७': 'सात',
	'८': 'आठ', '९': 'नौ', '1': 'एक', '2': 'दुई',
	'3': 'तीन', '4': 'चार', '5': 'पांच', '6': 'छ',
	'7': 'सात', '8': 'आठ', '9': 'नौ', '0': 'शून्य'
	}
	# Process numbers inside brackets
	numerals_bracketed = re.findall(nepali_numeral_pattern, text)
	for i in range(len(numerals_bracketed)):
	num = nepalinumber(numerals_bracketed[i])
	nepali_num = lcn.format_to_nepali_words(int(num))
	text = text.replace(numerals_bracketed[i], nepali_num, 1)

	# Process numbers outside brackets
	numerals_outside = re.findall(normal_pattern, text)
	for i in range(len(numerals_outside)):
	for digit in numerals_outside[i]:
	text = text.replace(digit, transcriptions[digit] + " ", 1)
	return text

	import datetime
	from nepali.datetime import nepalihumanize, nepalidatetime

	# Convert Nepali Date to words(the format for Nepali Date is yyyy/mm/dd or yyyy-mm-dd or yyyy.mm.dd)
	def nepali_date_to_words(input_text):
	nepali_pattern = r'\b\d{4}[-/.]\d{2}[-/.]\d{2}\b\|[\u0966-\u096F]+\d{4}[-/.]\d{2}[-/.]\d{2}\b'
	dates = re.findall(nepali_pattern, input_text)
	for date in dates:
	year, month, day = [nepalinumber(x) for x in re.split(r'[-/.]', date)]
	if int(month) > 12 or int(day) > 31:
	raise ValueError(f"Invalid date: {date}")
	np_datetime = nepalidatetime(int(year), int(month), int(day))
	output = nepalihumanize(np_datetime,threshold=0,format="%Y %B %d")
	output_words = lcn.format_to_nepali_words(int(output.split()[0])) +" "+ "साल" +" "+ output.split()[1]+" "+ lcn.format_to_nepali_words(int(output.split()[2]))+" "+"गते"
	input_text = input_text.replace(date, output_words)
	return input_text

	# Convert English Date to words(the format for English Date is [yyyy/mm/dd] or [yyyy-mm-dd] or [yyyy.mm.dd])
	def english_date_to_words(input_text):
	english_pattern = r'\[(\d{4}[-/.]\d{2}[-/.]\d{2})\]'
	months={
	'1':'जनवरी','2':'फेब्रुअरी','3':'मार्च',
	'4':'अप्रिल','5':'मे','6':'जुन',
	'7':'जुलाई','8':'अगस्त','9':'सेप्टेम्बर',
	'10':'अक्टोबर','11':'नोवेम्बर','12':'डिसेम्बर'
	}
	dates = re.findall(english_pattern, input_text)
	for date in dates:
	year, month, day = [nepalinumber(x) for x in re.split(r'[-/.]', date)]
	if int(month) > 12 or int(day) > 31:
	raise ValueError(f"Invalid date: {date}")
	century, decade = divmod(int(year), 100)
	if century>=20:
	year_words = lcn.format_to_nepali_words(int(year))
	else:
	year_words = lcn.format_to_nepali_words(century)+" "+"सय"+" "+lcn.format_to_nepali_words(decade)
	month_name = months.get(str(month))
	day_words = lcn.format_to_nepali_words(int(day))
	input_text = input_text.replace(date, f"{year_words} {month_name} {day_words}")
	return input_text

	def filter_len_text(input_length):
	# dataset=load_dataset('awajai/transaction',split='train')
	# dataset = load_dataset('awajai/phase2dataset-tts',split='train')
	dataset=pd.read_csv('transcription.csv')
	dataset['transcription'] = dataset['transcription'].astype(str)
	dataset = dataset[dataset['transcription'].apply(lambda text: len(text.strip().split()) >= input_length)]

	dataset.reset_index(drop=True, inplace=True)
	print('Filtered Dataset:', len(dataset))
	return dataset

	from cosine_similarity import calculate_cosine_similarity

	def get_embedding(input_text):
	input_length = len([text for text in input_text.strip().split()])
	print(input_length)
	dataset = filter_len_text(input_length=input_length)
	matching_audio_arr=calculate_cosine_similarity(dataset,input_text=input_text)
	return matching_audio_arr

	def process_text(input_text):
	text = english_date_to_words(input_text)
	text = nepali_date_to_words(text)
	text = convert_to_text(text)
	return text

	def text_to_speech(input_text,language):
	if language=="Nepali":
	# audio_array=get_embedding(input_text)
	synthesiser = pipeline("text-to-speech", "aryamanstha/speecht5_nepali_oslr43_oslr143")
	# np_data=audio_array["embedding_audio_2"]
	np_data=np.load(file='embedding_audio_2.npy')
	speaker_embedding = torch.tensor(np_data).unsqueeze(0)
	tts_output = synthesiser(process_text(input_text), forward_params={"speaker_embeddings": speaker_embedding})
	speech = (np.array(tts_output["audio"]) * 32767).astype(np.int16)

	else:
	synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts")
	embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
	print(embeddings_dataset[7306]["xvector"])
	speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
	tts_output = synthesiser(input_text, forward_params={"speaker_embeddings": speaker_embedding})
	speech = (np.array(tts_output["audio"]) * 32767).astype(np.int16)

	return (16000, speech)

	demo = gr.Interface(
	fn=text_to_speech,
	inputs=[
	gr.Textbox(lines=5, label="Input Text"),
	gr.Radio(["English","Nepali"],label="Choose Language")
	],
	outputs=[gr.Audio( label="Output Audio",type="numpy")],
	title="SpeechT5: Text to Speech For Nepali Language",
	description="SpeechT5 is a speech-to-text model that converts text into speech. Type in the text you want to convert into speech.",
	)
	demo.launch(share=True)