Spaces:

vishred18
/

Comparative-Analysis-of-Speech-Synthesis-Models

Build error

App Files Files Community

Comparative-Analysis-of-Speech-Synthesis-Models / TensorFlowTTS /examples /cppwin /TensorflowTTSCppInference /Voice.cpp

vishred18

Upload 364 files

d5ee97c over 1 year ago

raw

history blame contribute delete

3.35 kB

	#include "Voice.h"
	#include "ext/ZCharScanner.h"



	std::vector<int32_t> Voice::PhonemesToID(const std::string & InTxt)
	{
	ZStringDelimiter Delim(InTxt);
	Delim.AddDelimiter(" ");

	std::vector<int32_t> VecPhones;
	VecPhones.reserve(Delim.szTokens());

	for (const auto& Pho : Delim.GetTokens())
	{
	size_t ArrID = 0;

	if (VoxUtil::FindInVec<std::string>(Pho, Phonemes, ArrID))
	VecPhones.push_back(PhonemeIDs[ArrID]);
	else
	std::cout << "Voice::PhonemesToID() WARNING: Unknown phoneme " << Pho << std::endl;



	}


	return VecPhones;

	}

	void Voice::ReadPhonemes(const std::string &PhonemePath)
	{
	std::ifstream Phone(PhonemePath);

	std::string Line;
	while (std::getline(Phone, Line))
	{
	if (Line.find("\t") == std::string::npos)
	continue;


	ZStringDelimiter Deline(Line);
	Deline.AddDelimiter("\t");

	Phonemes.push_back(Deline[0]);
	PhonemeIDs.push_back(stoi(Deline[1]));




	}

	}

	void Voice::ReadSpeakers(const std::string &SpeakerPath)
	{
	Speakers = GetLinedFile(SpeakerPath);

	}

	void Voice::ReadEmotions(const std::string &EmotionPath)
	{
	Emotions = GetLinedFile(EmotionPath);

	}

	void Voice::ReadModelInfo(const std::string &ModelInfoPath)
	{

	ModelInfo = "";
	std::vector<std::string> MiLines = GetLinedFile(ModelInfoPath);

	for (const std::string& ss : MiLines)
	ModelInfo += ss + "\n";


	}

	std::vector<std::string> Voice::GetLinedFile(const std::string &Path)
	{
	std::vector<std::string> RetLines;
	std::ifstream Fi(Path);

	if (!Fi.good()) // File not exists, ret empty vec
	return RetLines;

	std::string Line;
	while (std::getline(Fi, Line))
	{
	if (Line.size() > 1)
	RetLines.push_back(Line);


	}

	return RetLines;

	}

	Voice::Voice(const std::string & VoxPath, const std::string &inName, Phonemizer *InPhn)
	{
	MelPredictor.Initialize(VoxPath + "/melgen");
	Vocoder.Initialize(VoxPath + "/vocoder");

	if (InPhn)
	Processor.Initialize(InPhn);


	VoxInfo = VoxUtil::ReadModelJSON(VoxPath + "/info.json");
	Name = inName;
	ReadPhonemes(VoxPath + "/phonemes.txt");
	ReadSpeakers(VoxPath + "/speakers.txt");
	ReadEmotions(VoxPath + "/emotions.txt");


	ReadModelInfo(VoxPath + "/info.txt");







	}

	void Voice::AddPhonemizer(Phonemizer *InPhn)
	{
	Processor.Initialize(InPhn);


	}


	std::vector<float> Voice::Vocalize(const std::string & Prompt, float Speed, int32_t SpeakerID, float Energy, float F0, int32_t EmotionID)
	{

	std::string PhoneticTxt = Processor.ProcessTextPhonetic(Prompt,Phonemes,(ETTSLanguage::Enum)VoxInfo.Language);

	TFTensor<float> Mel = MelPredictor.DoInference(PhonemesToID(PhoneticTxt), SpeakerID, Speed, Energy, F0,EmotionID);

	TFTensor<float> AuData = Vocoder.DoInference(Mel);


	int64_t Width = AuData.Shape[0];
	int64_t Height = AuData.Shape[1];
	int64_t Depth = AuData.Shape[2];
	//int z = 0;

	std::vector<float> AudioData;
	AudioData.resize(Height);

	// Code to access 1D array as if it were 3D
	for (int64_t x = 0; x < Width;x++)
	{
	for (int64_t z = 0;z < Depth;z++)
	{
	for (int64_t y = 0; y < Height;y++) {
	int64_t Index = x * Height * Depth + y * Depth + z;
	AudioData[(size_t)y] = AuData.Data[(size_t)Index];

	}

	}
	}


	return AudioData;
	}


	Voice::~Voice()
	{
	}