import os import sys import requests import json from huggingface_hub import HfApi # start xVASynth service (no HTTP) import resources.app.no_server as xvaserver from gr_client import BlocksDemo # NVIDIA NeMo models hf_model_name = "Pendrokar/xvapitch_nvidia" model_repo = HfApi() commits = model_repo.list_repo_commits(repo_id=hf_model_name) latest_commit_sha = commits[0].commit_id hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/' models_path = hf_cache_models_path # Lojban model commits = model_repo.list_repo_commits(repo_id='Pendrokar/xvasynth_lojban') latest_commit_sha = commits[0].commit_id hf_cache_lojban_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvasynth_lojban/snapshots/{latest_commit_sha}/' # Robotic model hf_cache_robotic_models_path = '' try: commits = model_repo.list_repo_commits(repo_id='Pendrokar/xvasynth_cabal', token=os.getenv('HF_TOKEN')) latest_commit_sha = commits[0].commit_id hf_cache_robotic_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvasynth_cabal/snapshots/{latest_commit_sha}/' except: print('Robotic voice not loaded!') pass current_voice_model = None current_voice_type = None base_speaker_emb = '' def load_model(voice_model_name): global current_voice_model, current_voice_type, base_speaker_emb if voice_model_name == 'x_selpahi': # Lojban model_path = hf_cache_lojban_models_path + voice_model_name model_type = 'FastPitch1.1' else: if voice_model_name == 'cnc_cabal': model_path = hf_cache_robotic_models_path + voice_model_name else: model_path = models_path + voice_model_name model_type = 'xVAPitch' language = 'en' # seems to have no effect if generated text is from a different language data = { 'outputs': None, 'version': '3.0', 'model': model_path, 'modelType': model_type, 'base_lang': language, 'pluginsContext': '{}', } print('Loading voice model...') try: json_data = xvaserver.loadModel(data) current_voice_model = voice_model_name current_voice_type = model_type with open(model_path + '.json', 'r', encoding='utf-8') as f: voice_model_json = json.load(f) if model_type == 'xVAPitch': base_speaker_emb = voice_model_json['games'][0]['base_speaker_emb'] elif model_type == 'FastPitch1.1': base_speaker_emb = voice_model_json['games'][0]['resemblyzer'] except requests.exceptions.RequestException as err: print(f'FAILED to load voice model: {err}') return base_speaker_emb class LocalBlocksDemo(BlocksDemo): def predict( self, input_text, voice, lang, pacing, pitch, energy, anger, happy, sad, surprise, use_deepmoji ): global current_voice_model, current_voice_type, base_speaker_emb # grab only the first 1000 characters input_text = input_text[:1000] # load voice model if not the current model if (current_voice_model != voice): load_model(voice) model_type = current_voice_type pace = pacing if pacing else 1.0 save_path = '/tmp/xvapitch_audio_sample.wav' language = lang use_sr = 0 use_cleanup = 0 pluginsContext = {} pluginsContext["mantella_settings"] = { "emAngry": (anger if anger > 0 else 0), "emHappy": (happy if happy > 0 else 0), "emSad": (sad if sad > 0 else 0), "emSurprise": (surprise if surprise > 0 else 0), "run_model": use_deepmoji } data = { 'pluginsContext': json.dumps(pluginsContext), 'modelType': model_type, # pad with whitespaces as a workaround to avoid cutoffs 'sequence': input_text.center(len(input_text) + 2, ' '), 'pace': pace, 'outfile': save_path, 'vocoder': 'n/a', 'base_lang': language, 'base_emb': base_speaker_emb, 'useSR': use_sr, 'useCleanup': use_cleanup, } print('Synthesizing...') try: json_data = xvaserver.synthesize(data) # response = requests.post('http://0.0.0.0:8008/synthesize', json=data, timeout=60) # response.raise_for_status() # If the response contains an HTTP error status code, raise an exception # json_data = json.loads(response.text) except requests.exceptions.RequestException as err: print('FAILED to synthesize: {err}') save_path = '' response = {'text': '{"message": "Failed"}'} json_data = { 'arpabet': ['Failed'], 'durations': [0], 'em_anger': anger, 'em_happy': happy, 'em_sad': sad, 'em_surprise': surprise, } # print('server.log contents:') # with open('resources/app/server.log', 'r') as f: # print(f.read()) arpabet_html = '' if voice == 'x_selpahi': em_angry = 0 em_happy = 0 em_sad = 0 em_surprise = 0 else: arpabet_html = '
ARPAbet & Durations
' arpabet_html += '' arpabet_nopad = json_data['arpabet'].split('|PAD|') arpabet_symbols = json_data['arpabet'].split('|') wpad_len = len(arpabet_symbols) nopad_len = len(arpabet_nopad) total_dur_length = 0 for symb_i in range(wpad_len): if (arpabet_symbols[symb_i] == ''): continue total_dur_length += float(json_data['durations'][symb_i]) for symb_i in range(wpad_len): if (arpabet_symbols[symb_i] == ''): continue arpabet_length = float(json_data['durations'][symb_i]) cell_width = round(arpabet_length / total_dur_length * 100, 2) arpabet_html += ' ' arpabet_html += '
'\ + arpabet_symbols[symb_i]\ + '
' if use_deepmoji: em_angry = round(json_data['em_angry'][0], 2) em_happy = round(json_data['em_happy'][0], 2) em_sad = round(json_data['em_sad'][0], 2) em_surprise = round(json_data['em_surprise'][0], 2) else: em_angry = anger em_happy = happy em_sad = sad em_surprise = surprise return [ save_path, arpabet_html, em_angry, em_happy, em_sad, em_surprise, json_data ] if __name__ == "__main__": print('running custom Gradio interface') demo = LocalBlocksDemo(models_path, hf_cache_lojban_models_path, hf_cache_robotic_models_path) demo.block.launch()