dkounadis
/

artificial-styletts2

@@ -34,41 +34,44 @@ import audiofile
 # ================================================ LIST OF VOICES
-# ROOT_DIR = '/data/dkounadis/mimic3-voices/'
-# foreign_voices = []
-# english_voices = []
-# for lang in os.listdir(ROOT_DIR + 'voices'):
-#         for voice in os.listdir(ROOT_DIR + 'voices/' + lang):
-#             if 'en_' in lang:
-#                 try:
-#                     with open(ROOT_DIR + 'voices/' + lang + '/' + voice + '/speakers.txt', 'r') as f:
-#                         for spk in f:
-#                             english_voices.append(lang + '/' + voice + '#' + spk.rstrip())
-#                         # voice_spk_string = lang + '/' + voice + '#' + spk.rstrip() for spk in f
-#                 except FileNotFoundError:
-#                     english_voices.append(lang + '/' + voice)
-#             else:
-#                 try:
-#                     with open(ROOT_DIR + 'voices/' + lang + '/' + voice + '/speakers.txt', 'r') as f:
-#                         for spk in f:
-#                             foreign_voices.append(lang + '/' + voice + '#' + spk.rstrip())
-#                 except FileNotFoundError:
-#                     foreign_voices.append(lang + '/' + voice)
-# #
-# [print(i) for i in foreign_voices]
-# print('\n_______________________________\n')
-# [print(i) for i in english_voices]
 # ====================================================== LIST Mimic-3 ALL VOICES
 list_voices = [
     'en_US/m-ailabs_low#mary_ann',
     'en_UK/apope_low',
     'de_DE/thorsten-emotion_low#neutral',  # is the 4x really interesting we can just write it in Section
-    'human',
     ]  # special - for human we load specific style file - no Mimic3 is run
@@ -293,7 +296,7 @@ for _id, _voice in enumerate(list_voices):
         total_audio_mimic3 = []
         total_audio_styletts2 = []
         ix = 0
-        for list_of_10 in harvard_individual_sentences[:1]:  # 77
             text = ' '.join(list_of_10['sentences'])
@@ -312,7 +315,7 @@ for _id, _voice in enumerate(list_voices):
                     f'<prosody rate=\'{rate}\'>'
                     f'<voice name=\'{_voice}\'>'
                     '<s>'
-                    f'{text}'
                     '</s>'
                     '</voice>'
                     '</prosody>'
@@ -353,7 +356,9 @@ for _id, _voice in enumerate(list_voices):
                 # style_path = '/cache/audb/msppodcast/2.4.0/fe182b91/Audios/MSP-PODCAST_0235_0053.wav'
                 # --
                 # MSP['emotion.test-1'].get().sort_values('valence').index[-1]
-                style_path = '/cache/audb/msppodcast/2.4.0/fe182b91/Audios/MSP-PODCAST_0220_0870.wav'
                 x, fs = audiofile.read(style_path)  # assure is not very short - equl harvard sent len
                 print(x.shape,' human')   # crop human to almost mimic-3 duration
             total_audio_mimic3.append(x)
@@ -426,7 +431,7 @@ for _id, _voice in enumerate(list_voices):
-raise SystemExit
 print('\nVisuals\n')
 # ===============================================================================
@@ -475,32 +480,39 @@ for vox1, vox2 in voice_pairs:  # 1 figure pro pair
         p.index = p.index.map(mapper = (lambda x: x.total_seconds()))
         vis_df[k] = p
     preds = vis_df
-    fig, ax = plt.subplots(nrows=10, ncols=2, figsize=(24, 24), gridspec_kw={'hspace': 0, 'wspace': .04})
     # ADV - subplots
-    time_stamp = preds[f'mimic3_{_str2}'].index.to_numpy()
     for j, dim in enumerate(['arousal',
                             'dominance',
                             'valence']):
         # MIMIC3
-        ax[j, 0].plot(time_stamp, preds[f'styletts2_{_str1}'][dim],
                     color=(0,104/255,139/255),
                     label='mean_1',
                     linewidth=2)
         ax[j, 0].fill_between(time_stamp,
                         preds[f'styletts2_{_str1}'][dim],
                         preds[f'mimic3_{_str1}'][dim],
-                        color=(.2,.2,.2),
-                        alpha=0.244)
         if j == 0:
-            ax[j, 0].legend([f'mimic3_{_str1}',
-                            f'StyleTTS2 using {_str1}'],
                             prop={'size': 10},
                             #  loc='lower right'
                             )
@@ -508,8 +520,6 @@ for vox1, vox2 in voice_pairs:  # 1 figure pro pair
         # TICK
         ax[j, 0].set_ylim([1e-7, .9999])
-        # ax[j, 0].set_yticks([.25, .5,.75])
-        # ax[j, 0].set_yticklabels(['0.25', '.5', '0.75'])
         ax[j, 0].set_xticklabels(['' for _ in ax[j, 0].get_xticklabels()])
         ax[j, 0].set_xlim([time_stamp[0], time_stamp[-1]])
@@ -517,21 +527,20 @@ for vox1, vox2 in voice_pairs:  # 1 figure pro pair
     # MIMIC3   4x speed
-        ax[j, 1].plot(time_stamp, preds[f'mimic3_{_str2}'][dim],
                     color=(0,104/255,139/255),
                     label='mean_1',
                     linewidth=2)
         ax[j, 1].fill_between(time_stamp,
-                        preds[f'styletts2_{_str2}'][dim],
                         preds[f'mimic3_{_str2}'][dim],
-                        color=(.2,.2,.2),
-                        alpha=0.244)
         if j == 0:
-            ax[j, 1].legend([f'mimic3_{_str2}',
-                            f'StyleTTS2 using {_str2}'],
-                            prop={'size': 10},
                             #  loc='lower right'
                             )
@@ -561,34 +570,25 @@ for vox1, vox2 in voice_pairs:  # 1 figure pro pair
     for j, dim in enumerate(['Angry',
                             'Sad',
                             'Happy',
-                            'Surprise',
                             'Fear',
                             'Disgust',
-                            'Contempt',
                             #  'Neutral'
                             ]):   # ASaHSuFDCN
         j = j + 3  # skip A/D/V suplt
         # MIMIC3
-        ax[j, 0].plot(time_stamp, preds[f'mimic3_{_str1}'][dim],
                     color=(0,104/255,139/255),
                     label='mean_1',
                     linewidth=2)
         ax[j, 0].fill_between(time_stamp,
-                        preds[f'mimic3_{_str2}'][dim],
                         preds[f'styletts2_{_str2}'][dim],
-                        color=(.2,.2,.2),
-                        alpha=0.244)
-        # ax[j, 0].legend(['StyleTTS2 style mimic3',
-        #                  'StyleTTS2 style crema-d'],
-        #                  prop={'size': 10},
-        #                 #  loc='upper left'
-        # )
         ax[j, 0].set_ylabel(dim.lower(), color=(.4, .4, .4), fontsize=14)
         # TICKS
@@ -601,7 +601,7 @@ for vox1, vox2 in voice_pairs:  # 1 figure pro pair
     # MIMIC3   4x speed
-        ax[j, 1].plot(time_stamp, preds[f'mimic3_{_str2}'][dim],
                     color=(0,104/255,139/255),
                     label='mean_1',
                     linewidth=2)
@@ -609,9 +609,8 @@ for vox1, vox2 in voice_pairs:  # 1 figure pro pair
                         preds[f'mimic3_{_str2}'][dim],
                         preds[f'styletts2_{_str2}'][dim],
-                        color=(.2,.2,.2),
-                        alpha=0.244)
         # ax[j, 1].legend(['StyleTTS2 style mimic3   4x speed',
         #                  'StyleTTS2 style crema-d'],
         #                  prop={'size': 10},

 # ================================================ LIST OF VOICES
+ROOT_DIR = '/data/dkounadis/mimic3-voices/'
+foreign_voices = []
+english_voices = []
+for lang in os.listdir(ROOT_DIR + 'voices'):
+        for voice in os.listdir(ROOT_DIR + 'voices/' + lang):
+            if 'en_' in lang:
+                try:
+                    with open(ROOT_DIR + 'voices/' + lang + '/' + voice + '/speakers.txt', 'r') as f:
+                        for spk in f:
+                            english_voices.append(lang + '/' + voice + '#' + spk.rstrip())
+                        # voice_spk_string = lang + '/' + voice + '#' + spk.rstrip() for spk in f
+                except FileNotFoundError:
+                    english_voices.append(lang + '/' + voice)
+            else:
+                try:
+                    with open(ROOT_DIR + 'voices/' + lang + '/' + voice + '/speakers.txt', 'r') as f:
+                        for spk in f:
+                            foreign_voices.append(lang + '/' + voice + '#' + spk.rstrip())
+                except FileNotFoundError:
+                    foreign_voices.append(lang + '/' + voice)
+#
+[print(i) for i in foreign_voices]
+print('\n_______________________________\n')
+[print(i) for i in english_voices]
 # ====================================================== LIST Mimic-3 ALL VOICES
 list_voices = [
     'en_US/m-ailabs_low#mary_ann',
     'en_UK/apope_low',
     'de_DE/thorsten-emotion_low#neutral',  # is the 4x really interesting we can just write it in Section
+    # 'ko_KO/kss_low',
+    'fr_FR/m-ailabs_low#gilles_g_le_blanc',
+    #'human',
     ]  # special - for human we load specific style file - no Mimic3 is run
         total_audio_mimic3 = []
         total_audio_styletts2 = []
         ix = 0
+        for list_of_10 in harvard_individual_sentences[:4]:  # 77
             text = ' '.join(list_of_10['sentences'])
                     f'<prosody rate=\'{rate}\'>'
                     f'<voice name=\'{_voice}\'>'
                     '<s>'
+                    f'{text[:-1] + ", .. !!!"}'
                     '</s>'
                     '</voice>'
                     '</prosody>'
                 # style_path = '/cache/audb/msppodcast/2.4.0/fe182b91/Audios/MSP-PODCAST_0235_0053.wav'
                 # --
                 # MSP['emotion.test-1'].get().sort_values('valence').index[-1]
+                # style_path = '/cache/audb/msppodcast/2.4.0/fe182b91/Audios/MSP-PODCAST_0220_0870.wav'
+                # --
+                style_path = '/cache/audb/librispeech/3.1.0/fe182b91/test-clean/3575/170457/3575-170457-0024.wav'
                 x, fs = audiofile.read(style_path)  # assure is not very short - equl harvard sent len
                 print(x.shape,' human')   # crop human to almost mimic-3 duration
             total_audio_mimic3.append(x)
 print('\nVisuals\n')
 # ===============================================================================
         p.index = p.index.map(mapper = (lambda x: x.total_seconds()))
         vis_df[k] = p
     preds = vis_df
+    fig, ax = plt.subplots(nrows=8, ncols=2, figsize=(24, 19.2), gridspec_kw={'hspace': 0, 'wspace': .04})
     # ADV - subplots
+    time_stamp = preds[f'mimic3_{_str1}'].index.to_numpy()
     for j, dim in enumerate(['arousal',
                             'dominance',
                             'valence']):
         # MIMIC3
+        ax[j, 0].plot(time_stamp,
+                    #   np.ones_like(time_stamp) * .4,    --> to find the line on the legend
+                    preds[f'styletts2_{_str1}'][dim],   # THIS IS THE BLUE LINE VERIFIED
                     color=(0,104/255,139/255),
                     label='mean_1',
                     linewidth=2)
+        # ax[j, 0].plot(time_stamp, preds[f'styletts2_{_str1}'][dim],
+        #             color=(.2, .2, .2),
+        #             label='mean_1',
+        #             linewidth=2,
+        #             marker='o')
         ax[j, 0].fill_between(time_stamp,
                         preds[f'styletts2_{_str1}'][dim],
                         preds[f'mimic3_{_str1}'][dim],
+                        color=(.5,.5,.5),
+                        alpha=.4
+                        )
         if j == 0:
+            ax[j, 0].legend([f'StyleTTS2 using {_str1}',
+                             f'mimic3_{_str1}'],
                             prop={'size': 10},
                             #  loc='lower right'
                             )
         # TICK
         ax[j, 0].set_ylim([1e-7, .9999])
         ax[j, 0].set_xticklabels(['' for _ in ax[j, 0].get_xticklabels()])
         ax[j, 0].set_xlim([time_stamp[0], time_stamp[-1]])
     # MIMIC3   4x speed
+        ax[j, 1].plot(time_stamp, preds[f'styletts2_{_str2}'][dim],
                     color=(0,104/255,139/255),
                     label='mean_1',
                     linewidth=2)
         ax[j, 1].fill_between(time_stamp,
                         preds[f'mimic3_{_str2}'][dim],
+                        preds[f'styletts2_{_str2}'][dim],
+                        color=(.5,.5,.5),
+                        alpha=.4)
         if j == 0:
+            ax[j, 1].legend([
+                f'StyleTTS2 using {_str2}',
+                f'mimic3_{_str2}'],
+                prop={'size': 10},
                             #  loc='lower right'
                             )
     for j, dim in enumerate(['Angry',
                             'Sad',
                             'Happy',
+                            # 'Surprise',
                             'Fear',
                             'Disgust',
+                            # 'Contempt',
                             #  'Neutral'
                             ]):   # ASaHSuFDCN
         j = j + 3  # skip A/D/V suplt
         # MIMIC3
+        ax[j, 0].plot(time_stamp, preds[f'styletts2_{_str2}'][dim],
                     color=(0,104/255,139/255),
                     label='mean_1',
                     linewidth=2)
         ax[j, 0].fill_between(time_stamp,
                         preds[f'styletts2_{_str2}'][dim],
+                        preds[f'mimic3_{_str2}'][dim],
+                        color=(.5,.5,.5),
+                        alpha=.4)
         ax[j, 0].set_ylabel(dim.lower(), color=(.4, .4, .4), fontsize=14)
         # TICKS
     # MIMIC3   4x speed
+        ax[j, 1].plot(time_stamp, preds[f'styletts2_{_str2}'][dim],
                     color=(0,104/255,139/255),
                     label='mean_1',
                     linewidth=2)
                         preds[f'mimic3_{_str2}'][dim],
                         preds[f'styletts2_{_str2}'][dim],
+                        color=(.5,.5,.5),
+                        alpha=.4)
         # ax[j, 1].legend(['StyleTTS2 style mimic3   4x speed',
         #                  'StyleTTS2 style crema-d'],
         #                  prop={'size': 10},