Hev832 commited on
Commit
1f9caee
1 Parent(s): 4d1a0a6

Create hevrvc.py

Browse files
Files changed (1) hide show
  1. hevrvc.py +134 -0
hevrvc.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from pytube import YouTube
4
+ from pydub import AudioSegment
5
+ import numpy as np
6
+ import faiss
7
+ from sklearn.cluster import MiniBatchKMeans
8
+ import traceback
9
+
10
+ def calculate_audio_duration(file_path):
11
+ duration_seconds = len(AudioSegment.from_file(file_path)) / 1000.0
12
+ return duration_seconds
13
+
14
+ def youtube_to_wav(url, dataset_folder):
15
+ try:
16
+ yt = YouTube(url).streams.get_audio_only().download(output_path=dataset_folder)
17
+ mp4_path = os.path.join(dataset_folder, 'audio.mp4')
18
+ wav_path = os.path.join(dataset_folder, 'audio.wav')
19
+ os.rename(yt, mp4_path)
20
+ os.system(f'ffmpeg -i {mp4_path} -acodec pcm_s16le -ar 44100 {wav_path}')
21
+ os.remove(mp4_path)
22
+ return f'Audio downloaded and converted to WAV: {wav_path}'
23
+ except Exception as e:
24
+ return f"Error: {e}"
25
+
26
+ def create_training_files(model_name, dataset_folder, youtube_link):
27
+ if youtube_link:
28
+ youtube_to_wav(youtube_link, dataset_folder)
29
+
30
+ if not os.listdir(dataset_folder):
31
+ return "Your dataset folder is empty."
32
+
33
+ os.makedirs(f'./logs/{model_name}', exist_ok=True)
34
+
35
+ os.system(f'python infer/modules/train/preprocess.py {dataset_folder} 32000 2 ./logs/{model_name} False 3.0 > /dev/null 2>&1')
36
+
37
+ with open(f'./logs/{model_name}/preprocess.log', 'r') as f:
38
+ if 'end preprocess' in f.read():
39
+ return "Preprocessing Success"
40
+ else:
41
+ return "Error preprocessing data... Make sure your dataset folder is correct."
42
+
43
+ def extract_features(model_name, f0method):
44
+ os.system(f'python infer/modules/train/extract/extract_f0_rmvpe.py 1 0 0 ./logs/{model_name} True' if f0method == "rmvpe_gpu" else
45
+ f'python infer/modules/train/extract/extract_f0_print.py ./logs/{model_name} 2 {f0method}')
46
+ os.system(f'python infer/modules/train/extract_feature_print.py cuda:0 1 0 ./logs/{model_name} v2 True')
47
+
48
+ with open(f'./logs/{model_name}/extract_f0_feature.log', 'r') as f:
49
+ if 'all-feature-done' in f.read():
50
+ return "Feature Extraction Success"
51
+ else:
52
+ return "Error in feature extraction... Make sure your data was preprocessed."
53
+
54
+ def train_index(exp_dir1, version19):
55
+ exp_dir = f"logs/{exp_dir1}"
56
+ os.makedirs(exp_dir, exist_ok=True)
57
+ feature_dir = f"{exp_dir}/3_feature256" if version19 == "v1" else f"{exp_dir}/3_feature768"
58
+ if not os.path.exists(feature_dir):
59
+ return "Please perform feature extraction first!"
60
+
61
+ listdir_res = list(os.listdir(feature_dir))
62
+ if len(listdir_res) == 0:
63
+ return "Please perform feature extraction first!"
64
+
65
+ infos = []
66
+ npys = []
67
+ for name in sorted(listdir_res):
68
+ phone = np.load(f"{feature_dir}/{name}")
69
+ npys.append(phone)
70
+ big_npy = np.concatenate(npys, 0)
71
+ big_npy_idx = np.arange(big_npy.shape[0])
72
+ np.random.shuffle(big_npy_idx)
73
+ big_npy = big_npy[big_npy_idx]
74
+ if big_npy.shape[0] > 2e5:
75
+ infos.append(f"Trying k-means with {big_npy.shape[0]} to 10k centers.")
76
+ try:
77
+ big_npy = MiniBatchKMeans(
78
+ n_clusters=10000,
79
+ verbose=True,
80
+ batch_size=256,
81
+ compute_labels=False,
82
+ init="random",
83
+ ).fit(big_npy).cluster_centers_
84
+ except:
85
+ info = traceback.format_exc()
86
+ infos.append(info)
87
+ return "\n".join(infos)
88
+
89
+ np.save(f"{exp_dir}/total_fea.npy", big_npy)
90
+ n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
91
+ infos.append(f"{big_npy.shape},{n_ivf}")
92
+
93
+ index = faiss.index_factory(256 if version19 == "v1" else 768, f"IVF{n_ivf},Flat")
94
+ infos.append("Training index")
95
+ index_ivf = faiss.extract_index_ivf(index)
96
+ index_ivf.nprobe = 1
97
+ index.train(big_npy)
98
+ faiss.write_index(index, f"{exp_dir}/trained_IVF{n_ivf}_Flat_nprobe_{index_ivf.nprobe}_{exp_dir1}_{version19}.index")
99
+
100
+ infos.append("Adding to index")
101
+ batch_size_add = 8192
102
+ for i in range(0, big_npy.shape[0], batch_size_add):
103
+ index.add(big_npy[i: i + batch_size_add])
104
+ faiss.write_index(index, f"{exp_dir}/added_IVF{n_ivf}_Flat_nprobe_{index_ivf.nprobe}_{exp_dir1}_{version19}.index")
105
+
106
+ infos.append(f"Successfully built index: added_IVF{n_ivf}_Flat_nprobe_{index_ivf.nprobe}_{exp_dir1}_{version19}.index")
107
+ return "\n".join(infos)
108
+
109
+ with gr.Blocks() as demo:
110
+ with gr.Tab("CREATE TRANING FILES - This will process the data, extract the features and create your index file for you!"):
111
+ with gr.Row():
112
+ model_name = gr.Textbox(label="Model Name", value="My-Voice")
113
+ dataset_folder = gr.Textbox(label="Dataset Folder", value="/content/dataset")
114
+ youtube_link = gr.Textbox(label="YouTube Link (optional)")
115
+ with gr.Row():
116
+ start_button = gr.Button("Create Training Files")
117
+ f0method = gr.Dropdown(["pm", "harvest", "rmvpe", "rmvpe_gpu"], label="F0 Method", value="rmvpe_gpu")
118
+ extract_button = gr.Button("Extract Features")
119
+ train_button = gr.Button("Train Index")
120
+
121
+ output = gr.Textbox(label="Output")
122
+
123
+ start_button.click(create_training_files, inputs=[model_name, dataset_folder, youtube_link], outputs=output)
124
+ extract_button.click(extract_features, inputs=[model_name, f0method], outputs=output)
125
+ train_button.click(train_index, inputs=[model_name, "v2"], outputs=output)
126
+
127
+ demo.launch()
128
+
129
+
130
+
131
+
132
+
133
+
134
+ # beta state ......