smjain commited on
Commit
ca4b9a6
1 Parent(s): e5b72ea

Upload infer_serverless.py

Browse files
Files changed (1) hide show
  1. infer_serverless.py +679 -0
infer_serverless.py ADDED
@@ -0,0 +1,679 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch, os, traceback, sys, warnings, shutil, numpy as np
2
+ import gradio as gr
3
+ import librosa
4
+ import asyncio
5
+ import rarfile
6
+ import edge_tts
7
+ import yt_dlp
8
+ import ffmpeg
9
+ import gdown
10
+ import subprocess
11
+ import wave
12
+ import soundfile as sf
13
+ from scipy.io import wavfile
14
+ from datetime import datetime
15
+ from urllib.parse import urlparse
16
+ from mega import Mega
17
+ from flask import Flask, request, jsonify, send_file,session,render_template
18
+ import base64
19
+ import tempfile
20
+ import threading
21
+ import hashlib
22
+ import os
23
+ import werkzeug
24
+ from pydub import AudioSegment
25
+ import uuid
26
+ from threading import Semaphore
27
+ from threading import Lock
28
+ from multiprocessing import Process, SimpleQueue, set_start_method,get_context
29
+ from queue import Empty
30
+ from pydub import AudioSegment
31
+ from flask_dance.contrib.google import make_google_blueprint, google
32
+ import io
33
+ import runpod
34
+ import boto3
35
+
36
+
37
+
38
+
39
+
40
+ app = Flask(__name__)
41
+ app.secret_key = 'smjain_6789'
42
+ now_dir = os.getcwd()
43
+ cpt={}
44
+ tmp = os.path.join(now_dir, "TEMP")
45
+ shutil.rmtree(tmp, ignore_errors=True)
46
+ os.makedirs(tmp, exist_ok=True)
47
+ os.environ["TEMP"] = tmp
48
+ split_model="htdemucs"
49
+ convert_voice_lock = Lock()
50
+ # Define the maximum number of concurrent requests
51
+ MAX_CONCURRENT_REQUESTS = 2 # Adjust this number as needed
52
+
53
+ # Initialize the semaphore with the maximum number of concurrent requests
54
+ request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
55
+
56
+ task_status_tracker = {}
57
+ os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" # ONLY FOR TESTING, REMOVE IN PRODUCTION
58
+ os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1"
59
+ app.config["GOOGLE_OAUTH_CLIENT_ID"] = "144930881143-n3e3ubers3vkq7jc9doe4iirasgimdt2.apps.googleusercontent.com"
60
+ app.config["GOOGLE_OAUTH_CLIENT_SECRET"] = "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"
61
+ google_blueprint = make_google_blueprint(scope=["profile", "email"])
62
+ app.register_blueprint(google_blueprint, url_prefix="/login")
63
+ ACCESS_ID = os.getenv('ACCESS_ID', '')
64
+ SECRET_KEY = os.getenv('SECRET_KEY', '')
65
+
66
+ #set_start_method('spawn', force=True)
67
+ from lib.infer_pack.models import (
68
+ SynthesizerTrnMs256NSFsid,
69
+ SynthesizerTrnMs256NSFsid_nono,
70
+ SynthesizerTrnMs768NSFsid,
71
+ SynthesizerTrnMs768NSFsid_nono,
72
+ )
73
+ from fairseq import checkpoint_utils
74
+ from vc_infer_pipeline import VC
75
+ from config import Config
76
+ config = Config()
77
+
78
+ tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
79
+ voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
80
+
81
+ hubert_model = None
82
+
83
+ f0method_mode = ["pm", "harvest", "crepe"]
84
+ f0method_info = "PM is fast, Harvest is good but extremely slow, and Crepe effect is good but requires GPU (Default: PM)"
85
+
86
+
87
+ def index():
88
+ # Check if user is logged in
89
+ return render_template("ui.html")
90
+ #if google.authorized:
91
+ # return render_template("index.html", logged_in=True)
92
+ #else:
93
+ # return render_template("index.html", logged_in=False)
94
+
95
+
96
+
97
+
98
+ if os.path.isfile("rmvpe.pt"):
99
+ f0method_mode.insert(2, "rmvpe")
100
+ f0method_info = "PM is fast, Harvest is good but extremely slow, Rvmpe is alternative to harvest (might be better), and Crepe effect is good but requires GPU (Default: PM)"
101
+
102
+
103
+
104
+
105
+ def load_hubert():
106
+ global hubert_model
107
+ models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
108
+ ["hubert_base.pt"],
109
+ suffix="",
110
+ )
111
+ hubert_model = models[0]
112
+ hubert_model = hubert_model.to(config.device)
113
+ if config.is_half:
114
+ hubert_model = hubert_model.half()
115
+ else:
116
+ hubert_model = hubert_model.float()
117
+ hubert_model.eval()
118
+
119
+ load_hubert()
120
+
121
+ weight_root = "weights"
122
+ index_root = "weights/index"
123
+ weights_model = []
124
+ weights_index = []
125
+ for _, _, model_files in os.walk(weight_root):
126
+ for file in model_files:
127
+ if file.endswith(".pth"):
128
+ weights_model.append(file)
129
+ for _, _, index_files in os.walk(index_root):
130
+ for file in index_files:
131
+ if file.endswith('.index') and "trained" not in file:
132
+ weights_index.append(os.path.join(index_root, file))
133
+
134
+ def check_models():
135
+ weights_model = []
136
+ weights_index = []
137
+ for _, _, model_files in os.walk(weight_root):
138
+ for file in model_files:
139
+ if file.endswith(".pth"):
140
+ weights_model.append(file)
141
+ for _, _, index_files in os.walk(index_root):
142
+ for file in index_files:
143
+ if file.endswith('.index') and "trained" not in file:
144
+ weights_index.append(os.path.join(index_root, file))
145
+ return (
146
+ gr.Dropdown.update(choices=sorted(weights_model), value=weights_model[0]),
147
+ gr.Dropdown.update(choices=sorted(weights_index))
148
+ )
149
+
150
+ def clean():
151
+ return (
152
+ gr.Dropdown.update(value=""),
153
+ gr.Slider.update(visible=False)
154
+ )
155
+ # Function to delete files
156
+ def cleanup_files(file_paths):
157
+ for path in file_paths:
158
+ try:
159
+ os.remove(path)
160
+ print(f"Deleted {path}")
161
+ except Exception as e:
162
+ print(f"Error deleting {path}: {e}")
163
+
164
+
165
+
166
+
167
+
168
+ def upload_file(local_file_path,bucket_name):
169
+ # Configure the client with your credentials
170
+ session = boto3.session.Session()
171
+ client = session.client('s3',
172
+ region_name='nyc3',
173
+ endpoint_url='https://nyc3.digitaloceanspaces.com',
174
+ aws_access_key_id=ACCESS_ID,
175
+ aws_secret_access_key=SECRET_KEY)
176
+
177
+ # Define the bucket and object key
178
+
179
+ filename = os.path.basename(local_file_path)
180
+ object_key = f'{filename}' # Construct the object key
181
+
182
+ # Define the local path to save the file
183
+
184
+
185
+
186
+ try:
187
+ response=client.upload_file(local_file_path, bucket_name, filename)
188
+
189
+ except client.exceptions.NoSuchKey:
190
+ return jsonify({'error': 'File not found in the bucket'}), 404
191
+ except Exception as e:
192
+ return jsonify({'error': str(e)}), 500
193
+
194
+ # Optional: Send the file directly to the client
195
+ # return send_file(local_file_path, as_attachment=True)
196
+
197
+ return jsonify({'success': True, 'message': 'File downloaded successfully', 'file_path': local_file_path})
198
+
199
+
200
+
201
+ def download_file(filename,bucket_name):
202
+ # Configure the client with your credentials
203
+ session = boto3.session.Session()
204
+ client = session.client('s3',
205
+ region_name='nyc3',
206
+ endpoint_url='https://nyc3.digitaloceanspaces.com',
207
+ aws_access_key_id=ACCESS_ID,
208
+ aws_secret_access_key=SECRET_KEY)
209
+
210
+ # Define the bucket and object key
211
+
212
+ object_key = f'{filename}' # Construct the object key
213
+
214
+ # Define the local path to save the file
215
+ local_file_path = os.path.join('downloads', filename)
216
+
217
+ # Check if the 'downloads' directory exists, create it if not
218
+ if not os.path.exists(os.path.dirname(local_file_path)):
219
+ os.makedirs(os.path.dirname(local_file_path))
220
+
221
+ # Download the file from the bucket
222
+ try:
223
+ client.download_file(bucket_name, object_key, local_file_path)
224
+ except client.exceptions.NoSuchKey:
225
+ return jsonify({'error': 'File not found in the bucket'}), 404
226
+ except Exception as e:
227
+ return jsonify({'error': str(e)}), 500
228
+
229
+ # Optional: Send the file directly to the client
230
+ # return send_file(local_file_path, as_attachment=True)
231
+
232
+ return jsonify({'success': True, 'message': 'File downloaded successfully', 'file_path': local_file_path})
233
+
234
+
235
+ def list_weights():
236
+ directory = 'weights'
237
+ files = os.listdir(directory)
238
+ # Extract filenames without their extensions
239
+ filenames = [os.path.splitext(file)[0] for file in files if os.path.isfile(os.path.join(directory, file))]
240
+ return jsonify(filenames)
241
+
242
+
243
+ def logout():
244
+ # Clear the session
245
+ session.clear()
246
+ #if "google_oauth_token" in session:
247
+ # del session["google_oauth_token"]
248
+ return redirect(url_for("index"))
249
+
250
+
251
+
252
+ def get_status(audio_id):
253
+ # Retrieve the task status using the unique ID
254
+ print(audio_id)
255
+ status_info = task_status_tracker.get(audio_id, {"status": "Unknown ID", "percentage": 0})
256
+ return jsonify({"audio_id": audio_id, "status": status_info["status"], "percentage": status_info["percentage"]})
257
+
258
+ processed_audio_storage = {}
259
+
260
+ def api_convert_voice(filename,spk_id1,unique_id):
261
+ acquired = request_semaphore.acquire(blocking=False)
262
+
263
+ if not acquired:
264
+ return jsonify({"error": "Too many requests, please try again later"}), 429
265
+ #task_status_tracker[unique_id] = {"status": "Starting", "percentage": 0}
266
+ try:
267
+
268
+ #if session.get('submitted'):
269
+ # return jsonify({"error": "Form already submitted"}), 400
270
+
271
+ # Process the form here...
272
+ # Set the flag indicating the form has been submitted
273
+ #session['submitted'] = True
274
+
275
+ spk_id = spk_id1+'.pth'
276
+ print("speaker id path=",spk_id)
277
+ voice_transform = 0
278
+ local_file_path = os.path.join('downloads', filename)
279
+ # The file part
280
+
281
+ file_size = os.path.getsize(local_file_path)
282
+ if file_size > 10 * 1024 * 1024: # 10 MB limit
283
+ return json.dumps({"error": "File size exceeds 10 MB"}), 400
284
+
285
+ content_type_format_map = {
286
+ 'audio/mpeg': 'mp3',
287
+ 'audio/wav': 'wav',
288
+ 'audio/x-wav': 'wav',
289
+ 'audio/mp4': 'mp4',
290
+ 'audio/x-m4a': 'mp4',
291
+ }
292
+
293
+ # Default to 'mp3' if content type is unknown (or adjust as needed)
294
+ audio_format = content_type_format_map.get(file.content_type, 'mp3')
295
+
296
+ # Convert the uploaded file to an audio segment
297
+ audio = AudioSegment.from_file(local_file_path, format=audio_format)
298
+
299
+
300
+
301
+
302
+ # Calculate audio length in minutes
303
+ audio_length_minutes = len(audio) / 60000.0 # pydub returns length in milliseconds
304
+
305
+ if audio_length_minutes > 5:
306
+ return json.dumps({"error": "Audio length exceeds 5 minutes"}), 400
307
+
308
+ #created_files = []
309
+ # Save the file to a temporary path
310
+ #unique_id = str(uuid.uuid4())
311
+ print(unique_id)
312
+ base_filename = os.path.basename(local_file_path)
313
+
314
+ filename = werkzeug.utils.secure_filename(base_filename)
315
+ input_audio_path = os.path.join(tmp, f"{spk_id}_input_audio_{unique_id}.{filename.split('.')[-1]}")
316
+ file.save(input_audio_path)
317
+
318
+ #created_files.append(input_audio_path)
319
+
320
+ #split audio
321
+ task_status_tracker[unique_id] = {"status": "Processing: Step 1", "percentage": 30}
322
+
323
+ cut_vocal_and_inst(input_audio_path,spk_id,unique_id)
324
+ print("audio splitting performed")
325
+ vocal_path = f"output/{spk_id}_{unique_id}/{split_model}/{spk_id}_input_audio_{unique_id}/vocals.wav"
326
+ inst = f"output/{spk_id}_{unique_id}/{split_model}/{spk_id}_input_audio_{unique_id}/no_vocals.wav"
327
+ print("*****before making call to convert ", unique_id)
328
+ #task_status_tracker[unique_id] = "Processing: Step 2"
329
+ #output_queue = SimpleQueue()
330
+ ctx = get_context('spawn')
331
+ output_queue = ctx.Queue()
332
+ # Create and start the process
333
+ p = ctx.Process(target=worker, args=(spk_id, vocal_path, voice_transform, unique_id, output_queue,))
334
+ p.start()
335
+
336
+ # Wait for the process to finish and get the result
337
+ p.join()
338
+ print("*******waiting for process to complete ")
339
+
340
+ output_path = output_queue.get()
341
+ task_status_tracker[unique_id] = {"status": "Processing: Step 2", "percentage": 80}
342
+ #if isinstance(output_path, Exception):
343
+ # print("Exception in worker:", output_path)
344
+ #else:
345
+ # print("output path of converted voice", output_path)
346
+ #output_path = convert_voice(spk_id, vocal_path, voice_transform,unique_id)
347
+ output_path1= combine_vocal_and_inst(output_path,inst,unique_id)
348
+
349
+ processed_audio_storage[unique_id] = output_path1
350
+ session['processed_audio_id'] = unique_id
351
+ task_status_tracker[unique_id] = {"status": "Finalizing", "percentage": 100}
352
+ print(output_path1)
353
+ upload_file(outputpath1)
354
+ print("file uploaded")
355
+ #created_files.extend([vocal_path, inst, output_path])
356
+ task_status_tracker[unique_id]["status"] = "Completed"
357
+
358
+ finally:
359
+ request_semaphore.release()
360
+ #if os.path.exists(output_path1):
361
+
362
+ # return send_file(output_path1, as_attachment=True)
363
+ #else:
364
+ # return jsonify({"error": "File not found."}), 404
365
+
366
+ def convert_voice_thread_safe(spk_id, vocal_path, voice_transform, unique_id):
367
+ with convert_voice_lock:
368
+ return convert_voice(spk_id, vocal_path, voice_transform, unique_id)
369
+
370
+
371
+
372
+ def get_vc_safe(sid, to_return_protect0):
373
+ with convert_voice_lock:
374
+ return get_vc(sid, to_return_protect0)
375
+
376
+
377
+
378
+ def get_processed_audio(audio_id):
379
+ # Retrieve the path from temporary storage or session
380
+ if audio_id in processed_audio_storage:
381
+ file_path = processed_audio_storage[audio_id]
382
+ return send_file(file_path, as_attachment=True)
383
+ return jsonify({"error": "File not found."}), 404
384
+
385
+ def worker(spk_id, input_audio_path, voice_transform, unique_id, output_queue):
386
+ try:
387
+ output_audio_path = convert_voice(spk_id, input_audio_path, voice_transform, unique_id)
388
+ print("output in worker for audio file", output_audio_path)
389
+ output_queue.put(output_audio_path)
390
+ print("added to output queue")
391
+ except Exception as e:
392
+ print("exception in adding to queue")
393
+ output_queue.put(e) # Send the exception to the main process for debugging
394
+
395
+
396
+ def convert_voice(spk_id, input_audio_path, voice_transform,unique_id):
397
+ get_vc(spk_id,0.5)
398
+ print("*****before makinf call to vc ", unique_id)
399
+
400
+
401
+ output_audio_path = vc_single(
402
+ sid=0,
403
+ input_audio_path=input_audio_path,
404
+ f0_up_key=voice_transform, # Assuming voice_transform corresponds to f0_up_key
405
+ f0_file=None ,
406
+ f0_method="rmvpe",
407
+ file_index=spk_id, # Assuming file_index_path corresponds to file_index
408
+ index_rate=0.75,
409
+ filter_radius=3,
410
+ resample_sr=0,
411
+ rms_mix_rate=0.25,
412
+ protect=0.33, # Adjusted from protect_rate to protect to match the function signature,
413
+ unique_id=unique_id
414
+ )
415
+ print(output_audio_path)
416
+ return output_audio_path
417
+
418
+ def cut_vocal_and_inst(audio_path,spk_id,unique_id):
419
+
420
+ vocal_path = "output/result/audio.wav"
421
+ os.makedirs("output/result", exist_ok=True)
422
+ #wavfile.write(vocal_path, audio_data[0], audio_data[1])
423
+ #logs.append("Starting the audio splitting process...")
424
+ #yield "\n".join(logs), None, None
425
+ print("before executing splitter")
426
+ command = f"demucs --two-stems=vocals -n {split_model} {audio_path} -o output/{spk_id}_{unique_id}"
427
+ env = os.environ.copy()
428
+
429
+ # Add or modify the environment variable for this subprocess
430
+ env["CUDA_VISIBLE_DEVICES"] = "0"
431
+
432
+
433
+
434
+ #result = subprocess.Popen(command.split(), stdout=subprocess.PIPE, text=True)
435
+ result = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
436
+ if result.returncode != 0:
437
+ print("Demucs process failed:", result.stderr)
438
+ else:
439
+ print("Demucs process completed successfully.")
440
+ print("after executing splitter")
441
+ #for line in result.stdout:
442
+ # logs.append(line)
443
+ # yield "\n".join(logs), None, None
444
+
445
+ print(result.stdout)
446
+ vocal = f"output/{split_model}/{spk_id}_input_audio/vocals.wav"
447
+ inst = f"output/{split_model}/{spk_id}_input_audio/no_vocals.wav"
448
+ #logs.append("Audio splitting complete.")
449
+
450
+
451
+ def combine_vocal_and_inst(vocal_path, inst_path, output_path):
452
+
453
+ vocal_volume=1
454
+ inst_volume=1
455
+ os.makedirs("output/result", exist_ok=True)
456
+ # Assuming vocal_path and inst_path are now directly passed as arguments
457
+ output_path = f"output/result/{output_path}.mp3"
458
+ #command = f'ffmpeg -y -i "{inst_path}" -i "{vocal_path}" -filter_complex [0:a]volume={inst_volume}[i];[1:a]volume={vocal_volume}[v];[i][v]amix=inputs=2:duration=longest[a] -map [a] -b:a 320k -c:a libmp3lame "{output_path}"'
459
+ #command=f'ffmpeg -y -i "{inst_path}" -i "{vocal_path}" -filter_complex "amix=inputs=2:duration=longest" -b:a 320k -c:a libmp3lame "{output_path}"'
460
+ # Load the audio files
461
+ print(vocal_path)
462
+ print(inst_path)
463
+ vocal = AudioSegment.from_file(vocal_path)
464
+ instrumental = AudioSegment.from_file(inst_path)
465
+
466
+ # Overlay the vocal track on top of the instrumental track
467
+ combined = vocal.overlay(instrumental)
468
+
469
+ # Export the result
470
+ combined.export(output_path, format="mp3")
471
+
472
+ #result = subprocess.run(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
473
+ return output_path
474
+
475
+
476
+
477
+ def vc_single(
478
+ sid,
479
+ input_audio_path,
480
+ f0_up_key,
481
+ f0_file,
482
+ f0_method,
483
+ file_index,
484
+ index_rate,
485
+ filter_radius,
486
+ resample_sr,
487
+ rms_mix_rate,
488
+ protect,
489
+ unique_id
490
+ ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
491
+ global tgt_sr, net_g, vc, hubert_model, version, cpt
492
+ print("***** in vc ", unique_id)
493
+
494
+ try:
495
+ logs = []
496
+ print(f"Converting...")
497
+
498
+ audio, sr = librosa.load(input_audio_path, sr=16000, mono=True)
499
+ print(f"found audio ")
500
+ f0_up_key = int(f0_up_key)
501
+ times = [0, 0, 0]
502
+ if hubert_model == None:
503
+ load_hubert()
504
+ print("loaded hubert")
505
+ if_f0 = 1
506
+ audio_opt = vc.pipeline(
507
+ hubert_model,
508
+ net_g,
509
+ 0,
510
+ audio,
511
+ input_audio_path,
512
+ times,
513
+ f0_up_key,
514
+ f0_method,
515
+ file_index,
516
+ # file_big_npy,
517
+ index_rate,
518
+ if_f0,
519
+ filter_radius,
520
+ tgt_sr,
521
+ resample_sr,
522
+ rms_mix_rate,
523
+ version,
524
+ protect,
525
+ f0_file=f0_file
526
+ )
527
+
528
+
529
+ # Get the current thread's name or ID
530
+
531
+
532
+
533
+ if resample_sr >= 16000 and tgt_sr != resample_sr:
534
+ tgt_sr = resample_sr
535
+ index_info = (
536
+ "Using index:%s." % file_index
537
+ if os.path.exists(file_index)
538
+ else "Index not used."
539
+ )
540
+
541
+ print("writing to FS")
542
+ #output_file_path = os.path.join("output", f"converted_audio_{sid}.wav") # Adjust path as needed
543
+ # Assuming 'unique_id' is passed to convert_voice function along with 'sid'
544
+ print("***** before writing to file outout ", unique_id)
545
+ output_file_path = os.path.join("output", f"converted_audio_{sid}_{unique_id}.wav") # Adjust path as needed
546
+
547
+ print("******* output file path ",output_file_path)
548
+ os.makedirs(os.path.dirname(output_file_path), exist_ok=True) # Create the output directory if it doesn't exist
549
+ print("create dir")
550
+ # Save the audio file using the target sampling rate
551
+ sf.write(output_file_path, audio_opt, tgt_sr)
552
+
553
+ print("wrote to FS")
554
+
555
+ # Return the path to the saved file along with any other information
556
+
557
+ return output_file_path
558
+
559
+
560
+ except:
561
+ info = traceback.format_exc()
562
+
563
+ return info, (None, None)
564
+
565
+
566
+
567
+
568
+ def get_vc(sid, to_return_protect0):
569
+ global n_spk, tgt_sr, net_g, vc, cpt, version, weights_index
570
+ if sid == "" or sid == []:
571
+ global hubert_model
572
+ if hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
573
+ print("clean_empty_cache")
574
+ del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
575
+ hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
576
+ if torch.cuda.is_available():
577
+ torch.cuda.empty_cache()
578
+ ###楼下不这么折腾清理不干净
579
+ if_f0 = cpt[sid].get("f0", 1)
580
+ version = cpt[sid].get("version", "v1")
581
+ if version == "v1":
582
+ if if_f0 == 1:
583
+ net_g = SynthesizerTrnMs256NSFsid(
584
+ *cpt[sid]["config"], is_half=config.is_half
585
+ )
586
+ else:
587
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt[sid]["config"])
588
+ elif version == "v2":
589
+ if if_f0 == 1:
590
+ net_g = SynthesizerTrnMs768NSFsid(
591
+ *cpt[sid]["config"], is_half=config.is_half
592
+ )
593
+ else:
594
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt[sid]["config"])
595
+ del net_g, cpt
596
+ if torch.cuda.is_available():
597
+ torch.cuda.empty_cache()
598
+ cpt = None
599
+ return (
600
+ gr.Slider.update(maximum=2333, visible=False),
601
+ gr.Slider.update(visible=True),
602
+ gr.Dropdown.update(choices=sorted(weights_index), value=""),
603
+ gr.Markdown.update(value="# <center> No model selected")
604
+ )
605
+ print(f"Loading {sid} model...")
606
+ selected_model = sid[:-4]
607
+ cpt[sid] = torch.load(os.path.join(weight_root, sid), map_location="cpu")
608
+ tgt_sr = cpt[sid]["config"][-1]
609
+ cpt[sid]["config"][-3] = cpt[sid]["weight"]["emb_g.weight"].shape[0]
610
+ if_f0 = cpt[sid].get("f0", 1)
611
+ if if_f0 == 0:
612
+ to_return_protect0 = {
613
+ "visible": False,
614
+ "value": 0.5,
615
+ "__type__": "update",
616
+ }
617
+ else:
618
+ to_return_protect0 = {
619
+ "visible": True,
620
+ "value": to_return_protect0,
621
+ "__type__": "update",
622
+ }
623
+ version = cpt[sid].get("version", "v1")
624
+ if version == "v1":
625
+ if if_f0 == 1:
626
+ net_g = SynthesizerTrnMs256NSFsid(*cpt[sid]["config"], is_half=config.is_half)
627
+ else:
628
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt[sid]["config"])
629
+ elif version == "v2":
630
+ if if_f0 == 1:
631
+ net_g = SynthesizerTrnMs768NSFsid(*cpt[sid]["config"], is_half=config.is_half)
632
+ else:
633
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt[sid]["config"])
634
+ del net_g.enc_q
635
+ print(net_g.load_state_dict(cpt[sid]["weight"], strict=False))
636
+ net_g.eval().to(config.device)
637
+ if config.is_half:
638
+ net_g = net_g.half()
639
+ else:
640
+ net_g = net_g.float()
641
+ vc = VC(tgt_sr, config)
642
+ n_spk = cpt[sid]["config"][-3]
643
+ weights_index = []
644
+ for _, _, index_files in os.walk(index_root):
645
+ for file in index_files:
646
+ if file.endswith('.index') and "trained" not in file:
647
+ weights_index.append(os.path.join(index_root, file))
648
+ if weights_index == []:
649
+ selected_index = gr.Dropdown.update(value="")
650
+ else:
651
+ selected_index = gr.Dropdown.update(value=weights_index[0])
652
+ for index, model_index in enumerate(weights_index):
653
+ if selected_model in model_index:
654
+ selected_index = gr.Dropdown.update(value=weights_index[index])
655
+ break
656
+ return (
657
+ gr.Slider.update(maximum=n_spk, visible=True),
658
+ to_return_protect0,
659
+ selected_index,
660
+ gr.Markdown.update(
661
+ f'## <center> {selected_model}\n'+
662
+ f'### <center> RVC {version} Model'
663
+ )
664
+ )
665
+
666
+
667
+
668
+ def handler(job):
669
+ job_input = job["input"] # Access the input from the request.
670
+ filename=job_input["filename"]
671
+ spk_id=job_input["spk_id"]
672
+ unique_id=job_input["unique_id"]
673
+ download_file(filename,"sing")
674
+ api_convert_voice(filename,spk_id,unique_id)
675
+ # Add your custom code here.
676
+ return "Your job results"
677
+
678
+ if __name__ == '__main__':
679
+ runpod.serverless.start({"handler": handler}) # Required.