Spaces:
Sleeping
Sleeping
Update infer-web.py
Browse files- infer-web.py +352 -347
infer-web.py
CHANGED
@@ -69,6 +69,9 @@ import time
|
|
69 |
import csv
|
70 |
from shlex import quote as SQuote
|
71 |
|
|
|
|
|
|
|
72 |
logger = logging.getLogger(__name__)
|
73 |
|
74 |
RQuote = lambda val: SQuote(str(val))
|
@@ -2451,384 +2454,386 @@ def GradioSetup():
|
|
2451 |
outputs=[advanced_settings_batch],
|
2452 |
)
|
2453 |
|
2454 |
-
with gr.TabItem(i18n("Train"), visible=False):
|
2455 |
|
2456 |
-
|
2457 |
-
|
2458 |
-
|
2459 |
-
|
2460 |
-
|
2461 |
-
|
2462 |
-
|
2463 |
-
|
2464 |
-
|
2465 |
-
|
2466 |
-
|
2467 |
-
|
2468 |
-
|
2469 |
-
|
2470 |
-
|
2471 |
-
|
2472 |
-
|
2473 |
-
|
2474 |
-
|
2475 |
-
label=i18n("Version:"),
|
2476 |
-
choices=["v1", "v2"],
|
2477 |
-
value="v2",
|
2478 |
-
interactive=True,
|
2479 |
-
visible=True,
|
2480 |
-
)
|
2481 |
-
|
2482 |
-
with gr.Column():
|
2483 |
-
np7 = gr.Slider(
|
2484 |
-
minimum=1,
|
2485 |
-
maximum=config.n_cpu,
|
2486 |
-
step=1,
|
2487 |
-
label=i18n("Number of CPU processes:"),
|
2488 |
-
value=config.n_cpu,
|
2489 |
interactive=True,
|
2490 |
)
|
2491 |
-
|
2492 |
-
|
2493 |
-
|
2494 |
-
|
2495 |
-
label=i18n("Specify the model ID:"),
|
2496 |
-
value=0,
|
2497 |
interactive=True,
|
|
|
2498 |
)
|
2499 |
-
|
2500 |
-
|
2501 |
-
|
2502 |
-
|
2503 |
-
|
2504 |
-
|
2505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2506 |
)
|
2507 |
-
|
2508 |
-
|
2509 |
-
|
2510 |
-
|
|
|
|
|
|
|
2511 |
)
|
2512 |
-
|
2513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2514 |
)
|
2515 |
-
|
2516 |
-
|
2517 |
-
|
2518 |
-
|
2519 |
-
|
2520 |
-
|
2521 |
-
|
2522 |
-
|
2523 |
-
|
2524 |
-
|
2525 |
-
|
|
|
|
|
|
|
2526 |
)
|
2527 |
-
|
2528 |
-
with gr.Accordion(label=i18n("Step 2: Extracting features")):
|
2529 |
with gr.Row():
|
2530 |
-
with gr.
|
2531 |
-
|
2532 |
-
|
2533 |
-
|
2534 |
-
|
2535 |
-
|
2536 |
-
|
2537 |
-
|
2538 |
-
|
2539 |
-
|
2540 |
-
|
2541 |
-
|
2542 |
-
|
2543 |
-
|
2544 |
-
|
2545 |
-
|
2546 |
-
|
2547 |
-
|
2548 |
-
|
2549 |
-
|
2550 |
-
|
2551 |
-
|
2552 |
-
|
2553 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2554 |
],
|
2555 |
-
|
2556 |
-
|
2557 |
)
|
2558 |
-
|
2559 |
-
|
2560 |
-
|
2561 |
-
|
2562 |
-
|
2563 |
-
"Hop Length (lower hop lengths take more time to infer but are more pitch accurate):"
|
2564 |
-
),
|
2565 |
-
value=64,
|
2566 |
-
interactive=True,
|
2567 |
-
)
|
2568 |
-
|
2569 |
with gr.Row():
|
2570 |
-
|
2571 |
-
|
2572 |
-
|
2573 |
-
|
2574 |
-
|
2575 |
-
interactive=False,
|
2576 |
-
)
|
2577 |
-
|
2578 |
-
but2.click(
|
2579 |
-
extract_f0_feature,
|
2580 |
-
[
|
2581 |
-
gpus6,
|
2582 |
-
np7,
|
2583 |
-
f0method8,
|
2584 |
-
if_f0_3,
|
2585 |
-
exp_dir1,
|
2586 |
-
version19,
|
2587 |
-
hop_length,
|
2588 |
-
],
|
2589 |
-
[info2],
|
2590 |
-
api_name="train_extract_f0_feature",
|
2591 |
-
)
|
2592 |
-
|
2593 |
-
with gr.Row():
|
2594 |
-
with gr.Accordion(label=i18n("Step 3: Model training started")):
|
2595 |
-
with gr.Row():
|
2596 |
-
save_epoch10 = gr.Slider(
|
2597 |
-
minimum=1,
|
2598 |
-
maximum=100,
|
2599 |
-
step=1,
|
2600 |
-
label=i18n("Save frequency:"),
|
2601 |
-
value=10,
|
2602 |
-
interactive=True,
|
2603 |
-
visible=True,
|
2604 |
-
)
|
2605 |
-
total_epoch11 = gr.Slider(
|
2606 |
-
minimum=1,
|
2607 |
-
maximum=10000,
|
2608 |
-
step=2,
|
2609 |
-
label=i18n("Training epochs:"),
|
2610 |
-
value=750,
|
2611 |
-
interactive=True,
|
2612 |
-
)
|
2613 |
-
batch_size12 = gr.Slider(
|
2614 |
-
minimum=1,
|
2615 |
-
maximum=50,
|
2616 |
-
step=1,
|
2617 |
-
label=i18n("Batch size per GPU:"),
|
2618 |
-
value=default_batch_size,
|
2619 |
-
# value=20,
|
2620 |
-
interactive=True,
|
2621 |
-
)
|
2622 |
-
|
2623 |
-
with gr.Row():
|
2624 |
-
if_save_latest13 = gr.Checkbox(
|
2625 |
-
label=i18n(
|
2626 |
-
"Whether to save only the latest .ckpt file to save hard drive space"
|
2627 |
-
),
|
2628 |
-
value=True,
|
2629 |
interactive=True,
|
2630 |
)
|
2631 |
-
|
2632 |
label=i18n(
|
2633 |
-
"
|
2634 |
),
|
2635 |
-
value=
|
2636 |
-
interactive=True,
|
2637 |
)
|
2638 |
-
|
|
|
2639 |
label=i18n(
|
2640 |
-
"
|
2641 |
),
|
2642 |
-
value=True,
|
2643 |
-
interactive=True,
|
2644 |
)
|
|
|
2645 |
with gr.Column():
|
2646 |
-
|
2647 |
-
|
2648 |
-
label=i18n("Load pre-trained base model G path:"),
|
2649 |
-
value="assets/pretrained_v2/f0G40k.pth",
|
2650 |
-
interactive=True,
|
2651 |
-
)
|
2652 |
-
pretrained_D15 = gr.Textbox(
|
2653 |
-
label=i18n("Load pre-trained base model D path:"),
|
2654 |
-
value="assets/pretrained_v2/f0D40k.pth",
|
2655 |
-
interactive=True,
|
2656 |
-
)
|
2657 |
-
with gr.Row():
|
2658 |
-
gpus16 = gr.Textbox(
|
2659 |
-
label=i18n(
|
2660 |
-
"Provide the GPU index(es) separated by '-', like 0-1-2 for using GPUs 0, 1, and 2:"
|
2661 |
-
),
|
2662 |
-
value=gpus,
|
2663 |
-
interactive=True,
|
2664 |
-
)
|
2665 |
-
sr2.change(
|
2666 |
-
change_sr2,
|
2667 |
-
[sr2, if_f0_3, version19],
|
2668 |
-
[pretrained_G14, pretrained_D15],
|
2669 |
)
|
2670 |
-
|
2671 |
-
|
2672 |
-
|
2673 |
-
|
2674 |
-
|
2675 |
-
|
2676 |
-
|
2677 |
-
inputs=[if_f0_3, sr2, version19],
|
2678 |
-
outputs=[f0method8, pretrained_G14, pretrained_D15],
|
2679 |
-
)
|
2680 |
-
with gr.Row():
|
2681 |
-
butstop = gr.Button(
|
2682 |
-
i18n("Stop training"),
|
2683 |
-
variant="primary",
|
2684 |
visible=False,
|
2685 |
)
|
2686 |
-
|
2687 |
-
i18n("
|
2688 |
-
|
2689 |
-
but3.click(
|
2690 |
-
fn=stoptraining,
|
2691 |
-
inputs=[gr.Number(value=0, visible=False)],
|
2692 |
-
outputs=[but3, butstop],
|
2693 |
-
api_name="train_stop",
|
2694 |
-
)
|
2695 |
-
butstop.click(
|
2696 |
-
fn=stoptraining,
|
2697 |
-
inputs=[gr.Number(value=1, visible=False)],
|
2698 |
-
outputs=[but3, butstop],
|
2699 |
)
|
2700 |
-
|
2701 |
-
label=i18n("
|
2702 |
-
value="",
|
2703 |
-
lines=4,
|
2704 |
-
max_lines=4,
|
2705 |
)
|
2706 |
-
|
2707 |
-
|
2708 |
-
|
2709 |
-
|
2710 |
-
|
2711 |
-
i18n("Save all"),
|
2712 |
-
i18n("Save D and G"),
|
2713 |
-
i18n("Save voice"),
|
2714 |
-
],
|
2715 |
-
value=i18n("Choose the method"),
|
2716 |
-
interactive=True,
|
2717 |
-
)
|
2718 |
-
but4 = gr.Button(
|
2719 |
-
i18n("Train feature index"), variant="primary"
|
2720 |
-
)
|
2721 |
-
|
2722 |
-
but7 = gr.Button(i18n("Save model"), variant="primary")
|
2723 |
-
|
2724 |
-
if_save_every_weights18.change(
|
2725 |
-
fn=lambda if_save_every_weights: (
|
2726 |
-
{
|
2727 |
-
"visible": if_save_every_weights,
|
2728 |
-
"__type__": "update",
|
2729 |
-
}
|
2730 |
-
),
|
2731 |
-
inputs=[if_save_every_weights18],
|
2732 |
-
outputs=[save_epoch10],
|
2733 |
)
|
2734 |
-
|
2735 |
-
|
2736 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2737 |
[
|
2738 |
-
|
2739 |
-
|
2740 |
-
|
2741 |
-
|
2742 |
-
|
2743 |
-
|
2744 |
-
|
2745 |
-
|
2746 |
-
pretrained_G14,
|
2747 |
-
pretrained_D15,
|
2748 |
-
gpus16,
|
2749 |
-
if_cache_gpu17,
|
2750 |
-
if_save_every_weights18,
|
2751 |
-
version19,
|
2752 |
],
|
2753 |
-
[
|
2754 |
-
api_name="
|
2755 |
-
)
|
2756 |
-
|
2757 |
-
but4.click(train_index, [exp_dir1, version19], info3)
|
2758 |
-
but7.click(resources.save_model, [exp_dir1, save_action], info3)
|
2759 |
-
|
2760 |
-
with gr.TabItem(i18n("UVR5")): # UVR section
|
2761 |
-
with gr.Row():
|
2762 |
-
with gr.Column():
|
2763 |
-
model_select = gr.Radio(
|
2764 |
-
label=i18n("Model Architecture:"),
|
2765 |
-
choices=["VR", "MDX", "Demucs (Beta)"],
|
2766 |
-
value="VR",
|
2767 |
-
interactive=True,
|
2768 |
-
)
|
2769 |
-
dir_wav_input = gr.Textbox(
|
2770 |
-
label=i18n(
|
2771 |
-
"Enter the path of the audio folder to be processed:"
|
2772 |
-
),
|
2773 |
-
value=os.path.join(now_dir, "assets", "audios"),
|
2774 |
-
)
|
2775 |
-
wav_inputs = gr.File(
|
2776 |
-
file_count="multiple",
|
2777 |
-
label=i18n(
|
2778 |
-
"You can also input audio files in batches. Choose one of the two options. Priority is given to reading from the folder."
|
2779 |
-
),
|
2780 |
-
)
|
2781 |
-
|
2782 |
-
with gr.Column():
|
2783 |
-
model_choose = gr.Dropdown(
|
2784 |
-
label=i18n("Model:"), choices=uvr5_names
|
2785 |
)
|
2786 |
-
agg = gr.Slider(
|
2787 |
-
minimum=0,
|
2788 |
-
maximum=20,
|
2789 |
-
step=1,
|
2790 |
-
label="Vocal Extraction Aggressive",
|
2791 |
-
value=10,
|
2792 |
-
interactive=True,
|
2793 |
-
visible=False,
|
2794 |
-
)
|
2795 |
-
opt_vocal_root = gr.Textbox(
|
2796 |
-
label=i18n("Specify the output folder for vocals:"),
|
2797 |
-
value="assets/audios",
|
2798 |
-
)
|
2799 |
-
opt_ins_root = gr.Textbox(
|
2800 |
-
label=i18n("Specify the output folder for accompaniment:"),
|
2801 |
-
value="assets/audios/audio-others",
|
2802 |
-
)
|
2803 |
-
format0 = gr.Radio(
|
2804 |
-
label=i18n("Export file format:"),
|
2805 |
-
choices=["wav", "flac", "mp3", "m4a"],
|
2806 |
-
value="flac",
|
2807 |
-
interactive=True,
|
2808 |
-
)
|
2809 |
-
model_select.change(
|
2810 |
-
fn=update_model_choices,
|
2811 |
-
inputs=model_select,
|
2812 |
-
outputs=model_choose,
|
2813 |
-
)
|
2814 |
-
but2 = gr.Button(i18n("Convert"), variant="primary")
|
2815 |
-
vc_output4 = gr.Textbox(label=i18n("Output information:"))
|
2816 |
-
# wav_inputs.upload(fn=save_to_wav2_edited, inputs=[wav_inputs], outputs=[])
|
2817 |
-
but2.click(
|
2818 |
-
uvr,
|
2819 |
-
[
|
2820 |
-
model_choose,
|
2821 |
-
dir_wav_input,
|
2822 |
-
opt_vocal_root,
|
2823 |
-
wav_inputs,
|
2824 |
-
opt_ins_root,
|
2825 |
-
agg,
|
2826 |
-
format0,
|
2827 |
-
model_select,
|
2828 |
-
],
|
2829 |
-
[vc_output4],
|
2830 |
-
api_name="uvr_convert",
|
2831 |
-
)
|
2832 |
with gr.TabItem(i18n("TTS")):
|
2833 |
with gr.Column():
|
2834 |
text_test = gr.Textbox(
|
|
|
69 |
import csv
|
70 |
from shlex import quote as SQuote
|
71 |
|
72 |
+
import torch
|
73 |
+
cpu_flag = torch.cuda.is_available()
|
74 |
+
|
75 |
logger = logging.getLogger(__name__)
|
76 |
|
77 |
RQuote = lambda val: SQuote(str(val))
|
|
|
2454 |
outputs=[advanced_settings_batch],
|
2455 |
)
|
2456 |
|
|
|
2457 |
|
2458 |
+
with gr.Tabs(visible=cpu_flag) as tabs:
|
2459 |
+
with gr.TabItem(i18n("Train"), visible=False):
|
2460 |
+
|
2461 |
+
with gr.Accordion(label=i18n("Step 1: Processing data")):
|
2462 |
+
with gr.Row():
|
2463 |
+
with gr.Column():
|
2464 |
+
exp_dir1 = gr.Textbox(
|
2465 |
+
label=i18n("Enter the model name:"),
|
2466 |
+
value=i18n("Model_Name"),
|
2467 |
+
)
|
2468 |
+
if_f0_3 = gr.Checkbox(
|
2469 |
+
label=i18n("Whether the model has pitch guidance."),
|
2470 |
+
value=True,
|
2471 |
+
interactive=True,
|
2472 |
+
)
|
2473 |
+
sr2 = gr.Radio(
|
2474 |
+
label=i18n("Target sample rate:"),
|
2475 |
+
choices=["40k", "48k", "32k"],
|
2476 |
+
value="40k",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2477 |
interactive=True,
|
2478 |
)
|
2479 |
+
version19 = gr.Radio(
|
2480 |
+
label=i18n("Version:"),
|
2481 |
+
choices=["v1", "v2"],
|
2482 |
+
value="v2",
|
|
|
|
|
2483 |
interactive=True,
|
2484 |
+
visible=True,
|
2485 |
)
|
2486 |
+
|
2487 |
+
with gr.Column():
|
2488 |
+
np7 = gr.Slider(
|
2489 |
+
minimum=1,
|
2490 |
+
maximum=config.n_cpu,
|
2491 |
+
step=1,
|
2492 |
+
label=i18n("Number of CPU processes:"),
|
2493 |
+
value=config.n_cpu,
|
2494 |
+
interactive=True,
|
2495 |
+
)
|
2496 |
+
spk_id5 = gr.Slider(
|
2497 |
+
minimum=0,
|
2498 |
+
maximum=4,
|
2499 |
+
step=1,
|
2500 |
+
label=i18n("Specify the model ID:"),
|
2501 |
+
value=0,
|
2502 |
+
interactive=True,
|
2503 |
+
)
|
2504 |
+
|
2505 |
+
with gr.Row():
|
2506 |
+
with gr.Column():
|
2507 |
+
trainset_dir4 = gr.Dropdown(
|
2508 |
+
choices=sorted(datasets),
|
2509 |
+
label=i18n("Select your dataset:"),
|
2510 |
+
value=get_dataset(),
|
2511 |
+
)
|
2512 |
+
|
2513 |
+
dataset_path = gr.Textbox(
|
2514 |
+
label=i18n("Or add your dataset path:"),
|
2515 |
+
interactive=True,
|
2516 |
+
)
|
2517 |
+
btn_update_dataset_list = gr.Button(
|
2518 |
+
i18n("Update list"), variant="primary"
|
2519 |
+
)
|
2520 |
+
|
2521 |
+
btn_update_dataset_list.click(
|
2522 |
+
resources.update_dataset_list, [spk_id5], trainset_dir4
|
2523 |
)
|
2524 |
+
but1 = gr.Button(i18n("Process data"), variant="primary")
|
2525 |
+
info1 = gr.Textbox(label=i18n("Output information:"), value="")
|
2526 |
+
but1.click(
|
2527 |
+
preprocess_dataset,
|
2528 |
+
[trainset_dir4, exp_dir1, sr2, np7, dataset_path],
|
2529 |
+
[info1],
|
2530 |
+
api_name="train_preprocess",
|
2531 |
)
|
2532 |
+
|
2533 |
+
with gr.Accordion(label=i18n("Step 2: Extracting features")):
|
2534 |
+
with gr.Row():
|
2535 |
+
with gr.Column():
|
2536 |
+
gpus6 = gr.Textbox(
|
2537 |
+
label=i18n(
|
2538 |
+
"Provide the GPU index(es) separated by '-', like 0-1-2 for using GPUs 0, 1, and 2:"
|
2539 |
+
),
|
2540 |
+
value=gpus,
|
2541 |
+
interactive=True,
|
2542 |
+
)
|
2543 |
+
gpu_info9 = gr.Textbox(
|
2544 |
+
label=i18n("GPU Information:"),
|
2545 |
+
value=gpu_info,
|
2546 |
+
visible=F0GPUVisible,
|
2547 |
+
)
|
2548 |
+
with gr.Column():
|
2549 |
+
f0method8 = gr.Radio(
|
2550 |
+
label=i18n("Select the pitch extraction algorithm:"),
|
2551 |
+
choices=[
|
2552 |
+
"pm",
|
2553 |
+
"harvest",
|
2554 |
+
"dio",
|
2555 |
+
"crepe",
|
2556 |
+
"mangio-crepe",
|
2557 |
+
"rmvpe",
|
2558 |
+
"rmvpe_gpu",
|
2559 |
+
],
|
2560 |
+
value="rmvpe",
|
2561 |
+
interactive=True,
|
2562 |
+
)
|
2563 |
+
hop_length = gr.Slider(
|
2564 |
+
minimum=1,
|
2565 |
+
maximum=512,
|
2566 |
+
step=1,
|
2567 |
+
label=i18n(
|
2568 |
+
"Hop Length (lower hop lengths take more time to infer but are more pitch accurate):"
|
2569 |
+
),
|
2570 |
+
value=64,
|
2571 |
+
interactive=True,
|
2572 |
+
)
|
2573 |
+
|
2574 |
+
with gr.Row():
|
2575 |
+
but2 = gr.Button(i18n("Feature extraction"), variant="primary")
|
2576 |
+
info2 = gr.Textbox(
|
2577 |
+
label=i18n("Output information:"),
|
2578 |
+
value="",
|
2579 |
+
max_lines=8,
|
2580 |
+
interactive=False,
|
2581 |
)
|
2582 |
+
|
2583 |
+
but2.click(
|
2584 |
+
extract_f0_feature,
|
2585 |
+
[
|
2586 |
+
gpus6,
|
2587 |
+
np7,
|
2588 |
+
f0method8,
|
2589 |
+
if_f0_3,
|
2590 |
+
exp_dir1,
|
2591 |
+
version19,
|
2592 |
+
hop_length,
|
2593 |
+
],
|
2594 |
+
[info2],
|
2595 |
+
api_name="train_extract_f0_feature",
|
2596 |
)
|
2597 |
+
|
|
|
2598 |
with gr.Row():
|
2599 |
+
with gr.Accordion(label=i18n("Step 3: Model training started")):
|
2600 |
+
with gr.Row():
|
2601 |
+
save_epoch10 = gr.Slider(
|
2602 |
+
minimum=1,
|
2603 |
+
maximum=100,
|
2604 |
+
step=1,
|
2605 |
+
label=i18n("Save frequency:"),
|
2606 |
+
value=10,
|
2607 |
+
interactive=True,
|
2608 |
+
visible=True,
|
2609 |
+
)
|
2610 |
+
total_epoch11 = gr.Slider(
|
2611 |
+
minimum=1,
|
2612 |
+
maximum=10000,
|
2613 |
+
step=2,
|
2614 |
+
label=i18n("Training epochs:"),
|
2615 |
+
value=750,
|
2616 |
+
interactive=True,
|
2617 |
+
)
|
2618 |
+
batch_size12 = gr.Slider(
|
2619 |
+
minimum=1,
|
2620 |
+
maximum=50,
|
2621 |
+
step=1,
|
2622 |
+
label=i18n("Batch size per GPU:"),
|
2623 |
+
value=default_batch_size,
|
2624 |
+
# value=20,
|
2625 |
+
interactive=True,
|
2626 |
+
)
|
2627 |
+
|
2628 |
+
with gr.Row():
|
2629 |
+
if_save_latest13 = gr.Checkbox(
|
2630 |
+
label=i18n(
|
2631 |
+
"Whether to save only the latest .ckpt file to save hard drive space"
|
2632 |
+
),
|
2633 |
+
value=True,
|
2634 |
+
interactive=True,
|
2635 |
+
)
|
2636 |
+
if_cache_gpu17 = gr.Checkbox(
|
2637 |
+
label=i18n(
|
2638 |
+
"Cache all training sets to GPU memory. Caching small datasets (less than 10 minutes) can speed up training"
|
2639 |
+
),
|
2640 |
+
value=False,
|
2641 |
+
interactive=True,
|
2642 |
+
)
|
2643 |
+
if_save_every_weights18 = gr.Checkbox(
|
2644 |
+
label=i18n(
|
2645 |
+
"Save a small final model to the 'weights' folder at each save point"
|
2646 |
+
),
|
2647 |
+
value=True,
|
2648 |
+
interactive=True,
|
2649 |
+
)
|
2650 |
+
with gr.Column():
|
2651 |
+
with gr.Row():
|
2652 |
+
pretrained_G14 = gr.Textbox(
|
2653 |
+
label=i18n("Load pre-trained base model G path:"),
|
2654 |
+
value="assets/pretrained_v2/f0G40k.pth",
|
2655 |
+
interactive=True,
|
2656 |
+
)
|
2657 |
+
pretrained_D15 = gr.Textbox(
|
2658 |
+
label=i18n("Load pre-trained base model D path:"),
|
2659 |
+
value="assets/pretrained_v2/f0D40k.pth",
|
2660 |
+
interactive=True,
|
2661 |
+
)
|
2662 |
+
with gr.Row():
|
2663 |
+
gpus16 = gr.Textbox(
|
2664 |
+
label=i18n(
|
2665 |
+
"Provide the GPU index(es) separated by '-', like 0-1-2 for using GPUs 0, 1, and 2:"
|
2666 |
+
),
|
2667 |
+
value=gpus,
|
2668 |
+
interactive=True,
|
2669 |
+
)
|
2670 |
+
sr2.change(
|
2671 |
+
change_sr2,
|
2672 |
+
[sr2, if_f0_3, version19],
|
2673 |
+
[pretrained_G14, pretrained_D15],
|
2674 |
+
)
|
2675 |
+
version19.change(
|
2676 |
+
change_version19,
|
2677 |
+
[sr2, if_f0_3, version19],
|
2678 |
+
[pretrained_G14, pretrained_D15, sr2],
|
2679 |
+
)
|
2680 |
+
if_f0_3.change(
|
2681 |
+
fn=change_f0,
|
2682 |
+
inputs=[if_f0_3, sr2, version19],
|
2683 |
+
outputs=[f0method8, pretrained_G14, pretrained_D15],
|
2684 |
+
)
|
2685 |
+
with gr.Row():
|
2686 |
+
butstop = gr.Button(
|
2687 |
+
i18n("Stop training"),
|
2688 |
+
variant="primary",
|
2689 |
+
visible=False,
|
2690 |
+
)
|
2691 |
+
but3 = gr.Button(
|
2692 |
+
i18n("Train model"), variant="primary", visible=True
|
2693 |
+
)
|
2694 |
+
but3.click(
|
2695 |
+
fn=stoptraining,
|
2696 |
+
inputs=[gr.Number(value=0, visible=False)],
|
2697 |
+
outputs=[but3, butstop],
|
2698 |
+
api_name="train_stop",
|
2699 |
+
)
|
2700 |
+
butstop.click(
|
2701 |
+
fn=stoptraining,
|
2702 |
+
inputs=[gr.Number(value=1, visible=False)],
|
2703 |
+
outputs=[but3, butstop],
|
2704 |
+
)
|
2705 |
+
info3 = gr.Textbox(
|
2706 |
+
label=i18n("Output information:"),
|
2707 |
+
value="",
|
2708 |
+
lines=4,
|
2709 |
+
max_lines=4,
|
2710 |
+
)
|
2711 |
+
|
2712 |
+
with gr.Column():
|
2713 |
+
save_action = gr.Dropdown(
|
2714 |
+
label=i18n("Save type"),
|
2715 |
+
choices=[
|
2716 |
+
i18n("Save all"),
|
2717 |
+
i18n("Save D and G"),
|
2718 |
+
i18n("Save voice"),
|
2719 |
+
],
|
2720 |
+
value=i18n("Choose the method"),
|
2721 |
+
interactive=True,
|
2722 |
+
)
|
2723 |
+
but4 = gr.Button(
|
2724 |
+
i18n("Train feature index"), variant="primary"
|
2725 |
+
)
|
2726 |
+
|
2727 |
+
but7 = gr.Button(i18n("Save model"), variant="primary")
|
2728 |
+
|
2729 |
+
if_save_every_weights18.change(
|
2730 |
+
fn=lambda if_save_every_weights: (
|
2731 |
+
{
|
2732 |
+
"visible": if_save_every_weights,
|
2733 |
+
"__type__": "update",
|
2734 |
+
}
|
2735 |
+
),
|
2736 |
+
inputs=[if_save_every_weights18],
|
2737 |
+
outputs=[save_epoch10],
|
2738 |
+
)
|
2739 |
+
|
2740 |
+
but3.click(
|
2741 |
+
click_train,
|
2742 |
+
[
|
2743 |
+
exp_dir1,
|
2744 |
+
sr2,
|
2745 |
+
if_f0_3,
|
2746 |
+
spk_id5,
|
2747 |
+
save_epoch10,
|
2748 |
+
total_epoch11,
|
2749 |
+
batch_size12,
|
2750 |
+
if_save_latest13,
|
2751 |
+
pretrained_G14,
|
2752 |
+
pretrained_D15,
|
2753 |
+
gpus16,
|
2754 |
+
if_cache_gpu17,
|
2755 |
+
if_save_every_weights18,
|
2756 |
+
version19,
|
2757 |
],
|
2758 |
+
[info3, butstop, but3],
|
2759 |
+
api_name="train_start",
|
2760 |
)
|
2761 |
+
|
2762 |
+
but4.click(train_index, [exp_dir1, version19], info3)
|
2763 |
+
but7.click(resources.save_model, [exp_dir1, save_action], info3)
|
2764 |
+
|
2765 |
+
with gr.TabItem(i18n("UVR5")): # UVR section
|
|
|
|
|
|
|
|
|
|
|
|
|
2766 |
with gr.Row():
|
2767 |
+
with gr.Column():
|
2768 |
+
model_select = gr.Radio(
|
2769 |
+
label=i18n("Model Architecture:"),
|
2770 |
+
choices=["VR", "MDX", "Demucs (Beta)"],
|
2771 |
+
value="VR",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2772 |
interactive=True,
|
2773 |
)
|
2774 |
+
dir_wav_input = gr.Textbox(
|
2775 |
label=i18n(
|
2776 |
+
"Enter the path of the audio folder to be processed:"
|
2777 |
),
|
2778 |
+
value=os.path.join(now_dir, "assets", "audios"),
|
|
|
2779 |
)
|
2780 |
+
wav_inputs = gr.File(
|
2781 |
+
file_count="multiple",
|
2782 |
label=i18n(
|
2783 |
+
"You can also input audio files in batches. Choose one of the two options. Priority is given to reading from the folder."
|
2784 |
),
|
|
|
|
|
2785 |
)
|
2786 |
+
|
2787 |
with gr.Column():
|
2788 |
+
model_choose = gr.Dropdown(
|
2789 |
+
label=i18n("Model:"), choices=uvr5_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2790 |
)
|
2791 |
+
agg = gr.Slider(
|
2792 |
+
minimum=0,
|
2793 |
+
maximum=20,
|
2794 |
+
step=1,
|
2795 |
+
label="Vocal Extraction Aggressive",
|
2796 |
+
value=10,
|
2797 |
+
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2798 |
visible=False,
|
2799 |
)
|
2800 |
+
opt_vocal_root = gr.Textbox(
|
2801 |
+
label=i18n("Specify the output folder for vocals:"),
|
2802 |
+
value="assets/audios",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2803 |
)
|
2804 |
+
opt_ins_root = gr.Textbox(
|
2805 |
+
label=i18n("Specify the output folder for accompaniment:"),
|
2806 |
+
value="assets/audios/audio-others",
|
|
|
|
|
2807 |
)
|
2808 |
+
format0 = gr.Radio(
|
2809 |
+
label=i18n("Export file format:"),
|
2810 |
+
choices=["wav", "flac", "mp3", "m4a"],
|
2811 |
+
value="flac",
|
2812 |
+
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2813 |
)
|
2814 |
+
model_select.change(
|
2815 |
+
fn=update_model_choices,
|
2816 |
+
inputs=model_select,
|
2817 |
+
outputs=model_choose,
|
2818 |
+
)
|
2819 |
+
but2 = gr.Button(i18n("Convert"), variant="primary")
|
2820 |
+
vc_output4 = gr.Textbox(label=i18n("Output information:"))
|
2821 |
+
# wav_inputs.upload(fn=save_to_wav2_edited, inputs=[wav_inputs], outputs=[])
|
2822 |
+
but2.click(
|
2823 |
+
uvr,
|
2824 |
[
|
2825 |
+
model_choose,
|
2826 |
+
dir_wav_input,
|
2827 |
+
opt_vocal_root,
|
2828 |
+
wav_inputs,
|
2829 |
+
opt_ins_root,
|
2830 |
+
agg,
|
2831 |
+
format0,
|
2832 |
+
model_select,
|
|
|
|
|
|
|
|
|
|
|
|
|
2833 |
],
|
2834 |
+
[vc_output4],
|
2835 |
+
api_name="uvr_convert",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2836 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2837 |
with gr.TabItem(i18n("TTS")):
|
2838 |
with gr.Column():
|
2839 |
text_test = gr.Textbox(
|