Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -73,18 +73,15 @@ from models.soundstream_hubert_new import SoundStream 
     | 
|
| 73 | 
         
             
            device = "cuda:0"
         
     | 
| 74 | 
         | 
| 75 | 
         
             
            model = AutoModelForCausalLM.from_pretrained(
         
     | 
| 76 | 
         
            -
                "m-a-p/YuE-s1-7B-anneal-en-cot",
         
     | 
| 77 | 
         
             
                torch_dtype=torch.float16,
         
     | 
| 78 | 
         
             
                attn_implementation="flash_attention_2",
         
     | 
| 79 | 
         
            -
                 
     | 
| 80 | 
         
             
            ).to(device)
         
     | 
| 81 | 
         
             
            model.eval()
         
     | 
| 82 | 
         | 
| 83 | 
         
             
            basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
         
     | 
| 84 | 
         
             
            resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
         
     | 
| 85 | 
         
            -
            #config_path = './xcodec_mini_infer/decoders/config.yaml' # removed vocoder
         
     | 
| 86 | 
         
            -
            #vocal_decoder_path = './xcodec_mini_infer/decoders/decoder_131000.pth' # removed vocoder
         
     | 
| 87 | 
         
            -
            #inst_decoder_path = './xcodec_mini_infer/decoders/decoder_151000.pth' # removed vocoder
         
     | 
| 88 | 
         | 
| 89 | 
         
             
            mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
         
     | 
| 90 | 
         | 
| 
         @@ -94,18 +91,8 @@ model_config = OmegaConf.load(basic_model_config) 
     | 
|
| 94 | 
         
             
            codec_model = eval(model_config.generator.name)(**model_config.generator.config).to(device)
         
     | 
| 95 | 
         
             
            parameter_dict = torch.load(resume_path, map_location='cpu')
         
     | 
| 96 | 
         
             
            codec_model.load_state_dict(parameter_dict['codec_model'])
         
     | 
| 97 | 
         
            -
            # codec_model = torch.compile(codec_model)
         
     | 
| 98 | 
         
             
            codec_model.eval()
         
     | 
| 99 | 
         | 
| 100 | 
         
            -
            # Preload and compile vocoders # removed vocoder
         
     | 
| 101 | 
         
            -
            #vocal_decoder, inst_decoder = build_codec_model(config_path, vocal_decoder_path, inst_decoder_path)
         
     | 
| 102 | 
         
            -
            #vocal_decoder.to(device)
         
     | 
| 103 | 
         
            -
            #inst_decoder.to(device)
         
     | 
| 104 | 
         
            -
            #vocal_decoder = torch.compile(vocal_decoder)
         
     | 
| 105 | 
         
            -
            #inst_decoder = torch.compile(inst_decoder)
         
     | 
| 106 | 
         
            -
            #vocal_decoder.eval()
         
     | 
| 107 | 
         
            -
            #inst_decoder.eval()
         
     | 
| 108 | 
         
            -
             
     | 
| 109 | 
         | 
| 110 | 
         
             
            @spaces.GPU(duration=120)
         
     | 
| 111 | 
         
             
            def generate_music(
         
     | 
| 
         @@ -309,8 +296,8 @@ def generate_music( 
     | 
|
| 309 | 
         
             
                                    continue
         
     | 
| 310 | 
         
             
                                # mix
         
     | 
| 311 | 
         
             
                                recons_mix = os.path.join(recons_mix_dir, os.path.basename(inst_path).replace('instrumental', 'mixed'))
         
     | 
| 312 | 
         
            -
                                vocal_stem, sr = sf.read( 
     | 
| 313 | 
         
            -
                                instrumental_stem, _ = sf.read( 
     | 
| 314 | 
         
             
                                mix_stem = (vocal_stem + instrumental_stem) / 1
         
     | 
| 315 | 
         
             
                                return (sr, (mix_stem * 32767).astype(np.int16)), (sr, (vocal_stem * 32767).astype(np.int16)), (sr, (instrumental_stem * 32767).astype(np.int16))
         
     | 
| 316 | 
         
             
                        except Exception as e:
         
     | 
| 
         | 
|
| 73 | 
         
             
            device = "cuda:0"
         
     | 
| 74 | 
         | 
| 75 | 
         
             
            model = AutoModelForCausalLM.from_pretrained(
         
     | 
| 76 | 
         
            +
                "m-a-p/YuE-s1-7B-anneal-en-icl", # "m-a-p/YuE-s1-7B-anneal-en-cot",
         
     | 
| 77 | 
         
             
                torch_dtype=torch.float16,
         
     | 
| 78 | 
         
             
                attn_implementation="flash_attention_2",
         
     | 
| 79 | 
         
            +
                low_cpu_mem_usage=True,
         
     | 
| 80 | 
         
             
            ).to(device)
         
     | 
| 81 | 
         
             
            model.eval()
         
     | 
| 82 | 
         | 
| 83 | 
         
             
            basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
         
     | 
| 84 | 
         
             
            resume_path = './xcodec_mini_infer/final_ckpt/ckpt_00360000.pth'
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 85 | 
         | 
| 86 | 
         
             
            mmtokenizer = _MMSentencePieceTokenizer("./mm_tokenizer_v0.2_hf/tokenizer.model")
         
     | 
| 87 | 
         | 
| 
         | 
|
| 91 | 
         
             
            codec_model = eval(model_config.generator.name)(**model_config.generator.config).to(device)
         
     | 
| 92 | 
         
             
            parameter_dict = torch.load(resume_path, map_location='cpu')
         
     | 
| 93 | 
         
             
            codec_model.load_state_dict(parameter_dict['codec_model'])
         
     | 
| 
         | 
|
| 94 | 
         
             
            codec_model.eval()
         
     | 
| 95 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 96 | 
         | 
| 97 | 
         
             
            @spaces.GPU(duration=120)
         
     | 
| 98 | 
         
             
            def generate_music(
         
     | 
| 
         | 
|
| 296 | 
         
             
                                    continue
         
     | 
| 297 | 
         
             
                                # mix
         
     | 
| 298 | 
         
             
                                recons_mix = os.path.join(recons_mix_dir, os.path.basename(inst_path).replace('instrumental', 'mixed'))
         
     | 
| 299 | 
         
            +
                                vocal_stem, sr = sf.read(vocal_path)
         
     | 
| 300 | 
         
            +
                                instrumental_stem, _ = sf.read(inst_path)
         
     | 
| 301 | 
         
             
                                mix_stem = (vocal_stem + instrumental_stem) / 1
         
     | 
| 302 | 
         
             
                                return (sr, (mix_stem * 32767).astype(np.int16)), (sr, (vocal_stem * 32767).astype(np.int16)), (sr, (instrumental_stem * 32767).astype(np.int16))
         
     | 
| 303 | 
         
             
                        except Exception as e:
         
     |