Spaces:

FunAudioLLM
/

Fun-CineForge-Demo

Running on Zero

Upload 111 files

03022ee verified 10 days ago

959 Bytes

	model: FunCineForgeInferModel
	index_ds: FunCineForgeDS
	xvec_model: pretrained_models/funcineforge_zh_en/camplus.onnx
	model_conf: {}

	dataset_conf:
	# face is from the video, vocal is the reference audio, extract speaker ID and start-end timestamp from dialogue
	load_meta_data_key: "text,clue,face,dialogue,vocal,video"
	sos: 6561
	eos: 6562
	turn_of_speech: 6563
	fill_token: 6564
	ignore_id: -100
	startofclue_token: 151646
	endofclue_token: 151647
	frame_shift: 25 # ms
	timebook_size: 1500 # 60 * 25 = 1500
	pangbai: 1500
	dubai: 1501
	duihua: 1502
	duoren: 1503
	male: 1504
	female: 1505
	child: 1506
	youth: 1507
	adult: 1508
	middle: 1509
	elderly: 1510
	speaker_id_start: 1511


	sampling: ras
	lm_use_prompt: true
	fm_use_prompt: true
	use_llm_cache: true
	seed: 0
	max_length: 1500 # 60s * 25 fps
	min_length: 50 # 2s * 25 fps
	llm_dtype: fp32
	fm_dtype: fp32
	voc_dtype: fp32
	batch_size: 1