ArabicSpeech
/

Octopus

Audio-Text-to-Text

Model card Files Files and versions

Octopus / decode_config.yaml

SaraAlthubaiti's picture

Upload 2 files

da9202d verified about 2 months ago

history blame contribute delete

1.57 kB

	# Copyright (2024) Tsinghua University, Bytedance Ltd. and/or its affiliates
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	model:
	# paths
	llama_path: "DeepSeek-R1-Distill-Qwen-1.5B/"
	whisper_path: "distil-whisper/distil-large-v3/"
	beats_path: "BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt"

	ckpt: "tiny_all_tasks_319.pth"

	freeze_whisper: True
	freeze_beats: True

	# window-level Q-Former
	use_speech_Qformer: True
	freeze_speech_QFormer: False
	window_level_Qformer: True
	num_speech_query_token: 1
	second_per_window: 0.333333
	second_stride: 0.333333

	speech_llama_proj_model: ""
	freeze_speech_llama_proj: False

	# LoRA
	lora: True
	lora_rank: 8
	lora_alpha: 32
	lora_dropout: 0.1

	multi_prompt: True
	prompt_template: "USER: {}\nASSISTANT:"
	prompt_path: "prompts/train_prompt.json"
	test_prompt_path: "prompts/test_prompt.json"
	max_txt_len: 300
	end_sym: "</s>"

	generate:
	max_new_tokens: 200
	num_beams: 4
	do_sample: False
	min_length: 1
	temperature: 1.0
	top_p: 0.9
	repetition_penalty: 1.0
	length_penalty: 1.0