Audio-Text-to-Text
Transformers
Safetensors
English
Korean
symphony
feature-extraction
audio
text-generation
custom_code
Eval Results
Instructions to use okestro-ai-lab/SYMPHONY-ASR with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use okestro-ai-lab/SYMPHONY-ASR with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("okestro-ai-lab/SYMPHONY-ASR", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from transformers import PretrainedConfig, AutoConfig | |
| class SymphonySpeechEncoderConfig(PretrainedConfig): | |
| model_type = "symphony_speech_encoder" | |
| def __init__( | |
| self, | |
| n_mels=128, | |
| n_ctx=1500, | |
| n_state=1280, | |
| n_head=20, | |
| n_layer=32, | |
| stage_tokens=[80, 80, 80], | |
| compression_size=50, | |
| **kwargs | |
| ): | |
| super().__init__(**kwargs) | |
| self.n_mels = n_mels | |
| self.n_ctx = n_ctx | |
| self.n_state = n_state | |
| self.n_head = n_head | |
| self.n_layer = n_layer | |
| self.stage_tokens = stage_tokens | |
| self.compression_size = compression_size | |
| class SymphonyConfig(PretrainedConfig): | |
| model_type = "symphony" | |
| def __init__( | |
| self, | |
| encoder_config=None, | |
| llm_config=None, | |
| lora_r=16, | |
| lora_a=64, | |
| llm_modules=None, | |
| low_resource=False, | |
| **kwargs | |
| ): | |
| # llm_modules 기본값 | |
| if llm_modules is None: | |
| llm_modules = ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"] | |
| # LLM config 처리: dict면 AutoConfig로 변환 | |
| if llm_config is None: | |
| llm_config = AutoConfig.from_pretrained("Qwen/Qwen3-4B") | |
| elif isinstance(llm_config, dict): | |
| if "_name_or_path" in llm_config: | |
| llm_config = AutoConfig.from_pretrained(llm_config["_name_or_path"], **llm_config) | |
| else: | |
| llm_config = AutoConfig.from_dict(llm_config) | |
| # Encoder config 처리 | |
| if encoder_config is None: | |
| encoder_config = SymphonySpeechEncoderConfig() | |
| elif isinstance(encoder_config, dict): | |
| encoder_config = SymphonySpeechEncoderConfig(**encoder_config) | |
| self.llm_config = llm_config | |
| self.encoder_config = encoder_config | |
| self.lora_r = lora_r | |
| self.lora_a = lora_a | |
| self.llm_modules = llm_modules | |
| self.low_resource = low_resource | |
| super().__init__(**kwargs) | |