TinyLlama_TinyLlama-1.1B-Chat-v1.0$: loader: Transformers cpu_memory: 2048 auto_devices: false disk: false cpu: true bf16: true load_in_8bit: false trust_remote_code: false no_use_fast: false use_flash_attention_2: false load_in_4bit: false compute_dtype: bfloat16 quant_type: fp4 use_double_quant: false disable_exllama: false disable_exllamav2: false compress_pos_emb: 1 alpha_value: 1 rope_freq_base: 0