############################### # miqu-1-120b-attenuated.yaml # ############################### # Use: mergekit-yaml --clone-tensors ./miqu-1-120b-attenuated.yaml ./miqu-1-120b-attenuated # See: https://huggingface.co/wolfram/miqu-1-120b for original 'miqu-1-120b' layer ranges. # See: https://github.com/arcee-ai/mergekit/issues/198 for discussion/reasoning behind this idea. # --- # The scale factor to use, eg: solve x^2 = 1/2 --> x = 1/sqrt(2) ≈ 0.7071067812 const_tag: &scale_factor 0.7071067812 # 1/sqrt(2) # The filter parameters of a scaled block. attenuate-env: &attenuated_env parameters: scale: - filter: q_proj value: *scale_factor - filter: k_proj value: *scale_factor - value: 1.0 # --- slices: ########################### # Block 1: miqu-1 [0, 20] # ########################### - sources: - model: 152334H/miqu-1-70b-sf layer_range: [0, 10] # The first 10 layers of Block 1 are not duplicated - sources: - model: 152334H/miqu-1-70b-sf layer_range: [10, 20] # The last 10 layers of Block 1 are are duplicated twice <<: *attenuated_env ########################### # Block 2: miqu-1 [10, 30] # ########################### - sources: - model: 152334H/miqu-1-70b-sf layer_range: [10, 30] <<: *attenuated_env ########################### # Block 3: miqu-1 [20, 40] # ########################### - sources: - model: 152334H/miqu-1-70b-sf layer_range: [20, 40] <<: *attenuated_env ########################### # Block 4: miqu-1 [30, 50] # ########################### - sources: - model: 152334H/miqu-1-70b-sf layer_range: [30, 50] <<: *attenuated_env ########################### # Block 5: miqu-1 [40, 60] # ########################### - sources: - model: 152334H/miqu-1-70b-sf layer_range: [40, 60] <<: *attenuated_env ########################### # Block 6: miqu-1 [50, 70] # ########################### - sources: - model: 152334H/miqu-1-70b-sf layer_range: [50, 70] <<: *attenuated_env ########################## # Block 7: miqu-1 [60, 80] # ########################## - sources: - model: 152334H/miqu-1-70b-sf layer_range: [60, 70] # The first 10 layers of Block 7 are are duplicated twice <<: *attenuated_env - sources: - model: 152334H/miqu-1-70b-sf layer_range: [70, 80] # The last 10 layers of Block 7 are not duplicated merge_method: passthrough dtype: float16