Edit model card

final_model

This is a merge of pre-trained language models created using mergekit.

Merge Details

Merge Method

This model was merged using the breadcrumbs_ties merge method using ./Yosegi-0603 as a base.

Models Merged

The following models were included in the merge:

  • ./Ninja-2B_JP
  • ./Yosegi-0601

Configuration

The following YAML configuration was used to produce this model:

base_model: ./Yosegi-0603
dtype: bfloat16
merge_method: breadcrumbs_ties
parameters:
  int8_mask: 1.0
  normalize: 0.0
slices:
- sources:
  - layer_range: [0, 2]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.050387850856855765
      - filter: mlp
        value: -0.17075015661203768
      - value: -0.008041653902986862
      weight:
      - filter: self_attn
        value: 0.0999312941470471
      - filter: mlp
        value: 0.541727762184749
      - value: 0.6837012779994258
  - layer_range: [0, 2]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.8218846237599902
      - filter: mlp
        value: 1.0
      - value: 0.9254078866667358
      gamma:
      - filter: self_attn
        value: -0.11213758231875963
      - filter: mlp
        value: 0.021586098873668948
      - value: -0.12827998218659437
      weight:
      - filter: self_attn
        value: 0.40391646444657003
      - filter: mlp
        value: 0.623121864641881
      - value: 0.5967833694632534
  - layer_range: [0, 2]
    model: ./Yosegi-0603
- sources:
  - layer_range: [2, 4]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 0.8079479346300947
      - filter: mlp
        value: 1.0
      - value: 0.710146185559419
      gamma:
      - filter: self_attn
        value: 0.1383609589681566
      - filter: mlp
        value: 0.21188532059635062
      - value: 0.2994723556443468
      weight:
      - filter: self_attn
        value: 0.48107070906079974
      - filter: mlp
        value: 0.5848073552919492
      - value: 0.4583842493359253
  - layer_range: [2, 4]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 0.934378153535579
      - value: 1.0
      gamma:
      - filter: self_attn
        value: 0.073192612278188
      - filter: mlp
        value: 0.07939126555063317
      - value: -0.06891845030175699
      weight:
      - filter: self_attn
        value: 0.32120386994101
      - filter: mlp
        value: 0.5001108459121922
      - value: 0.9138710221666694
  - layer_range: [2, 4]
    model: ./Yosegi-0603
- sources:
  - layer_range: [4, 6]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 0.7237519222177541
      - value: 0.776951124863642
      gamma:
      - filter: self_attn
        value: -0.2265121048274062
      - filter: mlp
        value: -0.1757947421960496
      - value: -0.11401593728931929
      weight:
      - filter: self_attn
        value: 0.6448742737026658
      - filter: mlp
        value: 0.13809748641457986
      - value: 0.3950550285769662
  - layer_range: [4, 6]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.9649359194114893
      - filter: mlp
        value: 0.916637032428399
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.16291684846287688
      - filter: mlp
        value: -0.19013548712121703
      - value: 0.038409066391918795
      weight:
      - filter: self_attn
        value: 0.1977358472772336
      - filter: mlp
        value: 0.22661167907612348
      - value: 0.6426575016448257
  - layer_range: [4, 6]
    model: ./Yosegi-0603
- sources:
  - layer_range: [6, 8]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 0.8727809666891416
      - filter: mlp
        value: 1.0
      - value: 0.5160677785559116
      gamma:
      - filter: self_attn
        value: 0.14245180617134273
      - filter: mlp
        value: 0.08189992601998919
      - value: -0.1038827997670827
      weight:
      - filter: self_attn
        value: 0.23575676914257698
      - filter: mlp
        value: 0.4047231670507743
      - value: 0.34207794631274374
  - layer_range: [6, 8]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: 0.576775501046583
      - filter: mlp
        value: -0.046028636298718645
      - value: -0.024161321403060265
      weight:
      - filter: self_attn
        value: 0.833089842843994
      - filter: mlp
        value: 0.5434667434613458
      - value: 0.2946693008513797
  - layer_range: [6, 8]
    model: ./Yosegi-0603
- sources:
  - layer_range: [8, 10]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 1.0
      - value: 0.9930269337531187
      gamma:
      - filter: self_attn
        value: 0.4549980941970383
      - filter: mlp
        value: 0.10362988739411173
      - value: -0.43800391668559174
      weight:
      - filter: self_attn
        value: 0.19663450954683193
      - filter: mlp
        value: 0.16783989984505265
      - value: 0.7465091417598162
  - layer_range: [8, 10]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.797370597380894
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.0665958634205702
      - filter: mlp
        value: -0.058297473060129834
      - value: -0.38206760673090134
      weight:
      - filter: self_attn
        value: 0.7015967347604024
      - filter: mlp
        value: 0.7733694864324641
      - value: 0.7636921732342238
  - layer_range: [8, 10]
    model: ./Yosegi-0603
- sources:
  - layer_range: [10, 12]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 0.8047576867589878
      - filter: mlp
        value: 0.8852533319203653
      - value: 0.7707342647603538
      gamma:
      - filter: self_attn
        value: -0.054343999574509694
      - filter: mlp
        value: -0.3465154355167133
      - value: 0.022315854655582765
      weight:
      - filter: self_attn
        value: 0.4396484757291151
      - filter: mlp
        value: 0.34318396468602314
      - value: 0.8236034746664869
  - layer_range: [10, 12]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.9058471193805165
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: 0.1221058588826469
      - filter: mlp
        value: -0.4004985640890659
      - value: 0.3219195440395816
      weight:
      - filter: self_attn
        value: 0.3565443612269864
      - filter: mlp
        value: 0.2817057075232181
      - value: 0.5934890337808251
  - layer_range: [10, 12]
    model: ./Yosegi-0603
- sources:
  - layer_range: [12, 14]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.027897116191693133
      - filter: mlp
        value: -0.1765379388255607
      - value: 0.09108936063176161
      weight:
      - filter: self_attn
        value: 0.4499753137521779
      - filter: mlp
        value: 0.901296236087911
      - value: 0.3548680126954006
  - layer_range: [12, 14]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.8973815150776497
      - filter: mlp
        value: 0.6029953465961999
      - value: 1.0
      gamma:
      - filter: self_attn
        value: 0.10393082898402586
      - filter: mlp
        value: 0.15993577688878796
      - value: 0.011410411917833683
      weight:
      - filter: self_attn
        value: 0.2211644023056492
      - filter: mlp
        value: 0.5677387594231849
      - value: 0.1316535663010981
  - layer_range: [12, 14]
    model: ./Yosegi-0603
- sources:
  - layer_range: [14, 16]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 0.9584597245055072
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.17789727632680347
      - filter: mlp
        value: 0.2182263440314275
      - value: 0.1449547656126498
      weight:
      - filter: self_attn
        value: 0.4551004762874224
      - filter: mlp
        value: 0.9182082826762857
      - value: 0.3736989395186422
  - layer_range: [14, 16]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.7414465107848625
      - filter: mlp
        value: 1.0
      - value: 0.7894887419395906
      gamma:
      - filter: self_attn
        value: -0.07343933395880992
      - filter: mlp
        value: 0.250800731630588
      - value: -0.2948778134297542
      weight:
      - filter: self_attn
        value: 0.43125199001016495
      - filter: mlp
        value: 0.6182726353394477
      - value: 0.838902157446268
  - layer_range: [14, 16]
    model: ./Yosegi-0603
- sources:
  - layer_range: [16, 18]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 0.9474287877268394
      - filter: mlp
        value: 1.0
      - value: 0.9613380133344519
      gamma:
      - filter: self_attn
        value: -0.08608895546593046
      - filter: mlp
        value: -0.07275416053291164
      - value: -0.5796137860399382
      weight:
      - filter: self_attn
        value: 0.5593420897751296
      - filter: mlp
        value: 0.7339447992880666
      - value: 0.5447558586689005
  - layer_range: [16, 18]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.9321536960575384
      - filter: mlp
        value: 1.0
      - value: 0.9613033408813294
      gamma:
      - filter: self_attn
        value: 0.20610728738224296
      - filter: mlp
        value: 0.2002206706624053
      - value: -0.45349278793293785
      weight:
      - filter: self_attn
        value: 0.16162975594196963
      - filter: mlp
        value: 0.21262726992327483
      - value: 0.061213622827234075
  - layer_range: [16, 18]
    model: ./Yosegi-0603
- sources:
  - layer_range: [18, 20]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: 0.03922456593148313
      - filter: mlp
        value: 0.3318035822806869
      - value: -0.10373990685028205
      weight:
      - filter: self_attn
        value: 0.8254441016674987
      - filter: mlp
        value: 0.4568039342431161
      - value: 0.3152648515747969
  - layer_range: [18, 20]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 1.0
      - value: 0.9807358937293073
      gamma:
      - filter: self_attn
        value: -0.22734036563128657
      - filter: mlp
        value: 0.26113222150270854
      - value: 0.17739039022957015
      weight:
      - filter: self_attn
        value: 0.33759130475641996
      - filter: mlp
        value: 0.616639215544168
      - value: 0.47560658618977714
  - layer_range: [18, 20]
    model: ./Yosegi-0603
- sources:
  - layer_range: [20, 22]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 0.9394514442960196
      - filter: mlp
        value: 1.0
      - value: 0.9885037757465567
      gamma:
      - filter: self_attn
        value: -0.17365709450334324
      - filter: mlp
        value: 0.0712279381144505
      - value: 0.11809665485306464
      weight:
      - filter: self_attn
        value: 0.485610337254665
      - filter: mlp
        value: 0.8406593173801935
      - value: 0.5024102481819739
  - layer_range: [20, 22]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.09980202641768818
      - filter: mlp
        value: 0.051454493742856926
      - value: 0.14619126408666103
      weight:
      - filter: self_attn
        value: 0.54772456079406
      - filter: mlp
        value: 0.3440893571099615
      - value: 0.3747271233512448
  - layer_range: [20, 22]
    model: ./Yosegi-0603
- sources:
  - layer_range: [22, 24]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 0.9474712362889293
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.16020032978118146
      - filter: mlp
        value: -0.025085248873309034
      - value: 0.06046174910893976
      weight:
      - filter: self_attn
        value: 0.8654189362345427
      - filter: mlp
        value: 0.6344956382288498
      - value: 0.6383979001549549
  - layer_range: [22, 24]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.8240762427167851
      - filter: mlp
        value: 1.0
      - value: 0.9004913821398048
      gamma:
      - filter: self_attn
        value: -0.12224186789525764
      - filter: mlp
        value: -0.25877585460700525
      - value: 0.35149388360871714
      weight:
      - filter: self_attn
        value: 0.4294356408713786
      - filter: mlp
        value: 0.3920647298630233
      - value: 0.795891295390721
  - layer_range: [22, 24]
    model: ./Yosegi-0603
- sources:
  - layer_range: [24, 26]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: 0.16915580088030202
      - filter: mlp
        value: 0.2602652727555053
      - value: 0.16985672723305376
      weight:
      - filter: self_attn
        value: 0.420377024485687
      - filter: mlp
        value: 0.3401141209432324
      - value: 0.4953511256159331
  - layer_range: [24, 26]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.7290652609253236
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.1039167464696765
      - filter: mlp
        value: -0.18476572570059685
      - value: 0.1221387313921081
      weight:
      - filter: self_attn
        value: 0.2925002157134928
      - filter: mlp
        value: 0.3854740639588027
      - value: 0.555448110317977
  - layer_range: [24, 26]
    model: ./Yosegi-0603
- sources:
  - layer_range: [26, 28]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 1.0
      - filter: mlp
        value: 0.9104496350690235
      - value: 1.0
      gamma:
      - filter: self_attn
        value: 0.24831264214235005
      - filter: mlp
        value: -0.03903149241855605
      - value: 0.14189425093398259
      weight:
      - filter: self_attn
        value: 0.7685811138035815
      - filter: mlp
        value: 0.06535011571274918
      - value: 0.696502559577317
  - layer_range: [26, 28]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.9236218028490522
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.2451400735890047
      - filter: mlp
        value: -0.21555851418482214
      - value: 0.020418471695148876
      weight:
      - filter: self_attn
        value: 0.451368534421561
      - filter: mlp
        value: 0.27412879847687055
      - value: 0.18339776770537336
  - layer_range: [26, 28]
    model: ./Yosegi-0603
- sources:
  - layer_range: [28, 30]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 0.8590812961904566
      - filter: mlp
        value: 1.0
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.06934549536310654
      - filter: mlp
        value: -0.28464693250998063
      - value: -0.0588491947891552
      weight:
      - filter: self_attn
        value: 0.26716389671655294
      - filter: mlp
        value: 0.8228280162386532
      - value: 0.24197568479527135
  - layer_range: [28, 30]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.7277181780542642
      - filter: mlp
        value: 0.74166025738732
      - value: 1.0
      gamma:
      - filter: self_attn
        value: 0.1772650150670655
      - filter: mlp
        value: 0.06545031487123437
      - value: -0.28681451125993446
      weight:
      - filter: self_attn
        value: 0.5781944040541174
      - filter: mlp
        value: 0.2288692970435767
      - value: 0.689751088930503
  - layer_range: [28, 30]
    model: ./Yosegi-0603
- sources:
  - layer_range: [30, 32]
    model: ./Yosegi-0601
    parameters:
      density:
      - filter: self_attn
        value: 0.8177341862620365
      - filter: mlp
        value: 0.8875629677599377
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.06572527259889459
      - filter: mlp
        value: -0.18979543285938766
      - value: -0.24122036571646263
      weight:
      - filter: self_attn
        value: 0.5818433594657613
      - filter: mlp
        value: 0.36676821100234736
      - value: 0.3580688869263428
  - layer_range: [30, 32]
    model: ./Ninja-2B_JP
    parameters:
      density:
      - filter: self_attn
        value: 0.8306036003344672
      - filter: mlp
        value: 0.6993970248745297
      - value: 1.0
      gamma:
      - filter: self_attn
        value: -0.20599853236581384
      - filter: mlp
        value: -0.2001187634455465
      - value: -0.07654635090020837
      weight:
      - filter: self_attn
        value: 0.37120677279712305
      - filter: mlp
        value: 0.13105486609905853
      - value: 0.7204857820148367
  - layer_range: [30, 32]
    model: ./Yosegi-0603
tokenizer_source: union
Downloads last month
11
Safetensors
Model size
7.24B params
Tensor type
BF16
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for Local-Novel-LLM-project/Yosegi-0604

Merges
2 models