diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4423 @@ +{ + "metadata": { + "ParamSize": 324, + "ParamBytes": 31412968448.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 419430400, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 102400, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419430400, + "byteOffset": 0 + } + ], + "md5sum": "8efb052ccaeb3fe0dbaea6f9c829b02e" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 419430400, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 102400, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419430400, + "byteOffset": 0 + } + ], + "md5sum": "4a8627915485ce09d766748c2eaf73cb" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 89653248, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 21888, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 89653248, + "byteOffset": 0 + } + ], + "md5sum": "7dd211cf2b4ba021dc88957da62efa8b" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 44826624, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 2048, + 10944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 44826624, + "byteOffset": 0 + } + ], + "md5sum": "1635cdc76da038d13d988f8ace5fdad5" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27538432, + "records": [ + { + "name": "model.norm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 4096 + }, + { + "name": "model.layers.0.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 12587008 + }, + { + "name": "model.layers.0.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 14946304 + }, + { + "name": "model.layers.0.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 14947328 + }, + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 19141632 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27530240 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27534336 + } + ], + "md5sum": "94604460dd83a6fb5270a48f573f6752" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 23068672, + "records": [ + { + "name": "model.layers.1.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 0 + } + ], + "md5sum": "1d2cfb5622ce34e22e5ae4957ee157de" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27788288, + "records": [ + { + "name": "model.layers.1.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 14942208 + }, + { + "name": "model.layers.1.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 14943232 + }, + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 19137536 + }, + { + "name": "model.layers.1.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 27526144 + } + ], + "md5sum": "62b69b1ec7025b08abe78b71605dcef0" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.1.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "0d385052823dda4d013a321974973bc5" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.1.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "7879f962cef2ab4a381f9661261610da" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.1.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.2.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.2.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.2.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.2.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "ae252989f064e2d6262194cda9d72ae7" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.2.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "9e753070cd9932604081fd8d4f8d6bb7" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.2.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "ed63f9bddb6b846adb976f22f6849c57" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.2.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "8e02769b1751a12b2d6fe0ec659e411e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.2.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.3.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.3.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.3.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.3.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "4c9a575ac29f0912d0e8ffdebbd0a0a7" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.3.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "ac951ff044083478e8003210422bb563" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.3.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "af3e86be02579b3ebd5d5bb2c29363dc" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.3.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "532d5d8699b182ab3946c316e2d5263b" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.3.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.4.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.4.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.4.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.4.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "6181ef48c120f471179c405ecea2d745" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.4.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "9be38bc3c7592a96b3a9e2db17f41194" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.4.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "fbea4f36bbacd5f21d6ba0b21e94ad3a" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.4.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "77dd723ae26c53f97e89686ab1a78619" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.4.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.5.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.5.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.5.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.5.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "e06ba2c520e9e8e88e9281a0f3e8d36c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "a95303fe48fb1ac354f0b892f72442ad" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.5.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "2183838b88d8e2dbf42fd5e66a85cc13" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.5.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "765277f302d8375301976b81a0190f56" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.5.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.6.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.6.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.6.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.6.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "f5ac7d4e7504426086402bb9e56e4f2d" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.6.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "f03fed09dbb39c9cc027d956a073238b" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.6.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "1b71f705ecf9afb9d9a20c160ed01309" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.6.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "5eae4096c3981cec9178b6439b6af1c7" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.6.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.7.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.7.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.7.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.7.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "50e2ea4e7b468d3a097a32eed4eb9640" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.7.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "de7ac6b69157d9e17fbd0f154f1be493" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.7.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "f3bad9a2d2bb3c12ccb9fb8e808ff753" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.7.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "5aa5f6beb538b72b11a361cff10f5218" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.7.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.8.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.8.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.8.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.8.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "61a8df6e9ff40b668dc98854175a1905" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.8.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "bde648537a1cab7e836ce4b10abf976a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.8.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "91fcb48dc0b555b2377fef861deae02a" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.8.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "60201e91d1e9379301be2b0848e22ebd" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.8.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.9.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.9.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.9.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.9.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "162b5f3e8940473be617c3931a32b13a" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "433d0b07f9c2918a6c9ca699783811f4" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.9.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "3a57e4347113d8dd6c121e8b2338a11c" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.9.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "f1e598f998ce420c612a30ef5b91e3ed" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.9.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.10.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.10.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.10.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.10.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "ead6c9e151f9070da55594a35e1fdf28" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "64d1f10447145207fa6d5558be3d81f0" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.10.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "4f7679c3c359dff9b6b3664d056853a9" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.10.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "a19d8185410a2952c77f81499606cf1e" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.10.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.11.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.11.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.11.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.11.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "7682e0bf1ceedf464967c6775d0a6440" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.11.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "7d055e48551b487086938e176cfd5933" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.11.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "e7b76dc145f8beaca07dbf23adf94db5" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.11.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "aa6952d761db9bb571ba5b5ac0e26d09" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.11.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.12.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.12.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.12.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.12.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "64df32b84faa940dc5f062df9b0816e1" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.12.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "97d842c41752052c5f22f39a2bce18c9" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.12.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "920fed6b8d41bcf759c27b202bc20779" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.12.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "e8593dd181fec8ae648689db895ea6bf" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.12.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.13.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.13.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.13.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.13.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "76022278d2c3ae428e6aa302a66268f0" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.13.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "385c7e55d460c8958465b8b6ce74555f" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.13.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "6df68257505e91417cc9b5363fa8f6db" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.13.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "c35f40ea0a1a49170c2956a1c7daf0ef" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.13.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.14.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.14.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.14.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.14.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "7d1cacd90f7fa4807bfd98bae5cac89f" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.14.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "021f29d8ab1ad91e84c1fb23a09c97e5" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.14.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "b6571ad7b66464514a0092c4935d33b3" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.14.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "38fc228fe8fb956b00c9f7a4cd3d0763" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.14.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.15.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.15.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.15.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.15.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "172abfa0a744c2c47066e3c287a5e181" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.15.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "cd644fc4266fcb1cb8ee6641c40ada1e" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.15.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "024de581071a63e743015499c6e939c9" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.15.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "872beb9a9c644270aa00e969f811b6cf" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.15.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.16.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.16.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.16.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.16.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "1be2d13a1256429d28b3294c4d94cf14" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.16.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "62f2913b05acb718ed1632b043cd299d" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.16.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "a0d4f4b1c69bbaf0b945cfb7e24c5f0d" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.16.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "8b0496751189d06bcb09dc93b2cdfa18" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.16.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.17.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.17.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.17.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.17.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "28449c374e5b0112dcec0039c65a2da4" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.17.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "fdd75774fd1dd316eebb728424e6582b" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.17.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "e792edb4693a3797d3dbd321e34edc93" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.17.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "55cdebbe031e9a038b2f4ac15095a554" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.17.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.18.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.18.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.18.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.18.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "c98b63f62cbf058c2e2614ff592a241c" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.18.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "a0d61627ed864bcccbc2f7e9476ea4ed" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.18.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "5497deb6d47fcf900567b190e66d8635" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.18.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "ac51832091092bb06f38b4a93d189c2f" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.18.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.19.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.19.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.19.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.19.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "fafa0b88ba9041d96b889a2e11240fc6" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.19.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "e8460ae3840a171387ceb2a0ba408434" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.19.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "d4f82454b023c30c682459c321510125" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.19.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "bf016bb75ed2adbd7465f9a581af78ca" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.19.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.20.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.20.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.20.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.20.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "dc9f12206fee09fdda705c7f5ca48d31" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.20.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "b93b5e7bc89ac4f1865b9ff7f2962a5e" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.20.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "ff720d5a40509556cfb6f7cc265a86e5" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.20.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "a56acdfc6ac05f89ef92e6e76c7e5e56" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.20.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.21.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.21.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.21.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.21.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "1a98106be8c67ca6e81f60734fc7828f" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.21.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "0072c307e38b1d9b1a540a9e783ee063" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.21.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "1318eb0ebaf36823959598d23d43de7e" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.21.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "d26c2e179b658530d9a17379c1c9987d" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.21.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.22.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.22.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.22.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.22.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "e1802f9d6d19d317d9b12de13ab6dd1b" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.22.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "5e2e6062cacb3e83133f96cd2cc88715" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.22.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "b5fec625999a5637bd8362448162e620" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.22.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "f0aec2278df22d15ce0cd31aa5cb06fb" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.22.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.23.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.23.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.23.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.23.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "ab50dbb1d774a03ef3b9bd45a1f27576" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.23.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "d3441884a61a1d9ef456c4d37e01e519" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.23.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "270f2fca80123e3c2e03dafb4d11945d" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.23.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "74a8ff695a17ed0c5bdca287e3a9f69d" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.23.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.24.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.24.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.24.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.24.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "495aa2d785f69996a95d601383696e0d" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.24.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "e37b643bca4a30f6523a706310e92570" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.24.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "fad9bef60135f36949d730f44b2f88ea" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.24.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "85224dab3597f298836b4089343c3119" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.24.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.25.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.25.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.25.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.25.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "3eaf56b119050efac9e8f15ba69b8122" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.25.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "e11d8cb9d5680883bf6e28de0413d6fd" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.25.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "e1767425d3e359642a15db736f843d53" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.25.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "d421d28b021fc111a859140e195fc585" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 30680064, + "records": [ + { + "name": "model.layers.25.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + }, + { + "name": "model.layers.26.self_attn.q_proj.weight", + "shape": [ + 3072, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 11542528 + }, + { + "name": "model.layers.26.self_attn.kv_a_proj_with_mqa.weight", + "shape": [ + 576, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 24125440 + }, + { + "name": "model.layers.26.self_attn.kv_a_layernorm.weight", + "shape": [ + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1024, + "byteOffset": 26484736 + }, + { + "name": "model.layers.26.self_attn.kv_b_proj.weight", + "shape": [ + 4096, + 512 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 26485760 + } + ], + "md5sum": "00aa0d9b27e135e66991d5df5e59fd7c" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 31719424, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.gate.weight", + "shape": [ + 64, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.mlp.shared_experts.gate_up_proj.weight", + "shape": [ + 5632, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23068672, + "byteOffset": 8650752 + } + ], + "md5sum": "bd9499d319f006f5994ac1b2824ae015" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 738197504, + "records": [ + { + "name": "model.layers.26.mlp.moe_gate_up_proj.weight", + "shape": [ + 64, + 2816, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 738197504, + "byteOffset": 0 + } + ], + "md5sum": "1c2686eaa97ce37a79a2c7e62f13da8e" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 369098752, + "records": [ + { + "name": "model.layers.26.mlp.moe_down_proj.weight", + "shape": [ + 64, + 2048, + 1408 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 369098752, + "byteOffset": 0 + } + ], + "md5sum": "6d98de740a18641d2e2da76b56cdcc88" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 11542528, + "records": [ + { + "name": "model.layers.26.mlp.shared_experts.down_proj.weight", + "shape": [ + 2048, + 2816 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11534336, + "byteOffset": 0 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11534336 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 11538432 + } + ], + "md5sum": "a4a21384a05a39109863995203cef2a0" + } + ] +} \ No newline at end of file