{ "add_tail_layers": false, "architectures": [ "Ernie4_5_VLMoeForConditionalGeneration" ], "attention_probs_dropout_prob": 0.0, "auto_map": { "AutoConfig": "configuration_ernie4_5_vl.Ernie4_5_VLMoEConfig", "AutoImageProcessor": "processing_ernie4_5_vl.Ernie4_5_VLImageProcessor", "AutoModel": "modeling_ernie4_5_vl.Ernie4_5_VLMoeForConditionalGeneration", "AutoModelForCausalLM": "modeling_ernie4_5_vl.Ernie4_5_VLMoeForConditionalGeneration", "AutoProcessor": "processing_ernie4_5_vl.Ernie4_5_VLProcessor" }, "bos_token_id": 1, "cachekv_quant": false, "compression_ratio": 1.0, "disable_ffn_model_parallel": false, "dpo_config": null, "torch_dtype": "bfloat16", "enable_delay_scale_loss": true, "eos_token_id": 2, "freq_allocation": 20, "fuse_attn_ffn": true, "fuse_gate_detach_matmul": false, "fuse_linear": false, "fuse_ln": false, "fuse_rms_norm": false, "fuse_rope": false, "fuse_softmax_mask": false, "fuse_swiglu": false, "global_aux_loss": false, "hidden_act": "silu", "hidden_dropout_prob": 0.0, "hidden_size": 2560, "ignored_index": -100, "im_patch_id": 100295, "image_end_token_id": 101305, "image_start_token_id": 101304, "initializer_range": 0.02, "intermediate_size": 12288, "max_position_embeddings": 131072, "max_sequence_length": null, "max_text_id": null, "micro_batch_size": -1, "mm_vocab_size": 0, "modality_detach": false, "model_type": "ernie4_5_moe_vl", "moe_all_to_all_dropout": 0.0, "moe_aux_loss_lambda": 0.01, "moe_capacity": [ 128, 128, 128 ], "moe_dense_experts_token_type_id": 3, "moe_dropout_prob": 0.0, "moe_fuse_experts": false, "moe_gate": "topk", "moe_gate_act": "softmax", "moe_group": "world", "moe_group_experts": false, "moe_group_orthogonal_loss": true, "moe_intermediate_size": [ 1536, 512 ], "moe_k": 6, "moe_layer_end_index": [ 29, 28 ], "moe_layer_feed_fake_token": false, "moe_layer_interval": 1, "moe_layer_start_index": [ 1, 1 ], "moe_multimodal_dispatch_use_allgather": "v2-alltoall-unpad-text", "moe_norm_gate_logits": true, "moe_num_attn_experts": false, "moe_num_experts": [ 64, 64 ], "moe_num_shared_experts": 2, "moe_orthogonal_loss_lambda": 0.01, "moe_reverse_token_drop": false, "moe_use_aux_free": true, "moe_use_hard_gate": true, "moe_use_size_all2all": false, "moe_use_token_type_bias": false, "moe_z_loss_lambda": 0.0001, "num_acc_steps": 1, "num_attention_heads": 20, "num_hidden_layers": 28, "num_key_value_heads": 4, "output_attentions": false, "pad_token_id": 0, "pixel_hidden_size": 1280, "pp_seg_method": "layer:ErnieDecoderLayer|EmptyLayer", "recompute": false, "recompute_granularity": "core_attn", "recompute_use_reentrant": false, "refined_recompute": {}, "resampler_fuse_rms_norm": false, "rms_norm_eps": 1e-05, "rope_3d": true, "rope_scaling": { "mrope_section": [ 22, 22, 20 ], "type": "default" }, "rope_theta": 500000, "sinkhorn_2gate": true, "sinkhorn_temp": 0.03, "skip_recompute_ops": {}, "spatial_conv_size": 2, "temporal_conv_size": 2, "tensor_parallel_degree": 1, "tie_word_embeddings": true, "token_balance_loss": false, "token_balance_seqlen": false, "transformers_version": null, "unsloth_fixed": true, "use_bias": false, "use_cache": true, "use_ep_comm_overlap": false, "use_fast_ln": false, "use_flash_attention": true, "use_fused_head_and_loss_fn": false, "use_recompute_lm_head": false, "use_recompute_loss_fn": false, "use_recompute_moe": false, "use_recompute_resampler": false, "use_rmsnorm": true, "use_sparse_flash_attn": true, "use_sparse_head_and_loss_fn": false, "use_temporal_conv": true, "use_var_len_flash_attn": false, "using_precision_check": false, "video_end_token_id": 101307, "video_start_token_id": 101306, "vision_config": { "attn_implementation": "eager", "attn_sep": true, "depth": 32, "embed_dim": 1280, "hidden_act": "quick_gelu", "hidden_size": 1280, "in_channels": 3, "in_chans": 3, "mlp_ratio": 4, "model_type": "DFNRope_vision_transformer", "num_heads": 16, "patch_size": 14, "pp_data_balance": false, "recompute": false, "spatial_merge_size": 2, "spatial_patch_size": 14, "vit_first_fwd_bsz": 128, "vit_num_recompute_layers": 10000 }, "vocab_size": 103424, "weight_share_add_bias": true }