| { | |
| "_class_name": "UNetSpatioTemporalConditionModel", | |
| "_diffusers_version": "0.32.2", | |
| "addition_time_embed_dim": 1, | |
| "block_out_channels": [ | |
| 128, | |
| 256, | |
| 256, | |
| 512 | |
| ], | |
| "cross_attention_dim": 1, | |
| "decay": 0.9999, | |
| "down_block_types": [ | |
| "CrossAttnDownBlockSpatioTemporal", | |
| "CrossAttnDownBlockSpatioTemporal", | |
| "CrossAttnDownBlockSpatioTemporal", | |
| "DownBlockSpatioTemporal" | |
| ], | |
| "in_channels": 8, | |
| "inv_gamma": 1.0, | |
| "layers_per_block": 2, | |
| "min_decay": 0.0, | |
| "num_attention_heads": [ | |
| 8, | |
| 16, | |
| 16, | |
| 32 | |
| ], | |
| "num_frames": 64, | |
| "optimization_step": 100000, | |
| "out_channels": 4, | |
| "power": 0.6666666666666666, | |
| "projection_class_embeddings_input_dim": 1, | |
| "sample_size": 14, | |
| "transformer_layers_per_block": 1, | |
| "up_block_types": [ | |
| "UpBlockSpatioTemporal", | |
| "CrossAttnUpBlockSpatioTemporal", | |
| "CrossAttnUpBlockSpatioTemporal", | |
| "CrossAttnUpBlockSpatioTemporal" | |
| ], | |
| "update_after_step": 0, | |
| "use_ema_warmup": false | |
| } | |