Upload 5 files

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,3 +1,18 @@
 ---
 license: mit
 ---

 ---
 license: mit
 ---
+# iVideoGPT (Pre-trained on Open X-Embodiment, 256x256 resolution, action-free)
+See https://github.com/thuml/iVideoGPT for examples for using this model.
+## Citation
+```
+@inproceedings{wu2024ivideogpt,
+    title={iVideoGPT: Interactive VideoGPTs are Scalable World Models},
+    author={Jialong Wu and Shaofeng Yin and Ningya Feng and Xu He and Dong Li and Jianye Hao and Mingsheng Long},
+    booktitle={Advances in Neural Information Processing Systems},
+    year={2024}
+}
+```

tokenizer/config.json ADDED Viewed

+{
+  "_class_name": "CompressiveVQModel",
+  "_diffusers_version": "0.27.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    256,
+    512,
+    768
+  ],
+  "context_length": 2,
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 64,
+  "layers_per_block": 2,
+  "lookup_from_codebook": true,
+  "max_att_resolution": 32,
+  "mid_block_add_attention": false,
+  "norm_num_groups": 32,
+  "norm_type": "group",
+  "num_dyn_embeddings": 8192,
+  "num_vq_embeddings": 8192,
+  "out_channels": 3,
+  "patch_size": 4,
+  "resolution": 256,
+  "sample_size": 32,
+  "scaling_factor": 0.18215,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "vq_embed_dim": null
+}

tokenizer/diffusion_pytorch_model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e6489654fe40ee9859a5c6d65b42646093d611e09cf74af17efbfb15333c688
+size 1241963544

transformer/config.json ADDED Viewed

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.1,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "hidden_act": "silu",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_length": 1024,
+  "max_position_embeddings": 1024,
+  "model_type": "llama",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "num_key_value_heads": 12,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 16386
+}

transformer/model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f4356285238a819219447d466aad246c3c7dc24a903a2c09ccc292b57a9601f
+size 553749536