added_tokens.json CHANGED
@@ -2,6 +2,273 @@
2
  "</think>": 151668,
3
  "</tool_call>": 151658,
4
  "</tool_response>": 151666,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "<think>": 151667,
6
  "<tool_call>": 151657,
7
  "<tool_response>": 151665,
 
2
  "</think>": 151668,
3
  "</tool_call>": 151658,
4
  "</tool_response>": 151666,
5
+ "<reserved_0>": 151669,
6
+ "<reserved_100>": 151769,
7
+ "<reserved_101>": 151770,
8
+ "<reserved_102>": 151771,
9
+ "<reserved_103>": 151772,
10
+ "<reserved_104>": 151773,
11
+ "<reserved_105>": 151774,
12
+ "<reserved_106>": 151775,
13
+ "<reserved_107>": 151776,
14
+ "<reserved_108>": 151777,
15
+ "<reserved_109>": 151778,
16
+ "<reserved_10>": 151679,
17
+ "<reserved_110>": 151779,
18
+ "<reserved_111>": 151780,
19
+ "<reserved_112>": 151781,
20
+ "<reserved_113>": 151782,
21
+ "<reserved_114>": 151783,
22
+ "<reserved_115>": 151784,
23
+ "<reserved_116>": 151785,
24
+ "<reserved_117>": 151786,
25
+ "<reserved_118>": 151787,
26
+ "<reserved_119>": 151788,
27
+ "<reserved_11>": 151680,
28
+ "<reserved_120>": 151789,
29
+ "<reserved_121>": 151790,
30
+ "<reserved_122>": 151791,
31
+ "<reserved_123>": 151792,
32
+ "<reserved_124>": 151793,
33
+ "<reserved_125>": 151794,
34
+ "<reserved_126>": 151795,
35
+ "<reserved_127>": 151796,
36
+ "<reserved_128>": 151797,
37
+ "<reserved_129>": 151798,
38
+ "<reserved_12>": 151681,
39
+ "<reserved_130>": 151799,
40
+ "<reserved_131>": 151800,
41
+ "<reserved_132>": 151801,
42
+ "<reserved_133>": 151802,
43
+ "<reserved_134>": 151803,
44
+ "<reserved_135>": 151804,
45
+ "<reserved_136>": 151805,
46
+ "<reserved_137>": 151806,
47
+ "<reserved_138>": 151807,
48
+ "<reserved_139>": 151808,
49
+ "<reserved_13>": 151682,
50
+ "<reserved_140>": 151809,
51
+ "<reserved_141>": 151810,
52
+ "<reserved_142>": 151811,
53
+ "<reserved_143>": 151812,
54
+ "<reserved_144>": 151813,
55
+ "<reserved_145>": 151814,
56
+ "<reserved_146>": 151815,
57
+ "<reserved_147>": 151816,
58
+ "<reserved_148>": 151817,
59
+ "<reserved_149>": 151818,
60
+ "<reserved_14>": 151683,
61
+ "<reserved_150>": 151819,
62
+ "<reserved_151>": 151820,
63
+ "<reserved_152>": 151821,
64
+ "<reserved_153>": 151822,
65
+ "<reserved_154>": 151823,
66
+ "<reserved_155>": 151824,
67
+ "<reserved_156>": 151825,
68
+ "<reserved_157>": 151826,
69
+ "<reserved_158>": 151827,
70
+ "<reserved_159>": 151828,
71
+ "<reserved_15>": 151684,
72
+ "<reserved_160>": 151829,
73
+ "<reserved_161>": 151830,
74
+ "<reserved_162>": 151831,
75
+ "<reserved_163>": 151832,
76
+ "<reserved_164>": 151833,
77
+ "<reserved_165>": 151834,
78
+ "<reserved_166>": 151835,
79
+ "<reserved_167>": 151836,
80
+ "<reserved_168>": 151837,
81
+ "<reserved_169>": 151838,
82
+ "<reserved_16>": 151685,
83
+ "<reserved_170>": 151839,
84
+ "<reserved_171>": 151840,
85
+ "<reserved_172>": 151841,
86
+ "<reserved_173>": 151842,
87
+ "<reserved_174>": 151843,
88
+ "<reserved_175>": 151844,
89
+ "<reserved_176>": 151845,
90
+ "<reserved_177>": 151846,
91
+ "<reserved_178>": 151847,
92
+ "<reserved_179>": 151848,
93
+ "<reserved_17>": 151686,
94
+ "<reserved_180>": 151849,
95
+ "<reserved_181>": 151850,
96
+ "<reserved_182>": 151851,
97
+ "<reserved_183>": 151852,
98
+ "<reserved_184>": 151853,
99
+ "<reserved_185>": 151854,
100
+ "<reserved_186>": 151855,
101
+ "<reserved_187>": 151856,
102
+ "<reserved_188>": 151857,
103
+ "<reserved_189>": 151858,
104
+ "<reserved_18>": 151687,
105
+ "<reserved_190>": 151859,
106
+ "<reserved_191>": 151860,
107
+ "<reserved_192>": 151861,
108
+ "<reserved_193>": 151862,
109
+ "<reserved_194>": 151863,
110
+ "<reserved_195>": 151864,
111
+ "<reserved_196>": 151865,
112
+ "<reserved_197>": 151866,
113
+ "<reserved_198>": 151867,
114
+ "<reserved_199>": 151868,
115
+ "<reserved_19>": 151688,
116
+ "<reserved_1>": 151670,
117
+ "<reserved_200>": 151869,
118
+ "<reserved_201>": 151870,
119
+ "<reserved_202>": 151871,
120
+ "<reserved_203>": 151872,
121
+ "<reserved_204>": 151873,
122
+ "<reserved_205>": 151874,
123
+ "<reserved_206>": 151875,
124
+ "<reserved_207>": 151876,
125
+ "<reserved_208>": 151877,
126
+ "<reserved_209>": 151878,
127
+ "<reserved_20>": 151689,
128
+ "<reserved_210>": 151879,
129
+ "<reserved_211>": 151880,
130
+ "<reserved_212>": 151881,
131
+ "<reserved_213>": 151882,
132
+ "<reserved_214>": 151883,
133
+ "<reserved_215>": 151884,
134
+ "<reserved_216>": 151885,
135
+ "<reserved_217>": 151886,
136
+ "<reserved_218>": 151887,
137
+ "<reserved_219>": 151888,
138
+ "<reserved_21>": 151690,
139
+ "<reserved_220>": 151889,
140
+ "<reserved_221>": 151890,
141
+ "<reserved_222>": 151891,
142
+ "<reserved_223>": 151892,
143
+ "<reserved_224>": 151893,
144
+ "<reserved_225>": 151894,
145
+ "<reserved_226>": 151895,
146
+ "<reserved_227>": 151896,
147
+ "<reserved_228>": 151897,
148
+ "<reserved_229>": 151898,
149
+ "<reserved_22>": 151691,
150
+ "<reserved_230>": 151899,
151
+ "<reserved_231>": 151900,
152
+ "<reserved_232>": 151901,
153
+ "<reserved_233>": 151902,
154
+ "<reserved_234>": 151903,
155
+ "<reserved_235>": 151904,
156
+ "<reserved_236>": 151905,
157
+ "<reserved_237>": 151906,
158
+ "<reserved_238>": 151907,
159
+ "<reserved_239>": 151908,
160
+ "<reserved_23>": 151692,
161
+ "<reserved_240>": 151909,
162
+ "<reserved_241>": 151910,
163
+ "<reserved_242>": 151911,
164
+ "<reserved_243>": 151912,
165
+ "<reserved_244>": 151913,
166
+ "<reserved_245>": 151914,
167
+ "<reserved_246>": 151915,
168
+ "<reserved_247>": 151916,
169
+ "<reserved_248>": 151917,
170
+ "<reserved_249>": 151918,
171
+ "<reserved_24>": 151693,
172
+ "<reserved_250>": 151919,
173
+ "<reserved_251>": 151920,
174
+ "<reserved_252>": 151921,
175
+ "<reserved_253>": 151922,
176
+ "<reserved_254>": 151923,
177
+ "<reserved_255>": 151924,
178
+ "<reserved_256>": 151925,
179
+ "<reserved_257>": 151926,
180
+ "<reserved_258>": 151927,
181
+ "<reserved_259>": 151928,
182
+ "<reserved_25>": 151694,
183
+ "<reserved_260>": 151929,
184
+ "<reserved_261>": 151930,
185
+ "<reserved_262>": 151931,
186
+ "<reserved_263>": 151932,
187
+ "<reserved_264>": 151933,
188
+ "<reserved_265>": 151934,
189
+ "<reserved_266>": 151935,
190
+ "<reserved_26>": 151695,
191
+ "<reserved_27>": 151696,
192
+ "<reserved_28>": 151697,
193
+ "<reserved_29>": 151698,
194
+ "<reserved_2>": 151671,
195
+ "<reserved_30>": 151699,
196
+ "<reserved_31>": 151700,
197
+ "<reserved_32>": 151701,
198
+ "<reserved_33>": 151702,
199
+ "<reserved_34>": 151703,
200
+ "<reserved_35>": 151704,
201
+ "<reserved_36>": 151705,
202
+ "<reserved_37>": 151706,
203
+ "<reserved_38>": 151707,
204
+ "<reserved_39>": 151708,
205
+ "<reserved_3>": 151672,
206
+ "<reserved_40>": 151709,
207
+ "<reserved_41>": 151710,
208
+ "<reserved_42>": 151711,
209
+ "<reserved_43>": 151712,
210
+ "<reserved_44>": 151713,
211
+ "<reserved_45>": 151714,
212
+ "<reserved_46>": 151715,
213
+ "<reserved_47>": 151716,
214
+ "<reserved_48>": 151717,
215
+ "<reserved_49>": 151718,
216
+ "<reserved_4>": 151673,
217
+ "<reserved_50>": 151719,
218
+ "<reserved_51>": 151720,
219
+ "<reserved_52>": 151721,
220
+ "<reserved_53>": 151722,
221
+ "<reserved_54>": 151723,
222
+ "<reserved_55>": 151724,
223
+ "<reserved_56>": 151725,
224
+ "<reserved_57>": 151726,
225
+ "<reserved_58>": 151727,
226
+ "<reserved_59>": 151728,
227
+ "<reserved_5>": 151674,
228
+ "<reserved_60>": 151729,
229
+ "<reserved_61>": 151730,
230
+ "<reserved_62>": 151731,
231
+ "<reserved_63>": 151732,
232
+ "<reserved_64>": 151733,
233
+ "<reserved_65>": 151734,
234
+ "<reserved_66>": 151735,
235
+ "<reserved_67>": 151736,
236
+ "<reserved_68>": 151737,
237
+ "<reserved_69>": 151738,
238
+ "<reserved_6>": 151675,
239
+ "<reserved_70>": 151739,
240
+ "<reserved_71>": 151740,
241
+ "<reserved_72>": 151741,
242
+ "<reserved_73>": 151742,
243
+ "<reserved_74>": 151743,
244
+ "<reserved_75>": 151744,
245
+ "<reserved_76>": 151745,
246
+ "<reserved_77>": 151746,
247
+ "<reserved_78>": 151747,
248
+ "<reserved_79>": 151748,
249
+ "<reserved_7>": 151676,
250
+ "<reserved_80>": 151749,
251
+ "<reserved_81>": 151750,
252
+ "<reserved_82>": 151751,
253
+ "<reserved_83>": 151752,
254
+ "<reserved_84>": 151753,
255
+ "<reserved_85>": 151754,
256
+ "<reserved_86>": 151755,
257
+ "<reserved_87>": 151756,
258
+ "<reserved_88>": 151757,
259
+ "<reserved_89>": 151758,
260
+ "<reserved_8>": 151677,
261
+ "<reserved_90>": 151759,
262
+ "<reserved_91>": 151760,
263
+ "<reserved_92>": 151761,
264
+ "<reserved_93>": 151762,
265
+ "<reserved_94>": 151763,
266
+ "<reserved_95>": 151764,
267
+ "<reserved_96>": 151765,
268
+ "<reserved_97>": 151766,
269
+ "<reserved_98>": 151767,
270
+ "<reserved_99>": 151768,
271
+ "<reserved_9>": 151678,
272
  "<think>": 151667,
273
  "<tool_call>": 151657,
274
  "<tool_response>": 151665,
config.json CHANGED
@@ -1,4 +1,12 @@
1
  {
 
 
 
 
 
 
 
 
2
  "architectures": [
3
  "IsaacForConditionalGeneration"
4
  ],
@@ -55,18 +63,87 @@
55
  "num_key_value_heads": 8,
56
  "pixel_shuffle_scale": 2,
57
  "rms_norm_eps": 1e-06,
58
- "rope_scaling": {
59
- "mrope_interleaved": true,
60
- "mrope_section": null,
61
- "rope_type": "default"
62
- },
63
  "rope_theta": 1000000.0,
64
  "sliding_window": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  "tie_word_embeddings": false,
66
- "transformers_version": "4.56.1",
67
  "use_cache": true,
68
  "use_sliding_window": false,
69
- "video_patch_size": 16,
70
  "vision_config": {
71
  "attention_dropout": 0.0,
72
  "hidden_act": "gelu_pytorch_tanh",
@@ -74,7 +151,7 @@
74
  "image_size": 256,
75
  "intermediate_size": 4304,
76
  "layer_norm_eps": 1e-06,
77
- "model_type": "pixel_shuffle_siglip2",
78
  "num_attention_heads": 16,
79
  "num_channels": 3,
80
  "num_hidden_layers": 27,
@@ -83,7 +160,19 @@
83
  "pixel_shuffle_scale_factor": 2
84
  },
85
  "vision_max_num_patches": 6144,
 
 
 
 
 
86
  "vision_min_num_patches": 256,
 
 
 
 
 
 
 
87
  "vision_token": "<image>",
88
  "vocab_size": 151936
89
  }
 
1
  {
2
+ "_rope_parameters": {
3
+ "rope_theta": 1000000,
4
+ "rope_type": "default"
5
+ },
6
+ "_rope_scaling": {
7
+ "rope_theta": 1000000,
8
+ "rope_type": "default"
9
+ },
10
  "architectures": [
11
  "IsaacForConditionalGeneration"
12
  ],
 
63
  "num_key_value_heads": 8,
64
  "pixel_shuffle_scale": 2,
65
  "rms_norm_eps": 1e-06,
 
 
 
 
 
66
  "rope_theta": 1000000.0,
67
  "sliding_window": null,
68
+ "text_config": {
69
+ "architectures": [
70
+ "IsaacForConditionalGeneration"
71
+ ],
72
+ "attention_bias": false,
73
+ "attention_dropout": 0.0,
74
+ "bos_token_id": 151643,
75
+ "dtype": "float32",
76
+ "eos_token_id": 151645,
77
+ "head_dim": 128,
78
+ "hidden_act": "silu",
79
+ "hidden_size": 2048,
80
+ "initializer_range": 0.02,
81
+ "intermediate_size": 6144,
82
+ "layer_types": [
83
+ "full_attention",
84
+ "full_attention",
85
+ "full_attention",
86
+ "full_attention",
87
+ "full_attention",
88
+ "full_attention",
89
+ "full_attention",
90
+ "full_attention",
91
+ "full_attention",
92
+ "full_attention",
93
+ "full_attention",
94
+ "full_attention",
95
+ "full_attention",
96
+ "full_attention",
97
+ "full_attention",
98
+ "full_attention",
99
+ "full_attention",
100
+ "full_attention",
101
+ "full_attention",
102
+ "full_attention",
103
+ "full_attention",
104
+ "full_attention",
105
+ "full_attention",
106
+ "full_attention",
107
+ "full_attention",
108
+ "full_attention",
109
+ "full_attention",
110
+ "full_attention"
111
+ ],
112
+ "max_position_embeddings": 40960,
113
+ "max_window_layers": 28,
114
+ "model_type": "qwen3",
115
+ "num_attention_heads": 16,
116
+ "num_hidden_layers": 28,
117
+ "num_key_value_heads": 8,
118
+ "pixel_shuffle_scale": 2,
119
+ "rms_norm_eps": 1e-06,
120
+ "rope_parameters": {
121
+ "rope_theta": 1000000,
122
+ "rope_type": "default"
123
+ },
124
+ "rope_theta": 1000000.0,
125
+ "sliding_window": null,
126
+ "use_cache": true,
127
+ "use_sliding_window": false,
128
+ "vision_max_num_patches": 6144,
129
+ "vision_mean": [
130
+ 0.5,
131
+ 0.5,
132
+ 0.5
133
+ ],
134
+ "vision_min_num_patches": 256,
135
+ "vision_patch_size": 16,
136
+ "vision_std": [
137
+ 0.5,
138
+ 0.5,
139
+ 0.5
140
+ ],
141
+ "vocab_size": 151936
142
+ },
143
  "tie_word_embeddings": false,
144
+ "transformers_version": "5.0.0.dev0",
145
  "use_cache": true,
146
  "use_sliding_window": false,
 
147
  "vision_config": {
148
  "attention_dropout": 0.0,
149
  "hidden_act": "gelu_pytorch_tanh",
 
151
  "image_size": 256,
152
  "intermediate_size": 4304,
153
  "layer_norm_eps": 1e-06,
154
+ "model_type": "isaac_vision",
155
  "num_attention_heads": 16,
156
  "num_channels": 3,
157
  "num_hidden_layers": 27,
 
160
  "pixel_shuffle_scale_factor": 2
161
  },
162
  "vision_max_num_patches": 6144,
163
+ "vision_mean": [
164
+ 0.5,
165
+ 0.5,
166
+ 0.5
167
+ ],
168
  "vision_min_num_patches": 256,
169
+ "vision_patch_size": 16,
170
+ "vision_rescale_factor": 0.00392156862745098,
171
+ "vision_std": [
172
+ 0.5,
173
+ 0.5,
174
+ 0.5
175
+ ],
176
  "vision_token": "<image>",
177
  "vocab_size": 151936
178
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 151643,
4
  "eos_token_id": 151645,
5
- "transformers_version": "4.56.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 151643,
4
  "eos_token_id": 151645,
5
+ "transformers_version": "5.0.0.dev0"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d31217bf5365162ae38b4e6a5b27acff8481ef892e9803874cbb49476d0f501
3
- size 4969539560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb45f8ee129e542e12ea62717345ce39118f3f26971a082410b70f898aad3f3
3
+ size 4969541832
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e133442cabfd18ed5ba13cd21527d0220c78e2989a2778b8849e5835e0995c75
3
- size 4054187824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd03019a8b436cad6445923d117afea6b5925be937cafedb86c65bc76f37624b
3
+ size 4054193816
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
modular_isaac.py CHANGED
The diff for this file is too large to render. See raw diff
 
processor_config.json CHANGED
@@ -2,208 +2,46 @@
2
  "auto_map": {
3
  "AutoProcessor": "modular_isaac.IsaacProcessor"
4
  },
5
- "config": {
6
- "_name_or_path": "",
7
- "add_cross_attention": false,
8
- "architectures": [
9
- "IsaacForConditionalGeneration"
10
- ],
11
- "attention_bias": false,
12
- "attention_dropout": 0.0,
13
  "auto_map": {
14
- "AutoModelForCausalLM": "modular_isaac.IsaacForConditionalGeneration"
15
- },
16
- "bad_words_ids": null,
17
- "begin_suppress_tokens": null,
18
- "bos_token_id": 151643,
19
- "chunk_size_feed_forward": 0,
20
- "cross_attention_hidden_size": null,
21
- "decoder_start_token_id": null,
22
- "diversity_penalty": 0.0,
23
- "do_sample": false,
24
- "dtype": "float32",
25
- "early_stopping": false,
26
- "encoder_no_repeat_ngram_size": 0,
27
- "eos_token_id": 151645,
28
- "exponential_decay_length_penalty": null,
29
- "finetuning_task": null,
30
- "forced_bos_token_id": null,
31
- "forced_eos_token_id": null,
32
- "head_dim": 128,
33
- "hidden_act": "silu",
34
- "hidden_size": 2048,
35
- "id2label": {
36
- "0": "LABEL_0",
37
- "1": "LABEL_1"
38
- },
39
- "initializer_range": 0.02,
40
- "intermediate_size": 6144,
41
- "is_decoder": false,
42
- "is_encoder_decoder": false,
43
- "label2id": {
44
- "LABEL_0": 0,
45
- "LABEL_1": 1
46
  },
47
- "layer_types": [
48
- "full_attention",
49
- "full_attention",
50
- "full_attention",
51
- "full_attention",
52
- "full_attention",
53
- "full_attention",
54
- "full_attention",
55
- "full_attention",
56
- "full_attention",
57
- "full_attention",
58
- "full_attention",
59
- "full_attention",
60
- "full_attention",
61
- "full_attention",
62
- "full_attention",
63
- "full_attention",
64
- "full_attention",
65
- "full_attention",
66
- "full_attention",
67
- "full_attention",
68
- "full_attention",
69
- "full_attention",
70
- "full_attention",
71
- "full_attention",
72
- "full_attention",
73
- "full_attention",
74
- "full_attention",
75
- "full_attention"
76
  ],
77
- "length_penalty": 1.0,
78
- "max_length": 20,
79
- "max_position_embeddings": 40960,
80
- "max_sequence_length": 16384,
81
- "max_window_layers": 28,
82
- "min_length": 0,
83
- "model_type": "isaac",
84
- "no_repeat_ngram_size": 0,
85
- "num_attention_heads": 16,
86
- "num_beam_groups": 1,
87
- "num_beams": 1,
88
- "num_hidden_layers": 28,
89
- "num_key_value_heads": 8,
90
- "num_return_sequences": 1,
91
- "output_attentions": false,
92
- "output_hidden_states": false,
93
- "output_scores": false,
94
- "pad_token_id": null,
95
  "pixel_shuffle_scale": 2,
96
- "prefix": null,
97
- "problem_type": null,
98
- "pruned_heads": {},
99
- "remove_invalid_values": false,
100
- "repetition_penalty": 1.0,
101
- "return_dict": true,
102
- "return_dict_in_generate": false,
103
- "rms_norm_eps": 1e-06,
104
- "rope_scaling": {
105
- "mrope_interleaved": true,
106
- "mrope_section": null,
107
- "rope_type": "default"
108
- },
109
- "rope_theta": 1000000.0,
110
- "sep_token_id": null,
111
- "sliding_window": null,
112
- "suppress_tokens": null,
113
- "task_specific_params": null,
114
- "temperature": 1.0,
115
- "tf_legacy_loss": false,
116
- "tie_encoder_decoder": false,
117
- "tie_word_embeddings": false,
118
- "tokenizer_class": null,
119
- "top_k": 50,
120
- "top_p": 1.0,
121
- "torchscript": false,
122
- "transformers_version": "4.56.1",
123
- "typical_p": 1.0,
124
- "use_bfloat16": false,
125
- "use_cache": true,
126
- "use_sliding_window": false,
127
- "video_patch_size": 16,
128
- "vision_config": {
129
- "_name_or_path": "",
130
- "add_cross_attention": false,
131
- "architectures": null,
132
- "attention_dropout": 0.0,
133
- "bad_words_ids": null,
134
- "begin_suppress_tokens": null,
135
- "bos_token_id": null,
136
- "chunk_size_feed_forward": 0,
137
- "cross_attention_hidden_size": null,
138
- "decoder_start_token_id": null,
139
- "diversity_penalty": 0.0,
140
- "do_sample": false,
141
- "dtype": null,
142
- "early_stopping": false,
143
- "encoder_no_repeat_ngram_size": 0,
144
- "eos_token_id": null,
145
- "exponential_decay_length_penalty": null,
146
- "finetuning_task": null,
147
- "forced_bos_token_id": null,
148
- "forced_eos_token_id": null,
149
- "hidden_act": "gelu_pytorch_tanh",
150
- "hidden_size": 1152,
151
- "id2label": {
152
- "0": "LABEL_0",
153
- "1": "LABEL_1"
154
- },
155
- "image_size": 256,
156
- "intermediate_size": 4304,
157
- "is_decoder": false,
158
- "is_encoder_decoder": false,
159
- "label2id": {
160
- "LABEL_0": 0,
161
- "LABEL_1": 1
162
- },
163
- "layer_norm_eps": 1e-06,
164
- "length_penalty": 1.0,
165
- "max_length": 20,
166
- "min_length": 0,
167
- "model_type": "pixel_shuffle_siglip2",
168
- "no_repeat_ngram_size": 0,
169
- "num_attention_heads": 16,
170
- "num_beam_groups": 1,
171
- "num_beams": 1,
172
- "num_channels": 3,
173
- "num_hidden_layers": 27,
174
- "num_patches": 256,
175
- "num_return_sequences": 1,
176
- "output_attentions": false,
177
- "output_hidden_states": false,
178
- "output_scores": false,
179
- "pad_token_id": null,
180
- "patch_size": 16,
181
- "pixel_shuffle_scale_factor": 2,
182
- "prefix": null,
183
- "problem_type": null,
184
- "pruned_heads": {},
185
- "remove_invalid_values": false,
186
- "repetition_penalty": 1.0,
187
- "return_dict": true,
188
- "return_dict_in_generate": false,
189
- "sep_token_id": null,
190
- "suppress_tokens": null,
191
- "task_specific_params": null,
192
- "temperature": 1.0,
193
- "tf_legacy_loss": false,
194
- "tie_encoder_decoder": false,
195
- "tie_word_embeddings": true,
196
- "tokenizer_class": null,
197
- "top_k": 50,
198
- "top_p": 1.0,
199
- "torchscript": false,
200
- "typical_p": 1.0,
201
- "use_bfloat16": false
202
- },
203
- "vision_max_num_patches": 6144,
204
- "vision_min_num_patches": 256,
205
- "vision_token": "<image>",
206
- "vocab_size": 151936
207
  },
208
- "processor_class": "IsaacProcessor"
 
 
209
  }
 
2
  "auto_map": {
3
  "AutoProcessor": "modular_isaac.IsaacProcessor"
4
  },
5
+ "config": null,
6
+ "image_processor": {
 
 
 
 
 
 
7
  "auto_map": {
8
+ "AutoProcessor": "modular_isaac.IsaacProcessor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  },
10
+ "crop_size": null,
11
+ "data_format": "channels_first",
12
+ "device": null,
13
+ "disable_grouping": false,
14
+ "do_center_crop": false,
15
+ "do_convert_rgb": true,
16
+ "do_normalize": true,
17
+ "do_pad": false,
18
+ "do_rescale": true,
19
+ "do_resize": true,
20
+ "image_mean": [
21
+ 0.5,
22
+ 0.5,
23
+ 0.5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ],
25
+ "image_processor_type": "IsaacImageProcessorFast",
26
+ "image_seq_length": null,
27
+ "image_std": [
28
+ 0.5,
29
+ 0.5,
30
+ 0.5
31
+ ],
32
+ "input_data_format": null,
33
+ "max_num_patches": 6144,
34
+ "min_num_patches": 256,
35
+ "pad_size": null,
36
+ "patch_size": 16,
 
 
 
 
 
 
37
  "pixel_shuffle_scale": 2,
38
+ "processor_class": "IsaacProcessor",
39
+ "resample": 2,
40
+ "rescale_factor": 0.00392156862745098,
41
+ "return_tensors": null,
42
+ "size": null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  },
44
+ "max_sequence_length": 16384,
45
+ "processor_class": "IsaacProcessor",
46
+ "vision_token": "<image>"
47
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
- size 11422654
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c670a45d54b226b4213f50c920332be152acff8fafaabdafd5586e772c3d500
3
+ size 11473541
tokenizer_config.json CHANGED
@@ -1,217 +1,16 @@
1
  {
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
- "added_tokens_decoder": {
5
- "151643": {
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "151644": {
14
- "content": "<|im_start|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "151645": {
22
- "content": "<|im_end|>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "151646": {
30
- "content": "<|object_ref_start|>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "151647": {
38
- "content": "<|object_ref_end|>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "151648": {
46
- "content": "<|box_start|>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "151649": {
54
- "content": "<|box_end|>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "151650": {
62
- "content": "<|quad_start|>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- },
69
- "151651": {
70
- "content": "<|quad_end|>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
- },
77
- "151652": {
78
- "content": "<|vision_start|>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": true
84
- },
85
- "151653": {
86
- "content": "<|vision_end|>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": true
92
- },
93
- "151654": {
94
- "content": "<|vision_pad|>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": true
100
- },
101
- "151655": {
102
- "content": "<|image_pad|>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": true
108
- },
109
- "151656": {
110
- "content": "<|video_pad|>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "151657": {
118
- "content": "<tool_call>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": false
124
- },
125
- "151658": {
126
- "content": "</tool_call>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "151659": {
134
- "content": "<|fim_prefix|>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "151660": {
142
- "content": "<|fim_middle|>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "151661": {
150
- "content": "<|fim_suffix|>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "151662": {
158
- "content": "<|fim_pad|>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "151663": {
166
- "content": "<|repo_name|>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "151664": {
174
- "content": "<|file_sep|>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "151665": {
182
- "content": "<tool_response>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "151666": {
190
- "content": "</tool_response>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- },
197
- "151667": {
198
- "content": "<think>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": false
204
- },
205
- "151668": {
206
- "content": "</think>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": false
212
- }
213
  },
214
- "additional_special_tokens": [
 
 
 
 
 
215
  "<|im_start|>",
216
  "<|im_end|>",
217
  "<|object_ref_start|>",
@@ -226,14 +25,7 @@
226
  "<|image_pad|>",
227
  "<|video_pad|>"
228
  ],
229
- "auto_map": {
230
- "AutoProcessor": "modular_isaac.IsaacProcessor"
231
- },
232
- "bos_token": null,
233
- "clean_up_tokenization_spaces": false,
234
- "eos_token": "<|im_end|>",
235
- "errors": "replace",
236
- "extra_special_tokens": {},
237
  "model_max_length": 131072,
238
  "pad_token": "<|endoftext|>",
239
  "processor_class": "IsaacProcessor",
 
1
  {
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
+ "additional_special_tokens": null,
5
+ "auto_map": {
6
+ "AutoProcessor": "modular_isaac.IsaacProcessor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  },
8
+ "backend": "tokenizers",
9
+ "bos_token": null,
10
+ "clean_up_tokenization_spaces": false,
11
+ "eos_token": "<|im_end|>",
12
+ "errors": "replace",
13
+ "extra_special_tokens": [
14
  "<|im_start|>",
15
  "<|im_end|>",
16
  "<|object_ref_start|>",
 
25
  "<|image_pad|>",
26
  "<|video_pad|>"
27
  ],
28
+ "is_local": true,
 
 
 
 
 
 
 
29
  "model_max_length": 131072,
30
  "pad_token": "<|endoftext|>",
31
  "processor_class": "IsaacProcessor",