| { | |
| "architectures": [ | |
| "SiglipForImageClassification" | |
| ], | |
| "id2label": { | |
| "0": "barn", | |
| "1": "baseball_bat", | |
| "2": "basket", | |
| "3": "beach", | |
| "4": "bear", | |
| "5": "beard", | |
| "6": "bee", | |
| "7": "bird", | |
| "8": "blueberry", | |
| "9": "bowtie", | |
| "10": "bracelet", | |
| "11": "brain", | |
| "12": "bread", | |
| "13": "broccoli", | |
| "14": "bus", | |
| "15": "butterfly", | |
| "16": "circle", | |
| "17": "cloud", | |
| "18": "cruise_ship", | |
| "19": "dolphin", | |
| "20": "dumbbell", | |
| "21": "elephant", | |
| "22": "eye", | |
| "23": "eyeglasses", | |
| "24": "feather", | |
| "25": "fish", | |
| "26": "flower", | |
| "27": "foot", | |
| "28": "frog", | |
| "29": "giraffe", | |
| "30": "goatee", | |
| "31": "golf_club", | |
| "32": "grapes", | |
| "33": "grass", | |
| "34": "guitar", | |
| "35": "hamburger", | |
| "36": "hand", | |
| "37": "hat", | |
| "38": "headphones", | |
| "39": "helicopter", | |
| "40": "hexagon", | |
| "41": "hockey_stick", | |
| "42": "horse", | |
| "43": "hourglass", | |
| "44": "house", | |
| "45": "ice_cream", | |
| "46": "jacket", | |
| "47": "ladder", | |
| "48": "leg", | |
| "49": "lipstick", | |
| "50": "megaphone", | |
| "51": "monkey", | |
| "52": "moon", | |
| "53": "mushroom", | |
| "54": "necklace", | |
| "55": "owl", | |
| "56": "panda", | |
| "57": "pear", | |
| "58": "peas", | |
| "59": "penguin", | |
| "60": "pig", | |
| "61": "pillow", | |
| "62": "pineapple", | |
| "63": "pizza", | |
| "64": "pool", | |
| "65": "popsicle", | |
| "66": "rabbit", | |
| "67": "rhinoceros", | |
| "68": "rifle", | |
| "69": "river", | |
| "70": "sailboat", | |
| "71": "sandwich", | |
| "72": "sea_turtle", | |
| "73": "shark", | |
| "74": "shoe", | |
| "75": "skyscraper", | |
| "76": "snorkel", | |
| "77": "snowman", | |
| "78": "soccer_ball", | |
| "79": "speedboat", | |
| "80": "spider", | |
| "81": "spoon", | |
| "82": "square", | |
| "83": "squirrel", | |
| "84": "stethoscope", | |
| "85": "strawberry", | |
| "86": "streetlight", | |
| "87": "submarine", | |
| "88": "suitcase", | |
| "89": "sun", | |
| "90": "sweater", | |
| "91": "sword", | |
| "92": "table", | |
| "93": "teapot", | |
| "94": "teddy-bear", | |
| "95": "telephone", | |
| "96": "tent", | |
| "97": "The_Eiffel_Tower", | |
| "98": "The_Great_Wall_of_China", | |
| "99": "The_Mona_Lisa", | |
| "100": "tiger", | |
| "101": "toaster", | |
| "102": "tooth", | |
| "103": "tornado", | |
| "104": "tractor", | |
| "105": "train", | |
| "106": "tree", | |
| "107": "triangle", | |
| "108": "trombone", | |
| "109": "truck", | |
| "110": "trumpet", | |
| "111": "umbrella", | |
| "112": "vase", | |
| "113": "violin", | |
| "114": "watermelon", | |
| "115": "whale", | |
| "116": "windmill", | |
| "117": "wine_glass", | |
| "118": "yoga", | |
| "119": "zebra", | |
| "120": "zigzag" | |
| }, | |
| "initializer_factor": 1.0, | |
| "label2id": { | |
| "The_Eiffel_Tower": 97, | |
| "The_Great_Wall_of_China": 98, | |
| "The_Mona_Lisa": 99, | |
| "barn": 0, | |
| "baseball_bat": 1, | |
| "basket": 2, | |
| "beach": 3, | |
| "bear": 4, | |
| "beard": 5, | |
| "bee": 6, | |
| "bird": 7, | |
| "blueberry": 8, | |
| "bowtie": 9, | |
| "bracelet": 10, | |
| "brain": 11, | |
| "bread": 12, | |
| "broccoli": 13, | |
| "bus": 14, | |
| "butterfly": 15, | |
| "circle": 16, | |
| "cloud": 17, | |
| "cruise_ship": 18, | |
| "dolphin": 19, | |
| "dumbbell": 20, | |
| "elephant": 21, | |
| "eye": 22, | |
| "eyeglasses": 23, | |
| "feather": 24, | |
| "fish": 25, | |
| "flower": 26, | |
| "foot": 27, | |
| "frog": 28, | |
| "giraffe": 29, | |
| "goatee": 30, | |
| "golf_club": 31, | |
| "grapes": 32, | |
| "grass": 33, | |
| "guitar": 34, | |
| "hamburger": 35, | |
| "hand": 36, | |
| "hat": 37, | |
| "headphones": 38, | |
| "helicopter": 39, | |
| "hexagon": 40, | |
| "hockey_stick": 41, | |
| "horse": 42, | |
| "hourglass": 43, | |
| "house": 44, | |
| "ice_cream": 45, | |
| "jacket": 46, | |
| "ladder": 47, | |
| "leg": 48, | |
| "lipstick": 49, | |
| "megaphone": 50, | |
| "monkey": 51, | |
| "moon": 52, | |
| "mushroom": 53, | |
| "necklace": 54, | |
| "owl": 55, | |
| "panda": 56, | |
| "pear": 57, | |
| "peas": 58, | |
| "penguin": 59, | |
| "pig": 60, | |
| "pillow": 61, | |
| "pineapple": 62, | |
| "pizza": 63, | |
| "pool": 64, | |
| "popsicle": 65, | |
| "rabbit": 66, | |
| "rhinoceros": 67, | |
| "rifle": 68, | |
| "river": 69, | |
| "sailboat": 70, | |
| "sandwich": 71, | |
| "sea_turtle": 72, | |
| "shark": 73, | |
| "shoe": 74, | |
| "skyscraper": 75, | |
| "snorkel": 76, | |
| "snowman": 77, | |
| "soccer_ball": 78, | |
| "speedboat": 79, | |
| "spider": 80, | |
| "spoon": 81, | |
| "square": 82, | |
| "squirrel": 83, | |
| "stethoscope": 84, | |
| "strawberry": 85, | |
| "streetlight": 86, | |
| "submarine": 87, | |
| "suitcase": 88, | |
| "sun": 89, | |
| "sweater": 90, | |
| "sword": 91, | |
| "table": 92, | |
| "teapot": 93, | |
| "teddy-bear": 94, | |
| "telephone": 95, | |
| "tent": 96, | |
| "tiger": 100, | |
| "toaster": 101, | |
| "tooth": 102, | |
| "tornado": 103, | |
| "tractor": 104, | |
| "train": 105, | |
| "tree": 106, | |
| "triangle": 107, | |
| "trombone": 108, | |
| "truck": 109, | |
| "trumpet": 110, | |
| "umbrella": 111, | |
| "vase": 112, | |
| "violin": 113, | |
| "watermelon": 114, | |
| "whale": 115, | |
| "windmill": 116, | |
| "wine_glass": 117, | |
| "yoga": 118, | |
| "zebra": 119, | |
| "zigzag": 120 | |
| }, | |
| "model_type": "siglip", | |
| "problem_type": "single_label_classification", | |
| "text_config": { | |
| "attention_dropout": 0.0, | |
| "hidden_act": "gelu_pytorch_tanh", | |
| "hidden_size": 768, | |
| "intermediate_size": 3072, | |
| "layer_norm_eps": 1e-06, | |
| "max_position_embeddings": 64, | |
| "model_type": "siglip_text_model", | |
| "num_attention_heads": 12, | |
| "num_hidden_layers": 12, | |
| "projection_size": 768, | |
| "torch_dtype": "float32", | |
| "vocab_size": 256000 | |
| }, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.51.0.dev0", | |
| "vision_config": { | |
| "attention_dropout": 0.0, | |
| "hidden_act": "gelu_pytorch_tanh", | |
| "hidden_size": 768, | |
| "image_size": 224, | |
| "intermediate_size": 3072, | |
| "layer_norm_eps": 1e-06, | |
| "model_type": "siglip_vision_model", | |
| "num_attention_heads": 12, | |
| "num_channels": 3, | |
| "num_hidden_layers": 12, | |
| "patch_size": 16, | |
| "torch_dtype": "float32" | |
| } | |
| } | |