KeyError: 'CodeXEmbedConfig'

#12
by kangkang227 - opened

When I used your example of sentence_transformers, the following error occurred. I downloaded the model to my local device and then loaded it from the local location. Could you please tell me where my mistake lies?

Traceback (most recent call last):
File "/media/dell/205datadisk_1/home/zcj/zcj_paper/dataset/bigvul/test.py", line 14, in
model = SentenceTransformer('/media/dell/205datadisk_1/home/zcj/zcj_paper/nano-graphrag/models/models--Salesforce--SFR-Embedding-Code-2B_R/snapshots/c73d8631a005876ed5abde34db514b1fb6566973', trust_remote_code=True)
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py", line 287, in init
modules = self._load_sbert_model(
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py", line 1487, in _load_sbert_model
module = Transformer(model_name_or_path, cache_dir=cache_folder, **kwargs)
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/sentence_transformers/models/Transformer.py", line 54, in init
self._load_model(model_name_or_path, config, cache_dir, **model_args)
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/sentence_transformers/models/Transformer.py", line 85, in _load_model
self.auto_model = AutoModel.from_pretrained(
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 559, in from_pretrained
return model_class.from_pretrained(
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/transformers/modeling_utils.py", line 262, in _wrapper
return func(*args, **kwargs)
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/transformers/modeling_utils.py", line 4185, in from_pretrained
model = cls(config, *model_args, **model_kwargs)
File "/media/dell/205datadisk_1/home/zcj/.cache/huggingface/modules/transformers_modules/c73d8631a005876ed5abde34db514b1fb6566973/modeling_gemma2.py", line 1347, in init
self.tokenizer = AutoTokenizer.from_pretrained(config._name_or_path, trust_remote_code=True, device_map="auto")
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 960, in from_pretrained
tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]
File "/media/dell/205datadisk_1/home/zcj/miniconda3/envs/nanoG/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 760, in getitem
model_type = self._reverse_config_mapping[key.name]

install transformer and accelerate:

pip install transformers==4.49.0
pip install accelerate

then, use this program:

import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer

# Load the tokenizer with trust_remote_code=True
tokenizer = AutoTokenizer.from_pretrained(
    'Salesforce/SFR-Embedding-Code-2B_R',
    trust_remote_code=True
)

# Now load the model
model = AutoModel.from_pretrained(
    'Salesforce/SFR-Embedding-Code-2B_R',
    trust_remote_code=True
)

# Each query needs to be accompanied by an corresponding instruction describing the task.
query_instruction_example = "Given Code or Text, retrieval relevant content"
queries = [
            "how to implement quick sort in Python?"
            ]

# No instruction needed for retrieval passages
passages = [
            "def quick_sort(arr):\n    if len(arr) <= 1:\n        return arr\n    pivot = arr[len(arr) // 2]\n    left = [x for x in arr if x < pivot]\n    middle = [x for x in arr if x == pivot]\n    right = [x for x in arr if x > pivot]\n    return quick_sort(left) + middle + quick_sort(right)",
                "def bubble_sort(arr):\n    n = len(arr)\n    for i in range(n):\n        for j in range(0, n-i-1):\n            if arr[j] > arr[j+1]:\n                arr[j], arr[j+1] = arr[j+1], arr[j]\n    return arr"
                ]


# get the embeddings
max_length = 32768
query_embeddings = model.encode_queries(queries, instruction=query_instruction_example, max_length=max_length)
passage_embeddings = model.encode_corpus(passages, max_length=max_length)

# normalize embeddings
query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)

scores = (query_embeddings @ passage_embeddings.T) * 100
print(scores.tolist())

Sign up or log in to comment