| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | """CodeBLEU metric.""" |
| |
|
| | import evaluate |
| | import datasets |
| |
|
| | from .my_codebleu import calc_codebleu |
| |
|
| |
|
| | |
| | _CITATION = """\ |
| | @InProceedings{huggingface:module, |
| | title = {CodeBLEU: A Metric for Evaluating Code Generation}, |
| | authors={Sedykh, Ivan}, |
| | year={2022} |
| | } |
| | """ |
| |
|
| | |
| | _DESCRIPTION = """\ |
| | This new module is an adaptation of the original CodeBLEU metric from CodexGLUE benchmark |
| | for evaluating code generation. |
| | """ |
| |
|
| |
|
| | |
| | _KWARGS_DESCRIPTION = """ |
| | Calculates how good are predictions given some references, using certain scores |
| | Args: |
| | predictions: list of predictions to score. Each predictions |
| | should be a string with tokens separated by spaces. |
| | references: list of lists of references. Each list |
| | should contain len(predictions) items. |
| | lang: programming language in ['java','js','c_sharp','php','go','python','ruby'] |
| | tokenizer: tokenizer function str -> List[str], Defaults to lambda s: s.split() |
| | params: str, weights for averaging(see CodeBLEU paper). |
| | Defaults to equal weights "0.25,0.25,0.25,0.25". |
| | Returns: |
| | CodeBLEU: resulting score, |
| | ngram_match_score: See paper CodeBLEU, |
| | weighted_ngram_match_score: See paper CodeBLEU, |
| | syntax_match_score: See paper CodeBLEU, |
| | dataflow_match_score: See paper CodeBLEU, |
| | Examples: |
| | |
| | >>> codebleu = evaluate.load("my_new_module") |
| | >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1]) |
| | >>> print(results) |
| | {'accuracy': 1.0} |
| | """ |
| |
|
| | |
| | |
| |
|
| |
|
| | @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
| | class codebleu(evaluate.Metric): |
| | """CodeBLEU metric from CodexGLUE""" |
| |
|
| | def _info(self): |
| | |
| | return evaluate.MetricInfo( |
| | |
| | module_type="metric", |
| | description=_DESCRIPTION, |
| | citation=_CITATION, |
| | inputs_description=_KWARGS_DESCRIPTION, |
| | |
| | features=datasets.Features( |
| | { |
| | "predictions": datasets.Value("string"), |
| | "references": datasets.Sequence(datasets.Value("string")), |
| | } |
| | ), |
| | |
| | homepage="", |
| | |
| | codebase_urls=[], |
| | reference_urls=[ |
| | "https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/code-to-code-trans/evaluator", |
| | "https://arxiv.org/abs/2009.10297", |
| | ], |
| | ) |
| |
|
| | def _download_and_prepare(self, dl_manager): |
| | """Optional: download external resources useful to compute the scores""" |
| | |
| | |
| | pass |
| |
|
| | def _compute( |
| | self, |
| | predictions, |
| | references, |
| | lang, |
| | tokenizer=None, |
| | params="0.25,0.25,0.25,0.25", |
| | ): |
| | """Returns the scores""" |
| | res = calc_codebleu( |
| | predictions=predictions, |
| | references=references, |
| | lang=lang, |
| | tokenizer=tokenizer, |
| | params=params, |
| | ) |
| | return res |
| |
|