| | |
| | from mmengine.config import read_base |
| | import os.path as osp |
| | from opencompass.runners import LocalRunner, VOLCRunner |
| | from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner |
| | from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask |
| |
|
| | with read_base(): |
| | |
| | |
| | from opencompass.configs.datasets.bigcodebench.bigcodebench_full_instruct_gen import ( |
| | bigcodebench_full_instruct_datasets |
| | ) |
| | from opencompass.configs.datasets.bigcodebench.bigcodebench_hard_instruct_gen import ( |
| | bigcodebench_hard_instruct_datasets |
| | ) |
| | |
| | from opencompass.configs.datasets.livecodebench.livecodebench_time_split_gen_a4f90b import ( |
| | LCB_datasets |
| | ) |
| | |
| | from opencompass.configs.datasets.humaneval.humaneval_openai_sample_evals_gen_dcae0e import ( |
| | humaneval_datasets |
| | ) |
| | from opencompass.configs.datasets.humaneval_pro.humaneval_pro_gen import ( |
| | humanevalpro_datasets |
| | ) |
| | from opencompass.configs.datasets.humanevalx.humanevalx_gen_620cfa import ( |
| | humanevalx_datasets |
| | ) |
| | from opencompass.configs.datasets.humaneval_plus.humaneval_plus_gen import ( |
| | humaneval_plus_datasets |
| | ) |
| | |
| | from opencompass.configs.datasets.mbpp.mbpp_gen import ( |
| | mbpp_datasets |
| | ) |
| | from opencompass.configs.datasets.mbpp_pro.mbpp_pro_gen import ( |
| | mbpppro_datasets |
| | ) |
| | |
| | from opencompass.configs.datasets.multipl_e.multiple_gen import ( |
| | multiple_datasets |
| | ) |
| | |
| | from opencompass.configs.datasets.ds1000.ds1000_service_eval_gen_cbc84f import ( |
| | ds1000_datasets |
| | ) |
| |
|
| | |
| | from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import ( |
| | models as lmdeploy_qwen2_5_7b_instruct_model, |
| | ) |
| |
|
| | |
| | from opencompass.configs.summarizers.groups.ds1000 import ( |
| | ds1000_summary_groups, |
| | ) |
| | from opencompass.configs.summarizers.groups.multipl_e import ( |
| | multiple_summary_groups, |
| | ) |
| | from opencompass.configs.summarizers.groups.humanevalx import ( |
| | humanevalx_summary_groups, |
| | ) |
| |
|
| | |
| | models = sum([v for k, v in locals().items() if k.endswith('_model')], []) |
| |
|
| | for model in models: |
| | model['max_seq_len'] = 16384 |
| | model['max_out_len'] = 8192 |
| |
|
| | |
| | datasets = sum( |
| | (v for k, v in locals().items() if k.endswith('_datasets')), |
| | [], |
| | ) |
| |
|
| | for item in humanevalx_datasets: |
| | item['eval_cfg']['evaluator'][ |
| | 'ip_address' |
| | ] = 'codeeval.opencompass.org.cn/humanevalx' |
| | item['eval_cfg']['evaluator']['port'] = '' |
| | for item in ds1000_datasets: |
| | item['eval_cfg']['evaluator'][ |
| | 'ip_address' |
| | ] = 'codeeval.opencompass.org.cn/ds1000' |
| | item['eval_cfg']['evaluator']['port'] = '' |
| |
|
| |
|
| | for dataset in datasets: |
| | dataset['infer_cfg']['inferencer']['max_out_len'] = 8192 |
| |
|
| |
|
| | |
| | summary_groups = sum( |
| | [v for k, v in locals().items() if k.endswith('_summary_groups')], [] |
| | ) |
| | summary_groups.append( |
| | {'name': 'humanevalx', |
| | 'subsets': ['humanevalx-python', 'humanevalx-cpp', 'humanevalx-java', 'humanevalx-js']} |
| | ) |
| | summarizer = dict( |
| | dataset_abbrs = [ |
| | ['bigcodebench_hard_instruct', 'pass@1'], |
| | ['bigcodebench_full_instruct', 'pass@1'], |
| | ['lcb_code_generation', 'pass@1'], |
| | ['openai_humaneval', 'humaneval_pass@1'], |
| | ['mbpp', 'score'], |
| | ['humaneval_pro', 'pass@1'], |
| | ['mbpp_pro', 'pass@1'], |
| | ['humaneval_plus', 'humaneval_plus_pass@1'], |
| | ['multiple', 'naive_average'], |
| | ['humanevalx', 'naive_average'], |
| | ['ds1000', 'naive_average'], |
| | '', |
| | 'humanevalx-python', |
| | 'humanevalx-cpp', |
| | 'humanevalx-java', |
| | 'humanevalx-js', |
| | '', |
| | 'ds1000_Pandas', |
| | 'ds1000_Numpy', |
| | 'ds1000_Tensorflow', |
| | 'ds1000_Scipy', |
| | 'ds1000_Sklearn', |
| | 'ds1000_Pytorch', |
| | 'ds1000_Matplotlib', |
| | '', |
| | 'humaneval-multiple-cpp', |
| | 'humaneval-multiple-cs', |
| | 'humaneval-multiple-go', |
| | 'humaneval-multiple-java', |
| | 'humaneval-multiple-rb', |
| | 'humaneval-multiple-js', |
| | 'humaneval-multiple-php', |
| | 'humaneval-multiple-r', |
| | 'humaneval-multiple-rs', |
| | 'humaneval-multiple-sh', |
| | '', |
| | 'mbpp-multiple-cpp', |
| | 'mbpp-multiple-cs', |
| | 'mbpp-multiple-go', |
| | 'mbpp-multiple-java', |
| | 'mbpp-multiple-rb', |
| | 'mbpp-multiple-js', |
| | 'mbpp-multiple-php', |
| | 'mbpp-multiple-r', |
| | 'mbpp-multiple-rs', |
| | 'mbpp-multiple-sh' |
| | ], |
| | summary_groups=summary_groups, |
| | ) |
| |
|
| | work_dir = 'outputs/code' |
| |
|