| | import os.path as osp |
| | from copy import deepcopy |
| |
|
| | from mmengine.config import read_base |
| |
|
| | from opencompass.models import (HuggingFacewithChatTemplate, |
| | TurboMindModelwithChatTemplate) |
| | from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner |
| | from opencompass.runners import DLCRunner, LocalRunner |
| | from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask |
| |
|
| | |
| | |
| | |
| | with read_base(): |
| | from opencompass.configs.datasets.longbench.longbench import \ |
| | longbench_datasets |
| | from opencompass.configs.datasets.needlebench.needlebench_8k.needlebench_8k import \ |
| | needlebench_datasets as needlebench_8k_datasets |
| | from opencompass.configs.datasets.needlebench.needlebench_32k.needlebench_32k import \ |
| | needlebench_datasets as needlebench_32k_datasets |
| | from opencompass.configs.datasets.needlebench.needlebench_128k.needlebench_128k import \ |
| | needlebench_datasets as needlebench_128k_datasets |
| | from opencompass.configs.datasets.ruler.ruler_8k_gen import \ |
| | ruler_datasets as ruler_8k_datasets |
| | from opencompass.configs.datasets.ruler.ruler_32k_gen import \ |
| | ruler_datasets as ruler_32k_datasets |
| | from opencompass.configs.datasets.ruler.ruler_128k_gen import \ |
| | ruler_datasets as ruler_128k_datasets |
| | from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat_1m import \ |
| | models as lmdeploy_internlm2_5_7b_1m_chat_model |
| | from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import \ |
| | models as llama3_1_8b_instruct_model |
| | |
| | from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \ |
| | models as lmdeploy_qwen2_7b_instruct_model |
| | |
| | from opencompass.configs.summarizers.groups.longbench import \ |
| | longbench_summary_groups |
| | from opencompass.configs.summarizers.groups.ruler import \ |
| | ruler_summary_groups |
| | from opencompass.configs.summarizers.needlebench import ( |
| | needlebench_8k_summarizer, needlebench_32k_summarizer, |
| | needlebench_128k_summarizer) |
| |
|
| | |
| | |
| | |
| | |
| | datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) |
| |
|
| | |
| | |
| | |
| | needlebench_8k_summary_groups = needlebench_8k_summarizer['summary_groups'] |
| | needlebench_32k_summary_groups = needlebench_32k_summarizer['summary_groups'] |
| | needlebench_128k_summary_groups = needlebench_128k_summarizer['summary_groups'] |
| |
|
| | |
| | summarizer = dict( |
| | dataset_abbrs=[ |
| | ['ruler_8k', 'naive_average'], |
| | ['ruler_32k', 'naive_average'], |
| | ['ruler_128k', 'naive_average'], |
| | ['NeedleBench-Overall-Score-8K', 'weighted_average'], |
| | ['NeedleBench-Overall-Score-32K', 'weighted_average'], |
| | ['NeedleBench-Overall-Score-128K', 'weighted_average'], |
| | ['longbench', 'naive_average'], |
| | ['longbench_zh', 'naive_average'], |
| | ['longbench_en', 'naive_average'], |
| | '', |
| | 'longbench_single-document-qa', |
| | 'longbench_multi-document-qa', |
| | 'longbench_summarization', |
| | 'longbench_few-shot-learning', |
| | 'longbench_synthetic-tasks', |
| | 'longbench_code-completion', |
| | ], |
| | summary_groups=sum( |
| | [v for k, v in locals().items() if k.endswith('_summary_groups')], []), |
| | ) |
| |
|
| | |
| | |
| | |
| |
|
| | lmdeploy_qwen2_7b_instruct_model[0]['max_seq_len'] = 1048576 |
| | lmdeploy_qwen2_7b_instruct_model[0]['engine_config']['session_len'] = 1048576 |
| | lmdeploy_qwen2_7b_instruct_model[0]['engine_config']['tp'] = 4 |
| | lmdeploy_qwen2_7b_instruct_model[0]['engine_config']['rope_scaling_factor'] = 4 |
| | lmdeploy_qwen2_7b_instruct_model[0]['run_cfg']['num_gpus'] = 4 |
| |
|
| | llama3_1_8b_instruct_model[0]['max_seq_len'] = 1048576 |
| | llama3_1_8b_instruct_model[0]['engine_config']['session_len'] = 1048576 |
| | llama3_1_8b_instruct_model[0]['engine_config']['tp'] = 4 |
| | llama3_1_8b_instruct_model[0]['engine_config']['rope_scaling_factor'] = 4 |
| | llama3_1_8b_instruct_model[0]['run_cfg']['num_gpus'] = 4 |
| |
|
| | models = sum([v for k, v in locals().items() if k.endswith('_model')], []) |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | infer = dict( |
| | partitioner=dict(type=NumWorkerPartitioner, num_worker=8), |
| | runner=dict( |
| | type=LocalRunner, |
| | max_num_workers=16, |
| | retry=0, |
| | task=dict(type=OpenICLInferTask)), |
| | ) |
| |
|
| | |
| | eval = dict( |
| | partitioner=dict(type=NaivePartitioner, n=10), |
| | runner=dict(type=LocalRunner, |
| | max_num_workers=16, |
| | task=dict(type=OpenICLEvalTask)), |
| | ) |
| |
|
| | |
| | |
| | |
| | base_exp_dir = 'outputs/corebench/' |
| | work_dir = osp.join(base_exp_dir, 'long_context') |
| |
|