| | from mmengine.config import read_base |
| |
|
| | with read_base(): |
| | from opencompass.configs.datasets.subjective.hellobench.hellobench import hellobench_datasets |
| |
|
| | from opencompass.models import HuggingFacewithChatTemplate, OpenAI |
| | from opencompass.partitioners import NaivePartitioner |
| | from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner |
| | from opencompass.runners import LocalRunner |
| | from opencompass.summarizers import DefaultSubjectiveSummarizer |
| | from opencompass.tasks import OpenICLInferTask |
| | from opencompass.tasks.subjective_eval import SubjectiveEvalTask |
| |
|
| | api_meta_template = dict(round=[ |
| | dict(role='HUMAN', api_role='HUMAN'), |
| | dict(role='BOT', api_role='BOT', generate=True), |
| | ]) |
| |
|
| | |
| | |
| | |
| | models = [ |
| | dict( |
| | type=HuggingFacewithChatTemplate, |
| | abbr='glm-4-9b-chat-hf', |
| | path='THUDM/glm-4-9b-chat', |
| | max_out_len=16384, |
| | generation_kwargs=dict( |
| | temperature=0.8, |
| | do_sample= |
| | True, |
| | ), |
| | model_kwargs=dict( |
| | device_map='auto', |
| | trust_remote_code=True, |
| | ), |
| | batch_size=1, |
| | run_cfg=dict(num_gpus=2, num_procs=1), |
| | stop_words=['<|endoftext|>', '<|user|>', '<|observation|>'], |
| | ) |
| | ] |
| |
|
| | datasets = [*hellobench_datasets] |
| |
|
| | infer = dict( |
| | partitioner=dict(type=NaivePartitioner), |
| | runner=dict(type=LocalRunner, |
| | max_num_workers=16, |
| | task=dict(type=OpenICLInferTask)), |
| | ) |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | judge_models = [ |
| | dict( |
| | abbr='GPT4o', |
| | type=OpenAI, |
| | path='gpt-4o', |
| | key= |
| | 'xxxx', |
| | meta_template=api_meta_template, |
| | query_per_second=16, |
| | max_out_len=4096, |
| | batch_size=1, |
| | temperature=0.8, |
| | seed=42, |
| | ) |
| | ] |
| |
|
| | |
| | eval = dict( |
| | partitioner=dict( |
| | type=SubjectiveNaivePartitioner, |
| | models=models, |
| | judge_models=judge_models, |
| | ), |
| | runner=dict(type=LocalRunner, |
| | max_num_workers=16, |
| | task=dict(type=SubjectiveEvalTask)), |
| | ) |
| |
|
| | summarizer = dict(type=DefaultSubjectiveSummarizer) |
| | work_dir = 'outputs/hellobench/' |
| |
|