Update README.md
Browse files
README.md
CHANGED
|
@@ -27,7 +27,7 @@ model-index:
|
|
| 27 |
num_few_shot: 0
|
| 28 |
metrics:
|
| 29 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
| 30 |
-
value:
|
| 31 |
name: strict accuracy
|
| 32 |
source:
|
| 33 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
@@ -42,7 +42,7 @@ model-index:
|
|
| 42 |
num_few_shot: 3
|
| 43 |
metrics:
|
| 44 |
- type: acc_norm
|
| 45 |
-
value:
|
| 46 |
name: normalized accuracy
|
| 47 |
source:
|
| 48 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
@@ -57,7 +57,7 @@ model-index:
|
|
| 57 |
num_few_shot: 4
|
| 58 |
metrics:
|
| 59 |
- type: exact_match
|
| 60 |
-
value:
|
| 61 |
name: exact match
|
| 62 |
source:
|
| 63 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
@@ -72,7 +72,7 @@ model-index:
|
|
| 72 |
num_few_shot: 0
|
| 73 |
metrics:
|
| 74 |
- type: acc_norm
|
| 75 |
-
value:
|
| 76 |
name: acc_norm
|
| 77 |
source:
|
| 78 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
@@ -87,7 +87,7 @@ model-index:
|
|
| 87 |
num_few_shot: 0
|
| 88 |
metrics:
|
| 89 |
- type: acc_norm
|
| 90 |
-
value:
|
| 91 |
name: acc_norm
|
| 92 |
source:
|
| 93 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
@@ -104,7 +104,7 @@ model-index:
|
|
| 104 |
num_few_shot: 5
|
| 105 |
metrics:
|
| 106 |
- type: acc
|
| 107 |
-
value:
|
| 108 |
name: accuracy
|
| 109 |
source:
|
| 110 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
@@ -153,11 +153,11 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
|
|
| 153 |
|
| 154 |
| Metric |Value|
|
| 155 |
|-------------------|----:|
|
| 156 |
-
|Avg. |
|
| 157 |
-
|IFEval (0-Shot) |
|
| 158 |
-
|BBH (3-Shot) |
|
| 159 |
-
|MATH Lvl 5 (4-Shot)|
|
| 160 |
-
|GPQA (0-shot) |
|
| 161 |
-
|MuSR (0-shot) |
|
| 162 |
-
|MMLU-PRO (5-shot) |
|
| 163 |
|
|
|
|
| 27 |
num_few_shot: 0
|
| 28 |
metrics:
|
| 29 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
| 30 |
+
value: 56.04
|
| 31 |
name: strict accuracy
|
| 32 |
source:
|
| 33 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
|
| 42 |
num_few_shot: 3
|
| 43 |
metrics:
|
| 44 |
- type: acc_norm
|
| 45 |
+
value: 9.41
|
| 46 |
name: normalized accuracy
|
| 47 |
source:
|
| 48 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
|
| 57 |
num_few_shot: 4
|
| 58 |
metrics:
|
| 59 |
- type: exact_match
|
| 60 |
+
value: 5.06
|
| 61 |
name: exact match
|
| 62 |
source:
|
| 63 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
|
| 72 |
num_few_shot: 0
|
| 73 |
metrics:
|
| 74 |
- type: acc_norm
|
| 75 |
+
value: 1.23
|
| 76 |
name: acc_norm
|
| 77 |
source:
|
| 78 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
|
| 87 |
num_few_shot: 0
|
| 88 |
metrics:
|
| 89 |
- type: acc_norm
|
| 90 |
+
value: 1.11
|
| 91 |
name: acc_norm
|
| 92 |
source:
|
| 93 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
|
| 104 |
num_few_shot: 5
|
| 105 |
metrics:
|
| 106 |
- type: acc
|
| 107 |
+
value: 9.04
|
| 108 |
name: accuracy
|
| 109 |
source:
|
| 110 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.5B-chat
|
|
|
|
| 153 |
|
| 154 |
| Metric |Value|
|
| 155 |
|-------------------|----:|
|
| 156 |
+
|Avg. |13.65|
|
| 157 |
+
|IFEval (0-Shot) |56.04|
|
| 158 |
+
|BBH (3-Shot) | 9.41|
|
| 159 |
+
|MATH Lvl 5 (4-Shot)| 5.06|
|
| 160 |
+
|GPQA (0-shot) | 1.23|
|
| 161 |
+
|MuSR (0-shot) | 1.11|
|
| 162 |
+
|MMLU-PRO (5-shot) | 9.04|
|
| 163 |
|