HeshamHaroon commited on
Commit
a1bac93
·
verified ·
1 Parent(s): 566d03e

Initial release: Arabic Function Calling Leaderboard

Browse files
Files changed (2) hide show
  1. afcl/app.py +18 -84
  2. data/leaderboard.json +371 -84
afcl/app.py CHANGED
@@ -36,6 +36,7 @@ The **Arabic Function Calling Leaderboard (AFCL)** evaluates Large Language Mode
36
  LEADERBOARD_COLUMNS = {
37
  "rank": {"label": "المرتبة", "label_en": "Rank", "type": "number"},
38
  "model": {"label": "النموذج", "label_en": "Model", "type": "str"},
 
39
  "overall": {"label": "الدقة الكلية", "label_en": "Overall", "type": "number"},
40
  "simple": {"label": "بسيط", "label_en": "Simple", "type": "number"},
41
  "multiple": {"label": "متعدد", "label_en": "Multiple", "type": "number"},
@@ -43,88 +44,11 @@ LEADERBOARD_COLUMNS = {
43
  "parallel_multiple": {"label": "متوازي متعدد", "label_en": "Parallel Multiple", "type": "number"},
44
  "irrelevance": {"label": "اللا صلة", "label_en": "Irrelevance", "type": "number"},
45
  "dialect_handling": {"label": "اللهجات", "label_en": "Dialects", "type": "number"},
 
46
  }
47
 
48
- # Sample leaderboard data (will be replaced with actual results)
49
- SAMPLE_LEADERBOARD = [
50
- {
51
- "rank": 1,
52
- "model": "GPT-4o",
53
- "overall": 78.5,
54
- "simple": 85.2,
55
- "multiple": 80.1,
56
- "parallel": 75.3,
57
- "parallel_multiple": 72.4,
58
- "irrelevance": 82.0,
59
- "dialect_handling": 70.5,
60
- },
61
- {
62
- "rank": 2,
63
- "model": "Claude 3.5 Sonnet",
64
- "overall": 76.2,
65
- "simple": 83.5,
66
- "multiple": 78.8,
67
- "parallel": 73.2,
68
- "parallel_multiple": 70.1,
69
- "irrelevance": 80.5,
70
- "dialect_handling": 68.2,
71
- },
72
- {
73
- "rank": 3,
74
- "model": "Jais-30B",
75
- "overall": 72.8,
76
- "simple": 78.5,
77
- "multiple": 74.2,
78
- "parallel": 70.8,
79
- "parallel_multiple": 68.5,
80
- "irrelevance": 75.2,
81
- "dialect_handling": 72.0,
82
- },
83
- {
84
- "rank": 4,
85
- "model": "ALLaM-7B",
86
- "overall": 68.5,
87
- "simple": 75.2,
88
- "multiple": 70.5,
89
- "parallel": 65.8,
90
- "parallel_multiple": 62.3,
91
- "irrelevance": 70.8,
92
- "dialect_handling": 68.5,
93
- },
94
- {
95
- "rank": 5,
96
- "model": "Qwen2.5-72B",
97
- "overall": 74.1,
98
- "simple": 80.5,
99
- "multiple": 76.2,
100
- "parallel": 72.5,
101
- "parallel_multiple": 69.8,
102
- "irrelevance": 77.5,
103
- "dialect_handling": 65.2,
104
- },
105
- {
106
- "rank": 6,
107
- "model": "SILMA-9B",
108
- "overall": 65.2,
109
- "simple": 72.8,
110
- "multiple": 68.5,
111
- "parallel": 62.1,
112
- "parallel_multiple": 58.5,
113
- "irrelevance": 68.2,
114
- "dialect_handling": 62.8,
115
- },
116
- {
117
- "rank": 7,
118
- "model": "Llama-3.1-70B",
119
- "overall": 71.5,
120
- "simple": 78.2,
121
- "multiple": 73.5,
122
- "parallel": 69.8,
123
- "parallel_multiple": 66.2,
124
- "irrelevance": 74.5,
125
- "dialect_handling": 62.5,
126
- },
127
- ]
128
 
129
 
130
  def get_leaderboard_data() -> List[Dict]:
@@ -142,6 +66,11 @@ def format_leaderboard_dataframe(data: List[Dict], use_arabic: bool = True) -> p
142
  """Convert leaderboard data to pandas DataFrame."""
143
  df = pd.DataFrame(data)
144
 
 
 
 
 
 
145
  # Rename columns based on language preference
146
  column_mapping = {}
147
  for col, info in LEADERBOARD_COLUMNS.items():
@@ -151,10 +80,15 @@ def format_leaderboard_dataframe(data: List[Dict], use_arabic: bool = True) -> p
151
 
152
  df = df.rename(columns=column_mapping)
153
 
154
- # Format numeric columns
155
  for col in df.columns:
156
  if df[col].dtype in ['float64', 'float32']:
157
- df[col] = df[col].apply(lambda x: f"{x:.1f}%")
 
 
 
 
 
158
 
159
  return df
160
 
@@ -375,12 +309,12 @@ def create_app():
375
  gr.Markdown(f"""
376
  <div style="text-align: center; padding: 15px; background: #f5f5f5; border-radius: 8px;">
377
  <div style="font-size: 2rem; font-weight: bold; color: #1a5f2a;">{len(data)}</div>
378
- <div style="color: #666;">Models Evaluated | النماذج المقيّمة</div>
379
  </div>
380
  """)
381
  gr.Markdown("""
382
  <div style="text-align: center; padding: 15px; background: #f5f5f5; border-radius: 8px;">
383
- <div style="font-size: 2rem; font-weight: bold; color: #1a5f2a;">1,470+</div>
384
  <div style="color: #666;">Test Samples | عينات الاختبار</div>
385
  </div>
386
  """)
 
36
  LEADERBOARD_COLUMNS = {
37
  "rank": {"label": "المرتبة", "label_en": "Rank", "type": "number"},
38
  "model": {"label": "النموذج", "label_en": "Model", "type": "str"},
39
+ "organization": {"label": "المنظمة", "label_en": "Organization", "type": "str"},
40
  "overall": {"label": "الدقة الكلية", "label_en": "Overall", "type": "number"},
41
  "simple": {"label": "بسيط", "label_en": "Simple", "type": "number"},
42
  "multiple": {"label": "متعدد", "label_en": "Multiple", "type": "number"},
 
44
  "parallel_multiple": {"label": "متوازي متعدد", "label_en": "Parallel Multiple", "type": "number"},
45
  "irrelevance": {"label": "اللا صلة", "label_en": "Irrelevance", "type": "number"},
46
  "dialect_handling": {"label": "اللهجات", "label_en": "Dialects", "type": "number"},
47
+ "status": {"label": "الحالة", "label_en": "Status", "type": "str"},
48
  }
49
 
50
+ # Empty sample - will load from file
51
+ SAMPLE_LEADERBOARD = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
 
54
  def get_leaderboard_data() -> List[Dict]:
 
66
  """Convert leaderboard data to pandas DataFrame."""
67
  df = pd.DataFrame(data)
68
 
69
+ # Select columns to display
70
+ display_cols = ["rank", "model", "organization", "overall", "simple", "multiple",
71
+ "parallel", "parallel_multiple", "irrelevance", "dialect_handling", "status"]
72
+ df = df[[c for c in display_cols if c in df.columns]]
73
+
74
  # Rename columns based on language preference
75
  column_mapping = {}
76
  for col, info in LEADERBOARD_COLUMNS.items():
 
80
 
81
  df = df.rename(columns=column_mapping)
82
 
83
+ # Format numeric columns (show as percentage, but mark 0.0 as "Pending")
84
  for col in df.columns:
85
  if df[col].dtype in ['float64', 'float32']:
86
+ df[col] = df[col].apply(lambda x: "⏳ Pending" if x == 0.0 else f"{x:.1f}%")
87
+
88
+ # Format status column
89
+ status_col = "الحالة" if use_arabic else "Status"
90
+ if status_col in df.columns:
91
+ df[status_col] = df[status_col].apply(lambda x: "⏳ قيد التقييم" if x == "pending" else "✅ مكتمل" if use_arabic else "⏳ Pending" if x == "pending" else "✅ Completed")
92
 
93
  return df
94
 
 
309
  gr.Markdown(f"""
310
  <div style="text-align: center; padding: 15px; background: #f5f5f5; border-radius: 8px;">
311
  <div style="font-size: 2rem; font-weight: bold; color: #1a5f2a;">{len(data)}</div>
312
+ <div style="color: #666;">Models to Evaluate | النماذج للتقييم</div>
313
  </div>
314
  """)
315
  gr.Markdown("""
316
  <div style="text-align: center; padding: 15px; background: #f5f5f5; border-radius: 8px;">
317
+ <div style="font-size: 2rem; font-weight: bold; color: #1a5f2a;">1,470</div>
318
  <div style="color: #666;">Test Samples | عينات الاختبار</div>
319
  </div>
320
  """)
data/leaderboard.json CHANGED
@@ -1,107 +1,394 @@
1
  [
2
  {
3
  "rank": 1,
4
- "model": "GPT-4o",
5
- "overall": 78.5,
6
- "simple": 85.2,
7
- "multiple": 80.1,
8
- "parallel": 75.3,
9
- "parallel_multiple": 72.4,
10
- "irrelevance": 82.0,
11
- "dialect_handling": 70.5,
12
- "java": 76.8,
13
- "javascript": 74.2,
14
- "rest": 79.5,
15
- "sql": 77.3
16
  },
17
  {
18
  "rank": 2,
19
- "model": "Claude 3.5 Sonnet",
20
- "overall": 76.2,
21
- "simple": 83.5,
22
- "multiple": 78.8,
23
- "parallel": 73.2,
24
- "parallel_multiple": 70.1,
25
- "irrelevance": 80.5,
26
- "dialect_handling": 68.2,
27
- "java": 75.2,
28
- "javascript": 72.8,
29
- "rest": 78.2,
30
- "sql": 76.5
31
  },
32
  {
33
  "rank": 3,
34
- "model": "Qwen2.5-72B",
35
- "overall": 74.1,
36
- "simple": 80.5,
37
- "multiple": 76.2,
38
- "parallel": 72.5,
39
- "parallel_multiple": 69.8,
40
- "irrelevance": 77.5,
41
- "dialect_handling": 65.2,
42
- "java": 72.5,
43
- "javascript": 70.8,
44
- "rest": 75.2,
45
- "sql": 73.8
46
  },
47
  {
48
  "rank": 4,
49
- "model": "Jais-30B",
50
- "overall": 72.8,
51
- "simple": 78.5,
52
- "multiple": 74.2,
53
- "parallel": 70.8,
54
- "parallel_multiple": 68.5,
55
- "irrelevance": 75.2,
56
- "dialect_handling": 72.0,
57
- "java": 68.5,
58
- "javascript": 66.2,
59
- "rest": 71.8,
60
- "sql": 69.5
61
  },
62
  {
63
  "rank": 5,
64
- "model": "Llama-3.1-70B",
65
- "overall": 71.5,
66
- "simple": 78.2,
67
- "multiple": 73.5,
68
- "parallel": 69.8,
69
- "parallel_multiple": 66.2,
70
- "irrelevance": 74.5,
71
- "dialect_handling": 62.5,
72
- "java": 70.2,
73
- "javascript": 68.5,
74
- "rest": 73.5,
75
- "sql": 71.2
76
  },
77
  {
78
  "rank": 6,
79
- "model": "ALLaM-7B",
80
- "overall": 68.5,
81
- "simple": 75.2,
82
- "multiple": 70.5,
83
- "parallel": 65.8,
84
- "parallel_multiple": 62.3,
85
- "irrelevance": 70.8,
86
- "dialect_handling": 68.5,
87
- "java": 62.5,
88
- "javascript": 60.2,
89
- "rest": 66.8,
90
- "sql": 64.5
91
  },
92
  {
93
  "rank": 7,
94
- "model": "SILMA-9B",
95
- "overall": 65.2,
96
- "simple": 72.8,
97
- "multiple": 68.5,
98
- "parallel": 62.1,
99
- "parallel_multiple": 58.5,
100
- "irrelevance": 68.2,
101
- "dialect_handling": 62.8,
102
- "java": 58.5,
103
- "javascript": 56.2,
104
- "rest": 63.2,
105
- "sql": 60.8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
  ]
 
1
  [
2
  {
3
  "rank": 1,
4
+ "model": "Jais-30B-Chat",
5
+ "model_url": "https://huggingface.co/inceptionai/jais-30b-chat-v3",
6
+ "organization": "Inception AI",
7
+ "overall": 0.0,
8
+ "simple": 0.0,
9
+ "multiple": 0.0,
10
+ "parallel": 0.0,
11
+ "parallel_multiple": 0.0,
12
+ "irrelevance": 0.0,
13
+ "dialect_handling": 0.0,
14
+ "status": "pending"
 
15
  },
16
  {
17
  "rank": 2,
18
+ "model": "ALLaM-7B-Instruct",
19
+ "model_url": "https://huggingface.co/sdaia/allam-1-7b-instruct",
20
+ "organization": "SDAIA",
21
+ "overall": 0.0,
22
+ "simple": 0.0,
23
+ "multiple": 0.0,
24
+ "parallel": 0.0,
25
+ "parallel_multiple": 0.0,
26
+ "irrelevance": 0.0,
27
+ "dialect_handling": 0.0,
28
+ "status": "pending"
 
29
  },
30
  {
31
  "rank": 3,
32
+ "model": "SILMA-9B-Instruct",
33
+ "model_url": "https://huggingface.co/silma-ai/SILMA-9B-Instruct-v1.0",
34
+ "organization": "Silma AI",
35
+ "overall": 0.0,
36
+ "simple": 0.0,
37
+ "multiple": 0.0,
38
+ "parallel": 0.0,
39
+ "parallel_multiple": 0.0,
40
+ "irrelevance": 0.0,
41
+ "dialect_handling": 0.0,
42
+ "status": "pending"
 
43
  },
44
  {
45
  "rank": 4,
46
+ "model": "Fanar-Star-1.2B",
47
+ "model_url": "https://huggingface.co/QatarComputing/fanar-star-1.2b",
48
+ "organization": "Qatar Computing Research Institute",
49
+ "overall": 0.0,
50
+ "simple": 0.0,
51
+ "multiple": 0.0,
52
+ "parallel": 0.0,
53
+ "parallel_multiple": 0.0,
54
+ "irrelevance": 0.0,
55
+ "dialect_handling": 0.0,
56
+ "status": "pending"
 
57
  },
58
  {
59
  "rank": 5,
60
+ "model": "Yehia-7B-Preview",
61
+ "model_url": "https://huggingface.co/Kira-Arabic/Yehia-7B-preview",
62
+ "organization": "Kira Arabic",
63
+ "overall": 0.0,
64
+ "simple": 0.0,
65
+ "multiple": 0.0,
66
+ "parallel": 0.0,
67
+ "parallel_multiple": 0.0,
68
+ "irrelevance": 0.0,
69
+ "dialect_handling": 0.0,
70
+ "status": "pending"
 
71
  },
72
  {
73
  "rank": 6,
74
+ "model": "ArabianGPT-1B",
75
+ "model_url": "https://huggingface.co/ahmed-samir/arabiangpt-1b",
76
+ "organization": "Ahmed Samir",
77
+ "overall": 0.0,
78
+ "simple": 0.0,
79
+ "multiple": 0.0,
80
+ "parallel": 0.0,
81
+ "parallel_multiple": 0.0,
82
+ "irrelevance": 0.0,
83
+ "dialect_handling": 0.0,
84
+ "status": "pending"
 
85
  },
86
  {
87
  "rank": 7,
88
+ "model": "Qwen2.5-72B-Instruct",
89
+ "model_url": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
90
+ "organization": "Alibaba Qwen",
91
+ "overall": 0.0,
92
+ "simple": 0.0,
93
+ "multiple": 0.0,
94
+ "parallel": 0.0,
95
+ "parallel_multiple": 0.0,
96
+ "irrelevance": 0.0,
97
+ "dialect_handling": 0.0,
98
+ "status": "pending"
99
+ },
100
+ {
101
+ "rank": 8,
102
+ "model": "Qwen2.5-32B-Instruct",
103
+ "model_url": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct",
104
+ "organization": "Alibaba Qwen",
105
+ "overall": 0.0,
106
+ "simple": 0.0,
107
+ "multiple": 0.0,
108
+ "parallel": 0.0,
109
+ "parallel_multiple": 0.0,
110
+ "irrelevance": 0.0,
111
+ "dialect_handling": 0.0,
112
+ "status": "pending"
113
+ },
114
+ {
115
+ "rank": 9,
116
+ "model": "Qwen2.5-7B-Instruct",
117
+ "model_url": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
118
+ "organization": "Alibaba Qwen",
119
+ "overall": 0.0,
120
+ "simple": 0.0,
121
+ "multiple": 0.0,
122
+ "parallel": 0.0,
123
+ "parallel_multiple": 0.0,
124
+ "irrelevance": 0.0,
125
+ "dialect_handling": 0.0,
126
+ "status": "pending"
127
+ },
128
+ {
129
+ "rank": 10,
130
+ "model": "Llama-3.1-70B-Instruct",
131
+ "model_url": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
132
+ "organization": "Meta",
133
+ "overall": 0.0,
134
+ "simple": 0.0,
135
+ "multiple": 0.0,
136
+ "parallel": 0.0,
137
+ "parallel_multiple": 0.0,
138
+ "irrelevance": 0.0,
139
+ "dialect_handling": 0.0,
140
+ "status": "pending"
141
+ },
142
+ {
143
+ "rank": 11,
144
+ "model": "Llama-3.1-8B-Instruct",
145
+ "model_url": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
146
+ "organization": "Meta",
147
+ "overall": 0.0,
148
+ "simple": 0.0,
149
+ "multiple": 0.0,
150
+ "parallel": 0.0,
151
+ "parallel_multiple": 0.0,
152
+ "irrelevance": 0.0,
153
+ "dialect_handling": 0.0,
154
+ "status": "pending"
155
+ },
156
+ {
157
+ "rank": 12,
158
+ "model": "Gemma-2-27B-IT",
159
+ "model_url": "https://huggingface.co/google/gemma-2-27b-it",
160
+ "organization": "Google",
161
+ "overall": 0.0,
162
+ "simple": 0.0,
163
+ "multiple": 0.0,
164
+ "parallel": 0.0,
165
+ "parallel_multiple": 0.0,
166
+ "irrelevance": 0.0,
167
+ "dialect_handling": 0.0,
168
+ "status": "pending"
169
+ },
170
+ {
171
+ "rank": 13,
172
+ "model": "Gemma-2-9B-IT",
173
+ "model_url": "https://huggingface.co/google/gemma-2-9b-it",
174
+ "organization": "Google",
175
+ "overall": 0.0,
176
+ "simple": 0.0,
177
+ "multiple": 0.0,
178
+ "parallel": 0.0,
179
+ "parallel_multiple": 0.0,
180
+ "irrelevance": 0.0,
181
+ "dialect_handling": 0.0,
182
+ "status": "pending"
183
+ },
184
+ {
185
+ "rank": 14,
186
+ "model": "Aya-Expanse-32B",
187
+ "model_url": "https://huggingface.co/CohereForAI/aya-expanse-32b",
188
+ "organization": "Cohere For AI",
189
+ "overall": 0.0,
190
+ "simple": 0.0,
191
+ "multiple": 0.0,
192
+ "parallel": 0.0,
193
+ "parallel_multiple": 0.0,
194
+ "irrelevance": 0.0,
195
+ "dialect_handling": 0.0,
196
+ "status": "pending"
197
+ },
198
+ {
199
+ "rank": 15,
200
+ "model": "Aya-Expanse-8B",
201
+ "model_url": "https://huggingface.co/CohereForAI/aya-expanse-8b",
202
+ "organization": "Cohere For AI",
203
+ "overall": 0.0,
204
+ "simple": 0.0,
205
+ "multiple": 0.0,
206
+ "parallel": 0.0,
207
+ "parallel_multiple": 0.0,
208
+ "irrelevance": 0.0,
209
+ "dialect_handling": 0.0,
210
+ "status": "pending"
211
+ },
212
+ {
213
+ "rank": 16,
214
+ "model": "c4ai-command-r7b-arabic",
215
+ "model_url": "https://huggingface.co/CohereForAI/c4ai-command-r7b-arabic-02-2025",
216
+ "organization": "Cohere For AI",
217
+ "overall": 0.0,
218
+ "simple": 0.0,
219
+ "multiple": 0.0,
220
+ "parallel": 0.0,
221
+ "parallel_multiple": 0.0,
222
+ "irrelevance": 0.0,
223
+ "dialect_handling": 0.0,
224
+ "status": "pending"
225
+ },
226
+ {
227
+ "rank": 17,
228
+ "model": "Falcon-180B-Chat",
229
+ "model_url": "https://huggingface.co/tiiuae/falcon-180B-chat",
230
+ "organization": "TII UAE",
231
+ "overall": 0.0,
232
+ "simple": 0.0,
233
+ "multiple": 0.0,
234
+ "parallel": 0.0,
235
+ "parallel_multiple": 0.0,
236
+ "irrelevance": 0.0,
237
+ "dialect_handling": 0.0,
238
+ "status": "pending"
239
+ },
240
+ {
241
+ "rank": 18,
242
+ "model": "Falcon-40B-Instruct",
243
+ "model_url": "https://huggingface.co/tiiuae/falcon-40b-instruct",
244
+ "organization": "TII UAE",
245
+ "overall": 0.0,
246
+ "simple": 0.0,
247
+ "multiple": 0.0,
248
+ "parallel": 0.0,
249
+ "parallel_multiple": 0.0,
250
+ "irrelevance": 0.0,
251
+ "dialect_handling": 0.0,
252
+ "status": "pending"
253
+ },
254
+ {
255
+ "rank": 19,
256
+ "model": "DeepSeek-V3",
257
+ "model_url": "https://huggingface.co/deepseek-ai/DeepSeek-V3",
258
+ "organization": "DeepSeek",
259
+ "overall": 0.0,
260
+ "simple": 0.0,
261
+ "multiple": 0.0,
262
+ "parallel": 0.0,
263
+ "parallel_multiple": 0.0,
264
+ "irrelevance": 0.0,
265
+ "dialect_handling": 0.0,
266
+ "status": "pending"
267
+ },
268
+ {
269
+ "rank": 20,
270
+ "model": "Mistral-Large-Instruct",
271
+ "model_url": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2411",
272
+ "organization": "Mistral AI",
273
+ "overall": 0.0,
274
+ "simple": 0.0,
275
+ "multiple": 0.0,
276
+ "parallel": 0.0,
277
+ "parallel_multiple": 0.0,
278
+ "irrelevance": 0.0,
279
+ "dialect_handling": 0.0,
280
+ "status": "pending"
281
+ },
282
+ {
283
+ "rank": 21,
284
+ "model": "Mixtral-8x22B-Instruct",
285
+ "model_url": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1",
286
+ "organization": "Mistral AI",
287
+ "overall": 0.0,
288
+ "simple": 0.0,
289
+ "multiple": 0.0,
290
+ "parallel": 0.0,
291
+ "parallel_multiple": 0.0,
292
+ "irrelevance": 0.0,
293
+ "dialect_handling": 0.0,
294
+ "status": "pending"
295
+ },
296
+ {
297
+ "rank": 22,
298
+ "model": "Phi-4",
299
+ "model_url": "https://huggingface.co/microsoft/phi-4",
300
+ "organization": "Microsoft",
301
+ "overall": 0.0,
302
+ "simple": 0.0,
303
+ "multiple": 0.0,
304
+ "parallel": 0.0,
305
+ "parallel_multiple": 0.0,
306
+ "irrelevance": 0.0,
307
+ "dialect_handling": 0.0,
308
+ "status": "pending"
309
+ },
310
+ {
311
+ "rank": 23,
312
+ "model": "BLOOM-176B",
313
+ "model_url": "https://huggingface.co/bigscience/bloom",
314
+ "organization": "BigScience",
315
+ "overall": 0.0,
316
+ "simple": 0.0,
317
+ "multiple": 0.0,
318
+ "parallel": 0.0,
319
+ "parallel_multiple": 0.0,
320
+ "irrelevance": 0.0,
321
+ "dialect_handling": 0.0,
322
+ "status": "pending"
323
+ },
324
+ {
325
+ "rank": 24,
326
+ "model": "BLOOMZ-7B1",
327
+ "model_url": "https://huggingface.co/bigscience/bloomz-7b1",
328
+ "organization": "BigScience",
329
+ "overall": 0.0,
330
+ "simple": 0.0,
331
+ "multiple": 0.0,
332
+ "parallel": 0.0,
333
+ "parallel_multiple": 0.0,
334
+ "irrelevance": 0.0,
335
+ "dialect_handling": 0.0,
336
+ "status": "pending"
337
+ },
338
+ {
339
+ "rank": 25,
340
+ "model": "AceGPT-13B-Chat",
341
+ "model_url": "https://huggingface.co/FreedomIntelligence/AceGPT-13B-chat",
342
+ "organization": "FreedomIntelligence",
343
+ "overall": 0.0,
344
+ "simple": 0.0,
345
+ "multiple": 0.0,
346
+ "parallel": 0.0,
347
+ "parallel_multiple": 0.0,
348
+ "irrelevance": 0.0,
349
+ "dialect_handling": 0.0,
350
+ "status": "pending"
351
+ },
352
+ {
353
+ "rank": 26,
354
+ "model": "AraGPT2-Mega",
355
+ "model_url": "https://huggingface.co/aubmindlab/aragpt2-mega",
356
+ "organization": "AUB MIND Lab",
357
+ "overall": 0.0,
358
+ "simple": 0.0,
359
+ "multiple": 0.0,
360
+ "parallel": 0.0,
361
+ "parallel_multiple": 0.0,
362
+ "irrelevance": 0.0,
363
+ "dialect_handling": 0.0,
364
+ "status": "pending"
365
+ },
366
+ {
367
+ "rank": 27,
368
+ "model": "Arabic-Llama-3.1-8B",
369
+ "model_url": "https://huggingface.co/Ammar-Arabi/Arabic-Llama-3.1-8B-Instruct",
370
+ "organization": "Ammar Arabi",
371
+ "overall": 0.0,
372
+ "simple": 0.0,
373
+ "multiple": 0.0,
374
+ "parallel": 0.0,
375
+ "parallel_multiple": 0.0,
376
+ "irrelevance": 0.0,
377
+ "dialect_handling": 0.0,
378
+ "status": "pending"
379
+ },
380
+ {
381
+ "rank": 28,
382
+ "model": "Llama3-8B-Arabic-Instruct",
383
+ "model_url": "https://huggingface.co/MahmoudAshraf/Llama3-8B-Arabic-instruct",
384
+ "organization": "Mahmoud Ashraf",
385
+ "overall": 0.0,
386
+ "simple": 0.0,
387
+ "multiple": 0.0,
388
+ "parallel": 0.0,
389
+ "parallel_multiple": 0.0,
390
+ "irrelevance": 0.0,
391
+ "dialect_handling": 0.0,
392
+ "status": "pending"
393
  }
394
  ]