Update: Auto-evaluation on Space startup
Browse files- afcl/app.py +19 -13
afcl/app.py
CHANGED
|
@@ -179,19 +179,25 @@ CUSTOM_CSS = """
|
|
| 179 |
|
| 180 |
|
| 181 |
def load_evaluation_dataset():
|
| 182 |
-
"""Load
|
| 183 |
try:
|
| 184 |
-
|
|
|
|
| 185 |
samples = []
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
return samples
|
| 196 |
except Exception as e:
|
| 197 |
print(f"Error loading dataset: {e}")
|
|
@@ -447,8 +453,8 @@ def create_app():
|
|
| 447 |
""")
|
| 448 |
gr.HTML("""
|
| 449 |
<div style="background: rgba(255,255,255,0.03); border: 1px solid rgba(255,255,255,0.1); border-radius: 12px; padding: 24px; text-align: center; flex: 1;">
|
| 450 |
-
<div style="font-size: 2.5rem; font-weight: 700; background: linear-gradient(135deg, #22c55e, #16a34a); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">
|
| 451 |
-
<div style="color: #a0a0a0; font-size: 0.9rem; margin-top: 8px;">
|
| 452 |
</div>
|
| 453 |
""")
|
| 454 |
gr.HTML("""
|
|
|
|
| 179 |
|
| 180 |
|
| 181 |
def load_evaluation_dataset():
|
| 182 |
+
"""Load ALL Arabic FC dataset from HuggingFace (train + test = 1,470 samples)."""
|
| 183 |
try:
|
| 184 |
+
# Load both train and test splits
|
| 185 |
+
dataset = load_dataset("HeshamHaroon/Arabic_Function_Calling")
|
| 186 |
samples = []
|
| 187 |
+
|
| 188 |
+
# Process all splits (train + test)
|
| 189 |
+
for split_name in dataset.keys():
|
| 190 |
+
for item in dataset[split_name]:
|
| 191 |
+
sample = {
|
| 192 |
+
'id': item['id'],
|
| 193 |
+
'query_ar': item['query_ar'],
|
| 194 |
+
'functions': json.loads(item['functions']) if item['functions'] else [],
|
| 195 |
+
'ground_truth': json.loads(item['ground_truth']) if item['ground_truth'] else None,
|
| 196 |
+
'category': item['category'],
|
| 197 |
+
}
|
| 198 |
+
samples.append(sample)
|
| 199 |
+
|
| 200 |
+
print(f"Loaded {len(samples)} total samples from all splits")
|
| 201 |
return samples
|
| 202 |
except Exception as e:
|
| 203 |
print(f"Error loading dataset: {e}")
|
|
|
|
| 453 |
""")
|
| 454 |
gr.HTML("""
|
| 455 |
<div style="background: rgba(255,255,255,0.03); border: 1px solid rgba(255,255,255,0.1); border-radius: 12px; padding: 24px; text-align: center; flex: 1;">
|
| 456 |
+
<div style="font-size: 2.5rem; font-weight: 700; background: linear-gradient(135deg, #22c55e, #16a34a); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">1,470</div>
|
| 457 |
+
<div style="color: #a0a0a0; font-size: 0.9rem; margin-top: 8px;">Total Samples</div>
|
| 458 |
</div>
|
| 459 |
""")
|
| 460 |
gr.HTML("""
|