HeshamHaroon commited on
Commit
a18529f
·
verified ·
1 Parent(s): d9d7dd0

Update: Auto-evaluation on Space startup

Browse files
Files changed (1) hide show
  1. afcl/app.py +19 -13
afcl/app.py CHANGED
@@ -179,19 +179,25 @@ CUSTOM_CSS = """
179
 
180
 
181
  def load_evaluation_dataset():
182
- """Load the Arabic FC dataset from HuggingFace."""
183
  try:
184
- dataset = load_dataset("HeshamHaroon/Arabic_Function_Calling", split="test")
 
185
  samples = []
186
- for item in dataset:
187
- sample = {
188
- 'id': item['id'],
189
- 'query_ar': item['query_ar'],
190
- 'functions': json.loads(item['functions']) if item['functions'] else [],
191
- 'ground_truth': json.loads(item['ground_truth']) if item['ground_truth'] else None,
192
- 'category': item['category'],
193
- }
194
- samples.append(sample)
 
 
 
 
 
195
  return samples
196
  except Exception as e:
197
  print(f"Error loading dataset: {e}")
@@ -447,8 +453,8 @@ def create_app():
447
  """)
448
  gr.HTML("""
449
  <div style="background: rgba(255,255,255,0.03); border: 1px solid rgba(255,255,255,0.1); border-radius: 12px; padding: 24px; text-align: center; flex: 1;">
450
- <div style="font-size: 2.5rem; font-weight: 700; background: linear-gradient(135deg, #22c55e, #16a34a); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">147</div>
451
- <div style="color: #a0a0a0; font-size: 0.9rem; margin-top: 8px;">Test Samples</div>
452
  </div>
453
  """)
454
  gr.HTML("""
 
179
 
180
 
181
  def load_evaluation_dataset():
182
+ """Load ALL Arabic FC dataset from HuggingFace (train + test = 1,470 samples)."""
183
  try:
184
+ # Load both train and test splits
185
+ dataset = load_dataset("HeshamHaroon/Arabic_Function_Calling")
186
  samples = []
187
+
188
+ # Process all splits (train + test)
189
+ for split_name in dataset.keys():
190
+ for item in dataset[split_name]:
191
+ sample = {
192
+ 'id': item['id'],
193
+ 'query_ar': item['query_ar'],
194
+ 'functions': json.loads(item['functions']) if item['functions'] else [],
195
+ 'ground_truth': json.loads(item['ground_truth']) if item['ground_truth'] else None,
196
+ 'category': item['category'],
197
+ }
198
+ samples.append(sample)
199
+
200
+ print(f"Loaded {len(samples)} total samples from all splits")
201
  return samples
202
  except Exception as e:
203
  print(f"Error loading dataset: {e}")
 
453
  """)
454
  gr.HTML("""
455
  <div style="background: rgba(255,255,255,0.03); border: 1px solid rgba(255,255,255,0.1); border-radius: 12px; padding: 24px; text-align: center; flex: 1;">
456
+ <div style="font-size: 2.5rem; font-weight: 700; background: linear-gradient(135deg, #22c55e, #16a34a); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">1,470</div>
457
+ <div style="color: #a0a0a0; font-size: 0.9rem; margin-top: 8px;">Total Samples</div>
458
  </div>
459
  """)
460
  gr.HTML("""