Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Script to populate the Hugging Face dataset with mock data | |
| """ | |
| from datasets import Dataset | |
| from datetime import datetime, timedelta | |
| import random | |
| # Configuration | |
| DATASET_ID = "siro1/amd-hackathon" | |
| HF_TOKEN = None # Set this if needed for private repos | |
| # Generate mock data | |
| mock_data = [] | |
| teams = ["Team Alpha", "Team Beta", "Team Gamma", "Team Delta", "Team Epsilon"] | |
| base_date = datetime.now() - timedelta(days=7) | |
| for i in range(10): | |
| team = random.choice(teams) | |
| timestamp = (base_date + timedelta(days=i / 2)).strftime("%Y-%m-%d %H:%M:%S") | |
| # Vary configurations | |
| input_length = random.choice([128, 256, 512]) | |
| output_length = random.choice([128, 256, 512]) | |
| concurrent_requests = random.choice([8, 16, 32, 64]) | |
| # Generate performance metrics with some variance | |
| base_ttft = 40 + random.uniform(-10, 10) | |
| base_tpot = 11 + random.uniform(-2, 2) | |
| base_itl = 10 + random.uniform(-2, 2) | |
| base_e2e = 1500 + (input_length + output_length) * 2 + random.uniform(-200, 200) | |
| base_throughput = 2000 + concurrent_requests * 20 + random.uniform(-200, 200) | |
| bits_per_byte = 0.54 + random.uniform(-0.02, 0.02) | |
| byte_perplexity = 1.45 + random.uniform(-0.02, 0.02) | |
| word_perplexity = 4.13 + random.uniform(-0.02, 0.02) | |
| entry = { | |
| "team": team, | |
| "timestamp": timestamp, | |
| "ttft": round(base_ttft, 2), | |
| "tpot": round(base_tpot, 2), | |
| "itl": round(base_itl, 2), | |
| "e2e": round(base_e2e, 2), | |
| "throughput": round(base_throughput, 2), | |
| "bits_per_byte": round(bits_per_byte, 2), | |
| "byte_perplexity": round(byte_perplexity, 2), | |
| "word_perplexity": round(word_perplexity, 2), | |
| } | |
| mock_data.append(entry) | |
| # Sort by timestamp | |
| mock_data.sort(key=lambda x: x["timestamp"]) | |
| # Create dataset and push to hub | |
| print(f"Creating dataset with {len(mock_data)} entries...") | |
| dataset = Dataset.from_list(mock_data) | |
| print(f"Pushing to Hugging Face Hub: {DATASET_ID}") | |
| dataset.push_to_hub(DATASET_ID, token=HF_TOKEN) | |
| print("Dataset populated successfully!") | |
| print("\nSample entries:") | |
| for entry in mock_data[:3]: | |
| print( | |
| f"- {entry['team']} at {entry['timestamp']}: throughput={entry['throughput']}" | |
| ) | |