Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from datetime import datetime | |
| import os | |
| from datasets import load_dataset, Dataset | |
| # Configuration | |
| DATASET_ID = "siro1/amd-hackathon" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") # Optional: for write access | |
| dataframe: pd.DataFrame = None | |
| def load_dataframe(dataset=None) -> pd.DataFrame: | |
| global dataframe | |
| if dataset is None: | |
| # Force download of latest data by disabling cache | |
| dataset = load_dataset( | |
| DATASET_ID, split="train", download_mode="force_redownload" | |
| ) | |
| print(f"Loaded dataset: {len(dataset)}") | |
| results = dataset.map( | |
| lambda item: { | |
| "Team": item["team"], | |
| "Timestamp": item["timestamp"], | |
| "TTFT (ms)": item["ttft"], | |
| "TPOT (ms)": item["tpot"], | |
| "ITL (ms)": item["itl"], | |
| "E2E Latency (ms)": item["e2e"], | |
| "Throughput (tokens/s)": item["throughput"], | |
| "Bits per Byte": item["bits_per_byte"], | |
| "Byte Perplexity": item["byte_perplexity"], | |
| "Word Perplexity": item["word_perplexity"], | |
| }, | |
| batch_size=64, | |
| remove_columns=dataset.column_names, | |
| ) | |
| df = results.to_pandas() | |
| df = df.sort_values("Throughput (tokens/s)", ascending=False) | |
| dataframe = df | |
| return df | |
| def update_data( | |
| team_name, | |
| ttft, | |
| tpot, | |
| itl, | |
| e2e, | |
| throughput, | |
| bits_per_byte, | |
| byte_perplexity, | |
| word_perplexity, | |
| ): | |
| global dataframe | |
| """Insert a new row into the Hugging Face dataset""" | |
| existing_data = dataframe.to_dict(orient="records") | |
| print(f"Current data length: {len(existing_data)}") | |
| new_entry = { | |
| "Team": team_name, | |
| "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), | |
| "TTFT (ms)": float(ttft), | |
| "TPOT (ms)": float(tpot), | |
| "ITL (ms)": float(itl), | |
| "E2E Latency (ms)": float(e2e), | |
| "Throughput (tokens/s)": float(throughput), | |
| "Bits per Byte": float(bits_per_byte), | |
| "Byte Perplexity": float(byte_perplexity), | |
| "Word Perplexity": float(word_perplexity), | |
| } | |
| existing_data.append(new_entry) | |
| df = pd.DataFrame(existing_data) | |
| df = df.sort_values("Throughput (tokens/s)", ascending=False) | |
| dataframe = df | |
| print(f"New data length: {len(dataframe)}") | |
| return df | |
| def api_submit_results( | |
| team_name: str, | |
| ttft: float, | |
| tpot: float, | |
| itl: float, | |
| e2e: float, | |
| throughput: float, | |
| bits_per_byte: float, | |
| byte_perplexity: float, | |
| word_perplexity: float, | |
| ) -> str: | |
| try: | |
| # Update the dataset with new submission | |
| new_data = update_data( | |
| team_name=team_name, | |
| ttft=ttft, | |
| tpot=tpot, | |
| itl=itl, | |
| e2e=e2e, | |
| throughput=throughput, | |
| bits_per_byte=bits_per_byte, | |
| byte_perplexity=byte_perplexity, | |
| word_perplexity=word_perplexity, | |
| ) | |
| refresh_leaderboard(push_to_hub=False) | |
| return ["Success", new_data] | |
| except Exception as e: | |
| return ["Failed: " + str(e), None] | |
| def refresh_leaderboard(push_to_hub: bool = True): | |
| global dataframe | |
| dataset = Dataset.from_pandas(dataframe) | |
| dataset = dataset.map( | |
| lambda item: { | |
| "team": item["Team"], | |
| "timestamp": item["Timestamp"], | |
| "ttft": item["TTFT (ms)"], | |
| "tpot": item["TPOT (ms)"], | |
| "itl": item["ITL (ms)"], | |
| "e2e": item["E2E Latency (ms)"], | |
| "throughput": item["Throughput (tokens/s)"], | |
| "bits_per_byte": item["Bits per Byte"], | |
| "byte_perplexity": item["Byte Perplexity"], | |
| "word_perplexity": item["Word Perplexity"], | |
| }, | |
| remove_columns=dataset.column_names, | |
| ) | |
| if push_to_hub: | |
| dataset.push_to_hub(DATASET_ID, token=HF_TOKEN) | |
| def get_leaderboard(): | |
| global dataframe | |
| print(f"Getting leaderboard: {len(dataframe)}") | |
| return dataframe | |
| # Create Gradio interface | |
| def create_interface(): | |
| global dataframe | |
| with gr.Blocks(title="AMD vLLM Benchmark Leaderboard") as demo: | |
| gr.Markdown("# AMD vLLM Benchmark Leaderboard") | |
| gr.Markdown( | |
| "Track and compare performance and accuracy metrics for vLLM benchmarks" | |
| ) | |
| with gr.Tab("Leaderboard"): | |
| # Initial load | |
| leaderboard_table = gr.DataFrame( | |
| value=load_dataframe(), | |
| label="Benchmark Results", | |
| interactive=False, | |
| ) | |
| with gr.Column(visible=False): | |
| team_input = gr.Textbox() | |
| ttft_input = gr.Number() | |
| tpot_input = gr.Number() | |
| itl_input = gr.Number() | |
| e2e_input = gr.Number() | |
| throughput_input = gr.Number() | |
| bits_input = gr.Number() | |
| byte_perp_input = gr.Number() | |
| word_perp_input = gr.Number() | |
| submit_output = gr.Textbox() | |
| submit_btn = gr.Button("Submit") | |
| submit_btn.click( | |
| fn=api_submit_results, | |
| inputs=[ | |
| team_input, | |
| ttft_input, | |
| tpot_input, | |
| itl_input, | |
| e2e_input, | |
| throughput_input, | |
| bits_input, | |
| byte_perp_input, | |
| word_perp_input, | |
| ], | |
| outputs=[submit_output, leaderboard_table], | |
| api_name="submit_results", | |
| concurrency_limit=10, | |
| show_progress="full", | |
| ) | |
| refresh_btn = gr.Button("Refresh Leaderboard") | |
| refresh_btn.click( | |
| fn=refresh_leaderboard, | |
| ) | |
| # Auto-refresh every 30 seconds | |
| timer = gr.Timer(15) # 30 seconds interval | |
| timer.tick( | |
| fn=refresh_leaderboard, | |
| ) | |
| data_timer = gr.Timer(15) | |
| data_timer.tick( | |
| fn=get_leaderboard, | |
| outputs=leaderboard_table, | |
| ) | |
| return demo | |
| # Create and launch the app | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.queue(max_size=100) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| ssr_mode=False | |
| ) | |