| | import os, tempfile |
| | import numpy as np |
| | import pandas as pd |
| | import matplotlib.pyplot as plt |
| | from pandas.api.types import is_datetime64_any_dtype as is_datetime |
| | from sklearn.model_selection import train_test_split |
| | from sklearn.metrics import mean_absolute_error, r2_score |
| | from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor |
| | import gradio as gr |
| |
|
| | |
| | def infer_target_column(df: pd.DataFrame): |
| | for c in ["power_usage_kwh", "energy_kwh", "power_kwh", "energy"]: |
| | if c in df.columns: |
| | return c |
| | raise ValueError("Target column not found. Expected one of: " |
| | "['power_usage_kwh','energy_kwh','power_kwh','energy'].") |
| |
|
| | def ensure_datetime_naive(df: pd.DataFrame, tz_target: str = "Asia/Dubai"): |
| | if "timestamp" not in df.columns: |
| | return df |
| | |
| | ts = pd.to_datetime(df["timestamp"], errors="coerce", utc=True) |
| | try: |
| | ts = ts.dt.tz_convert(tz_target).dt.tz_localize(None) |
| | except Exception: |
| | try: |
| | ts = ts.dt.tz_localize(None) |
| | except Exception: |
| | pass |
| | df = df.copy() |
| | df["timestamp"] = ts |
| | return df |
| |
|
| | def feature_engineer(df: pd.DataFrame) -> pd.DataFrame: |
| | df = df.copy() |
| | df = ensure_datetime_naive(df, tz_target="Asia/Dubai") |
| |
|
| | |
| | num_cols = df.select_dtypes(include=[np.number]).columns |
| | df[num_cols] = df[num_cols].ffill().bfill() |
| |
|
| | |
| | if "timestamp" in df.columns and is_datetime(df["timestamp"]): |
| | df["hour"] = df["timestamp"].dt.hour |
| | df["dayofweek"] = df["timestamp"].dt.dayofweek |
| | df["is_weekend"] = (df["dayofweek"] >= 5).astype(int) |
| | df["month"] = df["timestamp"].dt.month |
| | df["dayofyear"] = df["timestamp"].dt.dayofyear |
| | df["hour_sin"] = np.sin(2*np.pi*df["hour"]/24) |
| | df["hour_cos"] = np.cos(2*np.pi*df["hour"]/24) |
| | df["dow_sin"] = np.sin(2*np.pi*df["dayofweek"]/7) |
| | df["dow_cos"] = np.cos(2*np.pi*df["dayofweek"]/7) |
| | else: |
| | for c in ["hour","dayofweek","is_weekend","month","dayofyear","hour_sin","hour_cos","dow_sin","dow_cos"]: |
| | if c not in df.columns: |
| | df[c] = 0 |
| |
|
| | |
| | tgt = infer_target_column(df) |
| | if "cooling_eff_pct" in df.columns: |
| | df["cooling_ineff_pct"] = 100 - df["cooling_eff_pct"] |
| | if "server_load_pct" in df.columns: |
| | df["energy_per_load"] = df[tgt] / np.maximum(df["server_load_pct"], 1) |
| | if "ambient_temp_c" in df.columns and "server_load_pct" in df.columns: |
| | df["temp_load_interaction"] = df["ambient_temp_c"] * df["server_load_pct"] |
| |
|
| | |
| | df["target_lag1"] = df[tgt].shift(1) |
| | df["target_roll3"] = df[tgt].rolling(3, min_periods=1).mean() |
| | df["target_roll24"] = df[tgt].rolling(24, min_periods=1).mean() |
| |
|
| | |
| | df = df.ffill().bfill() |
| | return df |
| |
|
| | def get_model(name: str): |
| | return GradientBoostingRegressor(random_state=42) if name == "Gradient Boosting" \ |
| | else RandomForestRegressor(n_estimators=300, random_state=42) |
| |
|
| | def feature_target_split(df: pd.DataFrame): |
| | y_col = infer_target_column(df) |
| | X = df.drop(columns=[c for c in [y_col, "timestamp"] if c in df.columns], errors="ignore") |
| | X = X.select_dtypes(include=[np.number]).copy() |
| | y = df[y_col].astype(float) |
| | return X, y, y_col |
| |
|
| | |
| | def run_pipeline(file_path, model_name): |
| | title = "β‘ AI-Driven Data Center Energy Optimization Dashboard" |
| |
|
| | try: |
| | if not file_path: |
| | return (title, "Please upload a CSV file.", None, None, None, None, None, None) |
| |
|
| | df_raw = pd.read_csv(file_path) |
| | df = feature_engineer(df_raw) |
| |
|
| | |
| | if len(df) < 10: |
| | return (title, "Not enough rows to train a model (need >= 10).", None, None, None, None, None, None) |
| |
|
| | X, y, y_col = feature_target_split(df) |
| |
|
| | |
| | test_size = 0.25 if len(df) >= 25 else 0.2 |
| | X_train, X_test, y_train, y_test = train_test_split( |
| | X, y, test_size=test_size, random_state=42 |
| | ) |
| | model = get_model(model_name) |
| | model.fit(X_train, y_train) |
| |
|
| | y_pred_all = model.predict(X) |
| | y_pred_test = model.predict(X_test) |
| |
|
| | mae = mean_absolute_error(y_test, y_pred_test) |
| | r2 = r2_score(y_test, y_pred_test) |
| | avg_actual = float(np.mean(y)) |
| | avg_pred = float(np.mean(y_pred_all)) |
| |
|
| | |
| | ts_plot = None |
| | if "timestamp" in df.columns and is_datetime(df["timestamp"]): |
| | plot_df = df.copy().sort_values("timestamp") |
| | Xp = plot_df.drop(columns=[c for c in [y_col, "timestamp"] if c in plot_df.columns], errors="ignore") |
| | Xp = Xp.select_dtypes(include=[np.number]).copy() |
| | yp = model.predict(Xp) |
| | ts_plot = plt.figure(figsize=(9, 3.6)) |
| | plt.plot(plot_df["timestamp"], plot_df[y_col], label="Actual") |
| | plt.plot(plot_df["timestamp"], yp, label="Predicted") |
| | plt.title("Time Series: Actual vs Predicted") |
| | plt.xlabel("Time"); plt.ylabel(y_col) |
| | plt.legend(); plt.tight_layout() |
| |
|
| | sc_plot = plt.figure(figsize=(4.6, 3.8)) |
| | plt.scatter(y_test, y_pred_test, alpha=0.6) |
| | mn = min(y_test.min(), y_pred_test.min()); mx = max(y_test.max(), y_pred_test.max()) |
| | plt.plot([mn, mx], [mn, mx], linestyle="--") |
| | plt.title("Holdout: Actual vs Predicted") |
| | plt.xlabel("Actual"); plt.ylabel("Predicted") |
| | plt.tight_layout() |
| |
|
| | res = y_test - y_pred_test |
| | resid_plot = plt.figure(figsize=(4.6, 3.6)) |
| | plt.hist(res, bins=30) |
| | plt.title("Holdout Residuals (Actual β Predicted)") |
| | plt.xlabel("Residual"); plt.ylabel("Count") |
| | plt.tight_layout() |
| |
|
| | fi_plot = None |
| | if hasattr(model, "feature_importances_"): |
| | importances = model.feature_importances_ |
| | fi = (pd.DataFrame({"feature": X.columns, "importance": importances}) |
| | .sort_values("importance", ascending=False).head(12)) |
| | fi_plot = plt.figure(figsize=(6.2, 3.8)) |
| | plt.barh(fi["feature"][::-1], fi["importance"][::-1]) |
| | plt.title("Top Feature Importances") |
| | plt.tight_layout() |
| |
|
| | |
| | out_df = df.copy() |
| | out_df[f"{y_col}_pred"] = y_pred_all |
| | tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") |
| | out_df.to_csv(tmp.name, index=False) |
| |
|
| | |
| | explainer = ( |
| | "### π§ What this app does\n" |
| | "This AI-driven dashboard learns the relationship between **server load**, **ambient temperature**, " |
| | "**cooling efficiency**, and time features to **predict power usage**. " |
| | "Use it to quantify drivers of energy consumption, monitor deviations, and surface optimization levers.\n\n" |
| | "### π Why it matters\n" |
| | "- Reduces **OPEX** by forecasting and optimizing energy usage\n" |
| | "- Identifies high-impact drivers (feature importance)\n" |
| | "- Enables proactive actions (e.g., workload shaping, cooling set-point tuning)\n\n" |
| | "### βοΈ How it works (high-level)\n" |
| | "1) Cleans and engineers features (diurnal/weekly cycles, rolling stats, domain signals)\n" |
| | "2) Trains a tree ensemble (Gradient Boosting or Random Forest)\n" |
| | "3) Evaluates on a holdout split and produces predictions for the entire dataset\n" |
| | "4) Visualizes time series, accuracy scatter, residuals, and top feature importance\n" |
| | ) |
| |
|
| | kpis = ( |
| | f"**Model:** {model_name}\n\n" |
| | f"**Target:** {y_col}\n" |
| | f"**Avg {y_col} (actual):** {avg_actual:,.2f}\n" |
| | f"**Avg {y_col} (predicted):** {avg_pred:,.2f}\n" |
| | f"**Rows:** {len(df):,}\n\n" |
| | f"**Holdout MAE:** {mae:,.2f} | **RΒ²:** {r2:,.3f}" |
| | ) |
| |
|
| | |
| | preview = out_df.head(10) |
| |
|
| | return ( |
| | title, |
| | explainer, |
| | kpis, |
| | preview, |
| | ts_plot, |
| | sc_plot, |
| | resid_plot, |
| | fi_plot, |
| | tmp.name |
| | ) |
| |
|
| | except Exception as e: |
| | err = f"β **Error:** {type(e).__name__}: {e}" |
| | return (title, err, None, None, None, None, None, None, None) |
| |
|
| | |
| | import gradio |
| | gradio.close_all() |
| |
|
| | with gr.Blocks(title="AI-Driven Data Center Energy Optimization") as demo: |
| | gr.Markdown("## β‘ AI-Driven Data Center Energy Optimization Dashboard") |
| |
|
| | with gr.Row(): |
| | fpath = gr.File(label="π Upload Dataset (CSV)", file_types=[".csv"], type="filepath") |
| | model_name = gr.Dropdown( |
| | choices=["Gradient Boosting", "Random Forest"], |
| | value="Gradient Boosting", |
| | label="π Select Model" |
| | ) |
| |
|
| | run_btn = gr.Button("βΆοΈ Run") |
| |
|
| | title_out = gr.Markdown() |
| | explainer_out = gr.Markdown() |
| | kpi_out = gr.Markdown() |
| | table_out = gr.Dataframe(label="π Sample (+ Predictions)", wrap=True, row_count=("fixed", 10)) |
| |
|
| | gr.Markdown("### π Visual Insights") |
| | ts_plot = gr.Plot(label="Time Series: Actual vs Predicted") |
| | sc_plot = gr.Plot(label="Holdout: Actual vs Predicted") |
| | resid_plot = gr.Plot(label="Residuals (Histogram)") |
| | fi_plot = gr.Plot(label="Top Feature Importances") |
| |
|
| | dl = gr.File(label="π₯ Download Data (+ Predictions)") |
| |
|
| | run_btn.click( |
| | fn=run_pipeline, |
| | inputs=[fpath, model_name], |
| | outputs=[title_out, explainer_out, kpi_out, table_out, ts_plot, sc_plot, resid_plot, fi_plot, dl] |
| | ) |
| |
|
| | demo.launch(share=True) |
| |
|