Spaces:

decodingdatascience
/

AIEnergyOptimizationDashboard

Sleeping

App Files Files Community

AIEnergyOptimizationDashboard / app.py

decodingdatascience

Create app.py

74a8924 verified 4 months ago

raw

history blame contribute delete

9.98 kB

	import os, tempfile
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from pandas.api.types import is_datetime64_any_dtype as is_datetime
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_absolute_error, r2_score
	from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
	import gradio as gr

	# ---------- Helpers ----------
	def infer_target_column(df: pd.DataFrame):
	for c in ["power_usage_kwh", "energy_kwh", "power_kwh", "energy"]:
	if c in df.columns:
	return c
	raise ValueError("Target column not found. Expected one of: "
	"['power_usage_kwh','energy_kwh','power_kwh','energy'].")

	def ensure_datetime_naive(df: pd.DataFrame, tz_target: str = "Asia/Dubai"):
	if "timestamp" not in df.columns:
	return df
	# Parse robustly with UTC, then convert to target tz and drop tz
	ts = pd.to_datetime(df["timestamp"], errors="coerce", utc=True)
	try:
	ts = ts.dt.tz_convert(tz_target).dt.tz_localize(None)
	except Exception:
	try:
	ts = ts.dt.tz_localize(None)
	except Exception:
	pass
	df = df.copy()
	df["timestamp"] = ts
	return df

	def feature_engineer(df: pd.DataFrame) -> pd.DataFrame:
	df = df.copy()
	df = ensure_datetime_naive(df, tz_target="Asia/Dubai")

	# Light numeric imputation
	num_cols = df.select_dtypes(include=[np.number]).columns
	df[num_cols] = df[num_cols].ffill().bfill()

	# Time features
	if "timestamp" in df.columns and is_datetime(df["timestamp"]):
	df["hour"] = df["timestamp"].dt.hour
	df["dayofweek"] = df["timestamp"].dt.dayofweek
	df["is_weekend"] = (df["dayofweek"] >= 5).astype(int)
	df["month"] = df["timestamp"].dt.month
	df["dayofyear"] = df["timestamp"].dt.dayofyear
	df["hour_sin"] = np.sin(2np.pidf["hour"]/24)
	df["hour_cos"] = np.cos(2np.pidf["hour"]/24)
	df["dow_sin"] = np.sin(2np.pidf["dayofweek"]/7)
	df["dow_cos"] = np.cos(2np.pidf["dayofweek"]/7)
	else:
	for c in ["hour","dayofweek","is_weekend","month","dayofyear","hour_sin","hour_cos","dow_sin","dow_cos"]:
	if c not in df.columns:
	df[c] = 0

	# Domain features
	tgt = infer_target_column(df)
	if "cooling_eff_pct" in df.columns:
	df["cooling_ineff_pct"] = 100 - df["cooling_eff_pct"]
	if "server_load_pct" in df.columns:
	df["energy_per_load"] = df[tgt] / np.maximum(df["server_load_pct"], 1)
	if "ambient_temp_c" in df.columns and "server_load_pct" in df.columns:
	df["temp_load_interaction"] = df["ambient_temp_c"] * df["server_load_pct"]

	# Target lags/rollings
	df["target_lag1"] = df[tgt].shift(1)
	df["target_roll3"] = df[tgt].rolling(3, min_periods=1).mean()
	df["target_roll24"] = df[tgt].rolling(24, min_periods=1).mean()

	# Fill NaNs from shifts
	df = df.ffill().bfill()
	return df

	def get_model(name: str):
	return GradientBoostingRegressor(random_state=42) if name == "Gradient Boosting" \
	else RandomForestRegressor(n_estimators=300, random_state=42)

	def feature_target_split(df: pd.DataFrame):
	y_col = infer_target_column(df)
	X = df.drop(columns=[c for c in [y_col, "timestamp"] if c in df.columns], errors="ignore")
	X = X.select_dtypes(include=[np.number]).copy()
	y = df[y_col].astype(float)
	return X, y, y_col

	# ---------- Core pipeline ----------
	def run_pipeline(file_path, model_name):
	title = "⚡ AI-Driven Data Center Energy Optimization Dashboard"

	try:
	if not file_path:
	return (title, "Please upload a CSV file.", None, None, None, None, None, None)

	df_raw = pd.read_csv(file_path)
	df = feature_engineer(df_raw)

	# Guardrail
	if len(df) < 10:
	return (title, "Not enough rows to train a model (need >= 10).", None, None, None, None, None, None)

	X, y, y_col = feature_target_split(df)

	# Split, train, predict
	test_size = 0.25 if len(df) >= 25 else 0.2
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=test_size, random_state=42
	)
	model = get_model(model_name)
	model.fit(X_train, y_train)

	y_pred_all = model.predict(X)
	y_pred_test = model.predict(X_test)

	mae = mean_absolute_error(y_test, y_pred_test)
	r2 = r2_score(y_test, y_pred_test)
	avg_actual = float(np.mean(y))
	avg_pred = float(np.mean(y_pred_all))

	# ------ Visualizations ------
	ts_plot = None
	if "timestamp" in df.columns and is_datetime(df["timestamp"]):
	plot_df = df.copy().sort_values("timestamp")
	Xp = plot_df.drop(columns=[c for c in [y_col, "timestamp"] if c in plot_df.columns], errors="ignore")
	Xp = Xp.select_dtypes(include=[np.number]).copy()
	yp = model.predict(Xp)
	ts_plot = plt.figure(figsize=(9, 3.6))
	plt.plot(plot_df["timestamp"], plot_df[y_col], label="Actual")
	plt.plot(plot_df["timestamp"], yp, label="Predicted")
	plt.title("Time Series: Actual vs Predicted")
	plt.xlabel("Time"); plt.ylabel(y_col)
	plt.legend(); plt.tight_layout()

	sc_plot = plt.figure(figsize=(4.6, 3.8))
	plt.scatter(y_test, y_pred_test, alpha=0.6)
	mn = min(y_test.min(), y_pred_test.min()); mx = max(y_test.max(), y_pred_test.max())
	plt.plot([mn, mx], [mn, mx], linestyle="--")
	plt.title("Holdout: Actual vs Predicted")
	plt.xlabel("Actual"); plt.ylabel("Predicted")
	plt.tight_layout()

	res = y_test - y_pred_test
	resid_plot = plt.figure(figsize=(4.6, 3.6))
	plt.hist(res, bins=30)
	plt.title("Holdout Residuals (Actual − Predicted)")
	plt.xlabel("Residual"); plt.ylabel("Count")
	plt.tight_layout()

	fi_plot = None
	if hasattr(model, "feature_importances_"):
	importances = model.feature_importances_
	fi = (pd.DataFrame({"feature": X.columns, "importance": importances})
	.sort_values("importance", ascending=False).head(12))
	fi_plot = plt.figure(figsize=(6.2, 3.8))
	plt.barh(fi["feature"][::-1], fi["importance"][::-1])
	plt.title("Top Feature Importances")
	plt.tight_layout()

	# Save predictions for download
	out_df = df.copy()
	out_df[f"{y_col}_pred"] = y_pred_all
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
	out_df.to_csv(tmp.name, index=False)

	# --------- Copy text (explainer + KPIs) ---------
	explainer = (
	"### 🧠 What this app does\n"
	"This AI-driven dashboard learns the relationship between server load, ambient temperature, "
	"cooling efficiency, and time features to predict power usage. "
	"Use it to quantify drivers of energy consumption, monitor deviations, and surface optimization levers.\n\n"
	"### 🔎 Why it matters\n"
	"- Reduces OPEX by forecasting and optimizing energy usage\n"
	"- Identifies high-impact drivers (feature importance)\n"
	"- Enables proactive actions (e.g., workload shaping, cooling set-point tuning)\n\n"
	"### ⚙️ How it works (high-level)\n"
	"1) Cleans and engineers features (diurnal/weekly cycles, rolling stats, domain signals)\n"
	"2) Trains a tree ensemble (Gradient Boosting or Random Forest)\n"
	"3) Evaluates on a holdout split and produces predictions for the entire dataset\n"
	"4) Visualizes time series, accuracy scatter, residuals, and top feature importance\n"
	)

	kpis = (
	f"Model: {model_name}\n\n"
	f"Target: {y_col}\n"
	f"Avg {y_col} (actual): {avg_actual:,.2f}\n"
	f"Avg {y_col} (predicted): {avg_pred:,.2f}\n"
	f"Rows: {len(df):,}\n\n"
	f"Holdout MAE: {mae:,.2f} \| R²: {r2:,.3f}"
	)

	# Sample preview table
	preview = out_df.head(10)

	return (
	title,
	explainer,
	kpis,
	preview,
	ts_plot,
	sc_plot,
	resid_plot,
	fi_plot,
	tmp.name
	)

	except Exception as e:
	err = f"❌ Error: {type(e).__name__}: {e}"
	return (title, err, None, None, None, None, None, None, None)

	# ---------- Gradio UI ----------
	import gradio
	gradio.close_all() # avoid port conflicts in Colab

	with gr.Blocks(title="AI-Driven Data Center Energy Optimization") as demo:
	gr.Markdown("## ⚡ AI-Driven Data Center Energy Optimization Dashboard")

	with gr.Row():
	fpath = gr.File(label="📁 Upload Dataset (CSV)", file_types=[".csv"], type="filepath")
	model_name = gr.Dropdown(
	choices=["Gradient Boosting", "Random Forest"],
	value="Gradient Boosting",
	label="🔍 Select Model"
	)

	run_btn = gr.Button("▶️ Run")

	title_out = gr.Markdown()
	explainer_out = gr.Markdown()
	kpi_out = gr.Markdown()
	table_out = gr.Dataframe(label="📋 Sample (+ Predictions)", wrap=True, row_count=("fixed", 10))

	gr.Markdown("### 📈 Visual Insights")
	ts_plot = gr.Plot(label="Time Series: Actual vs Predicted")
	sc_plot = gr.Plot(label="Holdout: Actual vs Predicted")
	resid_plot = gr.Plot(label="Residuals (Histogram)")
	fi_plot = gr.Plot(label="Top Feature Importances")

	dl = gr.File(label="📥 Download Data (+ Predictions)")

	run_btn.click(
	fn=run_pipeline,
	inputs=[fpath, model_name],
	outputs=[title_out, explainer_out, kpi_out, table_out, ts_plot, sc_plot, resid_plot, fi_plot, dl]
	)

	demo.launch(share=True)