Forgekit / forgekit /kaggle_runner.py
AIencoder's picture
Create forgekit/kaggle_runner.py
ae0b5a7 verified
"""Kaggle integration — push and run merge notebooks on free T4 GPUs."""
import json
import os
import tempfile
import requests
from typing import Optional
KAGGLE_API_URL = "https://www.kaggle.com/api/v1"
def _kaggle_headers(username: str, api_key: str) -> dict:
"""Create auth headers for Kaggle API (Basic auth)."""
import base64
creds = base64.b64encode(f"{username}:{api_key}".encode()).decode()
return {
"Authorization": f"Basic {creds}",
"Content-Type": "application/json",
}
def push_and_run_kernel(
notebook_json: str,
kernel_title: str,
kaggle_username: str,
kaggle_key: str,
enable_gpu: bool = True,
enable_internet: bool = True,
) -> dict:
"""Push a notebook to Kaggle and auto-run it.
Args:
notebook_json: The notebook content as JSON string
kernel_title: Title for the Kaggle kernel
kaggle_username: Kaggle username
kaggle_key: Kaggle API key
enable_gpu: Enable T4 GPU (free tier)
enable_internet: Enable internet access (needed for HF downloads)
Returns:
dict with status, url, and any errors
"""
if not kaggle_username or not kaggle_key:
return {
"success": False,
"error": (
"**Kaggle credentials required**\n\n"
"1. Go to [kaggle.com/settings](https://www.kaggle.com/settings)\n"
"2. Scroll to **API** section\n"
"3. Click **Create New Token** (downloads `kaggle.json`)\n"
"4. Copy your username and key from that file"
),
}
# Clean the title into a valid slug
slug = kernel_title.lower().replace(" ", "-")
slug = "".join(c for c in slug if c.isalnum() or c == "-")[:50]
kernel_slug = f"{kaggle_username}/{slug}"
headers = _kaggle_headers(kaggle_username, kaggle_key)
# Prepare kernel push payload
# Kaggle API expects the notebook source as a string
push_data = {
"id": kernel_slug,
"title": kernel_title[:50],
"code_file_name": f"{slug}.ipynb",
"code_file_content": notebook_json,
"language": "python",
"kernel_type": "notebook",
"is_private": True,
"enable_gpu": enable_gpu,
"enable_internet": enable_internet,
"dataset_sources": [],
"competition_sources": [],
"kernel_sources": [],
"category_ids": [],
}
try:
# Push kernel (this also triggers execution)
resp = requests.post(
f"{KAGGLE_API_URL}/kernels/push",
headers=headers,
json=push_data,
timeout=30,
)
if resp.status_code == 200:
result = resp.json()
kernel_url = f"https://www.kaggle.com/code/{kernel_slug}"
return {
"success": True,
"url": kernel_url,
"edit_url": f"{kernel_url}/edit",
"message": (
f"**Kernel pushed and running!**\n\n"
f"Your merge is now executing on Kaggle's free T4 GPU.\n\n"
f"- **View & Edit:** [{kernel_slug}]({kernel_url}/edit)\n"
f"- **Status:** [Check output]({kernel_url})\n\n"
f"The kernel will run automatically. Check back in ~15-30 min for 7B models.\n\n"
f"*Tip: Kaggle gives you 30 hours/week of free GPU time.*"
),
"ref": result.get("ref", ""),
"version": result.get("versionNumber", 1),
}
elif resp.status_code == 401:
return {
"success": False,
"error": "Invalid Kaggle credentials. Check your username and API key.",
}
elif resp.status_code == 403:
return {
"success": False,
"error": "Kaggle API access forbidden. Make sure your API token has kernel permissions.",
}
else:
error_detail = ""
try:
error_detail = resp.json().get("message", resp.text[:200])
except Exception:
error_detail = resp.text[:200]
return {
"success": False,
"error": f"Kaggle API error ({resp.status_code}): {error_detail}",
}
except requests.exceptions.Timeout:
return {"success": False, "error": "Request timed out. Try again."}
except Exception as e:
return {"success": False, "error": f"Error: {str(e)}"}
def check_kernel_status(
kernel_slug: str,
kaggle_username: str,
kaggle_key: str,
) -> dict:
"""Check the execution status of a Kaggle kernel.
Args:
kernel_slug: Full kernel slug (username/kernel-name)
kaggle_username: Kaggle username
kaggle_key: Kaggle API key
Returns:
dict with status info
"""
headers = _kaggle_headers(kaggle_username, kaggle_key)
try:
resp = requests.get(
f"{KAGGLE_API_URL}/kernels/status",
headers=headers,
params={"userName": kernel_slug.split("/")[0], "kernelSlug": kernel_slug.split("/")[1]},
timeout=15,
)
if resp.status_code == 200:
data = resp.json()
status = data.get("status", "unknown")
status_emoji = {
"queued": "⏳",
"running": "🔄",
"complete": "✅",
"error": "❌",
"cancelAcknowledged": "🚫",
}.get(status, "❓")
return {
"success": True,
"status": status,
"display": f"{status_emoji} **{status.upper()}**",
"failure_message": data.get("failureMessage", ""),
}
else:
return {"success": False, "error": f"API error: {resp.status_code}"}
except Exception as e:
return {"success": False, "error": str(e)}
def generate_kaggle_notebook(
merge_notebook: dict,
hf_token_secret: bool = True,
) -> str:
"""Adapt a merge notebook for Kaggle execution.
Modifies the notebook to:
- Use Kaggle's GPU environment
- Reference HF token from Kaggle secrets (if enabled)
- Add Kaggle-specific output handling
Args:
merge_notebook: The notebook dict from notebook_generator
hf_token_secret: If True, use Kaggle Secrets for HF token
Returns:
Notebook as JSON string
"""
nb = json.loads(json.dumps(merge_notebook)) # deep copy
# Add Kaggle environment setup cell at the beginning (after the header)
kaggle_setup = {
"cell_type": "code",
"metadata": {},
"source": [
"# Kaggle Environment Setup\n",
"import os\n",
"\n",
"# Use Kaggle Secrets for HF token (add in Kaggle Settings > Secrets)\n",
"from kaggle_secrets import UserSecretsClient\n",
"try:\n",
" secrets = UserSecretsClient()\n",
" hf_token = secrets.get_secret('HF_TOKEN')\n",
" os.environ['HF_TOKEN'] = hf_token\n",
" os.environ['HUGGING_FACE_HUB_TOKEN'] = hf_token\n",
" print('✅ HF Token loaded from Kaggle Secrets')\n",
"except Exception:\n",
" print('⚠️ No HF_TOKEN secret found. Add it in Settings > Secrets if needed.')\n",
"\n",
"# Verify GPU\n",
"import torch\n",
"if torch.cuda.is_available():\n",
" print(f'✅ GPU: {torch.cuda.get_device_name(0)}')\n",
" print(f' VRAM: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB')\n",
"else:\n",
" print('⚠️ No GPU detected. Enable GPU in kernel settings.')\n",
],
"outputs": [],
"execution_count": None,
}
# Insert after the first markdown cell (header)
if len(nb["cells"]) > 0:
nb["cells"].insert(1, kaggle_setup)
# Replace the HF login cell (notebook_login doesn't work on Kaggle)
for i, cell in enumerate(nb["cells"]):
if cell["cell_type"] == "code":
source = "".join(cell["source"]) if isinstance(cell["source"], list) else cell["source"]
if "notebook_login" in source:
nb["cells"][i]["source"] = [
"# HF Authentication (using Kaggle Secrets)\n",
"from huggingface_hub import login\n",
"import os\n",
"\n",
"hf_token = os.environ.get('HF_TOKEN', '')\n",
"if hf_token:\n",
" login(token=hf_token)\n",
" print('✅ Logged in to HuggingFace Hub')\n",
"else:\n",
" print('⚠️ No HF token. Add HF_TOKEN to Kaggle Secrets for gated models.')\n",
]
# Update metadata for Kaggle
nb["metadata"]["kaggle"] = {
"accelerator": "gpu",
"dataSources": [],
"isGpuEnabled": True,
"isInternetEnabled": True,
}
return json.dumps(nb, indent=2, ensure_ascii=False)