File size: 9,247 Bytes
ae0b5a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
"""Kaggle integration β€” push and run merge notebooks on free T4 GPUs."""

import json
import os
import tempfile
import requests
from typing import Optional


KAGGLE_API_URL = "https://www.kaggle.com/api/v1"


def _kaggle_headers(username: str, api_key: str) -> dict:
    """Create auth headers for Kaggle API (Basic auth)."""
    import base64
    creds = base64.b64encode(f"{username}:{api_key}".encode()).decode()
    return {
        "Authorization": f"Basic {creds}",
        "Content-Type": "application/json",
    }


def push_and_run_kernel(
    notebook_json: str,
    kernel_title: str,
    kaggle_username: str,
    kaggle_key: str,
    enable_gpu: bool = True,
    enable_internet: bool = True,
) -> dict:
    """Push a notebook to Kaggle and auto-run it.

    Args:
        notebook_json: The notebook content as JSON string
        kernel_title: Title for the Kaggle kernel
        kaggle_username: Kaggle username
        kaggle_key: Kaggle API key
        enable_gpu: Enable T4 GPU (free tier)
        enable_internet: Enable internet access (needed for HF downloads)

    Returns:
        dict with status, url, and any errors
    """
    if not kaggle_username or not kaggle_key:
        return {
            "success": False,
            "error": (
                "**Kaggle credentials required**\n\n"
                "1. Go to [kaggle.com/settings](https://www.kaggle.com/settings)\n"
                "2. Scroll to **API** section\n"
                "3. Click **Create New Token** (downloads `kaggle.json`)\n"
                "4. Copy your username and key from that file"
            ),
        }

    # Clean the title into a valid slug
    slug = kernel_title.lower().replace(" ", "-")
    slug = "".join(c for c in slug if c.isalnum() or c == "-")[:50]
    kernel_slug = f"{kaggle_username}/{slug}"

    headers = _kaggle_headers(kaggle_username, kaggle_key)

    # Prepare kernel push payload
    # Kaggle API expects the notebook source as a string
    push_data = {
        "id": kernel_slug,
        "title": kernel_title[:50],
        "code_file_name": f"{slug}.ipynb",
        "code_file_content": notebook_json,
        "language": "python",
        "kernel_type": "notebook",
        "is_private": True,
        "enable_gpu": enable_gpu,
        "enable_internet": enable_internet,
        "dataset_sources": [],
        "competition_sources": [],
        "kernel_sources": [],
        "category_ids": [],
    }

    try:
        # Push kernel (this also triggers execution)
        resp = requests.post(
            f"{KAGGLE_API_URL}/kernels/push",
            headers=headers,
            json=push_data,
            timeout=30,
        )

        if resp.status_code == 200:
            result = resp.json()
            kernel_url = f"https://www.kaggle.com/code/{kernel_slug}"
            return {
                "success": True,
                "url": kernel_url,
                "edit_url": f"{kernel_url}/edit",
                "message": (
                    f"**Kernel pushed and running!**\n\n"
                    f"Your merge is now executing on Kaggle's free T4 GPU.\n\n"
                    f"- **View & Edit:** [{kernel_slug}]({kernel_url}/edit)\n"
                    f"- **Status:** [Check output]({kernel_url})\n\n"
                    f"The kernel will run automatically. Check back in ~15-30 min for 7B models.\n\n"
                    f"*Tip: Kaggle gives you 30 hours/week of free GPU time.*"
                ),
                "ref": result.get("ref", ""),
                "version": result.get("versionNumber", 1),
            }

        elif resp.status_code == 401:
            return {
                "success": False,
                "error": "Invalid Kaggle credentials. Check your username and API key.",
            }
        elif resp.status_code == 403:
            return {
                "success": False,
                "error": "Kaggle API access forbidden. Make sure your API token has kernel permissions.",
            }
        else:
            error_detail = ""
            try:
                error_detail = resp.json().get("message", resp.text[:200])
            except Exception:
                error_detail = resp.text[:200]
            return {
                "success": False,
                "error": f"Kaggle API error ({resp.status_code}): {error_detail}",
            }

    except requests.exceptions.Timeout:
        return {"success": False, "error": "Request timed out. Try again."}
    except Exception as e:
        return {"success": False, "error": f"Error: {str(e)}"}


def check_kernel_status(
    kernel_slug: str,
    kaggle_username: str,
    kaggle_key: str,
) -> dict:
    """Check the execution status of a Kaggle kernel.

    Args:
        kernel_slug: Full kernel slug (username/kernel-name)
        kaggle_username: Kaggle username
        kaggle_key: Kaggle API key

    Returns:
        dict with status info
    """
    headers = _kaggle_headers(kaggle_username, kaggle_key)

    try:
        resp = requests.get(
            f"{KAGGLE_API_URL}/kernels/status",
            headers=headers,
            params={"userName": kernel_slug.split("/")[0], "kernelSlug": kernel_slug.split("/")[1]},
            timeout=15,
        )

        if resp.status_code == 200:
            data = resp.json()
            status = data.get("status", "unknown")

            status_emoji = {
                "queued": "⏳",
                "running": "πŸ”„",
                "complete": "βœ…",
                "error": "❌",
                "cancelAcknowledged": "🚫",
            }.get(status, "❓")

            return {
                "success": True,
                "status": status,
                "display": f"{status_emoji} **{status.upper()}**",
                "failure_message": data.get("failureMessage", ""),
            }
        else:
            return {"success": False, "error": f"API error: {resp.status_code}"}

    except Exception as e:
        return {"success": False, "error": str(e)}


def generate_kaggle_notebook(
    merge_notebook: dict,
    hf_token_secret: bool = True,
) -> str:
    """Adapt a merge notebook for Kaggle execution.

    Modifies the notebook to:
    - Use Kaggle's GPU environment
    - Reference HF token from Kaggle secrets (if enabled)
    - Add Kaggle-specific output handling

    Args:
        merge_notebook: The notebook dict from notebook_generator
        hf_token_secret: If True, use Kaggle Secrets for HF token

    Returns:
        Notebook as JSON string
    """
    nb = json.loads(json.dumps(merge_notebook))  # deep copy

    # Add Kaggle environment setup cell at the beginning (after the header)
    kaggle_setup = {
        "cell_type": "code",
        "metadata": {},
        "source": [
            "# Kaggle Environment Setup\n",
            "import os\n",
            "\n",
            "# Use Kaggle Secrets for HF token (add in Kaggle Settings > Secrets)\n",
            "from kaggle_secrets import UserSecretsClient\n",
            "try:\n",
            "    secrets = UserSecretsClient()\n",
            "    hf_token = secrets.get_secret('HF_TOKEN')\n",
            "    os.environ['HF_TOKEN'] = hf_token\n",
            "    os.environ['HUGGING_FACE_HUB_TOKEN'] = hf_token\n",
            "    print('βœ… HF Token loaded from Kaggle Secrets')\n",
            "except Exception:\n",
            "    print('⚠️ No HF_TOKEN secret found. Add it in Settings > Secrets if needed.')\n",
            "\n",
            "# Verify GPU\n",
            "import torch\n",
            "if torch.cuda.is_available():\n",
            "    print(f'βœ… GPU: {torch.cuda.get_device_name(0)}')\n",
            "    print(f'   VRAM: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB')\n",
            "else:\n",
            "    print('⚠️ No GPU detected. Enable GPU in kernel settings.')\n",
        ],
        "outputs": [],
        "execution_count": None,
    }

    # Insert after the first markdown cell (header)
    if len(nb["cells"]) > 0:
        nb["cells"].insert(1, kaggle_setup)

    # Replace the HF login cell (notebook_login doesn't work on Kaggle)
    for i, cell in enumerate(nb["cells"]):
        if cell["cell_type"] == "code":
            source = "".join(cell["source"]) if isinstance(cell["source"], list) else cell["source"]
            if "notebook_login" in source:
                nb["cells"][i]["source"] = [
                    "# HF Authentication (using Kaggle Secrets)\n",
                    "from huggingface_hub import login\n",
                    "import os\n",
                    "\n",
                    "hf_token = os.environ.get('HF_TOKEN', '')\n",
                    "if hf_token:\n",
                    "    login(token=hf_token)\n",
                    "    print('βœ… Logged in to HuggingFace Hub')\n",
                    "else:\n",
                    "    print('⚠️ No HF token. Add HF_TOKEN to Kaggle Secrets for gated models.')\n",
                ]

    # Update metadata for Kaggle
    nb["metadata"]["kaggle"] = {
        "accelerator": "gpu",
        "dataSources": [],
        "isGpuEnabled": True,
        "isInternetEnabled": True,
    }

    return json.dumps(nb, indent=2, ensure_ascii=False)