|
|
"""ForgeKit — Forge your perfect AI model, no code required. |
|
|
|
|
|
Main Gradio application with 5 tabs: |
|
|
1. Merge Builder — Visual merge configuration + notebook generation |
|
|
2. Model Explorer — Search and discover HF models |
|
|
3. GGUF Quantizer — Generate quantization notebooks |
|
|
4. Deploy — Generate deployment files for HF Spaces |
|
|
5. Leaderboard — Community merge rankings |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import json |
|
|
import tempfile |
|
|
import os |
|
|
|
|
|
from forgekit.model_info import fetch_model_info, search_models |
|
|
from forgekit.compatibility import check_compatibility, quick_check |
|
|
from forgekit.config_generator import ( |
|
|
MergeConfig, generate_yaml, generate_from_preset, |
|
|
MERGE_METHODS, PRESETS, |
|
|
) |
|
|
from forgekit.notebook_generator import generate_merge_notebook, save_notebook |
|
|
from forgekit.ai_advisor import merge_advisor, model_describer, config_explainer |
|
|
from forgekit.kaggle_runner import push_and_run_kernel, check_kernel_status, generate_kaggle_notebook |
|
|
|
|
|
|
|
|
theme = gr.themes.Base( |
|
|
primary_hue=gr.themes.colors.amber, |
|
|
secondary_hue=gr.themes.colors.purple, |
|
|
neutral_hue=gr.themes.colors.gray, |
|
|
font=gr.themes.GoogleFont("Inter"), |
|
|
font_mono=gr.themes.GoogleFont("JetBrains Mono"), |
|
|
).set( |
|
|
body_background_fill="#0a0a0f", |
|
|
body_background_fill_dark="#0a0a0f", |
|
|
body_text_color="#e5e5e5", |
|
|
body_text_color_dark="#e5e5e5", |
|
|
block_background_fill="#111118", |
|
|
block_background_fill_dark="#111118", |
|
|
block_border_color="#1f1f2e", |
|
|
block_border_color_dark="#1f1f2e", |
|
|
block_label_text_color="#9ca3af", |
|
|
block_label_text_color_dark="#9ca3af", |
|
|
block_title_text_color="#e5e5e5", |
|
|
block_title_text_color_dark="#e5e5e5", |
|
|
input_background_fill="#16161f", |
|
|
input_background_fill_dark="#16161f", |
|
|
input_border_color="#2a2a3a", |
|
|
input_border_color_dark="#2a2a3a", |
|
|
button_primary_background_fill="linear-gradient(to right, #f59e0b, #f97316)", |
|
|
button_primary_background_fill_dark="linear-gradient(to right, #f59e0b, #f97316)", |
|
|
button_primary_text_color="#ffffff", |
|
|
button_primary_text_color_dark="#ffffff", |
|
|
button_secondary_background_fill="#1f1f2e", |
|
|
button_secondary_background_fill_dark="#1f1f2e", |
|
|
button_secondary_text_color="#e5e5e5", |
|
|
button_secondary_text_color_dark="#e5e5e5", |
|
|
) |
|
|
|
|
|
CSS = """ |
|
|
.forgekit-header { text-align: center; padding: 1.5rem 0 1rem; } |
|
|
.forgekit-header h1 { font-size: 2.5rem; font-weight: 800; margin: 0; |
|
|
background: linear-gradient(135deg, #a855f7, #ec4899, #f59e0b); |
|
|
-webkit-background-clip: text; -webkit-text-fill-color: transparent; } |
|
|
.forgekit-header p { color: #9ca3af; font-size: 1rem; margin-top: 0.25rem; } |
|
|
.status-ok { color: #4ade80; font-weight: 600; } |
|
|
.status-warn { color: #fbbf24; font-weight: 600; } |
|
|
.status-err { color: #f87171; font-weight: 600; } |
|
|
.method-card { border: 1px solid #2a2a3a; border-radius: 12px; padding: 1rem; margin: 0.25rem 0; } |
|
|
footer { display: none !important; } |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
def check_models(models_text: str, token: str) -> tuple[str, str]: |
|
|
"""Check model compatibility and return report + quick status.""" |
|
|
models = [m.strip() for m in models_text.strip().split("\n") if m.strip()] |
|
|
if len(models) < 2: |
|
|
return "⚠️ Add at least 2 models (one per line)", "" |
|
|
|
|
|
tok = token.strip() if token else None |
|
|
report = check_compatibility(models, token=tok) |
|
|
quick = quick_check(models, token=tok) |
|
|
return report.to_markdown(), quick |
|
|
|
|
|
|
|
|
def generate_config( |
|
|
models_text: str, method: str, base_model: str, |
|
|
weights_text: str, densities_text: str, |
|
|
tokenizer_src: str, dtype: str, |
|
|
slerp_t: float, int8_mask: bool, normalize: bool, |
|
|
) -> str: |
|
|
"""Generate YAML config from UI inputs.""" |
|
|
models = [m.strip() for m in models_text.strip().split("\n") if m.strip()] |
|
|
if not models: |
|
|
return "# Add models first" |
|
|
|
|
|
|
|
|
weights = [] |
|
|
if weights_text.strip(): |
|
|
try: |
|
|
weights = [float(w.strip()) for w in weights_text.split(",")] |
|
|
except ValueError: |
|
|
return "# Invalid weights — use comma-separated numbers" |
|
|
|
|
|
densities = [] |
|
|
if densities_text.strip(): |
|
|
try: |
|
|
densities = [float(d.strip()) for d in densities_text.split(",")] |
|
|
except ValueError: |
|
|
return "# Invalid densities — use comma-separated numbers" |
|
|
|
|
|
config = MergeConfig( |
|
|
method=method, |
|
|
models=models, |
|
|
base_model=base_model.strip(), |
|
|
weights=weights, |
|
|
densities=densities, |
|
|
tokenizer_source=tokenizer_src.strip(), |
|
|
dtype=dtype, |
|
|
slerp_t=slerp_t, |
|
|
int8_mask=int8_mask, |
|
|
normalize=normalize, |
|
|
) |
|
|
|
|
|
return generate_yaml(config) |
|
|
|
|
|
|
|
|
def apply_preset(preset_name: str, models_text: str) -> tuple[str, str]: |
|
|
"""Apply a preset and return weights + densities strings.""" |
|
|
models = [m.strip() for m in models_text.strip().split("\n") if m.strip()] |
|
|
if not models: |
|
|
return "", "" |
|
|
|
|
|
preset = PRESETS.get(preset_name) |
|
|
if not preset: |
|
|
return "", "" |
|
|
|
|
|
weights, densities = preset.apply(models) |
|
|
return ", ".join(str(w) for w in weights), ", ".join(str(d) for d in densities) |
|
|
|
|
|
|
|
|
def generate_notebook_file( |
|
|
models_text: str, method: str, base_model: str, |
|
|
weights_text: str, densities_text: str, |
|
|
tokenizer_src: str, dtype: str, |
|
|
slerp_t: float, int8_mask: bool, normalize: bool, |
|
|
output_name: str, hf_user: str, |
|
|
inc_quantize: bool, inc_deploy: bool, |
|
|
quant_types_text: str, |
|
|
) -> str | None: |
|
|
"""Generate and save a Colab notebook, return file path.""" |
|
|
models = [m.strip() for m in models_text.strip().split("\n") if m.strip()] |
|
|
if not models: |
|
|
return None |
|
|
|
|
|
weights = [] |
|
|
if weights_text.strip(): |
|
|
try: |
|
|
weights = [float(w.strip()) for w in weights_text.split(",")] |
|
|
except ValueError: |
|
|
pass |
|
|
|
|
|
densities = [] |
|
|
if densities_text.strip(): |
|
|
try: |
|
|
densities = [float(d.strip()) for d in densities_text.split(",")] |
|
|
except ValueError: |
|
|
pass |
|
|
|
|
|
quant_types = [q.strip() for q in quant_types_text.split(",") if q.strip()] |
|
|
if not quant_types: |
|
|
quant_types = ["Q5_K_M", "Q4_K_M"] |
|
|
|
|
|
config = MergeConfig( |
|
|
method=method, |
|
|
models=models, |
|
|
base_model=base_model.strip(), |
|
|
weights=weights, |
|
|
densities=densities, |
|
|
tokenizer_source=tokenizer_src.strip(), |
|
|
dtype=dtype, |
|
|
slerp_t=slerp_t, |
|
|
int8_mask=int8_mask, |
|
|
normalize=normalize, |
|
|
) |
|
|
|
|
|
name = output_name.strip() or "ForgeKit-Merged-Model" |
|
|
user = hf_user.strip() |
|
|
|
|
|
nb = generate_merge_notebook( |
|
|
config, |
|
|
output_model_name=name, |
|
|
hf_username=user, |
|
|
include_quantize=inc_quantize, |
|
|
include_deploy=inc_deploy, |
|
|
quant_types=quant_types, |
|
|
) |
|
|
|
|
|
path = os.path.join(tempfile.gettempdir(), f"{name}_merge.ipynb") |
|
|
save_notebook(nb, path) |
|
|
return path |
|
|
|
|
|
|
|
|
def search_hf_models(query: str, arch_filter: str, sort_by: str) -> str: |
|
|
"""Search HF Hub and return formatted results.""" |
|
|
if not query.strip(): |
|
|
return "Enter a search query" |
|
|
|
|
|
results = search_models( |
|
|
query=query.strip(), |
|
|
architecture=arch_filter if arch_filter != "Any" else "", |
|
|
limit=15, |
|
|
sort=sort_by.lower(), |
|
|
) |
|
|
|
|
|
if not results: |
|
|
return "No models found" |
|
|
|
|
|
lines = ["| Model | Architecture | Downloads |", "|-------|-------------|-----------|"] |
|
|
for r in results: |
|
|
mid = r.get("model_id", "") |
|
|
mtype = r.get("model_type", "—") |
|
|
dl = r.get("downloads", 0) |
|
|
dl_str = f"{dl:,}" if dl else "—" |
|
|
lines.append(f"| `{mid}` | {mtype} | {dl_str} |") |
|
|
|
|
|
return "\n".join(lines) |
|
|
|
|
|
|
|
|
def fetch_model_details(model_id: str) -> str: |
|
|
"""Fetch and display detailed model info.""" |
|
|
if not model_id.strip(): |
|
|
return "Enter a model ID" |
|
|
|
|
|
info = fetch_model_info(model_id.strip()) |
|
|
if info.error: |
|
|
return f"❌ {info.error}" |
|
|
|
|
|
return f"""### {info.model_id} |
|
|
|
|
|
| Property | Value | |
|
|
|----------|-------| |
|
|
| **Architecture** | `{info.model_type}` | |
|
|
| **Hidden Size** | {info.hidden_size} | |
|
|
| **Layers** | {info.num_hidden_layers} | |
|
|
| **Vocab Size** | {info.vocab_size:,} | |
|
|
| **Intermediate** | {info.intermediate_size} | |
|
|
| **Attention Heads** | {info.num_attention_heads} | |
|
|
| **KV Heads** | {info.num_key_value_heads} | |
|
|
| **Max Position** | {info.max_position_embeddings:,} | |
|
|
| **dtype** | {info.torch_dtype} | |
|
|
| **Downloads** | {info.downloads:,} | |
|
|
| **Likes** | {info.likes} | |
|
|
| **Params (est.)** | {info.param_estimate} | |
|
|
| **RAM for merge** | {info.ram_estimate_gb} GB | |
|
|
| **Gated** | {'Yes' if info.gated else 'No'} | |
|
|
| **trust_remote_code** | {'Required' if info.trust_remote_code else 'No'} |""" |
|
|
|
|
|
|
|
|
def suggest_base(models_text: str, token: str) -> tuple[str, str]: |
|
|
"""Auto-suggest base model and tokenizer from compatibility check.""" |
|
|
models = [m.strip() for m in models_text.strip().split("\n") if m.strip()] |
|
|
if len(models) < 2: |
|
|
return "", "" |
|
|
tok = token.strip() if token else None |
|
|
report = check_compatibility(models, token=tok) |
|
|
return report.suggested_base, report.suggested_tokenizer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LEADERBOARD = [ |
|
|
{ |
|
|
"name": "Qwen2.5CMR-7B", "author": "AIencoder", |
|
|
"method": "DARE-TIES", "base": "Qwen2.5-7B-Instruct", |
|
|
"models": "Coder-7B + Math-7B", "likes": 0, |
|
|
"link": "https://huggingface.co/AIencoder/Qwen2.5CMR", |
|
|
}, |
|
|
{ |
|
|
"name": "Logic-Coder-7B", "author": "AIencoder", |
|
|
"method": "DARE-TIES", "base": "Mistral-7B", |
|
|
"models": "OpenHermes + CodeInstruct", "likes": 1, |
|
|
"link": "https://huggingface.co/AIencoder/Logic-Coder-7B", |
|
|
}, |
|
|
{ |
|
|
"name": "HermesMath-7B-TIES", "author": "AIencoder", |
|
|
"method": "TIES", "base": "Mistral-7B", |
|
|
"models": "Hermes + MetaMath", "likes": 1, |
|
|
"link": "https://huggingface.co/AIencoder/HermesMath-7B-TIES", |
|
|
}, |
|
|
{ |
|
|
"name": "Hermes-2-Pro-GodCoder", "author": "AIencoder", |
|
|
"method": "DARE-TIES", "base": "Mistral-7B", |
|
|
"models": "Hermes-2-Pro + CodeModels", "likes": 1, |
|
|
"link": "https://huggingface.co/AIencoder/Hermes-2-Pro-Mistral-7B-GodCoder", |
|
|
}, |
|
|
] |
|
|
|
|
|
|
|
|
def get_leaderboard() -> str: |
|
|
"""Return leaderboard as markdown table.""" |
|
|
lines = [ |
|
|
"| # | Model | Author | Method | Source Models | Likes |", |
|
|
"|---|-------|--------|--------|---------------|-------|", |
|
|
] |
|
|
sorted_lb = sorted(LEADERBOARD, key=lambda x: -x["likes"]) |
|
|
for i, entry in enumerate(sorted_lb, 1): |
|
|
name = f"[{entry['name']}]({entry['link']})" |
|
|
lines.append( |
|
|
f"| {i} | {name} | {entry['author']} | {entry['method']} | " |
|
|
f"{entry['models']} | {entry['likes']} |" |
|
|
) |
|
|
return "\n".join(lines) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(theme=theme, css=CSS, title="ForgeKit — Model Merging Platform") as demo: |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<div class="forgekit-header"> |
|
|
<h1>🔥 ForgeKit</h1> |
|
|
<p>Forge your perfect AI model — no code required</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("⚒️ Merge Builder", id="builder"): |
|
|
gr.Markdown("### Build your merge configuration and generate a ready-to-run Colab notebook") |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
models_input = gr.Textbox( |
|
|
label="Models to Merge (one per line)", |
|
|
placeholder="Qwen/Qwen2.5-Coder-7B-Instruct\nQwen/Qwen2.5-Math-7B-Instruct", |
|
|
lines=5, |
|
|
) |
|
|
hf_token = gr.Textbox( |
|
|
label="HF Token (optional — for gated models)", |
|
|
type="password", |
|
|
placeholder="hf_...", |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
check_btn = gr.Button("🔍 Check Compatibility", variant="secondary") |
|
|
suggest_btn = gr.Button("💡 Auto-Suggest Base", variant="secondary") |
|
|
|
|
|
compat_status = gr.Textbox(label="Quick Status", interactive=False, max_lines=2) |
|
|
compat_report = gr.Markdown(label="Compatibility Report") |
|
|
|
|
|
|
|
|
with gr.Column(scale=3): |
|
|
method_dd = gr.Dropdown( |
|
|
choices=list(MERGE_METHODS.keys()), |
|
|
value="dare_ties", |
|
|
label="Merge Method", |
|
|
) |
|
|
method_info_md = gr.Markdown( |
|
|
value=f"**DARE-TIES** — {MERGE_METHODS['dare_ties']['description']}" |
|
|
) |
|
|
base_model = gr.Textbox( |
|
|
label="Base Model", |
|
|
placeholder="Qwen/Qwen2.5-7B-Instruct", |
|
|
) |
|
|
tokenizer_src = gr.Textbox( |
|
|
label="Tokenizer Source", |
|
|
placeholder="Same as base model (leave blank to auto-fill)", |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
weights_input = gr.Textbox(label="Weights (comma-separated)", placeholder="0.5, 0.5") |
|
|
densities_input = gr.Textbox(label="Densities (comma-separated)", placeholder="0.7, 0.6") |
|
|
|
|
|
with gr.Row(): |
|
|
preset_dd = gr.Dropdown( |
|
|
choices=list(PRESETS.keys()), |
|
|
label="Apply Preset", |
|
|
scale=2, |
|
|
) |
|
|
preset_btn = gr.Button("Apply", variant="secondary", scale=1) |
|
|
|
|
|
with gr.Row(): |
|
|
dtype_dd = gr.Dropdown(choices=["bfloat16", "float16", "float32"], value="bfloat16", label="dtype") |
|
|
slerp_t = gr.Slider(0, 1, value=0.5, step=0.05, label="SLERP t", visible=False) |
|
|
|
|
|
with gr.Row(): |
|
|
int8_mask = gr.Checkbox(label="int8_mask", value=True) |
|
|
normalize_cb = gr.Checkbox(label="normalize", value=True) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Output") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=3): |
|
|
yaml_output = gr.Code(label="Generated YAML Config", language="yaml", lines=15) |
|
|
gen_yaml_btn = gr.Button("📋 Generate YAML", variant="primary", size="lg") |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
gr.Markdown("#### Notebook Settings") |
|
|
output_name = gr.Textbox(label="Model Name", placeholder="My-Merged-7B") |
|
|
hf_username = gr.Textbox(label="HF Username", placeholder="AIencoder") |
|
|
with gr.Row(): |
|
|
inc_quant = gr.Checkbox(label="Include GGUF Quantization", value=True) |
|
|
inc_deploy = gr.Checkbox(label="Include HF Deployment", value=True) |
|
|
quant_types = gr.Textbox(label="Quant Types", value="Q5_K_M, Q4_K_M") |
|
|
gen_nb_btn = gr.Button("🚀 Generate Colab Notebook", variant="primary", size="lg") |
|
|
nb_file = gr.File(label="Download Notebook") |
|
|
|
|
|
|
|
|
check_btn.click( |
|
|
check_models, [models_input, hf_token], [compat_report, compat_status] |
|
|
) |
|
|
suggest_btn.click( |
|
|
suggest_base, [models_input, hf_token], [base_model, tokenizer_src] |
|
|
) |
|
|
preset_btn.click( |
|
|
apply_preset, [preset_dd, models_input], [weights_input, densities_input] |
|
|
) |
|
|
gen_yaml_btn.click( |
|
|
generate_config, |
|
|
[models_input, method_dd, base_model, weights_input, densities_input, |
|
|
tokenizer_src, dtype_dd, slerp_t, int8_mask, normalize_cb], |
|
|
yaml_output, |
|
|
) |
|
|
gen_nb_btn.click( |
|
|
generate_notebook_file, |
|
|
[models_input, method_dd, base_model, weights_input, densities_input, |
|
|
tokenizer_src, dtype_dd, slerp_t, int8_mask, normalize_cb, |
|
|
output_name, hf_username, inc_quant, inc_deploy, quant_types], |
|
|
nb_file, |
|
|
) |
|
|
|
|
|
|
|
|
def on_method_change(m): |
|
|
info = MERGE_METHODS.get(m, {}) |
|
|
desc = f"**{info.get('name', m)}** — {info.get('description', '')}" |
|
|
show_slerp = m == "slerp" |
|
|
return desc, gr.update(visible=show_slerp) |
|
|
|
|
|
method_dd.change(on_method_change, method_dd, [method_info_md, slerp_t]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("🔍 Model Explorer", id="explorer"): |
|
|
gr.Markdown("### Search and discover models on HuggingFace Hub") |
|
|
|
|
|
with gr.Row(): |
|
|
search_query = gr.Textbox(label="Search", placeholder="qwen coder instruct", scale=3) |
|
|
arch_filter = gr.Dropdown( |
|
|
choices=["Any", "llama", "qwen2", "mistral", "gemma2", "phi3", "starcoder2"], |
|
|
value="Any", label="Architecture", scale=1, |
|
|
) |
|
|
sort_dd = gr.Dropdown(choices=["Downloads", "Likes", "Modified"], value="Downloads", label="Sort", scale=1) |
|
|
search_btn = gr.Button("🔍 Search", variant="primary", scale=1) |
|
|
|
|
|
search_results = gr.Markdown(label="Results") |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Model Details") |
|
|
with gr.Row(): |
|
|
detail_input = gr.Textbox(label="Model ID", placeholder="Qwen/Qwen2.5-Coder-7B-Instruct", scale=3) |
|
|
detail_btn = gr.Button("📋 Fetch Details", variant="secondary", scale=1) |
|
|
detail_output = gr.Markdown() |
|
|
|
|
|
search_btn.click(search_hf_models, [search_query, arch_filter, sort_dd], search_results) |
|
|
detail_btn.click(fetch_model_details, detail_input, detail_output) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("📦 GGUF Quantizer", id="quantizer"): |
|
|
gr.Markdown("""### Generate a quantization notebook for any HF model |
|
|
Convert any HuggingFace model to GGUF format for use with llama.cpp, Ollama, LM Studio, etc.""") |
|
|
|
|
|
q_model = gr.Textbox(label="Model ID", placeholder="AIencoder/Qwen2.5CMR-7B") |
|
|
q_username = gr.Textbox(label="Your HF Username", placeholder="AIencoder") |
|
|
|
|
|
gr.Markdown("#### Quantization Levels") |
|
|
gr.Markdown(""" |
|
|
| Type | Size (7B) | Quality | Best For | |
|
|
|------|----------|---------|----------| |
|
|
| Q8_0 | ~7.5 GB | Best | Maximum quality | |
|
|
| Q6_K | ~5.5 GB | Great | Good balance | |
|
|
| **Q5_K_M** | **~5 GB** | **Good** | **Recommended** | |
|
|
| Q4_K_M | ~4 GB | Decent | Memory-constrained | |
|
|
| IQ4_XS | ~3.5 GB | Fair | Extreme compression | |
|
|
""") |
|
|
q_types = gr.Textbox(label="Quant Types (comma-separated)", value="Q8_0, Q5_K_M, Q4_K_M") |
|
|
|
|
|
q_btn = gr.Button("📦 Generate Quantization Notebook", variant="primary", size="lg") |
|
|
q_file = gr.File(label="Download Notebook") |
|
|
|
|
|
def gen_quant_notebook(model_id, username, qtypes_text): |
|
|
if not model_id.strip(): |
|
|
return None |
|
|
qtypes = [q.strip() for q in qtypes_text.split(",") if q.strip()] |
|
|
name = model_id.strip().split("/")[-1] |
|
|
config = MergeConfig(method="linear", models=[model_id.strip()]) |
|
|
nb = generate_merge_notebook( |
|
|
config, |
|
|
output_model_name=name, |
|
|
hf_username=username.strip(), |
|
|
include_quantize=True, |
|
|
include_deploy=False, |
|
|
quant_types=qtypes, |
|
|
) |
|
|
|
|
|
path = os.path.join(tempfile.gettempdir(), f"{name}_quantize.ipynb") |
|
|
save_notebook(nb, path) |
|
|
return path |
|
|
|
|
|
q_btn.click(gen_quant_notebook, [q_model, q_username, q_types], q_file) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("🚀 Deploy", id="deploy"): |
|
|
gr.Markdown("""### Deploy your merged model to a HuggingFace Space |
|
|
|
|
|
After merging and (optionally) quantizing, deploy a chat interface for your model.""") |
|
|
|
|
|
d_model = gr.Textbox(label="Model Repo ID", placeholder="AIencoder/Qwen2.5CMR-7B") |
|
|
d_type = gr.Dropdown( |
|
|
choices=["Gradio Chat (transformers)", "Docker + llama.cpp (GGUF)"], |
|
|
value="Gradio Chat (transformers)", label="Deployment Type", |
|
|
) |
|
|
d_btn = gr.Button("📋 Generate Deployment Files", variant="primary") |
|
|
d_output = gr.Code(label="app.py", language="python", lines=20) |
|
|
d_readme = gr.Code(label="README.md (Space metadata)", language="markdown", lines=8) |
|
|
|
|
|
def gen_deploy(model_id, deploy_type): |
|
|
mid = model_id.strip() |
|
|
if not mid: |
|
|
return "# Enter a model ID first", "" |
|
|
|
|
|
if "Gradio" in deploy_type: |
|
|
app = f'''import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer |
|
|
import torch |
|
|
from threading import Thread |
|
|
|
|
|
MODEL_ID = "{mid}" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True |
|
|
) |
|
|
|
|
|
def chat(message, history): |
|
|
messages = [] |
|
|
for h in history: |
|
|
messages.append({{"role": "user", "content": h[0]}}) |
|
|
if h[1]: |
|
|
messages.append({{"role": "assistant", "content": h[1]}}) |
|
|
messages.append({{"role": "user", "content": message}}) |
|
|
|
|
|
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
|
inputs = tokenizer(text, return_tensors="pt").to(model.device) |
|
|
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
|
|
|
|
|
thread = Thread(target=model.generate, kwargs={{ |
|
|
**inputs, "max_new_tokens": 512, "streamer": streamer, |
|
|
"do_sample": True, "temperature": 0.7, |
|
|
}}) |
|
|
thread.start() |
|
|
|
|
|
response = "" |
|
|
for token in streamer: |
|
|
response += token |
|
|
yield response |
|
|
|
|
|
demo = gr.ChatInterface(chat, title="{mid.split('/')[-1]}", description="Merged with ForgeKit") |
|
|
demo.launch()''' |
|
|
readme = f"""--- |
|
|
title: {mid.split('/')[-1]} Chat |
|
|
emoji: 🔥 |
|
|
colorFrom: amber |
|
|
colorTo: orange |
|
|
sdk: gradio |
|
|
sdk_version: 5.12.0 |
|
|
app_file: app.py |
|
|
pinned: false |
|
|
license: apache-2.0 |
|
|
---""" |
|
|
else: |
|
|
app = f'''# Docker deployment with llama.cpp |
|
|
# Dockerfile for serving GGUF models |
|
|
|
|
|
FROM ghcr.io/ggerganov/llama.cpp:server |
|
|
|
|
|
# Download the GGUF model |
|
|
ADD https://huggingface.co/{mid}/resolve/main/*Q5_K_M*.gguf /models/model.gguf |
|
|
|
|
|
EXPOSE 7860 |
|
|
|
|
|
CMD ["/llama-server", \\ |
|
|
"--model", "/models/model.gguf", \\ |
|
|
"--host", "0.0.0.0", \\ |
|
|
"--port", "7860", \\ |
|
|
"--ctx-size", "4096", \\ |
|
|
"--n-gpu-layers", "99"]''' |
|
|
readme = f"""--- |
|
|
title: {mid.split('/')[-1]} |
|
|
emoji: 🔥 |
|
|
colorFrom: amber |
|
|
colorTo: orange |
|
|
sdk: docker |
|
|
pinned: false |
|
|
license: apache-2.0 |
|
|
---""" |
|
|
|
|
|
return app, readme |
|
|
|
|
|
d_btn.click(gen_deploy, [d_model, d_type], [d_output, d_readme]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("🤖 AI Advisor", id="ai"): |
|
|
gr.Markdown("""### AI-Powered Merge Intelligence |
|
|
Get smart recommendations, capability predictions, and plain-English explanations — powered by **Llama 3.3 70B** on Groq (free, blazing fast).""") |
|
|
|
|
|
groq_key = gr.Textbox( |
|
|
label="Groq API Key (free at console.groq.com)", |
|
|
type="password", |
|
|
placeholder="gsk_... (free, no credit card needed)", |
|
|
) |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.Tab("💡 Merge Advisor"): |
|
|
gr.Markdown("**Tell the AI what models you want to merge and it'll recommend the best strategy.**") |
|
|
ai_models = gr.Textbox( |
|
|
label="Models (one per line)", |
|
|
placeholder="Qwen/Qwen2.5-Coder-7B-Instruct\nQwen/Qwen2.5-Math-7B-Instruct", |
|
|
lines=4, |
|
|
) |
|
|
ai_goal = gr.Textbox( |
|
|
label="What do you want the merged model to do?", |
|
|
placeholder="I want a model that's great at both coding and math reasoning", |
|
|
) |
|
|
ai_advise_btn = gr.Button("💡 Get Recommendation", variant="primary") |
|
|
ai_advise_out = gr.Markdown() |
|
|
|
|
|
ai_advise_btn.click( |
|
|
merge_advisor, [ai_models, ai_goal, groq_key], ai_advise_out |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tab("🔮 Capability Predictor"): |
|
|
gr.Markdown("**Predict what your merged model will be good (and bad) at.**") |
|
|
desc_models = gr.Textbox( |
|
|
label="Models (one per line)", |
|
|
placeholder="Qwen/Qwen2.5-Coder-7B-Instruct\nQwen/Qwen2.5-Math-7B-Instruct", |
|
|
lines=4, |
|
|
) |
|
|
desc_method = gr.Textbox(label="Merge Method", placeholder="dare_ties") |
|
|
desc_weights = gr.Textbox(label="Weights", placeholder="0.5, 0.5") |
|
|
desc_btn = gr.Button("🔮 Predict Capabilities", variant="primary") |
|
|
desc_out = gr.Markdown() |
|
|
|
|
|
desc_btn.click( |
|
|
model_describer, [desc_models, desc_method, desc_weights, groq_key], desc_out |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tab("📖 Config Explainer"): |
|
|
gr.Markdown("**Paste any mergekit YAML config and get a plain-English explanation.**") |
|
|
explain_yaml = gr.Code( |
|
|
label="Paste YAML Config", |
|
|
language="yaml", |
|
|
lines=12, |
|
|
value="""merge_method: dare_ties |
|
|
base_model: Qwen/Qwen2.5-7B-Instruct |
|
|
models: |
|
|
- model: Qwen/Qwen2.5-Coder-7B-Instruct |
|
|
parameters: |
|
|
weight: 0.5 |
|
|
density: 0.7 |
|
|
- model: Qwen/Qwen2.5-Math-7B-Instruct |
|
|
parameters: |
|
|
weight: 0.5 |
|
|
density: 0.6 |
|
|
parameters: |
|
|
int8_mask: true |
|
|
normalize: true |
|
|
dtype: bfloat16""", |
|
|
) |
|
|
explain_btn = gr.Button("📖 Explain This Config", variant="primary") |
|
|
explain_out = gr.Markdown() |
|
|
|
|
|
explain_btn.click( |
|
|
config_explainer, [explain_yaml, groq_key], explain_out |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("🚀 Run on Kaggle", id="kaggle"): |
|
|
gr.Markdown("""### Run Your Merge on Kaggle's Free GPU |
|
|
Push your merge notebook directly to Kaggle and run it on a free T4 GPU — no local setup needed. |
|
|
|
|
|
**You need:** A [Kaggle account](https://www.kaggle.com) with an API token. Go to *Settings > API > Create New Token*.""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
kg_username = gr.Textbox(label="Kaggle Username", placeholder="your_kaggle_username") |
|
|
kg_key = gr.Textbox(label="Kaggle API Key", type="password", placeholder="From kaggle.json") |
|
|
with gr.Column(): |
|
|
kg_hf_note = gr.Markdown("""**Important:** Add your HF token as a Kaggle Secret: |
|
|
1. Go to your kernel's **Settings** tab |
|
|
2. Under **Secrets**, add `HF_TOKEN` with your HuggingFace token |
|
|
3. This lets the kernel download gated models and upload results""") |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("#### Configure Merge (or use settings from Merge Builder tab)") |
|
|
|
|
|
with gr.Row(): |
|
|
kg_models = gr.Textbox( |
|
|
label="Models (one per line)", lines=4, |
|
|
placeholder="Qwen/Qwen2.5-Coder-7B-Instruct\nQwen/Qwen2.5-Math-7B-Instruct", |
|
|
) |
|
|
with gr.Column(): |
|
|
kg_method = gr.Dropdown(choices=list(MERGE_METHODS.keys()), value="dare_ties", label="Method") |
|
|
kg_base = gr.Textbox(label="Base Model", placeholder="Qwen/Qwen2.5-7B-Instruct") |
|
|
kg_weights = gr.Textbox(label="Weights", placeholder="0.5, 0.5") |
|
|
kg_densities = gr.Textbox(label="Densities", placeholder="0.7, 0.6") |
|
|
|
|
|
with gr.Row(): |
|
|
kg_output_name = gr.Textbox(label="Output Model Name", placeholder="My-Merged-7B", value="ForgeKit-Merge") |
|
|
kg_hf_user = gr.Textbox(label="HF Username (for upload)", placeholder="AIencoder") |
|
|
|
|
|
kg_run_btn = gr.Button("🚀 Push & Run on Kaggle", variant="primary", size="lg") |
|
|
kg_status = gr.Markdown() |
|
|
|
|
|
def run_on_kaggle( |
|
|
username, key, models_text, method, base, weights_text, densities_text, |
|
|
output_name, hf_user, |
|
|
): |
|
|
|
|
|
models = [m.strip() for m in models_text.strip().split("\n") if m.strip()] |
|
|
if len(models) < 2: |
|
|
return "Add at least 2 models." |
|
|
|
|
|
weights = [] |
|
|
if weights_text.strip(): |
|
|
try: |
|
|
weights = [float(w.strip()) for w in weights_text.split(",")] |
|
|
except ValueError: |
|
|
return "Invalid weights." |
|
|
|
|
|
densities = [] |
|
|
if densities_text.strip(): |
|
|
try: |
|
|
densities = [float(d.strip()) for d in densities_text.split(",")] |
|
|
except ValueError: |
|
|
return "Invalid densities." |
|
|
|
|
|
config = MergeConfig( |
|
|
method=method, |
|
|
models=models, |
|
|
base_model=base.strip(), |
|
|
weights=weights, |
|
|
densities=densities, |
|
|
) |
|
|
|
|
|
name = output_name.strip() or "ForgeKit-Merge" |
|
|
|
|
|
|
|
|
nb = generate_merge_notebook( |
|
|
config, |
|
|
output_model_name=name, |
|
|
hf_username=hf_user.strip(), |
|
|
include_quantize=True, |
|
|
include_deploy=False, |
|
|
quant_types=["Q5_K_M", "Q4_K_M"], |
|
|
) |
|
|
|
|
|
|
|
|
kaggle_nb_json = generate_kaggle_notebook(nb) |
|
|
|
|
|
|
|
|
result = push_and_run_kernel( |
|
|
notebook_json=kaggle_nb_json, |
|
|
kernel_title=f"ForgeKit-{name}", |
|
|
kaggle_username=username.strip(), |
|
|
kaggle_key=key.strip(), |
|
|
enable_gpu=True, |
|
|
enable_internet=True, |
|
|
) |
|
|
|
|
|
if result["success"]: |
|
|
return result["message"] |
|
|
else: |
|
|
return result["error"] |
|
|
|
|
|
kg_run_btn.click( |
|
|
run_on_kaggle, |
|
|
[kg_username, kg_key, kg_models, kg_method, kg_base, kg_weights, kg_densities, |
|
|
kg_output_name, kg_hf_user], |
|
|
kg_status, |
|
|
) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("#### Check Kernel Status") |
|
|
with gr.Row(): |
|
|
kg_check_slug = gr.Textbox(label="Kernel Slug", placeholder="username/forgekit-my-merged-7b") |
|
|
kg_check_btn = gr.Button("🔍 Check Status", variant="secondary") |
|
|
kg_check_out = gr.Markdown() |
|
|
|
|
|
def check_status(slug, username, key): |
|
|
if not slug.strip(): |
|
|
return "Enter a kernel slug (username/kernel-name)" |
|
|
result = check_kernel_status(slug.strip(), username.strip(), key.strip()) |
|
|
if result["success"]: |
|
|
msg = result["display"] |
|
|
if result.get("failure_message"): |
|
|
msg += f"\n\nError: {result['failure_message']}" |
|
|
return msg |
|
|
return result["error"] |
|
|
|
|
|
kg_check_btn.click(check_status, [kg_check_slug, kg_username, kg_key], kg_check_out) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("🏆 Leaderboard", id="leaderboard"): |
|
|
gr.Markdown("""### Community Merge Leaderboard |
|
|
See what others have built with ForgeKit. Submit your own merge to get featured!""") |
|
|
|
|
|
lb_md = gr.Markdown(value=get_leaderboard()) |
|
|
lb_refresh = gr.Button("🔄 Refresh", variant="secondary") |
|
|
lb_refresh.click(lambda: get_leaderboard(), outputs=lb_md) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### Submit Your Merge") |
|
|
with gr.Row(): |
|
|
sub_name = gr.Textbox(label="Model Name", placeholder="My-Awesome-Merge-7B") |
|
|
sub_author = gr.Textbox(label="Author", placeholder="Your HF username") |
|
|
sub_method = gr.Textbox(label="Merge Method", placeholder="DARE-TIES") |
|
|
with gr.Row(): |
|
|
sub_models = gr.Textbox(label="Source Models (short)", placeholder="Coder-7B + Math-7B") |
|
|
sub_link = gr.Textbox(label="HF Model Link", placeholder="https://huggingface.co/...") |
|
|
sub_btn = gr.Button("📤 Submit", variant="primary") |
|
|
sub_status = gr.Markdown() |
|
|
|
|
|
def submit_merge(name, author, method, models, link): |
|
|
if not all([name, author, method, models, link]): |
|
|
return "⚠️ Please fill in all fields" |
|
|
LEADERBOARD.append({ |
|
|
"name": name, "author": author, "method": method, |
|
|
"base": "", "models": models, "likes": 0, "link": link, |
|
|
}) |
|
|
return f"✅ **{name}** submitted! It will appear on the leaderboard." |
|
|
|
|
|
sub_btn.click(submit_merge, [sub_name, sub_author, sub_method, sub_models, sub_link], sub_status) |
|
|
|
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
<center> |
|
|
|
|
|
**ForgeKit** v0.1.0 — Built by [AIencoder](https://huggingface.co/AIencoder) | [Portfolio](https://aiencoder-portfolio.static.hf.space) | [GitHub](https://github.com/Ary5272) |
|
|
|
|
|
</center> |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |