Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| GPT-OSS Model Push Script | |
| Specialized script for pushing GPT-OSS models to Hugging Face Hub | |
| Handles LoRA weight merging and model card generation | |
| """ | |
| import os | |
| import sys | |
| import argparse | |
| import json | |
| from datetime import datetime | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| import torch | |
| def merge_lora_weights(checkpoint_path, base_model_name, output_path): | |
| """Merge LoRA weights with base model for inference""" | |
| print(f"Loading base model: {base_model_name}") | |
| # Load base model | |
| model_kwargs = { | |
| "attn_implementation": "eager", | |
| "torch_dtype": "auto", | |
| "use_cache": True, | |
| "device_map": "auto" | |
| } | |
| base_model = AutoModelForCausalLM.from_pretrained(base_model_name, **model_kwargs).cuda() | |
| print(f"Loading LoRA weights from: {checkpoint_path}") | |
| # Load and merge LoRA weights | |
| model = PeftModel.from_pretrained(base_model, checkpoint_path) | |
| model = model.merge_and_unload() | |
| print(f"Saving merged model to: {output_path}") | |
| model.save_pretrained(output_path) | |
| # Save tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(base_model_name) | |
| tokenizer.save_pretrained(output_path) | |
| return model, tokenizer | |
| def create_gpt_oss_model_card(model_name, experiment_name, trackio_url, dataset_repo, author_name, model_description): | |
| """Create a comprehensive model card for GPT-OSS models""" | |
| card_content = f"""--- | |
| language: | |
| - en | |
| - es | |
| - fr | |
| - it | |
| - de | |
| - zh | |
| - hi | |
| - ja | |
| - ko | |
| - ar | |
| license: mit | |
| tags: | |
| - gpt-oss | |
| - multilingual | |
| - reasoning | |
| - chain-of-thought | |
| - fine-tuned | |
| --- | |
| # {model_name} | |
| ## Model Description | |
| {model_description} | |
| This model is a fine-tuned version of OpenAI's GPT-OSS-20B model, optimized for multilingual reasoning tasks. It has been trained on the Multilingual-Thinking dataset to generate chain-of-thought reasoning in multiple languages. | |
| ## Training Details | |
| - **Base Model**: openai/gpt-oss-20b | |
| - **Training Dataset**: HuggingFaceH4/Multilingual-Thinking | |
| - **Training Method**: LoRA (Low-Rank Adaptation) | |
| - **Quantization**: MXFP4 | |
| - **Experiment**: {experiment_name} | |
| - **Monitoring**: {trackio_url} | |
| ## Usage | |
| ### Basic Usage | |
| ```python | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # Load model and tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("{model_name}") | |
| model = AutoModelForCausalLM.from_pretrained("{model_name}") | |
| # Example: Reasoning in Spanish | |
| messages = [ | |
| {{"role": "system", "content": "reasoning language: Spanish"}}, | |
| {{"role": "user", "content": "What is the capital of Australia?"}} | |
| ] | |
| input_ids = tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| output_ids = model.generate(input_ids, max_new_tokens=512) | |
| response = tokenizer.batch_decode(output_ids)[0] | |
| print(response) | |
| ``` | |
| ### Multilingual Reasoning | |
| The model supports reasoning in multiple languages: | |
| - English | |
| - Spanish (Español) | |
| - French (Français) | |
| - Italian (Italiano) | |
| - German (Deutsch) | |
| - Chinese (中文) | |
| - Hindi (हिन्दी) | |
| - Japanese (日本語) | |
| - Korean (한국어) | |
| - Arabic (العربية) | |
| ### System Prompt Format | |
| To control the reasoning language, use the system prompt: | |
| ``` | |
| reasoning language: [LANGUAGE] | |
| ``` | |
| Example: | |
| ``` | |
| reasoning language: German | |
| ``` | |
| ## Training Configuration | |
| - **LoRA Rank**: 8 | |
| - **LoRA Alpha**: 16 | |
| - **Target Modules**: all-linear | |
| - **Learning Rate**: 2e-4 | |
| - **Batch Size**: 4 | |
| - **Sequence Length**: 2048 | |
| - **Mixed Precision**: bf16 | |
| ## Dataset Information | |
| The model was trained on the Multilingual-Thinking dataset, which contains 1,000 examples of chain-of-thought reasoning translated into multiple languages. | |
| ## Limitations | |
| - The model is designed for reasoning tasks and may not perform optimally on other tasks | |
| - Reasoning quality may vary across languages | |
| - The model inherits limitations from the base GPT-OSS-20B model | |
| ## Citation | |
| If you use this model in your research, please cite: | |
| ```bibtex | |
| @misc{{{model_name.replace("/", "_").replace("-", "_")}, | |
| author = {{{author_name}}}, | |
| title = {{{model_name}}}, | |
| year = {{{datetime.now().year}}}, | |
| publisher = {Hugging Face}, | |
| journal = {Hugging Face repository}, | |
| howpublished = {{\\url{{https://huggingface.co/{model_name}}}}} | |
| }} | |
| ``` | |
| ## License | |
| This model is licensed under the MIT License. | |
| ## Training Resources | |
| - **Training Dataset**: https://huggingface.co/datasets/{dataset_repo} | |
| - **Training Monitoring**: {trackio_url} | |
| - **Base Model**: https://huggingface.co/openai/gpt-oss-20b | |
| ## Model Information | |
| - **Architecture**: GPT-OSS-20B with LoRA adapters | |
| - **Parameters**: 20B base + LoRA adapters | |
| - **Context Length**: 2048 tokens | |
| - **Languages**: 10+ languages supported | |
| - **Task**: Multilingual reasoning and chain-of-thought generation | |
| """ | |
| return card_content | |
| def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experiment_name, dataset_repo, author_name, model_description): | |
| """Push GPT-OSS model to Hugging Face Hub""" | |
| print("=== GPT-OSS Model Push Pipeline ===") | |
| print(f"Checkpoint: {checkpoint_path}") | |
| print(f"Repository: {repo_name}") | |
| print(f"Experiment: {experiment_name}") | |
| print(f"Author: {author_name}") | |
| # Validate checkpoint path | |
| if not os.path.exists(checkpoint_path): | |
| raise FileNotFoundError(f"Checkpoint path not found: {checkpoint_path}") | |
| # Create temporary directory for merged model | |
| temp_output = f"/tmp/gpt_oss_merged_{datetime.now().strftime('%Y%m%d_%H%M%S')}" | |
| os.makedirs(temp_output, exist_ok=True) | |
| try: | |
| # Merge LoRA weights with base model | |
| print("Merging LoRA weights with base model...") | |
| model, tokenizer = merge_lora_weights( | |
| checkpoint_path=checkpoint_path, | |
| base_model_name="openai/gpt-oss-20b", | |
| output_path=temp_output | |
| ) | |
| # Create model card | |
| print("Creating model card...") | |
| model_card_content = create_gpt_oss_model_card( | |
| model_name=repo_name, | |
| experiment_name=experiment_name, | |
| trackio_url=trackio_url, | |
| dataset_repo=dataset_repo, | |
| author_name=author_name, | |
| model_description=model_description | |
| ) | |
| # Save model card | |
| model_card_path = os.path.join(temp_output, "README.md") | |
| with open(model_card_path, "w", encoding="utf-8") as f: | |
| f.write(model_card_content) | |
| # Push to Hugging Face Hub | |
| print(f"Pushing model to: {repo_name}") | |
| # Set HF token | |
| os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token | |
| # Push using transformers | |
| from huggingface_hub import HfApi | |
| api = HfApi() | |
| # Create repository if it doesn't exist | |
| try: | |
| api.create_repo(repo_name, private=False, exist_ok=True) | |
| except Exception as e: | |
| print(f"Warning: Could not create repository: {e}") | |
| # Upload files | |
| print("Uploading model files...") | |
| api.upload_folder( | |
| folder_path=temp_output, | |
| repo_id=repo_name, | |
| repo_type="model" | |
| ) | |
| print("✅ GPT-OSS model pushed successfully!") | |
| print(f"Model URL: https://huggingface.co/{repo_name}") | |
| # Clean up | |
| import shutil | |
| shutil.rmtree(temp_output) | |
| return True | |
| except Exception as e: | |
| print(f"❌ Error pushing GPT-OSS model: {e}") | |
| # Clean up on error | |
| if os.path.exists(temp_output): | |
| import shutil | |
| shutil.rmtree(temp_output) | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Push GPT-OSS model to Hugging Face Hub") | |
| parser.add_argument("checkpoint_path", help="Path to model checkpoint") | |
| parser.add_argument("repo_name", help="Hugging Face repository name") | |
| parser.add_argument("--token", required=True, help="Hugging Face token") | |
| parser.add_argument("--trackio-url", help="Trackio URL for model card") | |
| parser.add_argument("--experiment-name", help="Experiment name") | |
| parser.add_argument("--dataset-repo", help="Dataset repository") | |
| parser.add_argument("--author-name", help="Author name") | |
| parser.add_argument("--model-description", help="Model description") | |
| args = parser.parse_args() | |
| # Set defaults | |
| experiment_name = args.experiment_name or "gpt_oss_finetune" | |
| dataset_repo = args.dataset_repo or "HuggingFaceH4/Multilingual-Thinking" | |
| author_name = args.author_name or "GPT-OSS Fine-tuner" | |
| model_description = args.model_description or "A fine-tuned version of OpenAI's GPT-OSS-20B model for multilingual reasoning tasks." | |
| success = push_gpt_oss_model( | |
| checkpoint_path=args.checkpoint_path, | |
| repo_name=args.repo_name, | |
| hf_token=args.token, | |
| trackio_url=args.trackio_url, | |
| experiment_name=experiment_name, | |
| dataset_repo=dataset_repo, | |
| author_name=author_name, | |
| model_description=model_description | |
| ) | |
| sys.exit(0 if success else 1) | |
| if __name__ == "__main__": | |
| main() |