| | """ |
| | PyPilot Training Manager - Advanced distributed training with monitoring |
| | """ |
| | import torch |
| | import torch.nn as nn |
| | from torch.utils.data import DataLoader, Dataset |
| | from transformers import TrainingArguments, Trainer, EarlyStoppingCallback |
| | import wandb |
| | import numpy as np |
| | import time |
| | from datetime import datetime |
| | import os |
| |
|
| | class CodeDataset(Dataset): |
| | def __init__(self, tokenized_data): |
| | self.data = tokenized_data |
| | |
| | def __len__(self): |
| | return len(self.data) |
| | |
| | def __getitem__(self, idx): |
| | return self.data[idx] |
| |
|
| | class PyPilotTrainingManager: |
| | def __init__(self, model, model_name="PyPilot"): |
| | self.model = model |
| | self.model_name = model_name |
| | self.training_history = [] |
| | self.best_loss = float('inf') |
| | |
| | def setup_distributed_training(self, use_fp16=True, use_gradient_checkpointing=True): |
| | """Configure distributed training options""" |
| | training_args = TrainingArguments( |
| | output_dir=f"./pypilot-checkpoints", |
| | overwrite_output_dir=True, |
| | num_train_epochs=10, |
| | per_device_train_batch_size=4, |
| | per_device_eval_batch_size=4, |
| | gradient_accumulation_steps=8, |
| | learning_rate=5e-5, |
| | weight_decay=0.01, |
| | warmup_steps=1000, |
| | logging_dir="./logs", |
| | logging_steps=500, |
| | eval_steps=1000, |
| | save_steps=2000, |
| | save_total_limit=5, |
| | prediction_loss_only=True, |
| | remove_unused_columns=False, |
| | fp16=use_fp16, |
| | dataloader_pin_memory=False, |
| | gradient_checkpointing=use_gradient_checkpointing, |
| | report_to=["wandb"], |
| | run_name=f"pypilot-{datetime.now().strftime('%Y-%m-%d-%H-%M')}", |
| | ) |
| | return training_args |
| | |
| | def setup_wandb_monitoring(self, project_name="pypilot"): |
| | """Initialize Weights & Biases for experiment tracking""" |
| | wandb.init( |
| | project=project_name, |
| | name=f"pypilot-{datetime.now().strftime('%Y-%m-%d-%H-%M')}", |
| | config={ |
| | "architecture": "Transformer", |
| | "dataset": "GitHub Code", |
| | "epochs": 10, |
| | "batch_size": 32, |
| | } |
| | ) |
| | |
| | def create_advanced_callbacks(self): |
| | """Create callbacks for training optimization""" |
| | callbacks = [ |
| | EarlyStoppingCallback(early_stopping_patience=3), |
| | ] |
| | return callbacks |
| | |
| | def compute_metrics(self, eval_pred): |
| | """Compute advanced metrics for code generation""" |
| | predictions, labels = eval_pred |
| | predictions = torch.tensor(predictions) |
| | labels = torch.tensor(labels) |
| | |
| | |
| | loss_fct = nn.CrossEntropyLoss() |
| | loss = loss_fct(predictions.view(-1, predictions.size(-1)), labels.view(-1)) |
| | perplexity = torch.exp(loss) |
| | |
| | |
| | preds = torch.argmax(predictions, dim=-1) |
| | accuracy = (preds == labels).float().mean() |
| | |
| | return { |
| | "perplexity": perplexity.item(), |
| | "accuracy": accuracy.item(), |
| | "loss": loss.item() |
| | } |
| | |
| | def train_with_advanced_features(self, train_dataset, eval_dataset=None): |
| | """Start advanced training with all features""" |
| | print("π Starting Advanced PyPilot Training...") |
| | |
| | |
| | self.setup_wandb_monitoring() |
| | |
| | |
| | training_args = self.setup_distributed_training() |
| | callbacks = self.create_advanced_callbacks() |
| | |
| | |
| | trainer = Trainer( |
| | model=self.model, |
| | args=training_args, |
| | train_dataset=train_dataset, |
| | eval_dataset=eval_dataset, |
| | compute_metrics=self.compute_metrics, |
| | callbacks=callbacks, |
| | ) |
| | |
| | |
| | print("π― Training started with advanced features:") |
| | print(f" - FP16 Precision: Enabled") |
| | print(f" - Gradient Checkpointing: Enabled") |
| | print(f" - Early Stopping: Enabled") |
| | print(f" - W&B Monitoring: Enabled") |
| | |
| | trainer.train() |
| | |
| | |
| | trainer.save_model("./pypilot-final-model") |
| | print("β
Training completed and model saved!") |
| | |
| | return trainer |
| | |
| | def hyperparameter_search(self, train_dataset, param_combinations): |
| | """Perform hyperparameter search""" |
| | best_params = None |
| | |
| | for i, params in enumerate(param_combinations): |
| | print(f"π Testing hyperparameter combination {i+1}/{len(param_combinations)}") |
| | |
| | |
| | self.update_model_hyperparams(params) |
| | |
| | |
| | quick_trainer = Trainer( |
| | model=self.model, |
| | args=TrainingArguments( |
| | output_dir=f"./hparam-search-{i}", |
| | num_train_epochs=1, |
| | per_device_train_batch_size=params['batch_size'], |
| | learning_rate=params['learning_rate'], |
| | ), |
| | train_dataset=train_dataset, |
| | ) |
| | |
| | results = quick_trainer.train() |
| | |
| | if results.training_loss < self.best_loss: |
| | self.best_loss = results.training_loss |
| | best_params = params |
| | |
| | print(f"π― Best hyperparameters: {best_params}") |
| | return best_params |
| |
|
| | if __name__ == "__main__": |
| | |
| | from modeling_pypilot import PyPilotModel, PyPilotConfig |
| | |
| | config = PyPilotConfig() |
| | model = PyPilotModel(config) |
| | |
| | manager = PyPilotTrainingManager(model) |
| | print("β
Training Manager ready!") |