ml-expert

Machine Learning Expert

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "ml-expert" with this command: npx skills add personamanagmentlayer/pcl/personamanagmentlayer-pcl-ml-expert

Machine Learning Expert

Expert guidance for machine learning systems, deep learning, model training, deployment, and MLOps practices.

Core Concepts

Machine Learning Fundamentals

  • Supervised learning (classification, regression)

  • Unsupervised learning (clustering, dimensionality reduction)

  • Reinforcement learning

  • Feature engineering

  • Model evaluation and validation

  • Hyperparameter tuning

Deep Learning

  • Neural networks (CNNs, RNNs, Transformers)

  • Transfer learning

  • Fine-tuning pre-trained models

  • Attention mechanisms

  • GANs (Generative Adversarial Networks)

  • Autoencoders

MLOps

  • Model versioning and tracking

  • Experiment management

  • Model deployment and serving

  • Monitoring and retraining

  • CI/CD for ML pipelines

  • A/B testing for models

Supervised Learning

import numpy as np import pandas as pd from sklearn.model_selection import train_test_split, cross_val_score from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import classification_report, confusion_matrix import joblib

class MLPipeline: def init(self): self.scaler = StandardScaler() self.model = None self.feature_names = None

def prepare_data(self, X: pd.DataFrame, y: pd.Series, test_size: float = 0.2):
    """Split and scale data"""
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=42, stratify=y
    )

    # Scale features
    X_train_scaled = self.scaler.fit_transform(X_train)
    X_test_scaled = self.scaler.transform(X_test)

    self.feature_names = X.columns.tolist()

    return X_train_scaled, X_test_scaled, y_train, y_test

def train_classifier(self, X_train, y_train, n_estimators: int = 100):
    """Train random forest classifier"""
    self.model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=10,
        random_state=42,
        n_jobs=-1
    )

    self.model.fit(X_train, y_train)

    # Cross-validation
    cv_scores = cross_val_score(self.model, X_train, y_train, cv=5)

    return {
        "cv_mean": cv_scores.mean(),
        "cv_std": cv_scores.std(),
        "feature_importance": dict(zip(
            self.feature_names,
            self.model.feature_importances_
        ))
    }

def evaluate(self, X_test, y_test) -> dict:
    """Evaluate model performance"""
    y_pred = self.model.predict(X_test)
    y_proba = self.model.predict_proba(X_test)

    return {
        "predictions": y_pred,
        "probabilities": y_proba,
        "confusion_matrix": confusion_matrix(y_test, y_pred).tolist(),
        "classification_report": classification_report(y_test, y_pred, output_dict=True)
    }

def save_model(self, path: str):
    """Save model and scaler"""
    joblib.dump({
        "model": self.model,
        "scaler": self.scaler,
        "feature_names": self.feature_names
    }, path)

Deep Learning with PyTorch

import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, Dataset

class NeuralNetwork(nn.Module): def init(self, input_size: int, hidden_size: int, num_classes: int): super().init() self.fc1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.dropout = nn.Dropout(0.3) self.fc2 = nn.Linear(hidden_size, hidden_size // 2) self.fc3 = nn.Linear(hidden_size // 2, num_classes)

def forward(self, x):
    x = self.fc1(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.fc2(x)
    x = self.relu(x)
    x = self.fc3(x)
    return x

class Trainer: def init(self, model, device='cuda' if torch.cuda.is_available() else 'cpu'): self.model = model.to(device) self.device = device self.criterion = nn.CrossEntropyLoss() self.optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_epoch(self, dataloader: DataLoader) -> float:
    """Train for one epoch"""
    self.model.train()
    total_loss = 0

    for batch_idx, (data, target) in enumerate(dataloader):
        data, target = data.to(self.device), target.to(self.device)

        self.optimizer.zero_grad()
        output = self.model(data)
        loss = self.criterion(output, target)

        loss.backward()
        self.optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

def evaluate(self, dataloader: DataLoader) -> dict:
    """Evaluate model"""
    self.model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to(self.device), target.to(self.device)
            output = self.model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    return {
        "accuracy": 100 * correct / total,
        "total_samples": total
    }

def train(self, train_loader: DataLoader, val_loader: DataLoader,
          epochs: int = 10):
    """Full training loop"""
    history = {"train_loss": [], "val_acc": []}

    for epoch in range(epochs):
        train_loss = self.train_epoch(train_loader)
        val_metrics = self.evaluate(val_loader)

        history["train_loss"].append(train_loss)
        history["val_acc"].append(val_metrics["accuracy"])

        print(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss:.4f} - Val Acc: {val_metrics['accuracy']:.2f}%")

    return history

Model Deployment

from fastapi import FastAPI, HTTPException from pydantic import BaseModel import numpy as np

app = FastAPI()

class PredictionRequest(BaseModel): features: list[float]

class PredictionResponse(BaseModel): prediction: int probability: float model_version: str

class ModelServer: def init(self, model_path: str): self.model_data = joblib.load(model_path) self.model = self.model_data["model"] self.scaler = self.model_data["scaler"] self.version = "1.0.0"

def predict(self, features: np.ndarray) -> dict:
    """Make prediction"""
    # Scale features
    features_scaled = self.scaler.transform(features.reshape(1, -1))

    # Predict
    prediction = self.model.predict(features_scaled)[0]
    probability = self.model.predict_proba(features_scaled)[0].max()

    return {
        "prediction": int(prediction),
        "probability": float(probability),
        "model_version": self.version
    }

Global model instance

model_server = ModelServer("model.pkl")

@app.post("/predict", response_model=PredictionResponse) async def predict(request: PredictionRequest): try: features = np.array(request.features) result = model_server.predict(features) return PredictionResponse(**result) except Exception as e: raise HTTPException(status_code=500, detail=str(e))

@app.get("/health") async def health(): return {"status": "healthy", "model_version": model_server.version}

MLOps with MLflow

import mlflow import mlflow.sklearn from mlflow.tracking import MlflowClient

class MLflowExperiment: def init(self, experiment_name: str): mlflow.set_experiment(experiment_name) self.client = MlflowClient()

def log_training_run(self, model, X_train, y_train, X_test, y_test,
                    params: dict):
    """Log training run with MLflow"""
    with mlflow.start_run():
        # Log parameters
        mlflow.log_params(params)

        # Train model
        model.fit(X_train, y_train)

        # Evaluate
        train_score = model.score(X_train, y_train)
        test_score = model.score(X_test, y_test)

        # Log metrics
        mlflow.log_metric("train_accuracy", train_score)
        mlflow.log_metric("test_accuracy", test_score)

        # Log model
        mlflow.sklearn.log_model(model, "model")

        # Log feature importance
        if hasattr(model, 'feature_importances_'):
            feature_importance = dict(enumerate(model.feature_importances_))
            mlflow.log_dict(feature_importance, "feature_importance.json")

        run_id = mlflow.active_run().info.run_id
        return run_id

def register_model(self, run_id: str, model_name: str):
    """Register model in MLflow model registry"""
    model_uri = f"runs:/{run_id}/model"
    mlflow.register_model(model_uri, model_name)

def promote_to_production(self, model_name: str, version: int):
    """Promote model version to production"""
    self.client.transition_model_version_stage(
        name=model_name,
        version=version,
        stage="Production"
    )

Best Practices

Data Preparation

  • Handle missing values appropriately

  • Scale/normalize features

  • Encode categorical variables properly

  • Split data before any preprocessing

  • Use stratified splits for imbalanced data

  • Create validation set for hyperparameter tuning

Model Training

  • Start with simple baselines

  • Use cross-validation

  • Monitor training and validation metrics

  • Implement early stopping

  • Save best model checkpoints

  • Track experiments systematically

Deployment

  • Version models and datasets

  • Monitor model performance in production

  • Implement model A/B testing

  • Set up retraining pipelines

  • Log predictions for analysis

  • Implement fallback mechanisms

Anti-Patterns

❌ Training on test data (data leakage) ❌ No validation set for hyperparameter tuning ❌ Ignoring class imbalance ❌ Not scaling features ❌ Overfitting to training data ❌ No model versioning ❌ Missing monitoring in production

Resources

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

General

finance-expert

No summary provided by upstream source.

Repository SourceNeeds Review
General

trading-expert

No summary provided by upstream source.

Repository SourceNeeds Review
General

dart-expert

No summary provided by upstream source.

Repository SourceNeeds Review
General

postgresql-expert

No summary provided by upstream source.

Repository SourceNeeds Review