graceful-shutdown

Clean shutdown without data loss.

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "graceful-shutdown" with this command: npx skills add dadbodgeoff/drift/dadbodgeoff-drift-graceful-shutdown

Graceful Shutdown

Clean shutdown without data loss.

When to Use This Skill

  • Running background workers

  • Processing queues or streams

  • Buffering data before persistence

  • Any long-running process that handles state

Core Concepts

  • Signal handlers - Catch SIGTERM/SIGINT

  • In-flight tracking - Know what's still running

  • Buffer draining - Flush before exit

  • Cleanup callbacks - Close connections properly

Shutdown Flow

SIGTERM received │ ▼ Stop accepting new work │ ▼ Wait for in-flight jobs │ ▼ Drain buffers │ ▼ Run cleanup callbacks │ ▼ Process exits

TypeScript Implementation

// graceful-shutdown.ts type ShutdownCallback = () => Promise<void>; type DrainCallback = () => Promise<{ flushed: number; dropped: number }>;

interface InFlightJob { id: string; workerName: string; startedAt: Date; timeoutMs: number; }

class GracefulShutdown { private isShuttingDown = false; private shutdownPromise: Promise<void> | null = null; private callbacks: ShutdownCallback[] = []; private drainCallbacks: DrainCallback[] = []; private inFlightJobs = new Map<string, InFlightJob>(); private shutdownTimeoutMs = 30000;

registerSignals(): void { const handler = (signal: string) => { console.log([Shutdown] Received ${signal}); this.shutdown(Signal: ${signal}); };

process.on('SIGTERM', () => handler('SIGTERM'));
process.on('SIGINT', () => handler('SIGINT'));

}

onShutdown(callback: ShutdownCallback): void { this.callbacks.push(callback); }

onDrain(callback: DrainCallback): void { this.drainCallbacks.push(callback); }

trackJob(id: string, workerName: string, timeoutMs = 60000): void { if (this.isShuttingDown) return; this.inFlightJobs.set(id, { id, workerName, startedAt: new Date(), timeoutMs }); }

completeJob(id: string): void { this.inFlightJobs.delete(id); }

isShutdownInProgress(): boolean { return this.isShuttingDown; }

async shutdown(reason: string): Promise<void> { if (this.shutdownPromise) return this.shutdownPromise;

this.isShuttingDown = true;
console.log(`[Shutdown] Starting: ${reason}`);

this.shutdownPromise = this.performShutdown();
return this.shutdownPromise;

}

private async performShutdown(): Promise<void> { const startTime = Date.now();

// 1. Wait for in-flight jobs
console.log(`[Shutdown] Waiting for ${this.inFlightJobs.size} jobs...`);

while (this.inFlightJobs.size > 0) {
  if (Date.now() - startTime > this.shutdownTimeoutMs) {
    console.log(`[Shutdown] Timeout! ${this.inFlightJobs.size} jobs still running`);
    break;
  }

  // Force-complete stuck jobs
  const now = Date.now();
  for (const [id, job] of this.inFlightJobs) {
    if (now - job.startedAt.getTime() > job.timeoutMs) {
      console.log(`[Shutdown] Force-completing stuck job: ${id}`);
      this.inFlightJobs.delete(id);
    }
  }

  await this.sleep(100);
}

// 2. Drain buffers
if (this.drainCallbacks.length > 0) {
  console.log(`[Shutdown] Draining ${this.drainCallbacks.length} buffers...`);
  
  let totalFlushed = 0, totalDropped = 0;
  
  for (const drain of this.drainCallbacks) {
    try {
      const result = await Promise.race([
        drain(),
        this.sleep(10000).then(() => ({ flushed: 0, dropped: 0 })),
      ]);
      totalFlushed += result.flushed;
      totalDropped += result.dropped;
    } catch (err) {
      console.error('[Shutdown] Drain error:', err);
    }
  }
  
  console.log(`[Shutdown] Drained: ${totalFlushed} flushed, ${totalDropped} dropped`);
}

// 3. Run cleanup callbacks
console.log(`[Shutdown] Running ${this.callbacks.length} cleanup callbacks...`);

for (const callback of this.callbacks) {
  try {
    await Promise.race([
      callback(),
      this.sleep(5000).then(() => { throw new Error('Callback timeout'); }),
    ]);
  } catch (err) {
    console.error('[Shutdown] Callback error:', err);
  }
}

console.log(`[Shutdown] Complete in ${Date.now() - startTime}ms`);
process.exit(0);

}

private sleep(ms: number): Promise<void> { return new Promise(resolve => setTimeout(resolve, ms)); } }

// Singleton let instance: GracefulShutdown | null = null;

export function getShutdownHandler(): GracefulShutdown { if (!instance) instance = new GracefulShutdown(); return instance; }

Python Implementation

graceful_shutdown.py

import asyncio import signal from dataclasses import dataclass, field from datetime import datetime from typing import Callable, Awaitable, Dict, List

@dataclass class InFlightJob: id: str worker_name: str started_at: datetime timeout_seconds: float

ShutdownCallback = Callable[[], Awaitable[None]] DrainCallback = Callable[[], Awaitable[Dict[str, int]]]

class GracefulShutdown: def init(self, timeout_seconds: float = 30.0): self._is_shutting_down = False self._shutdown_task: asyncio.Task | None = None self._callbacks: List[ShutdownCallback] = [] self._drain_callbacks: List[DrainCallback] = [] self._in_flight: Dict[str, InFlightJob] = {} self._timeout = timeout_seconds

def register_signals(self):
    loop = asyncio.get_event_loop()
    for sig in (signal.SIGTERM, signal.SIGINT):
        loop.add_signal_handler(
            sig,
            lambda s=sig: asyncio.create_task(self.shutdown(f"Signal: {s.name}"))
        )

def on_shutdown(self, callback: ShutdownCallback):
    self._callbacks.append(callback)

def on_drain(self, callback: DrainCallback):
    self._drain_callbacks.append(callback)

def track_job(self, job_id: str, worker_name: str, timeout_seconds: float = 60.0):
    if self._is_shutting_down:
        return
    self._in_flight[job_id] = InFlightJob(
        id=job_id,
        worker_name=worker_name,
        started_at=datetime.now(),
        timeout_seconds=timeout_seconds,
    )

def complete_job(self, job_id: str):
    self._in_flight.pop(job_id, None)

@property
def is_shutting_down(self) -> bool:
    return self._is_shutting_down

async def shutdown(self, reason: str):
    if self._shutdown_task:
        return await self._shutdown_task

    self._is_shutting_down = True
    print(f"[Shutdown] Starting: {reason}")
    
    self._shutdown_task = asyncio.create_task(self._perform_shutdown())
    return await self._shutdown_task

async def _perform_shutdown(self):
    start_time = datetime.now()

    # Wait for in-flight jobs
    print(f"[Shutdown] Waiting for {len(self._in_flight)} jobs...")
    
    while self._in_flight:
        elapsed = (datetime.now() - start_time).total_seconds()
        if elapsed > self._timeout:
            print(f"[Shutdown] Timeout! {len(self._in_flight)} jobs still running")
            break

        # Force-complete stuck jobs
        now = datetime.now()
        stuck = [
            job_id for job_id, job in self._in_flight.items()
            if (now - job.started_at).total_seconds() > job.timeout_seconds
        ]
        for job_id in stuck:
            print(f"[Shutdown] Force-completing stuck job: {job_id}")
            self._in_flight.pop(job_id)

        await asyncio.sleep(0.1)

    # Drain buffers
    for drain in self._drain_callbacks:
        try:
            result = await asyncio.wait_for(drain(), timeout=10.0)
            print(f"[Shutdown] Drained: {result}")
        except Exception as e:
            print(f"[Shutdown] Drain error: {e}")

    # Run cleanup callbacks
    for callback in self._callbacks:
        try:
            await asyncio.wait_for(callback(), timeout=5.0)
        except Exception as e:
            print(f"[Shutdown] Callback error: {e}")

    elapsed = (datetime.now() - start_time).total_seconds()
    print(f"[Shutdown] Complete in {elapsed:.1f}s")

Singleton

_instance: GracefulShutdown | None = None

def get_shutdown_handler() -> GracefulShutdown: global _instance if _instance is None: _instance = GracefulShutdown() return _instance

Usage Examples

Basic Setup

const shutdown = getShutdownHandler(); shutdown.registerSignals();

// Register cleanup shutdown.onShutdown(async () => { await database.close(); await redis.quit(); });

// Register buffer drain shutdown.onDrain(async () => { return backpressureBuffer.flush(); });

Job Tracking

async function processJob(jobId: string) { const shutdown = getShutdownHandler();

// Don't start new work during shutdown if (shutdown.isShutdownInProgress()) { return; }

shutdown.trackJob(jobId, 'my-worker', 30000);

try { await doWork(jobId); } finally { shutdown.completeJob(jobId); } }

With Express/Fastify

const shutdown = getShutdownHandler(); shutdown.registerSignals();

// Stop accepting new requests shutdown.onShutdown(async () => { await new Promise<void>((resolve) => { server.close(() => resolve()); }); });

// Close database connections shutdown.onShutdown(async () => { await prisma.$disconnect(); });

Best Practices

  • Register signals early - First thing in app startup

  • Track all in-flight work - With appropriate timeouts

  • Drain before cleanup - Buffers first, connections last

  • Set reasonable timeouts - Don't hang forever

  • Check before new work - Don't start during shutdown

Common Mistakes

  • Not registering signal handlers

  • Starting new work during shutdown

  • No timeout on cleanup callbacks

  • Forgetting to track in-flight jobs

  • Closing connections before draining buffers

Related Skills

  • Background Jobs

  • Backpressure

  • Health Checks

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

General

oauth-social-login

No summary provided by upstream source.

Repository SourceNeeds Review
General

sse-streaming

No summary provided by upstream source.

Repository SourceNeeds Review
General

multi-tenancy

No summary provided by upstream source.

Repository SourceNeeds Review
General

deduplication

No summary provided by upstream source.

Repository SourceNeeds Review