Database Development

Schema design, optimization, and management best practices.

Schema Design

Normalization

-- 1NF: Atomic values, no repeating groups -- BAD CREATE TABLE orders ( id INT, products VARCHAR(255) -- "shirt,pants,shoes" - NOT atomic );

-- GOOD CREATE TABLE orders (id INT PRIMARY KEY); CREATE TABLE order_items ( order_id INT REFERENCES orders(id), product_id INT REFERENCES products(id), quantity INT );

-- 2NF: No partial dependencies (all non-key columns depend on entire PK) -- 3NF: No transitive dependencies (non-key columns don't depend on other non-key columns)

Data Types

-- Use appropriate types CREATE TABLE users ( id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, -- BIGINT for large tables uuid CHAR(36) NOT NULL UNIQUE, -- Fixed-length UUID email VARCHAR(255) NOT NULL, -- Variable length status ENUM('active', 'inactive', 'banned'), -- Constrained values balance DECIMAL(10,2) NOT NULL DEFAULT 0, -- Exact precision for money metadata JSON, -- Flexible schema created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP );

-- PostgreSQL specific CREATE TABLE events ( id UUID DEFAULT gen_random_uuid() PRIMARY KEY, data JSONB NOT NULL, -- Binary JSON, indexable tags TEXT[] NOT NULL DEFAULT '{}', -- Array type tsv TSVECTOR, -- Full-text search created_at TIMESTAMPTZ DEFAULT NOW() -- Timezone-aware );

Relationships

-- One-to-Many CREATE TABLE posts ( id SERIAL PRIMARY KEY, user_id INT NOT NULL REFERENCES users(id) ON DELETE CASCADE, title VARCHAR(255) NOT NULL );

-- Many-to-Many with pivot table CREATE TABLE post_tags ( post_id INT REFERENCES posts(id) ON DELETE CASCADE, tag_id INT REFERENCES tags(id) ON DELETE CASCADE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (post_id, tag_id) );

-- One-to-One CREATE TABLE user_profiles ( user_id INT PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE, bio TEXT, avatar_url VARCHAR(255) );

Indexing

Index Types

-- B-Tree (default, most common) CREATE INDEX idx_users_email ON users(email);

-- Composite index (order matters!) CREATE INDEX idx_orders_user_status ON orders(user_id, status); -- Good for: WHERE user_id = ? AND status = ? -- Good for: WHERE user_id = ? -- NOT good for: WHERE status = ? (leftmost prefix rule)

-- Unique index CREATE UNIQUE INDEX idx_users_email_unique ON users(email);

-- Partial index (PostgreSQL) CREATE INDEX idx_active_users ON users(email) WHERE status = 'active';

-- Expression index CREATE INDEX idx_users_email_lower ON users(LOWER(email));

-- Full-text index (MySQL) CREATE FULLTEXT INDEX idx_posts_content ON posts(title, content);

-- GIN index for JSONB (PostgreSQL) CREATE INDEX idx_events_data ON events USING GIN(data);

Index Strategy

-- Index columns used in: -- 1. WHERE clauses -- 2. JOIN conditions -- 3. ORDER BY (if used frequently) -- 4. Foreign keys

-- Check existing indexes SHOW INDEX FROM orders; -- MySQL \d orders -- PostgreSQL

-- Analyze query execution EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123;

Query Optimization

EXPLAIN Analysis

-- MySQL EXPLAIN SELECT * FROM orders WHERE user_id = 123 AND created_at > '2024-01-01';

-- Look for: -- type: "ref" or "range" (good), "ALL" (table scan, bad) -- key: Which index is used (NULL = no index) -- rows: Estimated rows examined

-- PostgreSQL EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT * FROM orders WHERE user_id = 123;

Common Optimizations

-- BAD: SELECT * SELECT * FROM users WHERE id = 1;

-- GOOD: Select only needed columns SELECT id, name, email FROM users WHERE id = 1;

-- BAD: OR can prevent index usage SELECT * FROM users WHERE email = 'a@b.com' OR name = 'John';

-- GOOD: Use UNION for OR conditions SELECT * FROM users WHERE email = 'a@b.com' UNION ALL SELECT * FROM users WHERE name = 'John' AND email != 'a@b.com';

-- BAD: Functions on indexed columns SELECT * FROM users WHERE YEAR(created_at) = 2024;

-- GOOD: Use range SELECT * FROM users WHERE created_at >= '2024-01-01' AND created_at < '2025-01-01';

-- BAD: LIKE with leading wildcard SELECT * FROM products WHERE name LIKE '%shirt%';

-- GOOD: Full-text search SELECT * FROM products WHERE MATCH(name) AGAINST('shirt' IN BOOLEAN MODE);

N+1 Problem

-- BAD: N+1 queries -- Query 1: SELECT * FROM posts LIMIT 10 -- Query 2-11: SELECT * FROM users WHERE id = ? (for each post)

-- GOOD: JOIN SELECT p.*, u.name as author_name FROM posts p JOIN users u ON p.user_id = u.id LIMIT 10;

-- GOOD: Subquery with IN SELECT * FROM users WHERE id IN (SELECT DISTINCT user_id FROM posts WHERE ...);

Migrations

Migration Best Practices

-- Always wrap in transactions BEGIN;

-- Add column (non-locking in PostgreSQL) ALTER TABLE users ADD COLUMN phone VARCHAR(20);

-- Add index concurrently (PostgreSQL, non-locking) CREATE INDEX CONCURRENTLY idx_users_phone ON users(phone);

-- Rename column safely ALTER TABLE users RENAME COLUMN phone TO phone_number;

COMMIT;

-- Rollback script BEGIN; ALTER TABLE users DROP COLUMN phone_number; DROP INDEX idx_users_phone; COMMIT;

Safe Migration Patterns

-- Adding NOT NULL column with default -- Step 1: Add nullable column ALTER TABLE users ADD COLUMN status VARCHAR(20);

-- Step 2: Backfill data UPDATE users SET status = 'active' WHERE status IS NULL;

-- Step 3: Add NOT NULL constraint ALTER TABLE users ALTER COLUMN status SET NOT NULL; ALTER TABLE users ALTER COLUMN status SET DEFAULT 'active';

-- Renaming tables (zero downtime) -- Step 1: Create new table CREATE TABLE accounts (LIKE users INCLUDING ALL);

-- Step 2: Copy data INSERT INTO accounts SELECT * FROM users;

-- Step 3: Create triggers for sync -- Step 4: Switch application -- Step 5: Drop old table

Performance

Connection Pooling

// Node.js with pg-pool const { Pool } = require('pg');

const pool = new Pool({ host: 'localhost', database: 'myapp', max: 20, // Max connections idleTimeoutMillis: 30000, // Close idle connections connectionTimeoutMillis: 2000 });

// Always use pool, not direct connections const result = await pool.query('SELECT * FROM users WHERE id = $1', [userId]);

Pagination

-- BAD: OFFSET for large datasets SELECT * FROM posts ORDER BY created_at DESC LIMIT 20 OFFSET 10000; -- Gets slower as offset increases

-- GOOD: Cursor-based pagination SELECT * FROM posts WHERE created_at < '2024-01-15 10:30:00' ORDER BY created_at DESC LIMIT 20;

-- GOOD: Keyset pagination with ID SELECT * FROM posts WHERE (created_at, id) < ('2024-01-15 10:30:00', 12345) ORDER BY created_at DESC, id DESC LIMIT 20;

Batch Operations

-- BAD: Many individual inserts INSERT INTO logs (message) VALUES ('log1'); INSERT INTO logs (message) VALUES ('log2'); -- ... 1000 more

-- GOOD: Batch insert INSERT INTO logs (message) VALUES ('log1'), ('log2'), ('log3'); -- Up to ~1000 at a time

-- GOOD: COPY for bulk loading (PostgreSQL) COPY logs (message) FROM '/path/to/file.csv' WITH CSV;

Transactions

ACID Properties

-- Atomicity: All or nothing BEGIN; UPDATE accounts SET balance = balance - 100 WHERE id = 1; UPDATE accounts SET balance = balance + 100 WHERE id = 2; -- If any fails, ROLLBACK COMMIT;

-- Isolation levels SET TRANSACTION ISOLATION LEVEL READ COMMITTED; -- Default SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; -- Strictest

-- Deadlock prevention: Always lock in same order BEGIN; SELECT * FROM accounts WHERE id = 1 FOR UPDATE; -- Lock row SELECT * FROM accounts WHERE id = 2 FOR UPDATE; -- Do work COMMIT;

NoSQL Patterns

Document Database (MongoDB)

// Schema design: Embed vs Reference // Embed: Data accessed together, 1:few relationships { _id: ObjectId("..."), title: "Blog Post", author: { // Embedded name: "John", email: "john@example.com" }, comments: [ // Embedded array { text: "Great!", user: "Jane" } ] }

// Reference: Large documents, many relationships { _id: ObjectId("..."), title: "Blog Post", author_id: ObjectId("...") // Reference to users collection }

// Indexes db.posts.createIndex({ "author_id": 1 }); db.posts.createIndex({ "title": "text", "content": "text" }); // Text search

Key-Value (Redis)

Caching pattern

SET user:123 '{"name":"John"}' EX 3600 # Expires in 1 hour GET user:123

Counter

INCR page:views:homepage GET page:views:homepage

Rate limiting

INCR rate:ip:192.168.1.1 EXPIRE rate:ip:192.168.1.1 60 # Reset every minute

Backup & Recovery

MySQL

mysqldump -u root -p database > backup.sql mysql -u root -p database < backup.sql

PostgreSQL

pg_dump -Fc database > backup.dump pg_restore -d database backup.dump

Point-in-time recovery (PostgreSQL)

Requires WAL archiving configured

pg_basebackup -D /backup/base -Fp -Xs -P

Monitoring Queries

-- MySQL slow query log SET GLOBAL slow_query_log = 'ON'; SET GLOBAL long_query_time = 1; -- Log queries > 1 second

-- PostgreSQL: Currently running queries SELECT pid, now() - pg_stat_activity.query_start AS duration, query FROM pg_stat_activity WHERE state = 'active' ORDER BY duration DESC;

-- Table sizes SELECT table_name, pg_size_pretty(pg_total_relation_size(quote_ident(table_name))) FROM information_schema.tables WHERE table_schema = 'public' ORDER BY pg_total_relation_size(quote_ident(table_name)) DESC;

database

Safety Notice

Copy this and send it to your AI assistant to learn

Caching pattern

Counter

Rate limiting

MySQL

PostgreSQL

Point-in-time recovery (PostgreSQL)

Requires WAL archiving configured

Source Transparency

Related Skills

php

laravel

javascript

landing-page