Database Development
Schema design, optimization, and management best practices.
Schema Design
Normalization
-- 1NF: Atomic values, no repeating groups -- BAD CREATE TABLE orders ( id INT, products VARCHAR(255) -- "shirt,pants,shoes" - NOT atomic );
-- GOOD CREATE TABLE orders (id INT PRIMARY KEY); CREATE TABLE order_items ( order_id INT REFERENCES orders(id), product_id INT REFERENCES products(id), quantity INT );
-- 2NF: No partial dependencies (all non-key columns depend on entire PK) -- 3NF: No transitive dependencies (non-key columns don't depend on other non-key columns)
Data Types
-- Use appropriate types CREATE TABLE users ( id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY, -- BIGINT for large tables uuid CHAR(36) NOT NULL UNIQUE, -- Fixed-length UUID email VARCHAR(255) NOT NULL, -- Variable length status ENUM('active', 'inactive', 'banned'), -- Constrained values balance DECIMAL(10,2) NOT NULL DEFAULT 0, -- Exact precision for money metadata JSON, -- Flexible schema created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP );
-- PostgreSQL specific CREATE TABLE events ( id UUID DEFAULT gen_random_uuid() PRIMARY KEY, data JSONB NOT NULL, -- Binary JSON, indexable tags TEXT[] NOT NULL DEFAULT '{}', -- Array type tsv TSVECTOR, -- Full-text search created_at TIMESTAMPTZ DEFAULT NOW() -- Timezone-aware );
Relationships
-- One-to-Many CREATE TABLE posts ( id SERIAL PRIMARY KEY, user_id INT NOT NULL REFERENCES users(id) ON DELETE CASCADE, title VARCHAR(255) NOT NULL );
-- Many-to-Many with pivot table CREATE TABLE post_tags ( post_id INT REFERENCES posts(id) ON DELETE CASCADE, tag_id INT REFERENCES tags(id) ON DELETE CASCADE, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (post_id, tag_id) );
-- One-to-One CREATE TABLE user_profiles ( user_id INT PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE, bio TEXT, avatar_url VARCHAR(255) );
Indexing
Index Types
-- B-Tree (default, most common) CREATE INDEX idx_users_email ON users(email);
-- Composite index (order matters!) CREATE INDEX idx_orders_user_status ON orders(user_id, status); -- Good for: WHERE user_id = ? AND status = ? -- Good for: WHERE user_id = ? -- NOT good for: WHERE status = ? (leftmost prefix rule)
-- Unique index CREATE UNIQUE INDEX idx_users_email_unique ON users(email);
-- Partial index (PostgreSQL) CREATE INDEX idx_active_users ON users(email) WHERE status = 'active';
-- Expression index CREATE INDEX idx_users_email_lower ON users(LOWER(email));
-- Full-text index (MySQL) CREATE FULLTEXT INDEX idx_posts_content ON posts(title, content);
-- GIN index for JSONB (PostgreSQL) CREATE INDEX idx_events_data ON events USING GIN(data);
Index Strategy
-- Index columns used in: -- 1. WHERE clauses -- 2. JOIN conditions -- 3. ORDER BY (if used frequently) -- 4. Foreign keys
-- Check existing indexes SHOW INDEX FROM orders; -- MySQL \d orders -- PostgreSQL
-- Analyze query execution EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123;
Query Optimization
EXPLAIN Analysis
-- MySQL EXPLAIN SELECT * FROM orders WHERE user_id = 123 AND created_at > '2024-01-01';
-- Look for: -- type: "ref" or "range" (good), "ALL" (table scan, bad) -- key: Which index is used (NULL = no index) -- rows: Estimated rows examined
-- PostgreSQL EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT * FROM orders WHERE user_id = 123;
Common Optimizations
-- BAD: SELECT * SELECT * FROM users WHERE id = 1;
-- GOOD: Select only needed columns SELECT id, name, email FROM users WHERE id = 1;
-- BAD: OR can prevent index usage SELECT * FROM users WHERE email = 'a@b.com' OR name = 'John';
-- GOOD: Use UNION for OR conditions SELECT * FROM users WHERE email = 'a@b.com' UNION ALL SELECT * FROM users WHERE name = 'John' AND email != 'a@b.com';
-- BAD: Functions on indexed columns SELECT * FROM users WHERE YEAR(created_at) = 2024;
-- GOOD: Use range SELECT * FROM users WHERE created_at >= '2024-01-01' AND created_at < '2025-01-01';
-- BAD: LIKE with leading wildcard SELECT * FROM products WHERE name LIKE '%shirt%';
-- GOOD: Full-text search SELECT * FROM products WHERE MATCH(name) AGAINST('shirt' IN BOOLEAN MODE);
N+1 Problem
-- BAD: N+1 queries -- Query 1: SELECT * FROM posts LIMIT 10 -- Query 2-11: SELECT * FROM users WHERE id = ? (for each post)
-- GOOD: JOIN SELECT p.*, u.name as author_name FROM posts p JOIN users u ON p.user_id = u.id LIMIT 10;
-- GOOD: Subquery with IN SELECT * FROM users WHERE id IN (SELECT DISTINCT user_id FROM posts WHERE ...);
Migrations
Migration Best Practices
-- Always wrap in transactions BEGIN;
-- Add column (non-locking in PostgreSQL) ALTER TABLE users ADD COLUMN phone VARCHAR(20);
-- Add index concurrently (PostgreSQL, non-locking) CREATE INDEX CONCURRENTLY idx_users_phone ON users(phone);
-- Rename column safely ALTER TABLE users RENAME COLUMN phone TO phone_number;
COMMIT;
-- Rollback script BEGIN; ALTER TABLE users DROP COLUMN phone_number; DROP INDEX idx_users_phone; COMMIT;
Safe Migration Patterns
-- Adding NOT NULL column with default -- Step 1: Add nullable column ALTER TABLE users ADD COLUMN status VARCHAR(20);
-- Step 2: Backfill data UPDATE users SET status = 'active' WHERE status IS NULL;
-- Step 3: Add NOT NULL constraint ALTER TABLE users ALTER COLUMN status SET NOT NULL; ALTER TABLE users ALTER COLUMN status SET DEFAULT 'active';
-- Renaming tables (zero downtime) -- Step 1: Create new table CREATE TABLE accounts (LIKE users INCLUDING ALL);
-- Step 2: Copy data INSERT INTO accounts SELECT * FROM users;
-- Step 3: Create triggers for sync -- Step 4: Switch application -- Step 5: Drop old table
Performance
Connection Pooling
// Node.js with pg-pool const { Pool } = require('pg');
const pool = new Pool({ host: 'localhost', database: 'myapp', max: 20, // Max connections idleTimeoutMillis: 30000, // Close idle connections connectionTimeoutMillis: 2000 });
// Always use pool, not direct connections const result = await pool.query('SELECT * FROM users WHERE id = $1', [userId]);
Pagination
-- BAD: OFFSET for large datasets SELECT * FROM posts ORDER BY created_at DESC LIMIT 20 OFFSET 10000; -- Gets slower as offset increases
-- GOOD: Cursor-based pagination SELECT * FROM posts WHERE created_at < '2024-01-15 10:30:00' ORDER BY created_at DESC LIMIT 20;
-- GOOD: Keyset pagination with ID SELECT * FROM posts WHERE (created_at, id) < ('2024-01-15 10:30:00', 12345) ORDER BY created_at DESC, id DESC LIMIT 20;
Batch Operations
-- BAD: Many individual inserts INSERT INTO logs (message) VALUES ('log1'); INSERT INTO logs (message) VALUES ('log2'); -- ... 1000 more
-- GOOD: Batch insert INSERT INTO logs (message) VALUES ('log1'), ('log2'), ('log3'); -- Up to ~1000 at a time
-- GOOD: COPY for bulk loading (PostgreSQL) COPY logs (message) FROM '/path/to/file.csv' WITH CSV;
Transactions
ACID Properties
-- Atomicity: All or nothing BEGIN; UPDATE accounts SET balance = balance - 100 WHERE id = 1; UPDATE accounts SET balance = balance + 100 WHERE id = 2; -- If any fails, ROLLBACK COMMIT;
-- Isolation levels SET TRANSACTION ISOLATION LEVEL READ COMMITTED; -- Default SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; -- Strictest
-- Deadlock prevention: Always lock in same order BEGIN; SELECT * FROM accounts WHERE id = 1 FOR UPDATE; -- Lock row SELECT * FROM accounts WHERE id = 2 FOR UPDATE; -- Do work COMMIT;
NoSQL Patterns
Document Database (MongoDB)
// Schema design: Embed vs Reference // Embed: Data accessed together, 1:few relationships { _id: ObjectId("..."), title: "Blog Post", author: { // Embedded name: "John", email: "john@example.com" }, comments: [ // Embedded array { text: "Great!", user: "Jane" } ] }
// Reference: Large documents, many relationships { _id: ObjectId("..."), title: "Blog Post", author_id: ObjectId("...") // Reference to users collection }
// Indexes db.posts.createIndex({ "author_id": 1 }); db.posts.createIndex({ "title": "text", "content": "text" }); // Text search
Key-Value (Redis)
Caching pattern
SET user:123 '{"name":"John"}' EX 3600 # Expires in 1 hour GET user:123
Counter
INCR page:views:homepage GET page:views:homepage
Rate limiting
INCR rate:ip:192.168.1.1 EXPIRE rate:ip:192.168.1.1 60 # Reset every minute
Backup & Recovery
MySQL
mysqldump -u root -p database > backup.sql mysql -u root -p database < backup.sql
PostgreSQL
pg_dump -Fc database > backup.dump pg_restore -d database backup.dump
Point-in-time recovery (PostgreSQL)
Requires WAL archiving configured
pg_basebackup -D /backup/base -Fp -Xs -P
Monitoring Queries
-- MySQL slow query log SET GLOBAL slow_query_log = 'ON'; SET GLOBAL long_query_time = 1; -- Log queries > 1 second
-- PostgreSQL: Currently running queries SELECT pid, now() - pg_stat_activity.query_start AS duration, query FROM pg_stat_activity WHERE state = 'active' ORDER BY duration DESC;
-- Table sizes SELECT table_name, pg_size_pretty(pg_total_relation_size(quote_ident(table_name))) FROM information_schema.tables WHERE table_schema = 'public' ORDER BY pg_total_relation_size(quote_ident(table_name)) DESC;