PostgreSQL
Enterprise-grade PostgreSQL database design and optimization following industry best practices. This skill covers schema design, query optimization, indexing strategies, and production-ready patterns.
Purpose
Build performant, scalable database systems:
-
Design efficient schemas
-
Write optimized queries
-
Implement proper indexing
-
Handle transactions correctly
-
Optimize for performance
-
Ensure data integrity
Features
- Schema Design
-- Users table with proper constraints CREATE TABLE users ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), email VARCHAR(255) UNIQUE NOT NULL, password_hash VARCHAR(255) NOT NULL, name VARCHAR(100) NOT NULL, role VARCHAR(20) DEFAULT 'user' CHECK (role IN ('user', 'admin', 'moderator')), is_active BOOLEAN DEFAULT TRUE, email_verified_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() );
-- Products table with foreign key CREATE TABLE products ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name VARCHAR(255) NOT NULL, description TEXT, price DECIMAL(10, 2) NOT NULL CHECK (price >= 0), stock_quantity INTEGER DEFAULT 0 CHECK (stock_quantity >= 0), category_id UUID REFERENCES categories(id) ON DELETE SET NULL, seller_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, is_published BOOLEAN DEFAULT FALSE, published_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() );
-- Orders table with status tracking CREATE TABLE orders ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), user_id UUID NOT NULL REFERENCES users(id), status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ( 'pending', 'confirmed', 'processing', 'shipped', 'delivered', 'cancelled' )), total_amount DECIMAL(12, 2) NOT NULL, shipping_address JSONB NOT NULL, notes TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() );
-- Order items junction table CREATE TABLE order_items ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), order_id UUID NOT NULL REFERENCES orders(id) ON DELETE CASCADE, product_id UUID NOT NULL REFERENCES products(id), quantity INTEGER NOT NULL CHECK (quantity > 0), unit_price DECIMAL(10, 2) NOT NULL, UNIQUE(order_id, product_id) );
-- Updated at trigger function CREATE OR REPLACE FUNCTION update_updated_at() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$ LANGUAGE plpgsql;
-- Apply trigger to tables CREATE TRIGGER users_updated_at BEFORE UPDATE ON users FOR EACH ROW EXECUTE FUNCTION update_updated_at();
CREATE TRIGGER products_updated_at BEFORE UPDATE ON products FOR EACH ROW EXECUTE FUNCTION update_updated_at();
CREATE TRIGGER orders_updated_at BEFORE UPDATE ON orders FOR EACH ROW EXECUTE FUNCTION update_updated_at();
- Indexing Strategies
-- Primary indexes (created automatically) -- B-tree indexes for equality and range queries CREATE INDEX idx_products_category ON products(category_id); CREATE INDEX idx_products_seller ON products(seller_id); CREATE INDEX idx_orders_user ON orders(user_id); CREATE INDEX idx_orders_status ON orders(status);
-- Composite indexes for common query patterns CREATE INDEX idx_products_category_price ON products(category_id, price); CREATE INDEX idx_orders_user_status ON orders(user_id, status); CREATE INDEX idx_orders_created_status ON orders(created_at DESC, status);
-- Partial indexes for filtered queries CREATE INDEX idx_products_published ON products(category_id, created_at) WHERE is_published = TRUE;
CREATE INDEX idx_orders_pending ON orders(user_id, created_at) WHERE status = 'pending';
-- Full-text search indexes CREATE INDEX idx_products_search ON products USING GIN(to_tsvector('english', name || ' ' || COALESCE(description, '')));
-- JSONB indexes CREATE INDEX idx_orders_shipping_city ON orders USING GIN((shipping_address->'city'));
-- Expression indexes CREATE INDEX idx_users_email_lower ON users(LOWER(email));
-- Covering indexes (index-only scans) CREATE INDEX idx_products_list ON products(category_id, is_published) INCLUDE (name, price);
- Query Patterns
-- Pagination with cursor (more efficient than OFFSET) SELECT id, name, price, created_at FROM products WHERE created_at < '2024-01-01' -- cursor value AND is_published = TRUE ORDER BY created_at DESC LIMIT 20;
-- Efficient offset pagination when needed SELECT id, name, price FROM products WHERE is_published = TRUE ORDER BY created_at DESC LIMIT 20 OFFSET 40;
-- Search with full-text search SELECT id, name, description, ts_rank(to_tsvector('english', name || ' ' || COALESCE(description, '')), plainto_tsquery('english', 'laptop gaming')) as rank FROM products WHERE to_tsvector('english', name || ' ' || COALESCE(description, '')) @@ plainto_tsquery('english', 'laptop gaming') ORDER BY rank DESC LIMIT 20;
-- Aggregation with filtering SELECT DATE_TRUNC('day', created_at) as date, COUNT(*) as order_count, SUM(total_amount) as revenue, AVG(total_amount) as avg_order_value FROM orders WHERE status = 'delivered' AND created_at >= NOW() - INTERVAL '30 days' GROUP BY DATE_TRUNC('day', created_at) ORDER BY date DESC;
-- Window functions SELECT id, name, price, category_id, RANK() OVER (PARTITION BY category_id ORDER BY price DESC) as price_rank, LAG(price) OVER (PARTITION BY category_id ORDER BY price) as prev_price, AVG(price) OVER (PARTITION BY category_id) as category_avg FROM products WHERE is_published = TRUE;
-- Common Table Expressions (CTE) WITH monthly_sales AS ( SELECT DATE_TRUNC('month', o.created_at) as month, SUM(oi.quantity * oi.unit_price) as revenue FROM orders o JOIN order_items oi ON o.id = oi.order_id WHERE o.status = 'delivered' GROUP BY DATE_TRUNC('month', o.created_at) ), sales_with_growth AS ( SELECT month, revenue, LAG(revenue) OVER (ORDER BY month) as prev_revenue, revenue - LAG(revenue) OVER (ORDER BY month) as growth FROM monthly_sales ) SELECT * FROM sales_with_growth ORDER BY month DESC;
-- Recursive CTE for hierarchical data WITH RECURSIVE category_tree AS ( -- Base case SELECT id, name, parent_id, 0 as depth, ARRAY[id] as path FROM categories WHERE parent_id IS NULL
UNION ALL
-- Recursive case SELECT c.id, c.name, c.parent_id, ct.depth + 1, ct.path || c.id FROM categories c JOIN category_tree ct ON c.parent_id = ct.id ) SELECT * FROM category_tree ORDER BY path;
- Transactions and Locking
-- Basic transaction BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 'sender_id'; UPDATE accounts SET balance = balance + 100 WHERE id = 'receiver_id';
INSERT INTO transactions (from_id, to_id, amount) VALUES ('sender_id', 'receiver_id', 100);
COMMIT;
-- Transaction with savepoint BEGIN;
UPDATE products SET stock_quantity = stock_quantity - 1 WHERE id = 'product_id'; SAVEPOINT after_stock_update;
INSERT INTO orders (user_id, total_amount) VALUES ('user_id', 99.99) RETURNING id INTO order_id;
-- If something goes wrong ROLLBACK TO SAVEPOINT after_stock_update;
COMMIT;
-- Pessimistic locking (FOR UPDATE) BEGIN;
SELECT * FROM products WHERE id = 'product_id' FOR UPDATE; -- Row is now locked until transaction ends
UPDATE products SET stock_quantity = stock_quantity - 1 WHERE id = 'product_id';
COMMIT;
-- Skip locked rows (for job queues) SELECT * FROM jobs WHERE status = 'pending' ORDER BY created_at LIMIT 1 FOR UPDATE SKIP LOCKED;
-- Advisory locks SELECT pg_advisory_lock(hashtext('unique_process_name')); -- Do exclusive work SELECT pg_advisory_unlock(hashtext('unique_process_name'));
- Performance Optimization
-- Analyze query performance EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT p.*, c.name as category_name FROM products p LEFT JOIN categories c ON p.category_id = c.id WHERE p.is_published = TRUE AND p.price BETWEEN 10 AND 100 ORDER BY p.created_at DESC LIMIT 20;
-- Table statistics ANALYZE products;
-- Vacuum to reclaim space VACUUM (VERBOSE, ANALYZE) products;
-- Check index usage SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch FROM pg_stat_user_indexes WHERE tablename = 'products' ORDER BY idx_scan DESC;
-- Find unused indexes SELECT schemaname, tablename, indexname, idx_scan FROM pg_stat_user_indexes WHERE idx_scan = 0 AND schemaname = 'public';
-- Table bloat check SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) as total_size, n_dead_tup as dead_tuples, n_live_tup as live_tuples FROM pg_stat_user_tables WHERE n_dead_tup > 1000 ORDER BY n_dead_tup DESC;
-- Connection monitoring SELECT datname, usename, application_name, client_addr, state, query_start, NOW() - query_start as query_duration FROM pg_stat_activity WHERE state = 'active' ORDER BY query_start;
- JSONB Operations
-- JSONB column CREATE TABLE events ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), type VARCHAR(50) NOT NULL, payload JSONB NOT NULL, created_at TIMESTAMPTZ DEFAULT NOW() );
-- Insert JSONB data INSERT INTO events (type, payload) VALUES ('user.created', '{"user_id": "123", "email": "test@example.com", "metadata": {"source": "web"}}');
-- Query JSONB SELECT * FROM events WHERE payload->>'user_id' = '123';
SELECT * FROM events WHERE payload @> '{"metadata": {"source": "web"}}';
-- JSONB operators SELECT payload->'user_id' as user_id, -- Get as JSONB payload->>'email' as email, -- Get as text payload#>'{metadata,source}' as source, payload ? 'email' as has_email, payload ?& ARRAY['user_id', 'email'] as has_all FROM events;
-- Update JSONB UPDATE events SET payload = payload || '{"processed": true}' WHERE id = 'event_id';
UPDATE events SET payload = jsonb_set(payload, '{metadata,updated_at}', '"2024-01-01"') WHERE id = 'event_id';
-- Remove key from JSONB UPDATE events SET payload = payload - 'temporary_field' WHERE id = 'event_id';
- Stored Procedures
-- Function to get user stats CREATE OR REPLACE FUNCTION get_user_stats(p_user_id UUID) RETURNS TABLE ( total_orders BIGINT, total_spent DECIMAL, avg_order_value DECIMAL, last_order_date TIMESTAMPTZ ) AS $$ BEGIN RETURN QUERY SELECT COUNT(*)::BIGINT, COALESCE(SUM(total_amount), 0), COALESCE(AVG(total_amount), 0), MAX(created_at) FROM orders WHERE user_id = p_user_id AND status = 'delivered'; END; $$ LANGUAGE plpgsql;
-- Usage SELECT * FROM get_user_stats('user-uuid-here');
-- Procedure for order creation CREATE OR REPLACE PROCEDURE create_order( p_user_id UUID, p_items JSONB, OUT p_order_id UUID ) LANGUAGE plpgsql AS $$ DECLARE v_total DECIMAL := 0; v_item JSONB; BEGIN -- Calculate total FOR v_item IN SELECT * FROM jsonb_array_elements(p_items) LOOP v_total := v_total + (v_item->>'price')::DECIMAL * (v_item->>'quantity')::INTEGER; END LOOP;
-- Create order INSERT INTO orders (user_id, total_amount, status) VALUES (p_user_id, v_total, 'pending') RETURNING id INTO p_order_id;
-- Create order items INSERT INTO order_items (order_id, product_id, quantity, unit_price) SELECT p_order_id, (value->>'product_id')::UUID, (value->>'quantity')::INTEGER, (value->>'price')::DECIMAL FROM jsonb_array_elements(p_items);
-- Update stock UPDATE products p SET stock_quantity = stock_quantity - (i.value->>'quantity')::INTEGER FROM jsonb_array_elements(p_items) i WHERE p.id = (i.value->>'product_id')::UUID; END; $$;
Use Cases
E-commerce Analytics
SELECT p.category_id, c.name as category_name, COUNT(DISTINCT o.id) as order_count, SUM(oi.quantity) as units_sold, SUM(oi.quantity * oi.unit_price) as revenue FROM products p JOIN categories c ON p.category_id = c.id JOIN order_items oi ON p.id = oi.product_id JOIN orders o ON oi.order_id = o.id WHERE o.status = 'delivered' AND o.created_at >= NOW() - INTERVAL '30 days' GROUP BY p.category_id, c.name ORDER BY revenue DESC;
User Activity Report
WITH user_activity AS ( SELECT u.id, u.email, COUNT(o.id) as order_count, COALESCE(SUM(o.total_amount), 0) as total_spent, MAX(o.created_at) as last_order_date FROM users u LEFT JOIN orders o ON u.id = o.user_id AND o.status = 'delivered' GROUP BY u.id, u.email ) SELECT *, CASE WHEN total_spent >= 1000 THEN 'VIP' WHEN total_spent >= 500 THEN 'Regular' ELSE 'New' END as customer_tier FROM user_activity ORDER BY total_spent DESC;
Best Practices
Do's
-
Use UUIDs for primary keys
-
Add indexes for common queries
-
Use EXPLAIN ANALYZE
-
Use transactions for data integrity
-
Use connection pooling
-
Regular VACUUM and ANALYZE
Don'ts
-
Don't use SELECT *
-
Don't ignore query plans
-
Don't forget foreign keys
-
Don't skip migrations
-
Don't use raw SQL without parameterization
References
-
PostgreSQL Documentation
-
Use The Index, Luke
-
PostgreSQL Wiki