postgresql

PostgreSQL

Enterprise-grade PostgreSQL database design and optimization following industry best practices. This skill covers schema design, query optimization, indexing strategies, and production-ready patterns.

Purpose

Build performant, scalable database systems:

Design efficient schemas
Write optimized queries
Implement proper indexing
Handle transactions correctly
Optimize for performance
Ensure data integrity

Features

Schema Design

-- Users table with proper constraints CREATE TABLE users ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), email VARCHAR(255) UNIQUE NOT NULL, password_hash VARCHAR(255) NOT NULL, name VARCHAR(100) NOT NULL, role VARCHAR(20) DEFAULT 'user' CHECK (role IN ('user', 'admin', 'moderator')), is_active BOOLEAN DEFAULT TRUE, email_verified_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() );

-- Products table with foreign key CREATE TABLE products ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name VARCHAR(255) NOT NULL, description TEXT, price DECIMAL(10, 2) NOT NULL CHECK (price >= 0), stock_quantity INTEGER DEFAULT 0 CHECK (stock_quantity >= 0), category_id UUID REFERENCES categories(id) ON DELETE SET NULL, seller_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, is_published BOOLEAN DEFAULT FALSE, published_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() );

-- Orders table with status tracking CREATE TABLE orders ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), user_id UUID NOT NULL REFERENCES users(id), status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ( 'pending', 'confirmed', 'processing', 'shipped', 'delivered', 'cancelled' )), total_amount DECIMAL(12, 2) NOT NULL, shipping_address JSONB NOT NULL, notes TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() );

-- Order items junction table CREATE TABLE order_items ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), order_id UUID NOT NULL REFERENCES orders(id) ON DELETE CASCADE, product_id UUID NOT NULL REFERENCES products(id), quantity INTEGER NOT NULL CHECK (quantity > 0), unit_price DECIMAL(10, 2) NOT NULL, UNIQUE(order_id, product_id) );

-- Updated at trigger function CREATE OR REPLACE FUNCTION update_updated_at() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$ LANGUAGE plpgsql;

-- Apply trigger to tables CREATE TRIGGER users_updated_at BEFORE UPDATE ON users FOR EACH ROW EXECUTE FUNCTION update_updated_at();

CREATE TRIGGER products_updated_at BEFORE UPDATE ON products FOR EACH ROW EXECUTE FUNCTION update_updated_at();

CREATE TRIGGER orders_updated_at BEFORE UPDATE ON orders FOR EACH ROW EXECUTE FUNCTION update_updated_at();

Indexing Strategies

-- Primary indexes (created automatically) -- B-tree indexes for equality and range queries CREATE INDEX idx_products_category ON products(category_id); CREATE INDEX idx_products_seller ON products(seller_id); CREATE INDEX idx_orders_user ON orders(user_id); CREATE INDEX idx_orders_status ON orders(status);

-- Composite indexes for common query patterns CREATE INDEX idx_products_category_price ON products(category_id, price); CREATE INDEX idx_orders_user_status ON orders(user_id, status); CREATE INDEX idx_orders_created_status ON orders(created_at DESC, status);

-- Partial indexes for filtered queries CREATE INDEX idx_products_published ON products(category_id, created_at) WHERE is_published = TRUE;

CREATE INDEX idx_orders_pending ON orders(user_id, created_at) WHERE status = 'pending';

-- Full-text search indexes CREATE INDEX idx_products_search ON products USING GIN(to_tsvector('english', name || ' ' || COALESCE(description, '')));

-- JSONB indexes CREATE INDEX idx_orders_shipping_city ON orders USING GIN((shipping_address->'city'));

-- Expression indexes CREATE INDEX idx_users_email_lower ON users(LOWER(email));

-- Covering indexes (index-only scans) CREATE INDEX idx_products_list ON products(category_id, is_published) INCLUDE (name, price);

Query Patterns

-- Pagination with cursor (more efficient than OFFSET) SELECT id, name, price, created_at FROM products WHERE created_at < '2024-01-01' -- cursor value AND is_published = TRUE ORDER BY created_at DESC LIMIT 20;

-- Efficient offset pagination when needed SELECT id, name, price FROM products WHERE is_published = TRUE ORDER BY created_at DESC LIMIT 20 OFFSET 40;

-- Search with full-text search SELECT id, name, description, ts_rank(to_tsvector('english', name || ' ' || COALESCE(description, '')), plainto_tsquery('english', 'laptop gaming')) as rank FROM products WHERE to_tsvector('english', name || ' ' || COALESCE(description, '')) @@ plainto_tsquery('english', 'laptop gaming') ORDER BY rank DESC LIMIT 20;

-- Aggregation with filtering SELECT DATE_TRUNC('day', created_at) as date, COUNT(*) as order_count, SUM(total_amount) as revenue, AVG(total_amount) as avg_order_value FROM orders WHERE status = 'delivered' AND created_at >= NOW() - INTERVAL '30 days' GROUP BY DATE_TRUNC('day', created_at) ORDER BY date DESC;

-- Window functions SELECT id, name, price, category_id, RANK() OVER (PARTITION BY category_id ORDER BY price DESC) as price_rank, LAG(price) OVER (PARTITION BY category_id ORDER BY price) as prev_price, AVG(price) OVER (PARTITION BY category_id) as category_avg FROM products WHERE is_published = TRUE;

-- Common Table Expressions (CTE) WITH monthly_sales AS ( SELECT DATE_TRUNC('month', o.created_at) as month, SUM(oi.quantity * oi.unit_price) as revenue FROM orders o JOIN order_items oi ON o.id = oi.order_id WHERE o.status = 'delivered' GROUP BY DATE_TRUNC('month', o.created_at) ), sales_with_growth AS ( SELECT month, revenue, LAG(revenue) OVER (ORDER BY month) as prev_revenue, revenue - LAG(revenue) OVER (ORDER BY month) as growth FROM monthly_sales ) SELECT * FROM sales_with_growth ORDER BY month DESC;

-- Recursive CTE for hierarchical data WITH RECURSIVE category_tree AS ( -- Base case SELECT id, name, parent_id, 0 as depth, ARRAY[id] as path FROM categories WHERE parent_id IS NULL

UNION ALL

-- Recursive case SELECT c.id, c.name, c.parent_id, ct.depth + 1, ct.path || c.id FROM categories c JOIN category_tree ct ON c.parent_id = ct.id ) SELECT * FROM category_tree ORDER BY path;

Transactions and Locking

-- Basic transaction BEGIN;

UPDATE accounts SET balance = balance - 100 WHERE id = 'sender_id'; UPDATE accounts SET balance = balance + 100 WHERE id = 'receiver_id';

INSERT INTO transactions (from_id, to_id, amount) VALUES ('sender_id', 'receiver_id', 100);

COMMIT;

-- Transaction with savepoint BEGIN;

UPDATE products SET stock_quantity = stock_quantity - 1 WHERE id = 'product_id'; SAVEPOINT after_stock_update;

INSERT INTO orders (user_id, total_amount) VALUES ('user_id', 99.99) RETURNING id INTO order_id;

-- If something goes wrong ROLLBACK TO SAVEPOINT after_stock_update;

COMMIT;

-- Pessimistic locking (FOR UPDATE) BEGIN;

SELECT * FROM products WHERE id = 'product_id' FOR UPDATE; -- Row is now locked until transaction ends

UPDATE products SET stock_quantity = stock_quantity - 1 WHERE id = 'product_id';

COMMIT;

-- Skip locked rows (for job queues) SELECT * FROM jobs WHERE status = 'pending' ORDER BY created_at LIMIT 1 FOR UPDATE SKIP LOCKED;

-- Advisory locks SELECT pg_advisory_lock(hashtext('unique_process_name')); -- Do exclusive work SELECT pg_advisory_unlock(hashtext('unique_process_name'));

Performance Optimization

-- Analyze query performance EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT) SELECT p.*, c.name as category_name FROM products p LEFT JOIN categories c ON p.category_id = c.id WHERE p.is_published = TRUE AND p.price BETWEEN 10 AND 100 ORDER BY p.created_at DESC LIMIT 20;

-- Table statistics ANALYZE products;

-- Vacuum to reclaim space VACUUM (VERBOSE, ANALYZE) products;

-- Check index usage SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch FROM pg_stat_user_indexes WHERE tablename = 'products' ORDER BY idx_scan DESC;

-- Find unused indexes SELECT schemaname, tablename, indexname, idx_scan FROM pg_stat_user_indexes WHERE idx_scan = 0 AND schemaname = 'public';

-- Table bloat check SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) as total_size, n_dead_tup as dead_tuples, n_live_tup as live_tuples FROM pg_stat_user_tables WHERE n_dead_tup > 1000 ORDER BY n_dead_tup DESC;

-- Connection monitoring SELECT datname, usename, application_name, client_addr, state, query_start, NOW() - query_start as query_duration FROM pg_stat_activity WHERE state = 'active' ORDER BY query_start;

JSONB Operations

-- JSONB column CREATE TABLE events ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), type VARCHAR(50) NOT NULL, payload JSONB NOT NULL, created_at TIMESTAMPTZ DEFAULT NOW() );

-- Insert JSONB data INSERT INTO events (type, payload) VALUES ('user.created', '{"user_id": "123", "email": "test@example.com", "metadata": {"source": "web"}}');

-- Query JSONB SELECT * FROM events WHERE payload->>'user_id' = '123';

SELECT * FROM events WHERE payload @> '{"metadata": {"source": "web"}}';

-- JSONB operators SELECT payload->'user_id' as user_id, -- Get as JSONB payload->>'email' as email, -- Get as text payload#>'{metadata,source}' as source, payload ? 'email' as has_email, payload ?& ARRAY['user_id', 'email'] as has_all FROM events;

-- Update JSONB UPDATE events SET payload = payload || '{"processed": true}' WHERE id = 'event_id';

UPDATE events SET payload = jsonb_set(payload, '{metadata,updated_at}', '"2024-01-01"') WHERE id = 'event_id';

-- Remove key from JSONB UPDATE events SET payload = payload - 'temporary_field' WHERE id = 'event_id';

Stored Procedures

-- Function to get user stats CREATE OR REPLACE FUNCTION get_user_stats(p_user_id UUID) RETURNS TABLE ( total_orders BIGINT, total_spent DECIMAL, avg_order_value DECIMAL, last_order_date TIMESTAMPTZ ) AS $$ BEGIN RETURN QUERY SELECT COUNT(*)::BIGINT, COALESCE(SUM(total_amount), 0), COALESCE(AVG(total_amount), 0), MAX(created_at) FROM orders WHERE user_id = p_user_id AND status = 'delivered'; END; $$ LANGUAGE plpgsql;

-- Usage SELECT * FROM get_user_stats('user-uuid-here');

-- Procedure for order creation CREATE OR REPLACE PROCEDURE create_order( p_user_id UUID, p_items JSONB, OUT p_order_id UUID ) LANGUAGE plpgsql AS $$ DECLARE v_total DECIMAL := 0; v_item JSONB; BEGIN -- Calculate total FOR v_item IN SELECT * FROM jsonb_array_elements(p_items) LOOP v_total := v_total + (v_item->>'price')::DECIMAL * (v_item->>'quantity')::INTEGER; END LOOP;

-- Create order INSERT INTO orders (user_id, total_amount, status) VALUES (p_user_id, v_total, 'pending') RETURNING id INTO p_order_id;

-- Create order items INSERT INTO order_items (order_id, product_id, quantity, unit_price) SELECT p_order_id, (value->>'product_id')::UUID, (value->>'quantity')::INTEGER, (value->>'price')::DECIMAL FROM jsonb_array_elements(p_items);

-- Update stock UPDATE products p SET stock_quantity = stock_quantity - (i.value->>'quantity')::INTEGER FROM jsonb_array_elements(p_items) i WHERE p.id = (i.value->>'product_id')::UUID; END; $$;

Use Cases

E-commerce Analytics

SELECT p.category_id, c.name as category_name, COUNT(DISTINCT o.id) as order_count, SUM(oi.quantity) as units_sold, SUM(oi.quantity * oi.unit_price) as revenue FROM products p JOIN categories c ON p.category_id = c.id JOIN order_items oi ON p.id = oi.product_id JOIN orders o ON oi.order_id = o.id WHERE o.status = 'delivered' AND o.created_at >= NOW() - INTERVAL '30 days' GROUP BY p.category_id, c.name ORDER BY revenue DESC;

User Activity Report

WITH user_activity AS ( SELECT u.id, u.email, COUNT(o.id) as order_count, COALESCE(SUM(o.total_amount), 0) as total_spent, MAX(o.created_at) as last_order_date FROM users u LEFT JOIN orders o ON u.id = o.user_id AND o.status = 'delivered' GROUP BY u.id, u.email ) SELECT *, CASE WHEN total_spent >= 1000 THEN 'VIP' WHEN total_spent >= 500 THEN 'Regular' ELSE 'New' END as customer_tier FROM user_activity ORDER BY total_spent DESC;

Best Practices

Do's

Use UUIDs for primary keys
Add indexes for common queries
Use EXPLAIN ANALYZE
Use transactions for data integrity
Use connection pooling
Regular VACUUM and ANALYZE

Don'ts

Don't use SELECT *
Don't ignore query plans
Don't forget foreign keys
Don't skip migrations
Don't use raw SQL without parameterization

References

PostgreSQL Documentation
Use The Index, Luke
PostgreSQL Wiki

Safety Notice

Copy this and send it to your AI assistant to learn

Source Transparency

Related Skills

docker

implementing-defense-in-depth

managing-databases