data-warehouse

Data Warehouse Design

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "data-warehouse" with this command: npx skills add pluginagentmarketplace/custom-plugin-sql/pluginagentmarketplace-custom-plugin-sql-data-warehouse

Data Warehouse Design

Star Schema Basics

Fact Table Design

-- Star schema with sales fact table CREATE TABLE fact_sales ( sales_id BIGINT PRIMARY KEY, date_id INT NOT NULL, customer_id INT NOT NULL, product_id INT NOT NULL, store_id INT NOT NULL, quantity INT NOT NULL, unit_price DECIMAL(10, 2), sale_amount DECIMAL(12, 2), discount_amount DECIMAL(12, 2), net_sales DECIMAL(12, 2), tax_amount DECIMAL(12, 2), total_sale DECIMAL(12, 2),

-- Foreign keys FOREIGN KEY (date_id) REFERENCES dim_date(date_id), FOREIGN KEY (customer_id) REFERENCES dim_customer(customer_id), FOREIGN KEY (product_id) REFERENCES dim_product(product_id), FOREIGN KEY (store_id) REFERENCES dim_store(store_id) );

-- Create indexes on foreign keys for query performance CREATE INDEX idx_fact_sales_date ON fact_sales(date_id); CREATE INDEX idx_fact_sales_customer ON fact_sales(customer_id); CREATE INDEX idx_fact_sales_product ON fact_sales(product_id); CREATE INDEX idx_fact_sales_store ON fact_sales(store_id);

Dimension Table Design

-- Date dimension (conformed dimension - used across multiple facts) CREATE TABLE dim_date ( date_id INT PRIMARY KEY, full_date DATE UNIQUE, day_of_week INT, day_of_week_name VARCHAR(10), day_of_month INT, week_of_year INT, month_number INT, month_name VARCHAR(12), quarter INT, year INT, fiscal_quarter INT, fiscal_year INT, is_holiday BOOLEAN, is_weekend BOOLEAN, is_weekday BOOLEAN );

-- Customer dimension CREATE TABLE dim_customer ( customer_id INT PRIMARY KEY, customer_code VARCHAR(20), first_name VARCHAR(50), last_name VARCHAR(50), email VARCHAR(100), phone VARCHAR(20), gender VARCHAR(10), birth_date DATE, -- Address hierarchy street_address VARCHAR(100), city VARCHAR(50), state_province VARCHAR(50), postal_code VARCHAR(10), country VARCHAR(50), region VARCHAR(50), -- Customer segment customer_segment VARCHAR(50), customer_lifetime_value DECIMAL(12, 2), -- Slowly changing dimension columns effective_date DATE, end_date DATE, is_current BOOLEAN, -- Audit columns created_date DATE, updated_date DATE );

-- Product dimension CREATE TABLE dim_product ( product_id INT PRIMARY KEY, product_code VARCHAR(50), product_name VARCHAR(200), product_category VARCHAR(50), product_subcategory VARCHAR(50), product_line VARCHAR(50), supplier_id INT, brand VARCHAR(50), model VARCHAR(100), color VARCHAR(30), size VARCHAR(10), unit_cost DECIMAL(10, 2), list_price DECIMAL(10, 2), cost_to_list_ratio DECIMAL(5, 4), product_status VARCHAR(20), effective_date DATE, end_date DATE, is_current BOOLEAN );

Slowly Changing Dimensions (SCD)

Type 1: Overwrite

-- Simply update the existing record UPDATE dim_customer SET email = 'new_email@example.com', phone = '555-9999', updated_date = CURRENT_DATE WHERE customer_id = 1;

Type 2: Add New Row

-- Close old row UPDATE dim_customer SET is_current = FALSE, end_date = CURRENT_DATE - INTERVAL '1 day' WHERE customer_id = 1 AND is_current = TRUE;

-- Insert new row INSERT INTO dim_customer VALUES ( customer_id, customer_code, new_values..., CURRENT_DATE, -- effective_date NULL, -- end_date TRUE, -- is_current CURRENT_DATE -- created_date );

Type 3: Add New Column

-- Add previous value columns ALTER TABLE dim_customer ADD COLUMN previous_city VARCHAR(50); ALTER TABLE dim_customer ADD COLUMN previous_city_start_date DATE;

-- Update previous columns when changing current UPDATE dim_customer SET previous_city = city, previous_city_start_date = CURRENT_DATE, city = 'New York' WHERE customer_id = 1;

Conformed Dimensions

-- Single dim_date used across all fact tables SELECT f.sales_id, f.quantity * f.unit_price as revenue, d.month_name, d.year FROM fact_sales f JOIN dim_date d ON f.date_id = d.date_id;

-- Reuse dim_customer in multiple facts SELECT fs.sales_id, fc.call_id, c.customer_segment FROM fact_sales fs JOIN dim_customer c ON fs.customer_id = c.customer_id LEFT JOIN fact_customer_calls fc ON c.customer_id = fc.customer_id;

Aggregate Tables (Materialized Views)

-- Pre-calculate common aggregations for performance CREATE MATERIALIZED VIEW sales_summary_daily AS SELECT d.full_date, d.month_name, d.year, p.product_category, c.customer_segment, COUNT(DISTINCT fs.sales_id) as transaction_count, SUM(fs.quantity) as total_quantity, ROUND(SUM(fs.net_sales), 2) as total_sales, ROUND(AVG(fs.net_sales), 2) as avg_sale, COUNT(DISTINCT fs.customer_id) as unique_customers FROM fact_sales fs JOIN dim_date d ON fs.date_id = d.date_id JOIN dim_product p ON fs.product_id = p.product_id JOIN dim_customer c ON fs.customer_id = c.customer_id GROUP BY d.full_date, d.month_name, d.year, p.product_category, c.customer_segment;

-- Refresh materialized view REFRESH MATERIALIZED VIEW sales_summary_daily;

-- Query aggregate table instead of fact table SELECT month_name, product_category, SUM(total_sales) as monthly_sales FROM sales_summary_daily WHERE year = 2024 GROUP BY month_name, product_category;

Bridge Tables (Many-to-Many)

-- For many-to-many relationships (e.g., product to categories) CREATE TABLE bridge_product_category ( product_id INT, category_id INT, PRIMARY KEY (product_id, category_id), FOREIGN KEY (product_id) REFERENCES dim_product(product_id), FOREIGN KEY (category_id) REFERENCES dim_category(category_id) );

-- Query with bridge table SELECT p.product_name, STRING_AGG(DISTINCT c.category_name, ', ') as categories, COUNT(DISTINCT bc.category_id) as category_count FROM dim_product p LEFT JOIN bridge_product_category bc ON p.product_id = bc.product_id LEFT JOIN dim_category c ON bc.category_id = c.category_id GROUP BY p.product_id, p.product_name;

Data Quality Metrics

-- Monitor fact table metrics SELECT COUNT(*) as total_records, COUNT(DISTINCT customer_id) as unique_customers, COUNT(DISTINCT product_id) as unique_products, MIN(sale_amount) as min_sale, MAX(sale_amount) as max_sale, ROUND(AVG(sale_amount), 2) as avg_sale, COUNT(CASE WHEN sale_amount < 0 THEN 1 END) as negative_sales, COUNT(CASE WHEN sale_amount IS NULL THEN 1 END) as null_sales, MAX(load_timestamp) as last_load_time FROM fact_sales;

-- Dimension quality checks SELECT 'dim_customer' as dimension, COUNT(*) as total_records, COUNT(DISTINCT customer_id) as distinct_ids, COUNT(CASE WHEN first_name IS NULL THEN 1 END) as null_first_names, COUNT(CASE WHEN is_current = TRUE THEN 1 END) as current_records FROM dim_customer;

Next Steps

Learn ETL/ELT pipeline design and data transformation patterns in the etl-pipelines skill.

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

Automation

postgresql

No summary provided by upstream source.

Repository SourceNeeds Review