PostgreSQL 特定语法

# PostgreSQL 特定语法

# 概述

PostgreSQL 是一个功能强大的开源对象关系型数据库系统,以其高度的标准兼容性、可扩展性和丰富的数据类型而闻名。本章节详细介绍PostgreSQL的特定语法、高级功能和最佳实践。

# PostgreSQL 数据类型

# 1. 基本数据类型

-- 创建包含各种数据类型的表
CREATE TABLE data_types_demo (
    -- 数值类型
    small_int SMALLINT,                    -- 2字节整数
    int_val INTEGER,                       -- 4字节整数
    big_int BIGINT,                        -- 8字节整数
    decimal_val DECIMAL(10,2),             -- 精确小数
    numeric_val NUMERIC(12,4),             -- 精确小数
    real_val REAL,                         -- 4字节浮点
    double_val DOUBLE PRECISION,           -- 8字节浮点
    
    -- 序列类型
    small_serial SMALLSERIAL,              -- 自增2字节整数
    serial_val SERIAL,                     -- 自增4字节整数
    big_serial BIGSERIAL,                  -- 自增8字节整数
    
    -- 字符类型
    char_val CHAR(10),                     -- 定长字符
    varchar_val VARCHAR(255),              -- 变长字符
    text_val TEXT,                         -- 无限长文本
    
    -- 布尔类型
    bool_val BOOLEAN,                      -- 布尔值
    
    -- 日期时间类型
    date_val DATE,                         -- 日期
    time_val TIME,                         -- 时间
    timestamp_val TIMESTAMP,              -- 时间戳
    timestamptz_val TIMESTAMPTZ,          -- 带时区时间戳
    interval_val INTERVAL,                 -- 时间间隔
    
    -- 二进制类型
    bytea_val BYTEA                        -- 二进制数据
);

-- 插入示例数据
INSERT INTO data_types_demo (
    small_int, int_val, big_int, decimal_val, numeric_val, real_val, double_val,
    char_val, varchar_val, text_val, bool_val,
    date_val, time_val, timestamp_val, timestamptz_val, interval_val,
    bytea_val
) VALUES (
    32767, 2147483647, 9223372036854775807, 12345.67, 123456.7890, 123.45, 123456.789012,
    'CHAR_VAL', 'VARCHAR示例', '这是一个很长的文本字段,可以存储大量内容', true,
    '2023-12-25', '14:30:00', '2023-12-25 14:30:00', '2023-12-25 14:30:00+08', '1 year 2 months 3 days',
    '\x48656c6c6f20576f726c64'::bytea
);

# 2. 数组类型

-- 创建包含数组的表
CREATE TABLE array_demo (
    id SERIAL PRIMARY KEY,
    tags TEXT[],                           -- 文本数组
    scores INTEGER[],                      -- 整数数组
    matrix INTEGER[][],                    -- 二维数组
    schedule TIME[]                        -- 时间数组
);

-- 插入数组数据
INSERT INTO array_demo (tags, scores, matrix, schedule) VALUES
('{"技术", "数据库", "PostgreSQL"}', '{95, 87, 92}', '{{1,2,3},{4,5,6}}', '{"09:00", "14:00", "18:00"}'),
(ARRAY['编程', '开发', 'SQL'], ARRAY[88, 91, 85], ARRAY[[7,8,9],[10,11,12]], ARRAY['10:30'::TIME, '15:30'::TIME]);

-- 数组查询操作
SELECT 
    id,
    tags,
    tags[1] as first_tag,                  -- 访问第一个元素
    tags[2:3] as middle_tags,              -- 切片操作
    array_length(tags, 1) as tag_count,   -- 数组长度
    95 = ANY(scores) as has_95,            -- 检查是否包含某值
    scores @> ARRAY[95] as contains_95,    -- 包含操作符
    tags && ARRAY['技术'] as has_tech      -- 重叠操作符
FROM array_demo;

-- 数组聚合
SELECT 
    array_agg(id) as all_ids,              -- 聚合为数组
    array_agg(tags) as all_tags,
    string_agg(tags[1], ', ') as first_tags -- 字符串聚合
FROM array_demo;

-- 展开数组
SELECT 
    id,
    unnest(tags) as tag,                   -- 展开数组为行
    unnest(scores) as score
FROM array_demo;

# 3. JSON 和 JSONB 类型

-- 创建JSON表
CREATE TABLE json_demo (
    id SERIAL PRIMARY KEY,
    data JSON,                             -- JSON类型
    metadata JSONB                         -- JSONB类型(二进制JSON)
);

-- 插入JSON数据
INSERT INTO json_demo (data, metadata) VALUES
('{"name": "张三", "age": 30, "skills": ["Java", "Python", "SQL"]}',
 '{"department": "技术部", "level": "高级", "projects": [{"name": "项目A", "status": "进行中"}, {"name": "项目B", "status": "已完成"}]}'),
('{"name": "李四", "age": 28, "skills": ["JavaScript", "React", "Node.js"]}',
 '{"department": "产品部", "level": "中级", "projects": [{"name": "项目C", "status": "计划中"}]}');

-- JSON查询操作
SELECT 
    id,
    data->>'name' as name,                 -- 提取文本值
    data->'age' as age_json,               -- 提取JSON值
    (data->>'age')::INTEGER as age_int,    -- 转换为整数
    data->'skills'->0 as first_skill,      -- 数组第一个元素
    data->'skills'->>1 as second_skill,    -- 数组第二个元素(文本)
    
    -- JSONB操作
    metadata->>'department' as department,
    metadata->'projects'->0->>'name' as first_project,
    jsonb_array_length(metadata->'projects') as project_count
FROM json_demo;

-- JSON路径查询
SELECT 
    id,
    jsonb_path_query(metadata, '$.projects[*].name') as project_names,
    jsonb_path_query_array(metadata, '$.projects[*].status') as project_statuses
FROM json_demo;

-- JSON聚合
SELECT 
    jsonb_agg(data) as all_data,           -- 聚合为JSONB数组
    jsonb_object_agg(id, data->>'name') as id_name_map  -- 聚合为JSONB对象
FROM json_demo;

-- JSON更新
UPDATE json_demo 
SET metadata = metadata || '{"updated_at": "2023-12-25"}'
WHERE id = 1;

UPDATE json_demo 
SET metadata = jsonb_set(metadata, '{level}', '"专家"')
WHERE data->>'name' = '张三';

-- JSON索引
CREATE INDEX idx_json_name ON json_demo USING GIN ((data->>'name'));
CREATE INDEX idx_jsonb_department ON json_demo USING GIN ((metadata->>'department'));
CREATE INDEX idx_jsonb_full ON json_demo USING GIN (metadata);

# 4. 几何类型

-- 创建几何类型表
CREATE TABLE geometry_demo (
    id SERIAL PRIMARY KEY,
    location POINT,                        -- 点
    area_box BOX,                          -- 矩形
    boundary POLYGON,                      -- 多边形
    route PATH,                            -- 路径
    radius_area CIRCLE                     -- 圆形
);

-- 插入几何数据
INSERT INTO geometry_demo (location, area_box, boundary, route, radius_area) VALUES
(POINT(116.4074, 39.9042),               -- 北京坐标
 BOX(POINT(0,0), POINT(10,10)),          -- 矩形
 POLYGON('((0,0),(0,10),(10,10),(10,0))'), -- 正方形
 PATH('((0,0),(5,5),(10,0))'),            -- 路径
 CIRCLE(POINT(5,5), 3)),                  -- 圆心(5,5)半径3的圆

(POINT(121.4737, 31.2304),               -- 上海坐标
 BOX(POINT(5,5), POINT(15,15)),
 POLYGON('((5,5),(5,15),(15,15),(15,5))'),
 PATH('((5,5),(10,10),(15,5))'),
 CIRCLE(POINT(10,10), 4));

-- 几何查询
SELECT 
    id,
    location,
    location[0] as longitude,              -- X坐标
    location[1] as latitude,               -- Y坐标
    area(area_box) as box_area,            -- 矩形面积
    area(boundary) as polygon_area,        -- 多边形面积
    length(route) as route_length,         -- 路径长度
    area(radius_area) as circle_area       -- 圆形面积
FROM geometry_demo;

-- 几何运算
SELECT 
    id,
    location <-> POINT(116.4074, 39.9042) as distance_to_beijing,  -- 距离
    area_box && BOX(POINT(3,3), POINT(7,7)) as box_overlap,        -- 重叠检测
    boundary @> POINT(5,5) as contains_point,                      -- 包含检测
    radius_area ## route as closest_point                          -- 最近点
FROM geometry_demo;

# 5. 网络地址类型

-- 创建网络地址表
CREATE TABLE network_demo (
    id SERIAL PRIMARY KEY,
    ip_address INET,                       -- IP地址
    network_cidr CIDR,                     -- 网络地址
    mac_address MACADDR                    -- MAC地址
);

-- 插入网络数据
INSERT INTO network_demo (ip_address, network_cidr, mac_address) VALUES
('192.168.1.100', '192.168.1.0/24', '08:00:2b:01:02:03'),
('10.0.0.50', '10.0.0.0/8', '08:00:2b:01:02:04'),
('172.16.0.10', '172.16.0.0/16', '08:00:2b:01:02:05');

-- 网络地址查询
SELECT 
    id,
    ip_address,
    host(ip_address) as host_part,         -- 主机部分
    network(ip_address) as network_part,   -- 网络部分
    broadcast(network_cidr) as broadcast_addr, -- 广播地址
    netmask(network_cidr) as subnet_mask,  -- 子网掩码
    family(ip_address) as ip_family        -- IP版本
FROM network_demo;

-- 网络包含查询
SELECT 
    id,
    ip_address,
    ip_address << '192.168.0.0/16' as in_private_network,
    network_cidr >> '192.168.1.100' as contains_ip
FROM network_demo;

# PostgreSQL 高级功能

# 1. 窗口函数

-- 创建销售数据表
CREATE TABLE sales (
    id SERIAL PRIMARY KEY,
    salesperson VARCHAR(50),
    department VARCHAR(50),
    sale_amount DECIMAL(10,2),
    sale_date DATE
);

-- 插入测试数据
INSERT INTO sales (salesperson, department, sale_amount, sale_date) VALUES
('张三', '销售部', 10000, '2023-01-15'),
('李四', '销售部', 15000, '2023-01-20'),
('王五', '市场部', 12000, '2023-02-10'),
('张三', '销售部', 8000, '2023-02-15'),
('李四', '销售部', 20000, '2023-03-05'),
('王五', '市场部', 18000, '2023-03-10');

-- 窗口函数示例
SELECT 
    salesperson,
    department,
    sale_amount,
    sale_date,
    
    -- 排名函数
    ROW_NUMBER() OVER (ORDER BY sale_amount DESC) as row_num,
    RANK() OVER (ORDER BY sale_amount DESC) as rank_num,
    DENSE_RANK() OVER (ORDER BY sale_amount DESC) as dense_rank,
    PERCENT_RANK() OVER (ORDER BY sale_amount) as percent_rank,
    
    -- 分区排名
    ROW_NUMBER() OVER (PARTITION BY department ORDER BY sale_amount DESC) as dept_rank,
    
    -- 聚合窗口函数
    SUM(sale_amount) OVER (PARTITION BY department) as dept_total,
    AVG(sale_amount) OVER (PARTITION BY salesperson) as person_avg,
    COUNT(*) OVER (PARTITION BY department) as dept_count,
    
    -- 移动窗口
    SUM(sale_amount) OVER (
        ORDER BY sale_date 
        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
    ) as moving_sum,
    
    -- 累计计算
    SUM(sale_amount) OVER (
        ORDER BY sale_date 
        ROWS UNBOUNDED PRECEDING
    ) as cumulative_sum,
    
    -- 前后值比较
    LAG(sale_amount, 1) OVER (ORDER BY sale_date) as prev_amount,
    LEAD(sale_amount, 1) OVER (ORDER BY sale_date) as next_amount,
    
    -- 首尾值
    FIRST_VALUE(sale_amount) OVER (
        PARTITION BY department ORDER BY sale_date
    ) as first_sale,
    LAST_VALUE(sale_amount) OVER (
        PARTITION BY department ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) as last_sale,
    
    -- 分位数
    NTILE(4) OVER (ORDER BY sale_amount) as quartile
    
FROM sales
ORDER BY sale_date;

# 2. 公共表表达式 (CTE)

-- 简单CTE
WITH department_stats AS (
    SELECT 
        department,
        COUNT(*) as sale_count,
        SUM(sale_amount) as total_sales,
        AVG(sale_amount) as avg_sale
    FROM sales
    GROUP BY department
),
top_departments AS (
    SELECT department, total_sales
    FROM department_stats
    WHERE total_sales > 30000
)
SELECT 
    ds.department,
    ds.sale_count,
    ds.total_sales,
    ds.avg_sale,
    CASE WHEN td.department IS NOT NULL THEN '顶级部门' ELSE '普通部门' END as dept_level
FROM department_stats ds
LEFT JOIN top_departments td ON ds.department = td.department
ORDER BY ds.total_sales DESC;

-- 递归CTE - 组织结构
CREATE TABLE employees (
    id INTEGER PRIMARY KEY,
    name VARCHAR(50),
    manager_id INTEGER REFERENCES employees(id)
);

INSERT INTO employees VALUES
(1, '总经理', NULL),
(2, '销售总监', 1),
(3, '技术总监', 1),
(4, '销售经理', 2),
(5, '开发经理', 3),
(6, '销售代表', 4),
(7, '开发工程师', 5);

-- 递归查询组织层级
WITH RECURSIVE employee_hierarchy AS (
    -- 基础情况:顶级管理者
    SELECT 
        id, 
        name, 
        manager_id, 
        0 as level,
        ARRAY[name] as path,
        name as path_string
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- 递归情况:下级员工
    SELECT 
        e.id, 
        e.name, 
        e.manager_id, 
        eh.level + 1,
        eh.path || e.name,
        eh.path_string || ' -> ' || e.name
    FROM employees e
    INNER JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT 
    REPEAT('  ', level) || name as hierarchy,
    level,
    path_string,
    array_length(path, 1) as depth
FROM employee_hierarchy
ORDER BY path;

-- 递归CTE - 数字序列
WITH RECURSIVE number_series AS (
    SELECT 1 as n
    UNION ALL
    SELECT n + 1
    FROM number_series
    WHERE n < 10
)
SELECT n, n * n as square, n * n * n as cube
FROM number_series;

# 3. 存储过程和函数

-- 创建简单函数
CREATE OR REPLACE FUNCTION calculate_age(birth_date DATE)
RETURNS INTEGER AS $$
BEGIN
    RETURN EXTRACT(YEAR FROM AGE(birth_date));
END;
$$ LANGUAGE plpgsql;

-- 使用函数
SELECT calculate_age('1990-05-15') as age;

-- 创建返回表的函数
CREATE OR REPLACE FUNCTION get_sales_summary(dept_name VARCHAR)
RETURNS TABLE(
    salesperson VARCHAR,
    total_amount DECIMAL,
    sale_count BIGINT,
    avg_amount DECIMAL
) AS $$
BEGIN
    RETURN QUERY
    SELECT 
        s.salesperson,
        SUM(s.sale_amount)::DECIMAL as total_amount,
        COUNT(*) as sale_count,
        AVG(s.sale_amount)::DECIMAL as avg_amount
    FROM sales s
    WHERE s.department = dept_name
    GROUP BY s.salesperson
    ORDER BY total_amount DESC;
END;
$$ LANGUAGE plpgsql;

-- 调用表函数
SELECT * FROM get_sales_summary('销售部');

-- 创建存储过程
CREATE OR REPLACE PROCEDURE update_sales_bonus(
    IN dept_name VARCHAR,
    IN bonus_rate DECIMAL
)
LANGUAGE plpgsql
AS $$
DECLARE
    rec RECORD;
    total_updated INTEGER := 0;
BEGIN
    -- 添加bonus列(如果不存在)
    IF NOT EXISTS (
        SELECT 1 FROM information_schema.columns 
        WHERE table_name = 'sales' AND column_name = 'bonus'
    ) THEN
        ALTER TABLE sales ADD COLUMN bonus DECIMAL(10,2) DEFAULT 0;
    END IF;
    
    -- 更新奖金
    FOR rec IN 
        SELECT id, sale_amount 
        FROM sales 
        WHERE department = dept_name
    LOOP
        UPDATE sales 
        SET bonus = sale_amount * bonus_rate 
        WHERE id = rec.id;
        
        total_updated := total_updated + 1;
    END LOOP;
    
    RAISE NOTICE '更新了 % 条记录的奖金', total_updated;
END;
$$;

-- 调用存储过程
CALL update_sales_bonus('销售部', 0.1);

-- 创建触发器函数
CREATE OR REPLACE FUNCTION sales_audit_trigger()
RETURNS TRIGGER AS $$
BEGIN
    IF TG_OP = 'INSERT' THEN
        INSERT INTO sales_audit (operation, sale_id, new_data, changed_at)
        VALUES ('INSERT', NEW.id, row_to_json(NEW), NOW());
        RETURN NEW;
    ELSIF TG_OP = 'UPDATE' THEN
        INSERT INTO sales_audit (operation, sale_id, old_data, new_data, changed_at)
        VALUES ('UPDATE', NEW.id, row_to_json(OLD), row_to_json(NEW), NOW());
        RETURN NEW;
    ELSIF TG_OP = 'DELETE' THEN
        INSERT INTO sales_audit (operation, sale_id, old_data, changed_at)
        VALUES ('DELETE', OLD.id, row_to_json(OLD), NOW());
        RETURN OLD;
    END IF;
END;
$$ LANGUAGE plpgsql;

-- 创建审计表
CREATE TABLE sales_audit (
    audit_id SERIAL PRIMARY KEY,
    operation VARCHAR(10),
    sale_id INTEGER,
    old_data JSONB,
    new_data JSONB,
    changed_at TIMESTAMP DEFAULT NOW()
);

-- 创建触发器
CREATE TRIGGER sales_audit_trigger
    AFTER INSERT OR UPDATE OR DELETE ON sales
    FOR EACH ROW EXECUTE FUNCTION sales_audit_trigger();

# 4. 视图和物化视图

-- 创建普通视图
CREATE VIEW sales_summary AS
SELECT 
    department,
    salesperson,
    COUNT(*) as sale_count,
    SUM(sale_amount) as total_amount,
    AVG(sale_amount) as avg_amount,
    MAX(sale_amount) as max_amount,
    MIN(sale_amount) as min_amount
FROM sales
GROUP BY department, salesperson;

-- 查询视图
SELECT * FROM sales_summary WHERE department = '销售部';

-- 创建物化视图
CREATE MATERIALIZED VIEW monthly_sales_summary AS
SELECT 
    EXTRACT(YEAR FROM sale_date) as year,
    EXTRACT(MONTH FROM sale_date) as month,
    department,
    COUNT(*) as sale_count,
    SUM(sale_amount) as total_amount,
    AVG(sale_amount) as avg_amount
FROM sales
GROUP BY 
    EXTRACT(YEAR FROM sale_date),
    EXTRACT(MONTH FROM sale_date),
    department
ORDER BY year, month, department;

-- 创建物化视图索引
CREATE INDEX idx_monthly_sales_year_month 
ON monthly_sales_summary (year, month);

-- 刷新物化视图
REFRESH MATERIALIZED VIEW monthly_sales_summary;

-- 并发刷新(不阻塞查询)
REFRESH MATERIALIZED VIEW CONCURRENTLY monthly_sales_summary;

-- 查询物化视图
SELECT * FROM monthly_sales_summary 
WHERE year = 2023 AND month = 3;

# 5. 分区表

-- 创建按日期分区的表
CREATE TABLE orders (
    id BIGSERIAL,
    customer_id INTEGER,
    order_date DATE NOT NULL,
    amount DECIMAL(10,2),
    status VARCHAR(20)
) PARTITION BY RANGE (order_date);

-- 创建分区
CREATE TABLE orders_2023_q1 PARTITION OF orders
    FOR VALUES FROM ('2023-01-01') TO ('2023-04-01');

CREATE TABLE orders_2023_q2 PARTITION OF orders
    FOR VALUES FROM ('2023-04-01') TO ('2023-07-01');

CREATE TABLE orders_2023_q3 PARTITION OF orders
    FOR VALUES FROM ('2023-07-01') TO ('2023-10-01');

CREATE TABLE orders_2023_q4 PARTITION OF orders
    FOR VALUES FROM ('2023-10-01') TO ('2024-01-01');

-- 创建默认分区
CREATE TABLE orders_default PARTITION OF orders DEFAULT;

-- 在分区上创建索引
CREATE INDEX idx_orders_2023_q1_customer ON orders_2023_q1 (customer_id);
CREATE INDEX idx_orders_2023_q2_customer ON orders_2023_q2 (customer_id);

-- 插入数据(自动路由到正确分区)
INSERT INTO orders (customer_id, order_date, amount, status) VALUES
(1001, '2023-02-15', 299.99, 'completed'),
(1002, '2023-05-20', 599.99, 'pending'),
(1003, '2023-08-10', 199.99, 'completed'),
(1004, '2023-11-25', 899.99, 'shipped');

-- 查询会自动使用分区裁剪
EXPLAIN (BUFFERS, ANALYZE)
SELECT * FROM orders 
WHERE order_date BETWEEN '2023-05-01' AND '2023-05-31';

-- 按哈希分区
CREATE TABLE user_sessions (
    session_id UUID PRIMARY KEY,
    user_id INTEGER NOT NULL,
    created_at TIMESTAMP DEFAULT NOW(),
    data JSONB
) PARTITION BY HASH (user_id);

-- 创建哈希分区
CREATE TABLE user_sessions_0 PARTITION OF user_sessions
    FOR VALUES WITH (MODULUS 4, REMAINDER 0);

CREATE TABLE user_sessions_1 PARTITION OF user_sessions
    FOR VALUES WITH (MODULUS 4, REMAINDER 1);

CREATE TABLE user_sessions_2 PARTITION OF user_sessions
    FOR VALUES WITH (MODULUS 4, REMAINDER 2);

CREATE TABLE user_sessions_3 PARTITION OF user_sessions
    FOR VALUES WITH (MODULUS 4, REMAINDER 3);

-- 按列表分区
CREATE TABLE sales_by_region (
    id SERIAL,
    region VARCHAR(20) NOT NULL,
    sale_amount DECIMAL(10,2),
    sale_date DATE
) PARTITION BY LIST (region);

CREATE TABLE sales_north PARTITION OF sales_by_region
    FOR VALUES IN ('北京', '天津', '河北');

CREATE TABLE sales_south PARTITION OF sales_by_region
    FOR VALUES IN ('广东', '广西', '海南');

CREATE TABLE sales_east PARTITION OF sales_by_region
    FOR VALUES IN ('上海', '江苏', '浙江');

CREATE TABLE sales_west PARTITION OF sales_by_region
    FOR VALUES IN ('四川', '重庆', '云南');

# 索引和性能优化

# 1. 高级索引类型

-- B-tree索引(默认)
CREATE INDEX idx_sales_amount ON sales (sale_amount);

-- 复合索引
CREATE INDEX idx_sales_dept_date ON sales (department, sale_date);

-- 部分索引
CREATE INDEX idx_sales_high_amount ON sales (sale_amount) 
WHERE sale_amount > 10000;

-- 表达式索引
CREATE INDEX idx_sales_upper_salesperson ON sales (UPPER(salesperson));

-- GIN索引(用于数组、JSON、全文搜索)
CREATE INDEX idx_json_gin ON json_demo USING GIN (metadata);
CREATE INDEX idx_array_gin ON array_demo USING GIN (tags);

-- GiST索引(用于几何类型、范围类型)
CREATE INDEX idx_geometry_gist ON geometry_demo USING GiST (location);

-- Hash索引(仅等值查询)
CREATE INDEX idx_sales_hash ON sales USING HASH (id);

-- BRIN索引(用于大表的范围查询)
CREATE INDEX idx_sales_brin ON sales USING BRIN (sale_date);

-- 查看索引使用情况
SELECT 
    schemaname,
    tablename,
    indexname,
    idx_scan,
    idx_tup_read,
    idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;

-- 查看未使用的索引
SELECT 
    schemaname,
    tablename,
    indexname,
    idx_scan
FROM pg_stat_user_indexes
WHERE idx_scan = 0;

# 2. 查询优化

-- 使用EXPLAIN分析查询计划
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT 
    s.salesperson,
    s.department,
    SUM(s.sale_amount) as total
FROM sales s
WHERE s.sale_date >= '2023-01-01'
GROUP BY s.salesperson, s.department
HAVING SUM(s.sale_amount) > 20000
ORDER BY total DESC;

-- 查看查询统计信息
SELECT 
    query,
    calls,
    total_time,
    mean_time,
    rows
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 10;

-- 分析表统计信息
ANALYZE sales;

-- 查看表统计信息
SELECT 
    schemaname,
    tablename,
    n_tup_ins,
    n_tup_upd,
    n_tup_del,
    n_live_tup,
    n_dead_tup,
    last_vacuum,
    last_autovacuum,
    last_analyze,
    last_autoanalyze
FROM pg_stat_user_tables;

# 3. 配置优化

-- 查看重要配置参数
SHOW shared_buffers;
SHOW work_mem;
SHOW maintenance_work_mem;
SHOW effective_cache_size;
SHOW random_page_cost;
SHOW seq_page_cost;

-- 查看连接信息
SELECT 
    datname,
    usename,
    client_addr,
    state,
    query_start,
    query
FROM pg_stat_activity
WHERE state = 'active';

-- 查看锁信息
SELECT 
    l.locktype,
    l.database,
    l.relation,
    l.page,
    l.tuple,
    l.virtualxid,
    l.transactionid,
    l.mode,
    l.granted,
    a.query
FROM pg_locks l
JOIN pg_stat_activity a ON l.pid = a.pid
WHERE NOT l.granted;

# 备份与恢复

# 1. 逻辑备份

# 完整数据库备份
pg_dump -h localhost -U postgres -d mydb > mydb_backup.sql

# 压缩备份
pg_dump -h localhost -U postgres -d mydb | gzip > mydb_backup.sql.gz

# 自定义格式备份(推荐)
pg_dump -h localhost -U postgres -Fc -d mydb > mydb_backup.dump

# 只备份结构
pg_dump -h localhost -U postgres -s -d mydb > mydb_schema.sql

# 只备份数据
pg_dump -h localhost -U postgres -a -d mydb > mydb_data.sql

# 备份特定表
pg_dump -h localhost -U postgres -t sales -t employees -d mydb > tables_backup.sql

# 恢复数据库
psql -h localhost -U postgres -d mydb < mydb_backup.sql

# 恢复自定义格式
pg_restore -h localhost -U postgres -d mydb mydb_backup.dump

# 并行恢复
pg_restore -h localhost -U postgres -d mydb -j 4 mydb_backup.dump

# 2. 物理备份

# 基础备份
pg_basebackup -h localhost -U postgres -D /backup/base -Ft -z -P

# 连续归档备份
# 在postgresql.conf中配置:
# wal_level = replica
# archive_mode = on
# archive_command = 'cp %p /backup/wal/%f'

# 时间点恢复
# 1. 停止PostgreSQL
# 2. 清空数据目录
# 3. 恢复基础备份
# 4. 创建recovery.conf
# 5. 启动PostgreSQL

# 3. 流复制

-- 主服务器配置
-- postgresql.conf:
-- wal_level = replica
-- max_wal_senders = 3
-- wal_keep_segments = 64

-- pg_hba.conf:
-- host replication replicator 192.168.1.0/24 md5

-- 创建复制用户
CREATE USER replicator REPLICATION LOGIN PASSWORD 'password';

-- 从服务器设置
-- pg_basebackup -h master_host -D /var/lib/postgresql/data -U replicator -P -W

-- recovery.conf:
-- standby_mode = 'on'
-- primary_conninfo = 'host=master_host port=5432 user=replicator password=password'
-- trigger_file = '/tmp/postgresql.trigger'

-- 查看复制状态
SELECT 
    client_addr,
    state,
    sent_lsn,
    write_lsn,
    flush_lsn,
    replay_lsn,
    sync_state
FROM pg_stat_replication;

# 最佳实践

# 1. 数据库设计

  • 合理选择数据类型(使用SERIAL而不是手动管理序列)
  • 适当使用约束(CHECK、UNIQUE、FOREIGN KEY)
  • 合理设计索引(避免过多索引,注意复合索引列顺序)
  • 使用分区表处理大数据量

# 2. 查询优化

  • 使用EXPLAIN分析查询计划
  • 避免SELECT *,只查询需要的列
  • 合理使用窗口函数替代子查询
  • 使用CTE提高查询可读性

# 3. 性能监控

  • 启用pg_stat_statements扩展
  • 定期运行VACUUM和ANALYZE
  • 监控连接数和锁等待
  • 设置合理的配置参数

# 4. 安全配置

  • 使用强密码和SSL连接
  • 配置适当的pg_hba.conf
  • 定期更新PostgreSQL版本
  • 实施行级安全策略(RLS)

# 5. 备份策略

  • 定期进行逻辑和物理备份
  • 测试备份恢复流程
  • 配置连续归档和时间点恢复
  • 考虑使用流复制实现高可用

通过掌握这些PostgreSQL特定的功能和最佳实践,可以充分利用PostgreSQL的强大特性,构建高性能、可靠的数据库应用。