SQL性能调优

# SQL性能调优

系统性的SQL性能调优方法论,从问题识别到解决方案实施的完整指南

# 概述

SQL性能调优是一个系统性的过程,涉及查询优化、索引设计、数据库配置、硬件资源等多个方面。本文档提供了完整的性能调优方法论和实战技巧。

# 性能调优方法论

# 1. 性能调优流程

1. 问题识别 → 2. 性能分析 → 3. 瓶颈定位 → 4. 优化方案 → 5. 效果验证 → 6. 持续监控

# 2. 性能指标体系

# 响应时间指标

  • 查询响应时间:单个查询的执行时间
  • 事务响应时间:完整事务的执行时间
  • 平均响应时间:一段时间内的平均响应时间
  • 95%分位响应时间:95%的请求在此时间内完成

# 吞吐量指标

  • QPS:每秒查询数(Queries Per Second)
  • TPS:每秒事务数(Transactions Per Second)
  • 并发用户数:同时活跃的用户数量

# 资源利用率指标

  • CPU利用率:处理器使用率
  • 内存利用率:内存使用率
  • 磁盘I/O:磁盘读写操作
  • 网络I/O:网络传输量

# 问题识别与诊断

# 1. 慢查询识别

# MySQL慢查询日志

-- 启用慢查询日志
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 2;  -- 超过2秒的查询
SET GLOBAL log_queries_not_using_indexes = 'ON';

-- 查看慢查询配置
SHOW VARIABLES LIKE 'slow_query%';
SHOW VARIABLES LIKE 'long_query_time';

-- 分析慢查询日志
-- 使用mysqldumpslow工具
-- mysqldumpslow -s t -t 10 /var/log/mysql/slow.log

# Performance Schema监控

-- 启用Performance Schema
SET GLOBAL performance_schema = ON;

-- 查看最耗时的查询
SELECT 
    DIGEST_TEXT as query,
    COUNT_STAR as exec_count,
    AVG_TIMER_WAIT/1000000000 as avg_time_sec,
    SUM_TIMER_WAIT/1000000000 as total_time_sec,
    SUM_ROWS_EXAMINED as total_rows_examined,
    SUM_ROWS_SENT as total_rows_sent
FROM performance_schema.events_statements_summary_by_digest
WHERE AVG_TIMER_WAIT > 1000000000  -- 平均执行时间超过1秒
ORDER BY AVG_TIMER_WAIT DESC
LIMIT 10;

-- 查看当前正在执行的查询
SELECT 
    processlist_id,
    processlist_user,
    processlist_host,
    processlist_db,
    processlist_command,
    processlist_time,
    processlist_state,
    processlist_info
FROM performance_schema.processlist
WHERE processlist_command != 'Sleep'
ORDER BY processlist_time DESC;

# PostgreSQL查询监控

-- 启用pg_stat_statements扩展
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;

-- 查看最耗时的查询
SELECT 
    query,
    calls,
    total_time,
    mean_time,
    stddev_time,
    rows,
    100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 10;

-- 查看当前活动查询
SELECT 
    pid,
    now() - pg_stat_activity.query_start AS duration,
    query,
    state
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes'
ORDER BY duration DESC;

-- 查看锁等待
SELECT 
    blocked_locks.pid AS blocked_pid,
    blocked_activity.usename AS blocked_user,
    blocking_locks.pid AS blocking_pid,
    blocking_activity.usename AS blocking_user,
    blocked_activity.query AS blocked_statement,
    blocking_activity.query AS current_statement_in_blocking_process
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;

# 2. 系统资源监控

# 数据库级别监控

-- MySQL:查看数据库状态
SHOW GLOBAL STATUS LIKE 'Threads_connected';
SHOW GLOBAL STATUS LIKE 'Threads_running';
SHOW GLOBAL STATUS LIKE 'Questions';
SHOW GLOBAL STATUS LIKE 'Uptime';
SHOW GLOBAL STATUS LIKE 'Slow_queries';

-- 查看InnoDB状态
SHOW ENGINE INNODB STATUS;

-- 查看缓冲池命中率
SELECT 
    (1 - (Innodb_buffer_pool_reads / Innodb_buffer_pool_read_requests)) * 100 AS buffer_pool_hit_rate
FROM 
    (SELECT VARIABLE_VALUE AS Innodb_buffer_pool_reads FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Innodb_buffer_pool_reads') AS reads,
    (SELECT VARIABLE_VALUE AS Innodb_buffer_pool_read_requests FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Innodb_buffer_pool_read_requests') AS requests;

-- PostgreSQL:查看数据库统计
SELECT 
    datname,
    numbackends,
    xact_commit,
    xact_rollback,
    blks_read,
    blks_hit,
    tup_returned,
    tup_fetched,
    tup_inserted,
    tup_updated,
    tup_deleted
FROM pg_stat_database
WHERE datname = current_database();

-- 查看缓存命中率
SELECT 
    sum(heap_blks_read) as heap_read,
    sum(heap_blks_hit) as heap_hit,
    sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
FROM pg_statio_user_tables;

# 操作系统级别监控

# CPU使用率
top -p $(pgrep mysqld)
htop

# 内存使用情况
free -h
ps aux | grep mysql

# 磁盘I/O
iostat -x 1
iotop

# 网络I/O
netstat -i
iftop

# 磁盘空间
df -h
du -sh /var/lib/mysql/*

# 查询优化技术

# 1. SQL语句优化

# WHERE子句优化

-- 避免在WHERE子句中使用函数
-- 不好的写法
SELECT * FROM orders WHERE YEAR(order_date) = 2024;

-- 好的写法
SELECT * FROM orders WHERE order_date >= '2024-01-01' AND order_date < '2025-01-01';

-- 避免隐式类型转换
-- 不好的写法(如果user_id是字符串类型)
SELECT * FROM orders WHERE user_id = 123;

-- 好的写法
SELECT * FROM orders WHERE user_id = '123';

-- 使用EXISTS代替IN(某些情况下)
-- 原查询
SELECT * FROM users WHERE id IN (SELECT user_id FROM orders WHERE total_amount > 1000);

-- 优化后
SELECT * FROM users u WHERE EXISTS (
    SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.total_amount > 1000
);

# JOIN优化

-- 确保JOIN条件使用索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_users_id ON users(id);  -- 通常主键自动有索引

SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- 小表驱动大表
-- 如果users表很小,orders表很大,这样写更好
SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.city = 'Beijing';  -- 先过滤小表

-- 避免笛卡尔积
-- 确保所有表都有适当的连接条件
SELECT u.name, o.order_date, p.product_name
FROM users u, orders o, products p
WHERE u.id = o.user_id  -- 必须有连接条件
  AND o.product_id = p.id;  -- 必须有连接条件

# 子查询优化

-- 将相关子查询转换为JOIN
-- 原查询(相关子查询)
SELECT u.name, u.email,
    (SELECT COUNT(*) FROM orders o WHERE o.user_id = u.id) as order_count
FROM users u;

-- 优化后(LEFT JOIN)
SELECT u.name, u.email, COALESCE(o.order_count, 0) as order_count
FROM users u
LEFT JOIN (
    SELECT user_id, COUNT(*) as order_count
    FROM orders
    GROUP BY user_id
) o ON u.id = o.user_id;

-- 使用窗口函数替代子查询
-- 原查询
SELECT 
    product_name,
    price,
    (SELECT AVG(price) FROM products p2 WHERE p2.category = p1.category) as avg_category_price
FROM products p1;

-- 优化后
SELECT 
    product_name,
    price,
    AVG(price) OVER (PARTITION BY category) as avg_category_price
FROM products;

# 2. 索引优化策略

# 复合索引设计

-- 最左前缀原则
CREATE INDEX idx_orders_user_date_status ON orders(user_id, order_date, status);

-- 以下查询可以使用该索引
SELECT * FROM orders WHERE user_id = 123;
SELECT * FROM orders WHERE user_id = 123 AND order_date >= '2024-01-01';
SELECT * FROM orders WHERE user_id = 123 AND order_date >= '2024-01-01' AND status = 'completed';

-- 以下查询无法有效使用该索引
SELECT * FROM orders WHERE order_date >= '2024-01-01';  -- 跳过了user_id
SELECT * FROM orders WHERE status = 'completed';  -- 跳过了user_id和order_date

-- 索引列顺序优化
-- 选择性高的列放在前面
SELECT 
    COUNT(DISTINCT user_id) / COUNT(*) as user_id_selectivity,
    COUNT(DISTINCT status) / COUNT(*) as status_selectivity,
    COUNT(DISTINCT order_date) / COUNT(*) as date_selectivity
FROM orders;

-- 根据选择性调整索引顺序
-- 如果user_id选择性最高,则:
CREATE INDEX idx_orders_optimized ON orders(user_id, order_date, status);

# 覆盖索引设计

-- 创建覆盖索引,避免回表查询
CREATE INDEX idx_orders_covering ON orders(user_id, order_date, total_amount, status);

-- 以下查询可以完全通过索引满足
SELECT order_date, total_amount, status
FROM orders
WHERE user_id = 123
ORDER BY order_date;

-- 验证是否使用覆盖索引
EXPLAIN SELECT order_date, total_amount, status
FROM orders
WHERE user_id = 123
ORDER BY order_date;
-- 应该显示"Using index"

# 部分索引(PostgreSQL)

-- 只为活跃订单创建索引
CREATE INDEX idx_orders_active ON orders(user_id, order_date)
WHERE status IN ('pending', 'processing');

-- 只为最近的数据创建索引
CREATE INDEX idx_orders_recent ON orders(user_id, total_amount)
WHERE order_date >= '2024-01-01';

# 3. 分页查询优化

# 传统分页问题

-- 深度分页性能差
SELECT * FROM orders ORDER BY id LIMIT 100000, 20;
-- 需要扫描前100000条记录

# 游标分页优化

-- 使用游标分页
-- 第一页
SELECT * FROM orders WHERE id > 0 ORDER BY id LIMIT 20;

-- 后续页(假设上一页最后一条记录的id是120)
SELECT * FROM orders WHERE id > 120 ORDER BY id LIMIT 20;

-- 复合排序的游标分页
SELECT * FROM orders 
WHERE (order_date, id) > ('2024-01-15', 1000)
ORDER BY order_date, id 
LIMIT 20;

# 计数优化

-- 避免精确计数(大表)
-- 不好的写法
SELECT COUNT(*) FROM orders WHERE status = 'completed';

-- 优化方案1:估算计数
SELECT table_rows 
FROM information_schema.tables 
WHERE table_name = 'orders' AND table_schema = 'your_database';

-- 优化方案2:缓存计数
-- 创建计数表
CREATE TABLE order_counts (
    status VARCHAR(20) PRIMARY KEY,
    count_value INT,
    last_updated TIMESTAMP
);

-- 使用触发器或定时任务更新计数

# 数据库配置优化

# 1. MySQL配置优化

# InnoDB配置

# my.cnf配置示例
[mysqld]
# 缓冲池大小(建议为物理内存的70-80%)
innodb_buffer_pool_size = 8G

# 缓冲池实例数(大内存时建议设置)
innodb_buffer_pool_instances = 8

# 日志文件大小
innodb_log_file_size = 1G
innodb_log_files_in_group = 2

# 刷新策略
innodb_flush_log_at_trx_commit = 1  # 安全性最高
# innodb_flush_log_at_trx_commit = 2  # 性能更好,但有数据丢失风险

# 并发线程数
innodb_thread_concurrency = 0  # 0表示不限制

# 读写I/O线程
innodb_read_io_threads = 8
innodb_write_io_threads = 8

# 文件格式
innodb_file_format = Barracuda
innodb_file_per_table = 1

# 查询缓存(MySQL 5.7及以下)
query_cache_type = 1
query_cache_size = 256M

# 连接相关
max_connections = 1000
max_connect_errors = 100000

# 临时表
tmp_table_size = 256M
max_heap_table_size = 256M

# 排序缓冲区
sort_buffer_size = 2M
read_buffer_size = 2M
read_rnd_buffer_size = 8M

# 慢查询日志
slow_query_log = 1
long_query_time = 2
log_queries_not_using_indexes = 1

# 查看和调整配置

-- 查看当前配置
SHOW VARIABLES LIKE 'innodb_buffer_pool_size';
SHOW VARIABLES LIKE 'max_connections';

-- 动态调整(重启后失效)
SET GLOBAL innodb_buffer_pool_size = 8589934592;  -- 8GB
SET GLOBAL max_connections = 1000;

-- 查看缓冲池使用情况
SHOW ENGINE INNODB STATUS;

-- 查看连接数
SHOW GLOBAL STATUS LIKE 'Threads_connected';
SHOW GLOBAL STATUS LIKE 'Max_used_connections';

# 2. PostgreSQL配置优化

# postgresql.conf配置

# 内存配置
shared_buffers = 2GB                    # 建议为物理内存的25%
effective_cache_size = 6GB               # 建议为物理内存的75%
work_mem = 256MB                         # 每个查询操作的内存
maintenance_work_mem = 1GB               # 维护操作的内存

# 检查点配置
checkpoint_completion_target = 0.9
wal_buffers = 64MB
default_statistics_target = 100

# 并发配置
max_connections = 200
max_worker_processes = 8
max_parallel_workers_per_gather = 4
max_parallel_workers = 8

# 日志配置
log_min_duration_statement = 1000        # 记录超过1秒的查询
log_checkpoints = on
log_connections = on
log_disconnections = on
log_lock_waits = on

# 自动清理
autovacuum = on
autovacuum_max_workers = 3
autovacuum_naptime = 1min

# 查看和调整配置

-- 查看当前配置
SHOW shared_buffers;
SHOW effective_cache_size;
SHOW work_mem;

-- 查看数据库活动
SELECT * FROM pg_stat_activity;

-- 查看数据库大小
SELECT 
    datname,
    pg_size_pretty(pg_database_size(datname)) as size
FROM pg_database
ORDER BY pg_database_size(datname) DESC;

-- 查看表大小
SELECT 
    schemaname,
    tablename,
    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
LIMIT 10;

# 硬件和架构优化

# 1. 硬件优化

# 存储优化

# SSD vs HDD性能对比
# SSD优势:
# - 随机I/O性能好
# - 延迟低
# - 适合OLTP工作负载

# 磁盘配置建议
# - 数据文件:使用SSD
# - 日志文件:使用高速SSD或分离到不同磁盘
# - 临时文件:使用高速存储

# RAID配置
# - RAID 10:性能和可靠性平衡
# - RAID 5:读性能好,写性能一般
# - RAID 0:性能最好,但无冗余

# 文件系统优化
# ext4挂载选项
mount -o noatime,data=writeback /dev/sdb1 /var/lib/mysql

# XFS挂载选项
mount -o noatime,largeio,inode64,swalloc /dev/sdb1 /var/lib/mysql

# 内存优化

# 内存分配建议
# - 数据库缓冲池:物理内存的70-80%
# - 操作系统:物理内存的20-30%
# - 其他应用:根据需要分配

# 查看内存使用
free -h
cat /proc/meminfo

# 调整内核参数
echo 'vm.swappiness = 1' >> /etc/sysctl.conf
echo 'vm.dirty_ratio = 15' >> /etc/sysctl.conf
echo 'vm.dirty_background_ratio = 5' >> /etc/sysctl.conf
sysctl -p

# 2. 架构优化

# 读写分离

-- 主库配置(写操作)
-- 配置二进制日志
log-bin = mysql-bin
server-id = 1
binlog-format = ROW

-- 从库配置(读操作)
server-id = 2
read-only = 1
relay-log = mysql-relay-bin

-- 应用层读写分离
-- 写操作连接主库
INSERT INTO users (name, email) VALUES ('John', 'john@example.com');
UPDATE users SET email = 'newemail@example.com' WHERE id = 1;
DELETE FROM users WHERE id = 1;

-- 读操作连接从库
SELECT * FROM users WHERE city = 'Beijing';
SELECT COUNT(*) FROM orders WHERE status = 'completed';

# 分库分表

-- 垂直分库:按业务模块分离
-- 用户库
CREATE DATABASE user_db;
USE user_db;
CREATE TABLE users (...);  -- 用户相关表

-- 订单库
CREATE DATABASE order_db;
USE order_db;
CREATE TABLE orders (...);  -- 订单相关表

-- 水平分表:按数据量分离
-- 按用户ID分表
CREATE TABLE orders_0 (...);
CREATE TABLE orders_1 (...);
CREATE TABLE orders_2 (...);
CREATE TABLE orders_3 (...);

-- 分表路由逻辑
-- table_index = user_id % 4
-- 用户ID为123的订单存储在orders_3表中(123 % 4 = 3)

-- 按时间分表
CREATE TABLE orders_202401 (...);
CREATE TABLE orders_202402 (...);
CREATE TABLE orders_202403 (...);

# 缓存策略

-- Redis缓存热点数据
-- 缓存用户信息
SET user:123 '{"name":"John","email":"john@example.com"}' EX 3600;

-- 缓存查询结果
SET query:popular_products '[{"id":1,"name":"Product1"},{"id":2,"name":"Product2"}]' EX 1800;

-- 缓存计数器
SET order_count:user:123 15 EX 3600;

-- 应用层缓存逻辑
-- 1. 先查缓存
-- 2. 缓存未命中时查数据库
-- 3. 将结果写入缓存
-- 4. 设置合适的过期时间

# 监控和维护

# 1. 性能监控

# 监控指标

-- MySQL监控脚本
SELECT 
    'QPS' as metric,
    VARIABLE_VALUE as value
FROM INFORMATION_SCHEMA.GLOBAL_STATUS 
WHERE VARIABLE_NAME = 'Questions'
UNION ALL
SELECT 
    'TPS' as metric,
    VARIABLE_VALUE as value
FROM INFORMATION_SCHEMA.GLOBAL_STATUS 
WHERE VARIABLE_NAME = 'Com_commit'
UNION ALL
SELECT 
    'Connections' as metric,
    VARIABLE_VALUE as value
FROM INFORMATION_SCHEMA.GLOBAL_STATUS 
WHERE VARIABLE_NAME = 'Threads_connected';

-- PostgreSQL监控脚本
SELECT 
    'active_connections' as metric,
    count(*) as value
FROM pg_stat_activity 
WHERE state = 'active'
UNION ALL
SELECT 
    'idle_connections' as metric,
    count(*) as value
FROM pg_stat_activity 
WHERE state = 'idle'
UNION ALL
SELECT 
    'total_connections' as metric,
    count(*) as value
FROM pg_stat_activity;

# 告警设置

# 监控脚本示例
#!/bin/bash

# 检查MySQL连接数
CONNECTIONS=$(mysql -e "SHOW GLOBAL STATUS LIKE 'Threads_connected';" | awk 'NR==2{print $2}')
MAX_CONNECTIONS=$(mysql -e "SHOW VARIABLES LIKE 'max_connections';" | awk 'NR==2{print $2}')
USAGE_PERCENT=$((CONNECTIONS * 100 / MAX_CONNECTIONS))

if [ $USAGE_PERCENT -gt 80 ]; then
    echo "WARNING: MySQL connections usage is ${USAGE_PERCENT}%"
    # 发送告警邮件或短信
fi

# 检查慢查询数量
SLOW_QUERIES=$(mysql -e "SHOW GLOBAL STATUS LIKE 'Slow_queries';" | awk 'NR==2{print $2}')
echo "Current slow queries: $SLOW_QUERIES"

# 检查磁盘空间
DISK_USAGE=$(df -h /var/lib/mysql | awk 'NR==2{print $5}' | sed 's/%//')
if [ $DISK_USAGE -gt 85 ]; then
    echo "WARNING: Disk usage is ${DISK_USAGE}%"
fi

# 2. 定期维护

# 索引维护

-- MySQL:分析表和优化表
ANALYZE TABLE users;
OPTIMIZE TABLE users;

-- 检查索引使用情况
SELECT 
    t.TABLE_SCHEMA,
    t.TABLE_NAME,
    s.INDEX_NAME,
    s.COLUMN_NAME,
    s.SEQ_IN_INDEX,
    s.CARDINALITY
FROM INFORMATION_SCHEMA.TABLES t
INNER JOIN INFORMATION_SCHEMA.STATISTICS s ON t.TABLE_NAME = s.TABLE_NAME
WHERE t.TABLE_SCHEMA = 'your_database'
ORDER BY t.TABLE_NAME, s.INDEX_NAME, s.SEQ_IN_INDEX;

-- PostgreSQL:更新统计信息
ANALYZE users;
ANALYZE;  -- 分析所有表

-- 重建索引
REINDEX INDEX idx_users_email;
REINDEX TABLE users;

-- 检查索引膨胀
SELECT 
    schemaname,
    tablename,
    indexname,
    pg_size_pretty(pg_relation_size(indexrelid)) as index_size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;

# 数据清理

-- 清理历史数据
-- 分批删除,避免长时间锁表
DELETE FROM log_table 
WHERE created_at < DATE_SUB(NOW(), INTERVAL 90 DAY)
LIMIT 10000;

-- 归档历史数据
INSERT INTO orders_archive 
SELECT * FROM orders 
WHERE order_date < '2023-01-01';

DELETE FROM orders 
WHERE order_date < '2023-01-01'
LIMIT 10000;

-- PostgreSQL:清理死元组
VACUUM ANALYZE users;
VACUUM FULL users;  -- 完全清理,会锁表

-- 自动清理配置
ALTER TABLE users SET (autovacuum_vacuum_scale_factor = 0.1);
ALTER TABLE users SET (autovacuum_analyze_scale_factor = 0.05);

# 最佳实践总结

# 1. 性能调优原则

  1. 测量优先:先测量,再优化
  2. 找到瓶颈:识别真正的性能瓶颈
  3. 逐步优化:一次优化一个问题
  4. 验证效果:每次优化后都要验证效果
  5. 持续监控:建立持续的性能监控体系

# 2. 优化优先级

  1. SQL语句优化:成本最低,效果最明显
  2. 索引优化:针对性强,效果显著
  3. 数据库配置:全局影响,需要谨慎
  4. 硬件升级:成本较高,但效果立竿见影
  5. 架构调整:成本最高,但可以解决根本问题

# 3. 常见误区

  • 过度索引:索引不是越多越好
  • 忽视统计信息:过期的统计信息会导致错误的执行计划
  • 盲目调参:不理解参数含义就随意调整
  • 只关注单个查询:忽视整体系统性能
  • 缺乏监控:没有建立完善的监控体系

通过系统性的性能调优方法论和实践,可以显著提升数据库系统的性能和稳定性。