SQL性能调优
# SQL性能调优
系统性的SQL性能调优方法论,从问题识别到解决方案实施的完整指南
# 概述
SQL性能调优是一个系统性的过程,涉及查询优化、索引设计、数据库配置、硬件资源等多个方面。本文档提供了完整的性能调优方法论和实战技巧。
# 性能调优方法论
# 1. 性能调优流程
1. 问题识别 → 2. 性能分析 → 3. 瓶颈定位 → 4. 优化方案 → 5. 效果验证 → 6. 持续监控
# 2. 性能指标体系
# 响应时间指标
- 查询响应时间:单个查询的执行时间
- 事务响应时间:完整事务的执行时间
- 平均响应时间:一段时间内的平均响应时间
- 95%分位响应时间:95%的请求在此时间内完成
# 吞吐量指标
- QPS:每秒查询数(Queries Per Second)
- TPS:每秒事务数(Transactions Per Second)
- 并发用户数:同时活跃的用户数量
# 资源利用率指标
- CPU利用率:处理器使用率
- 内存利用率:内存使用率
- 磁盘I/O:磁盘读写操作
- 网络I/O:网络传输量
# 问题识别与诊断
# 1. 慢查询识别
# MySQL慢查询日志
-- 启用慢查询日志
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 2; -- 超过2秒的查询
SET GLOBAL log_queries_not_using_indexes = 'ON';
-- 查看慢查询配置
SHOW VARIABLES LIKE 'slow_query%';
SHOW VARIABLES LIKE 'long_query_time';
-- 分析慢查询日志
-- 使用mysqldumpslow工具
-- mysqldumpslow -s t -t 10 /var/log/mysql/slow.log
# Performance Schema监控
-- 启用Performance Schema
SET GLOBAL performance_schema = ON;
-- 查看最耗时的查询
SELECT
DIGEST_TEXT as query,
COUNT_STAR as exec_count,
AVG_TIMER_WAIT/1000000000 as avg_time_sec,
SUM_TIMER_WAIT/1000000000 as total_time_sec,
SUM_ROWS_EXAMINED as total_rows_examined,
SUM_ROWS_SENT as total_rows_sent
FROM performance_schema.events_statements_summary_by_digest
WHERE AVG_TIMER_WAIT > 1000000000 -- 平均执行时间超过1秒
ORDER BY AVG_TIMER_WAIT DESC
LIMIT 10;
-- 查看当前正在执行的查询
SELECT
processlist_id,
processlist_user,
processlist_host,
processlist_db,
processlist_command,
processlist_time,
processlist_state,
processlist_info
FROM performance_schema.processlist
WHERE processlist_command != 'Sleep'
ORDER BY processlist_time DESC;
# PostgreSQL查询监控
-- 启用pg_stat_statements扩展
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
-- 查看最耗时的查询
SELECT
query,
calls,
total_time,
mean_time,
stddev_time,
rows,
100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 10;
-- 查看当前活动查询
SELECT
pid,
now() - pg_stat_activity.query_start AS duration,
query,
state
FROM pg_stat_activity
WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes'
ORDER BY duration DESC;
-- 查看锁等待
SELECT
blocked_locks.pid AS blocked_pid,
blocked_activity.usename AS blocked_user,
blocking_locks.pid AS blocking_pid,
blocking_activity.usename AS blocking_user,
blocked_activity.query AS blocked_statement,
blocking_activity.query AS current_statement_in_blocking_process
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;
# 2. 系统资源监控
# 数据库级别监控
-- MySQL:查看数据库状态
SHOW GLOBAL STATUS LIKE 'Threads_connected';
SHOW GLOBAL STATUS LIKE 'Threads_running';
SHOW GLOBAL STATUS LIKE 'Questions';
SHOW GLOBAL STATUS LIKE 'Uptime';
SHOW GLOBAL STATUS LIKE 'Slow_queries';
-- 查看InnoDB状态
SHOW ENGINE INNODB STATUS;
-- 查看缓冲池命中率
SELECT
(1 - (Innodb_buffer_pool_reads / Innodb_buffer_pool_read_requests)) * 100 AS buffer_pool_hit_rate
FROM
(SELECT VARIABLE_VALUE AS Innodb_buffer_pool_reads FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Innodb_buffer_pool_reads') AS reads,
(SELECT VARIABLE_VALUE AS Innodb_buffer_pool_read_requests FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'Innodb_buffer_pool_read_requests') AS requests;
-- PostgreSQL:查看数据库统计
SELECT
datname,
numbackends,
xact_commit,
xact_rollback,
blks_read,
blks_hit,
tup_returned,
tup_fetched,
tup_inserted,
tup_updated,
tup_deleted
FROM pg_stat_database
WHERE datname = current_database();
-- 查看缓存命中率
SELECT
sum(heap_blks_read) as heap_read,
sum(heap_blks_hit) as heap_hit,
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
FROM pg_statio_user_tables;
# 操作系统级别监控
# CPU使用率
top -p $(pgrep mysqld)
htop
# 内存使用情况
free -h
ps aux | grep mysql
# 磁盘I/O
iostat -x 1
iotop
# 网络I/O
netstat -i
iftop
# 磁盘空间
df -h
du -sh /var/lib/mysql/*
# 查询优化技术
# 1. SQL语句优化
# WHERE子句优化
-- 避免在WHERE子句中使用函数
-- 不好的写法
SELECT * FROM orders WHERE YEAR(order_date) = 2024;
-- 好的写法
SELECT * FROM orders WHERE order_date >= '2024-01-01' AND order_date < '2025-01-01';
-- 避免隐式类型转换
-- 不好的写法(如果user_id是字符串类型)
SELECT * FROM orders WHERE user_id = 123;
-- 好的写法
SELECT * FROM orders WHERE user_id = '123';
-- 使用EXISTS代替IN(某些情况下)
-- 原查询
SELECT * FROM users WHERE id IN (SELECT user_id FROM orders WHERE total_amount > 1000);
-- 优化后
SELECT * FROM users u WHERE EXISTS (
SELECT 1 FROM orders o WHERE o.user_id = u.id AND o.total_amount > 1000
);
# JOIN优化
-- 确保JOIN条件使用索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_users_id ON users(id); -- 通常主键自动有索引
SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- 小表驱动大表
-- 如果users表很小,orders表很大,这样写更好
SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.city = 'Beijing'; -- 先过滤小表
-- 避免笛卡尔积
-- 确保所有表都有适当的连接条件
SELECT u.name, o.order_date, p.product_name
FROM users u, orders o, products p
WHERE u.id = o.user_id -- 必须有连接条件
AND o.product_id = p.id; -- 必须有连接条件
# 子查询优化
-- 将相关子查询转换为JOIN
-- 原查询(相关子查询)
SELECT u.name, u.email,
(SELECT COUNT(*) FROM orders o WHERE o.user_id = u.id) as order_count
FROM users u;
-- 优化后(LEFT JOIN)
SELECT u.name, u.email, COALESCE(o.order_count, 0) as order_count
FROM users u
LEFT JOIN (
SELECT user_id, COUNT(*) as order_count
FROM orders
GROUP BY user_id
) o ON u.id = o.user_id;
-- 使用窗口函数替代子查询
-- 原查询
SELECT
product_name,
price,
(SELECT AVG(price) FROM products p2 WHERE p2.category = p1.category) as avg_category_price
FROM products p1;
-- 优化后
SELECT
product_name,
price,
AVG(price) OVER (PARTITION BY category) as avg_category_price
FROM products;
# 2. 索引优化策略
# 复合索引设计
-- 最左前缀原则
CREATE INDEX idx_orders_user_date_status ON orders(user_id, order_date, status);
-- 以下查询可以使用该索引
SELECT * FROM orders WHERE user_id = 123;
SELECT * FROM orders WHERE user_id = 123 AND order_date >= '2024-01-01';
SELECT * FROM orders WHERE user_id = 123 AND order_date >= '2024-01-01' AND status = 'completed';
-- 以下查询无法有效使用该索引
SELECT * FROM orders WHERE order_date >= '2024-01-01'; -- 跳过了user_id
SELECT * FROM orders WHERE status = 'completed'; -- 跳过了user_id和order_date
-- 索引列顺序优化
-- 选择性高的列放在前面
SELECT
COUNT(DISTINCT user_id) / COUNT(*) as user_id_selectivity,
COUNT(DISTINCT status) / COUNT(*) as status_selectivity,
COUNT(DISTINCT order_date) / COUNT(*) as date_selectivity
FROM orders;
-- 根据选择性调整索引顺序
-- 如果user_id选择性最高,则:
CREATE INDEX idx_orders_optimized ON orders(user_id, order_date, status);
# 覆盖索引设计
-- 创建覆盖索引,避免回表查询
CREATE INDEX idx_orders_covering ON orders(user_id, order_date, total_amount, status);
-- 以下查询可以完全通过索引满足
SELECT order_date, total_amount, status
FROM orders
WHERE user_id = 123
ORDER BY order_date;
-- 验证是否使用覆盖索引
EXPLAIN SELECT order_date, total_amount, status
FROM orders
WHERE user_id = 123
ORDER BY order_date;
-- 应该显示"Using index"
# 部分索引(PostgreSQL)
-- 只为活跃订单创建索引
CREATE INDEX idx_orders_active ON orders(user_id, order_date)
WHERE status IN ('pending', 'processing');
-- 只为最近的数据创建索引
CREATE INDEX idx_orders_recent ON orders(user_id, total_amount)
WHERE order_date >= '2024-01-01';
# 3. 分页查询优化
# 传统分页问题
-- 深度分页性能差
SELECT * FROM orders ORDER BY id LIMIT 100000, 20;
-- 需要扫描前100000条记录
# 游标分页优化
-- 使用游标分页
-- 第一页
SELECT * FROM orders WHERE id > 0 ORDER BY id LIMIT 20;
-- 后续页(假设上一页最后一条记录的id是120)
SELECT * FROM orders WHERE id > 120 ORDER BY id LIMIT 20;
-- 复合排序的游标分页
SELECT * FROM orders
WHERE (order_date, id) > ('2024-01-15', 1000)
ORDER BY order_date, id
LIMIT 20;
# 计数优化
-- 避免精确计数(大表)
-- 不好的写法
SELECT COUNT(*) FROM orders WHERE status = 'completed';
-- 优化方案1:估算计数
SELECT table_rows
FROM information_schema.tables
WHERE table_name = 'orders' AND table_schema = 'your_database';
-- 优化方案2:缓存计数
-- 创建计数表
CREATE TABLE order_counts (
status VARCHAR(20) PRIMARY KEY,
count_value INT,
last_updated TIMESTAMP
);
-- 使用触发器或定时任务更新计数
# 数据库配置优化
# 1. MySQL配置优化
# InnoDB配置
# my.cnf配置示例
[mysqld]
# 缓冲池大小(建议为物理内存的70-80%)
innodb_buffer_pool_size = 8G
# 缓冲池实例数(大内存时建议设置)
innodb_buffer_pool_instances = 8
# 日志文件大小
innodb_log_file_size = 1G
innodb_log_files_in_group = 2
# 刷新策略
innodb_flush_log_at_trx_commit = 1 # 安全性最高
# innodb_flush_log_at_trx_commit = 2 # 性能更好,但有数据丢失风险
# 并发线程数
innodb_thread_concurrency = 0 # 0表示不限制
# 读写I/O线程
innodb_read_io_threads = 8
innodb_write_io_threads = 8
# 文件格式
innodb_file_format = Barracuda
innodb_file_per_table = 1
# 查询缓存(MySQL 5.7及以下)
query_cache_type = 1
query_cache_size = 256M
# 连接相关
max_connections = 1000
max_connect_errors = 100000
# 临时表
tmp_table_size = 256M
max_heap_table_size = 256M
# 排序缓冲区
sort_buffer_size = 2M
read_buffer_size = 2M
read_rnd_buffer_size = 8M
# 慢查询日志
slow_query_log = 1
long_query_time = 2
log_queries_not_using_indexes = 1
# 查看和调整配置
-- 查看当前配置
SHOW VARIABLES LIKE 'innodb_buffer_pool_size';
SHOW VARIABLES LIKE 'max_connections';
-- 动态调整(重启后失效)
SET GLOBAL innodb_buffer_pool_size = 8589934592; -- 8GB
SET GLOBAL max_connections = 1000;
-- 查看缓冲池使用情况
SHOW ENGINE INNODB STATUS;
-- 查看连接数
SHOW GLOBAL STATUS LIKE 'Threads_connected';
SHOW GLOBAL STATUS LIKE 'Max_used_connections';
# 2. PostgreSQL配置优化
# postgresql.conf配置
# 内存配置
shared_buffers = 2GB # 建议为物理内存的25%
effective_cache_size = 6GB # 建议为物理内存的75%
work_mem = 256MB # 每个查询操作的内存
maintenance_work_mem = 1GB # 维护操作的内存
# 检查点配置
checkpoint_completion_target = 0.9
wal_buffers = 64MB
default_statistics_target = 100
# 并发配置
max_connections = 200
max_worker_processes = 8
max_parallel_workers_per_gather = 4
max_parallel_workers = 8
# 日志配置
log_min_duration_statement = 1000 # 记录超过1秒的查询
log_checkpoints = on
log_connections = on
log_disconnections = on
log_lock_waits = on
# 自动清理
autovacuum = on
autovacuum_max_workers = 3
autovacuum_naptime = 1min
# 查看和调整配置
-- 查看当前配置
SHOW shared_buffers;
SHOW effective_cache_size;
SHOW work_mem;
-- 查看数据库活动
SELECT * FROM pg_stat_activity;
-- 查看数据库大小
SELECT
datname,
pg_size_pretty(pg_database_size(datname)) as size
FROM pg_database
ORDER BY pg_database_size(datname) DESC;
-- 查看表大小
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
LIMIT 10;
# 硬件和架构优化
# 1. 硬件优化
# 存储优化
# SSD vs HDD性能对比
# SSD优势:
# - 随机I/O性能好
# - 延迟低
# - 适合OLTP工作负载
# 磁盘配置建议
# - 数据文件:使用SSD
# - 日志文件:使用高速SSD或分离到不同磁盘
# - 临时文件:使用高速存储
# RAID配置
# - RAID 10:性能和可靠性平衡
# - RAID 5:读性能好,写性能一般
# - RAID 0:性能最好,但无冗余
# 文件系统优化
# ext4挂载选项
mount -o noatime,data=writeback /dev/sdb1 /var/lib/mysql
# XFS挂载选项
mount -o noatime,largeio,inode64,swalloc /dev/sdb1 /var/lib/mysql
# 内存优化
# 内存分配建议
# - 数据库缓冲池:物理内存的70-80%
# - 操作系统:物理内存的20-30%
# - 其他应用:根据需要分配
# 查看内存使用
free -h
cat /proc/meminfo
# 调整内核参数
echo 'vm.swappiness = 1' >> /etc/sysctl.conf
echo 'vm.dirty_ratio = 15' >> /etc/sysctl.conf
echo 'vm.dirty_background_ratio = 5' >> /etc/sysctl.conf
sysctl -p
# 2. 架构优化
# 读写分离
-- 主库配置(写操作)
-- 配置二进制日志
log-bin = mysql-bin
server-id = 1
binlog-format = ROW
-- 从库配置(读操作)
server-id = 2
read-only = 1
relay-log = mysql-relay-bin
-- 应用层读写分离
-- 写操作连接主库
INSERT INTO users (name, email) VALUES ('John', 'john@example.com');
UPDATE users SET email = 'newemail@example.com' WHERE id = 1;
DELETE FROM users WHERE id = 1;
-- 读操作连接从库
SELECT * FROM users WHERE city = 'Beijing';
SELECT COUNT(*) FROM orders WHERE status = 'completed';
# 分库分表
-- 垂直分库:按业务模块分离
-- 用户库
CREATE DATABASE user_db;
USE user_db;
CREATE TABLE users (...); -- 用户相关表
-- 订单库
CREATE DATABASE order_db;
USE order_db;
CREATE TABLE orders (...); -- 订单相关表
-- 水平分表:按数据量分离
-- 按用户ID分表
CREATE TABLE orders_0 (...);
CREATE TABLE orders_1 (...);
CREATE TABLE orders_2 (...);
CREATE TABLE orders_3 (...);
-- 分表路由逻辑
-- table_index = user_id % 4
-- 用户ID为123的订单存储在orders_3表中(123 % 4 = 3)
-- 按时间分表
CREATE TABLE orders_202401 (...);
CREATE TABLE orders_202402 (...);
CREATE TABLE orders_202403 (...);
# 缓存策略
-- Redis缓存热点数据
-- 缓存用户信息
SET user:123 '{"name":"John","email":"john@example.com"}' EX 3600;
-- 缓存查询结果
SET query:popular_products '[{"id":1,"name":"Product1"},{"id":2,"name":"Product2"}]' EX 1800;
-- 缓存计数器
SET order_count:user:123 15 EX 3600;
-- 应用层缓存逻辑
-- 1. 先查缓存
-- 2. 缓存未命中时查数据库
-- 3. 将结果写入缓存
-- 4. 设置合适的过期时间
# 监控和维护
# 1. 性能监控
# 监控指标
-- MySQL监控脚本
SELECT
'QPS' as metric,
VARIABLE_VALUE as value
FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Questions'
UNION ALL
SELECT
'TPS' as metric,
VARIABLE_VALUE as value
FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Com_commit'
UNION ALL
SELECT
'Connections' as metric,
VARIABLE_VALUE as value
FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Threads_connected';
-- PostgreSQL监控脚本
SELECT
'active_connections' as metric,
count(*) as value
FROM pg_stat_activity
WHERE state = 'active'
UNION ALL
SELECT
'idle_connections' as metric,
count(*) as value
FROM pg_stat_activity
WHERE state = 'idle'
UNION ALL
SELECT
'total_connections' as metric,
count(*) as value
FROM pg_stat_activity;
# 告警设置
# 监控脚本示例
#!/bin/bash
# 检查MySQL连接数
CONNECTIONS=$(mysql -e "SHOW GLOBAL STATUS LIKE 'Threads_connected';" | awk 'NR==2{print $2}')
MAX_CONNECTIONS=$(mysql -e "SHOW VARIABLES LIKE 'max_connections';" | awk 'NR==2{print $2}')
USAGE_PERCENT=$((CONNECTIONS * 100 / MAX_CONNECTIONS))
if [ $USAGE_PERCENT -gt 80 ]; then
echo "WARNING: MySQL connections usage is ${USAGE_PERCENT}%"
# 发送告警邮件或短信
fi
# 检查慢查询数量
SLOW_QUERIES=$(mysql -e "SHOW GLOBAL STATUS LIKE 'Slow_queries';" | awk 'NR==2{print $2}')
echo "Current slow queries: $SLOW_QUERIES"
# 检查磁盘空间
DISK_USAGE=$(df -h /var/lib/mysql | awk 'NR==2{print $5}' | sed 's/%//')
if [ $DISK_USAGE -gt 85 ]; then
echo "WARNING: Disk usage is ${DISK_USAGE}%"
fi
# 2. 定期维护
# 索引维护
-- MySQL:分析表和优化表
ANALYZE TABLE users;
OPTIMIZE TABLE users;
-- 检查索引使用情况
SELECT
t.TABLE_SCHEMA,
t.TABLE_NAME,
s.INDEX_NAME,
s.COLUMN_NAME,
s.SEQ_IN_INDEX,
s.CARDINALITY
FROM INFORMATION_SCHEMA.TABLES t
INNER JOIN INFORMATION_SCHEMA.STATISTICS s ON t.TABLE_NAME = s.TABLE_NAME
WHERE t.TABLE_SCHEMA = 'your_database'
ORDER BY t.TABLE_NAME, s.INDEX_NAME, s.SEQ_IN_INDEX;
-- PostgreSQL:更新统计信息
ANALYZE users;
ANALYZE; -- 分析所有表
-- 重建索引
REINDEX INDEX idx_users_email;
REINDEX TABLE users;
-- 检查索引膨胀
SELECT
schemaname,
tablename,
indexname,
pg_size_pretty(pg_relation_size(indexrelid)) as index_size
FROM pg_stat_user_indexes
ORDER BY pg_relation_size(indexrelid) DESC;
# 数据清理
-- 清理历史数据
-- 分批删除,避免长时间锁表
DELETE FROM log_table
WHERE created_at < DATE_SUB(NOW(), INTERVAL 90 DAY)
LIMIT 10000;
-- 归档历史数据
INSERT INTO orders_archive
SELECT * FROM orders
WHERE order_date < '2023-01-01';
DELETE FROM orders
WHERE order_date < '2023-01-01'
LIMIT 10000;
-- PostgreSQL:清理死元组
VACUUM ANALYZE users;
VACUUM FULL users; -- 完全清理,会锁表
-- 自动清理配置
ALTER TABLE users SET (autovacuum_vacuum_scale_factor = 0.1);
ALTER TABLE users SET (autovacuum_analyze_scale_factor = 0.05);
# 最佳实践总结
# 1. 性能调优原则
- 测量优先:先测量,再优化
- 找到瓶颈:识别真正的性能瓶颈
- 逐步优化:一次优化一个问题
- 验证效果:每次优化后都要验证效果
- 持续监控:建立持续的性能监控体系
# 2. 优化优先级
- SQL语句优化:成本最低,效果最明显
- 索引优化:针对性强,效果显著
- 数据库配置:全局影响,需要谨慎
- 硬件升级:成本较高,但效果立竿见影
- 架构调整:成本最高,但可以解决根本问题
# 3. 常见误区
- 过度索引:索引不是越多越好
- 忽视统计信息:过期的统计信息会导致错误的执行计划
- 盲目调参:不理解参数含义就随意调整
- 只关注单个查询:忽视整体系统性能
- 缺乏监控:没有建立完善的监控体系
通过系统性的性能调优方法论和实践,可以显著提升数据库系统的性能和稳定性。