SQL执行计划分析
# SQL执行计划分析
执行计划是数据库优化器选择的查询执行策略,分析执行计划是SQL性能优化的核心技能
# 概述
执行计划(Execution Plan)是数据库管理系统为执行SQL查询而制定的详细步骤。通过分析执行计划,我们可以了解查询的执行过程,识别性能瓶颈,并制定相应的优化策略。
# 执行计划基础
# 1. 什么是执行计划
执行计划包含以下信息:
- 操作类型:扫描、连接、排序等
- 数据访问方式:全表扫描、索引扫描等
- 资源消耗:CPU、I/O、内存使用
- 执行顺序:操作的先后顺序
- 数据流向:数据在各操作间的传递
# 2. 执行计划的生成过程
1. SQL解析 → 2. 语法检查 → 3. 语义分析 → 4. 查询优化 → 5. 执行计划生成
# 不同数据库的执行计划
# MySQL执行计划
# 1. EXPLAIN基本用法
-- 基本EXPLAIN
EXPLAIN SELECT * FROM users WHERE city = 'Beijing';
-- JSON格式(更详细)
EXPLAIN FORMAT=JSON SELECT * FROM users WHERE city = 'Beijing';
-- 传统格式
EXPLAIN FORMAT=TRADITIONAL SELECT * FROM users WHERE city = 'Beijing';
-- 树形格式(MySQL 8.0.16+)
EXPLAIN FORMAT=TREE SELECT * FROM users WHERE city = 'Beijing';
# 2. EXPLAIN输出字段解析
-- 示例查询
EXPLAIN SELECT u.name, o.order_date, o.total_amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.city = 'Beijing' AND o.order_date >= '2024-01-01'
ORDER BY o.total_amount DESC
LIMIT 10;
字段说明:
字段 | 说明 | 重要性 |
---|---|---|
id | 查询序列号 | 高 |
select_type | 查询类型 | 高 |
table | 表名 | 中 |
partitions | 分区信息 | 中 |
type | 连接类型 | 高 |
possible_keys | 可能使用的索引 | 中 |
key | 实际使用的索引 | 高 |
key_len | 索引长度 | 中 |
ref | 索引引用 | 中 |
rows | 扫描行数 | 高 |
filtered | 过滤百分比 | 中 |
Extra | 额外信息 | 高 |
# 3. 关键字段详解
select_type类型:
-- SIMPLE:简单查询
EXPLAIN SELECT * FROM users WHERE id = 1;
-- PRIMARY:主查询
-- SUBQUERY:子查询
EXPLAIN SELECT * FROM users WHERE id IN (
SELECT user_id FROM orders WHERE total_amount > 1000
);
-- DERIVED:派生表
EXPLAIN SELECT * FROM (
SELECT user_id, COUNT(*) as order_count
FROM orders GROUP BY user_id
) t WHERE order_count > 5;
-- UNION:联合查询
EXPLAIN SELECT name FROM users WHERE city = 'Beijing'
UNION
SELECT name FROM users WHERE city = 'Shanghai';
type连接类型(性能从好到差):
-- system:系统表,只有一行
-- const:常量查询,主键或唯一索引
EXPLAIN SELECT * FROM users WHERE id = 1;
-- eq_ref:唯一索引扫描
EXPLAIN SELECT u.*, o.* FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- ref:非唯一索引扫描
EXPLAIN SELECT * FROM users WHERE city = 'Beijing';
-- range:范围扫描
EXPLAIN SELECT * FROM orders WHERE order_date BETWEEN '2024-01-01' AND '2024-12-31';
-- index:索引全扫描
EXPLAIN SELECT id FROM users ORDER BY id;
-- ALL:全表扫描(最差)
EXPLAIN SELECT * FROM users WHERE UPPER(name) = 'JOHN';
Extra信息解读:
-- Using index:覆盖索引
CREATE INDEX idx_users_city_name ON users(city, name);
EXPLAIN SELECT name FROM users WHERE city = 'Beijing';
-- Using where:WHERE条件过滤
EXPLAIN SELECT * FROM users WHERE age > 25;
-- Using filesort:文件排序(需要优化)
EXPLAIN SELECT * FROM users ORDER BY age;
-- Using temporary:使用临时表(需要优化)
EXPLAIN SELECT city, COUNT(*) FROM users GROUP BY city;
-- Using index condition:索引条件下推
EXPLAIN SELECT * FROM users WHERE city = 'Beijing' AND age > 25;
# 4. EXPLAIN ANALYZE(MySQL 8.0.18+)
-- 实际执行并分析
EXPLAIN ANALYZE SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.city = 'Beijing'
GROUP BY u.id, u.name
ORDER BY order_count DESC
LIMIT 10;
-- 输出包含实际执行时间和行数
# PostgreSQL执行计划
# 1. EXPLAIN基本用法
-- 基本EXPLAIN
EXPLAIN SELECT * FROM users WHERE city = 'Beijing';
-- 详细分析(包含实际执行)
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT * FROM users WHERE city = 'Beijing';
-- JSON格式
EXPLAIN (FORMAT JSON, ANALYZE)
SELECT * FROM users WHERE city = 'Beijing';
# 2. 执行计划节点类型
-- Seq Scan:顺序扫描(全表扫描)
EXPLAIN SELECT * FROM users WHERE age > 25;
-- Index Scan:索引扫描
CREATE INDEX idx_users_email ON users(email);
EXPLAIN SELECT * FROM users WHERE email = 'john@example.com';
-- Index Only Scan:仅索引扫描(覆盖索引)
CREATE INDEX idx_users_city_name ON users(city, name);
EXPLAIN SELECT name FROM users WHERE city = 'Beijing';
-- Bitmap Index Scan:位图索引扫描
EXPLAIN SELECT * FROM users WHERE city = 'Beijing' OR city = 'Shanghai';
-- Nested Loop:嵌套循环连接
EXPLAIN SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.id = 123;
-- Hash Join:哈希连接
EXPLAIN SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- Merge Join:归并连接
EXPLAIN SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
ORDER BY u.id;
# 3. 成本分析
-- 理解成本信息
EXPLAIN SELECT * FROM users WHERE city = 'Beijing';
-- 输出示例:
-- Seq Scan on users (cost=0.00..25.00 rows=5 width=64)
-- Filter: (city = 'Beijing'::text)
-- cost=启动成本..总成本
-- rows=预估返回行数
-- width=平均行宽度(字节)
# SQL Server执行计划
# 1. 查看执行计划
-- 显示估计执行计划
SET SHOWPLAN_ALL ON;
SELECT * FROM users WHERE city = 'Beijing';
SET SHOWPLAN_ALL OFF;
-- 显示实际执行计划
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT * FROM users WHERE city = 'Beijing';
SET STATISTICS IO OFF;
SET STATISTICS TIME OFF;
-- 图形化执行计划(SSMS中)
-- Ctrl + M 开启图形化执行计划
SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.city = 'Beijing';
# 2. 动态管理视图
-- 查看缓存的执行计划
SELECT
cp.plan_handle,
cp.usecounts,
cp.size_in_bytes,
st.text
FROM sys.dm_exec_cached_plans cp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) st
WHERE st.text LIKE '%users%';
-- 查看当前执行的查询
SELECT
session_id,
request_id,
start_time,
status,
command,
sql_handle,
statement_start_offset,
statement_end_offset
FROM sys.dm_exec_requests
WHERE session_id > 50;
# Oracle执行计划
# 1. 查看执行计划
-- 生成执行计划
EXPLAIN PLAN FOR
SELECT * FROM users WHERE city = 'Beijing';
-- 查看执行计划
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);
-- 查看实际执行计划
SELECT /*+ GATHER_PLAN_STATISTICS */ * FROM users WHERE city = 'Beijing';
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(NULL, NULL, 'ALLSTATS LAST'));
-- 自动跟踪
SET AUTOTRACE ON;
SELECT * FROM users WHERE city = 'Beijing';
SET AUTOTRACE OFF;
# 2. 执行计划操作类型
-- TABLE ACCESS FULL:全表扫描
EXPLAIN PLAN FOR SELECT * FROM users WHERE age > 25;
-- INDEX RANGE SCAN:索引范围扫描
CREATE INDEX idx_users_city ON users(city);
EXPLAIN PLAN FOR SELECT * FROM users WHERE city = 'Beijing';
-- NESTED LOOPS:嵌套循环
EXPLAIN PLAN FOR
SELECT u.name, o.order_date
FROM users u, orders o
WHERE u.id = o.user_id AND u.id = 123;
-- HASH JOIN:哈希连接
EXPLAIN PLAN FOR
SELECT u.name, o.order_date
FROM users u, orders o
WHERE u.id = o.user_id;
# 执行计划分析技巧
# 1. 识别性能瓶颈
# 高成本操作
-- MySQL:关注rows列
EXPLAIN SELECT * FROM orders o
INNER JOIN order_items oi ON o.id = oi.order_id
WHERE o.order_date >= '2024-01-01';
-- 如果rows很大,考虑:
-- 1. 添加索引
CREATE INDEX idx_orders_date ON orders(order_date);
-- 2. 优化WHERE条件
-- 3. 使用LIMIT限制结果
# 全表扫描
-- 发现全表扫描
EXPLAIN SELECT * FROM users WHERE UPPER(name) = 'JOHN';
-- type = ALL,需要优化
-- 优化方案:
-- 1. 避免在WHERE中使用函数
EXPLAIN SELECT * FROM users WHERE name = 'John';
-- 2. 创建函数索引(支持的数据库)
CREATE INDEX idx_users_upper_name ON users(UPPER(name));
# 文件排序和临时表
-- 发现Using filesort
EXPLAIN SELECT * FROM users ORDER BY age;
-- 优化:创建索引
CREATE INDEX idx_users_age ON users(age);
EXPLAIN SELECT * FROM users ORDER BY age;
-- 发现Using temporary
EXPLAIN SELECT city, COUNT(*) FROM users GROUP BY city;
-- 优化:创建索引
CREATE INDEX idx_users_city ON users(city);
EXPLAIN SELECT city, COUNT(*) FROM users GROUP BY city;
# 2. 连接优化分析
# 连接类型选择
-- 分析不同连接算法的选择
EXPLAIN SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- Nested Loop:适合小表连接大表
-- Hash Join:适合大表连接
-- Merge Join:适合已排序的数据
# 连接顺序优化
-- 多表连接的顺序很重要
EXPLAIN SELECT u.name, o.order_date, p.product_name
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE u.city = 'Beijing';
-- 优化器会选择最优的连接顺序
-- 但可以通过提示强制指定(某些数据库)
# 3. 索引使用分析
# 索引选择性
-- 检查索引是否被使用
EXPLAIN SELECT * FROM users WHERE city = 'Beijing' AND age > 25;
-- 如果没有使用复合索引,可能需要调整索引列顺序
CREATE INDEX idx_users_city_age ON users(city, age);
-- 或
CREATE INDEX idx_users_age_city ON users(age, city);
-- 重新分析
EXPLAIN SELECT * FROM users WHERE city = 'Beijing' AND age > 25;
# 覆盖索引效果
-- 检查是否使用覆盖索引
CREATE INDEX idx_users_city_name_email ON users(city, name, email);
EXPLAIN SELECT name, email FROM users WHERE city = 'Beijing';
-- 应该显示Using index
# 实战案例分析
# 案例1:慢查询优化
# 问题查询
-- 原始查询(性能差)
SELECT u.name, u.email, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE YEAR(u.created_at) = 2024
GROUP BY u.id, u.name, u.email
ORDER BY order_count DESC;
-- 查看执行计划
EXPLAIN SELECT u.name, u.email, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE YEAR(u.created_at) = 2024
GROUP BY u.id, u.name, u.email
ORDER BY order_count DESC;
# 执行计划分析
问题发现:
1. WHERE YEAR(u.created_at) = 2024 导致全表扫描
2. ORDER BY order_count 导致Using filesort
3. 没有合适的索引支持连接操作
# 优化方案
-- 1. 修改WHERE条件,避免函数
SELECT u.name, u.email, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01' AND u.created_at < '2025-01-01'
GROUP BY u.id, u.name, u.email
ORDER BY order_count DESC;
-- 2. 创建支持索引
CREATE INDEX idx_users_created_at ON users(created_at);
CREATE INDEX idx_orders_user_id ON orders(user_id);
-- 3. 验证优化效果
EXPLAIN SELECT u.name, u.email, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01' AND u.created_at < '2025-01-01'
GROUP BY u.id, u.name, u.email
ORDER BY order_count DESC;
# 案例2:复杂连接优化
# 问题查询
-- 多表连接查询
SELECT
u.name,
o.order_date,
p.product_name,
oi.quantity,
oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE u.city = 'Beijing'
AND o.order_date >= '2024-01-01'
AND p.category = 'Electronics';
-- 分析执行计划
EXPLAIN SELECT
u.name,
o.order_date,
p.product_name,
oi.quantity,
oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE u.city = 'Beijing'
AND o.order_date >= '2024-01-01'
AND p.category = 'Electronics';
# 优化策略
-- 1. 为连接列创建索引
CREATE INDEX idx_users_city ON users(city);
CREATE INDEX idx_orders_user_date ON orders(user_id, order_date);
CREATE INDEX idx_order_items_order ON order_items(order_id);
CREATE INDEX idx_order_items_product ON order_items(product_id);
CREATE INDEX idx_products_category ON products(category);
-- 2. 考虑创建覆盖索引
CREATE INDEX idx_users_city_name ON users(city, name);
CREATE INDEX idx_products_category_name ON products(category, product_name);
-- 3. 重新分析执行计划
EXPLAIN SELECT
u.name,
o.order_date,
p.product_name,
oi.quantity,
oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE u.city = 'Beijing'
AND o.order_date >= '2024-01-01'
AND p.category = 'Electronics';
# 案例3:子查询优化
# 问题查询
-- 相关子查询(性能差)
SELECT u.name, u.email
FROM users u
WHERE u.id IN (
SELECT o.user_id
FROM orders o
WHERE o.total_amount > 1000
AND o.order_date >= '2024-01-01'
);
-- 分析执行计划
EXPLAIN SELECT u.name, u.email
FROM users u
WHERE u.id IN (
SELECT o.user_id
FROM orders o
WHERE o.total_amount > 1000
AND o.order_date >= '2024-01-01'
);
# 优化方案
-- 1. 转换为JOIN
SELECT DISTINCT u.name, u.email
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 1000
AND o.order_date >= '2024-01-01';
-- 2. 使用EXISTS(某些情况下更优)
SELECT u.name, u.email
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id
AND o.total_amount > 1000
AND o.order_date >= '2024-01-01'
);
-- 3. 比较执行计划
EXPLAIN SELECT DISTINCT u.name, u.email
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 1000
AND o.order_date >= '2024-01-01';
EXPLAIN SELECT u.name, u.email
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id
AND o.total_amount > 1000
AND o.order_date >= '2024-01-01'
);
# 执行计划监控和自动化
# 1. 性能监控脚本
-- MySQL:监控慢查询
SELECT
sql_text,
exec_count,
avg_timer_wait/1000000000 as avg_time_sec,
sum_timer_wait/1000000000 as total_time_sec
FROM performance_schema.events_statements_summary_by_digest
WHERE avg_timer_wait > 1000000000 -- 超过1秒的查询
ORDER BY avg_timer_wait DESC
LIMIT 10;
-- PostgreSQL:监控慢查询
SELECT
query,
calls,
total_time,
mean_time,
rows
FROM pg_stat_statements
WHERE mean_time > 1000 -- 平均执行时间超过1秒
ORDER BY mean_time DESC
LIMIT 10;
# 2. 执行计划缓存分析
-- SQL Server:分析计划缓存
SELECT
cp.usecounts,
cp.size_in_bytes,
cp.cacheobjtype,
st.text
FROM sys.dm_exec_cached_plans cp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) st
WHERE cp.usecounts = 1 -- 只使用一次的计划(可能需要参数化)
ORDER BY cp.size_in_bytes DESC;
-- Oracle:分析共享池
SELECT
sql_text,
executions,
elapsed_time/executions/1000000 as avg_elapsed_sec,
buffer_gets/executions as avg_buffer_gets
FROM v$sql
WHERE executions > 0
AND elapsed_time/executions/1000000 > 1 -- 平均执行时间超过1秒
ORDER BY elapsed_time/executions DESC;
# 最佳实践
# 1. 执行计划分析流程
- 收集基线:记录正常情况下的执行计划
- 定期检查:定期分析慢查询的执行计划
- 对比分析:比较优化前后的执行计划差异
- 验证效果:在测试环境验证优化效果
- 监控变化:持续监控执行计划的变化
# 2. 常见优化策略
- 索引优化:根据执行计划创建合适的索引
- 查询重写:改写SQL语句以获得更好的执行计划
- 统计信息:保持统计信息的及时更新
- 提示使用:在必要时使用优化器提示
- 分区策略:对大表使用分区提高查询性能
# 3. 注意事项
- 环境差异:测试环境和生产环境的执行计划可能不同
- 数据变化:数据量变化会影响执行计划的选择
- 版本升级:数据库版本升级可能改变优化器行为
- 参数调整:数据库参数调整会影响执行计划
- 并发影响:高并发情况下的执行计划可能与单独执行不同
通过系统性的执行计划分析,可以深入理解查询的执行过程,识别性能瓶颈,制定有效的优化策略,从而显著提升数据库查询性能。