SQL执行计划分析

# SQL执行计划分析

执行计划是数据库优化器选择的查询执行策略,分析执行计划是SQL性能优化的核心技能

# 概述

执行计划(Execution Plan)是数据库管理系统为执行SQL查询而制定的详细步骤。通过分析执行计划,我们可以了解查询的执行过程,识别性能瓶颈,并制定相应的优化策略。

# 执行计划基础

# 1. 什么是执行计划

执行计划包含以下信息:

  • 操作类型:扫描、连接、排序等
  • 数据访问方式:全表扫描、索引扫描等
  • 资源消耗:CPU、I/O、内存使用
  • 执行顺序:操作的先后顺序
  • 数据流向:数据在各操作间的传递

# 2. 执行计划的生成过程

1. SQL解析 → 2. 语法检查 → 3. 语义分析 → 4. 查询优化 → 5. 执行计划生成

# 不同数据库的执行计划

# MySQL执行计划

# 1. EXPLAIN基本用法

-- 基本EXPLAIN
EXPLAIN SELECT * FROM users WHERE city = 'Beijing';

-- JSON格式(更详细)
EXPLAIN FORMAT=JSON SELECT * FROM users WHERE city = 'Beijing';

-- 传统格式
EXPLAIN FORMAT=TRADITIONAL SELECT * FROM users WHERE city = 'Beijing';

-- 树形格式(MySQL 8.0.16+)
EXPLAIN FORMAT=TREE SELECT * FROM users WHERE city = 'Beijing';

# 2. EXPLAIN输出字段解析

-- 示例查询
EXPLAIN SELECT u.name, o.order_date, o.total_amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.city = 'Beijing' AND o.order_date >= '2024-01-01'
ORDER BY o.total_amount DESC
LIMIT 10;

字段说明:

字段 说明 重要性
id 查询序列号
select_type 查询类型
table 表名
partitions 分区信息
type 连接类型
possible_keys 可能使用的索引
key 实际使用的索引
key_len 索引长度
ref 索引引用
rows 扫描行数
filtered 过滤百分比
Extra 额外信息

# 3. 关键字段详解

select_type类型:

-- SIMPLE:简单查询
EXPLAIN SELECT * FROM users WHERE id = 1;

-- PRIMARY:主查询
-- SUBQUERY:子查询
EXPLAIN SELECT * FROM users WHERE id IN (
    SELECT user_id FROM orders WHERE total_amount > 1000
);

-- DERIVED:派生表
EXPLAIN SELECT * FROM (
    SELECT user_id, COUNT(*) as order_count 
    FROM orders GROUP BY user_id
) t WHERE order_count > 5;

-- UNION:联合查询
EXPLAIN SELECT name FROM users WHERE city = 'Beijing'
UNION
SELECT name FROM users WHERE city = 'Shanghai';

type连接类型(性能从好到差):

-- system:系统表,只有一行
-- const:常量查询,主键或唯一索引
EXPLAIN SELECT * FROM users WHERE id = 1;

-- eq_ref:唯一索引扫描
EXPLAIN SELECT u.*, o.* FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- ref:非唯一索引扫描
EXPLAIN SELECT * FROM users WHERE city = 'Beijing';

-- range:范围扫描
EXPLAIN SELECT * FROM orders WHERE order_date BETWEEN '2024-01-01' AND '2024-12-31';

-- index:索引全扫描
EXPLAIN SELECT id FROM users ORDER BY id;

-- ALL:全表扫描(最差)
EXPLAIN SELECT * FROM users WHERE UPPER(name) = 'JOHN';

Extra信息解读:

-- Using index:覆盖索引
CREATE INDEX idx_users_city_name ON users(city, name);
EXPLAIN SELECT name FROM users WHERE city = 'Beijing';

-- Using where:WHERE条件过滤
EXPLAIN SELECT * FROM users WHERE age > 25;

-- Using filesort:文件排序(需要优化)
EXPLAIN SELECT * FROM users ORDER BY age;

-- Using temporary:使用临时表(需要优化)
EXPLAIN SELECT city, COUNT(*) FROM users GROUP BY city;

-- Using index condition:索引条件下推
EXPLAIN SELECT * FROM users WHERE city = 'Beijing' AND age > 25;

# 4. EXPLAIN ANALYZE(MySQL 8.0.18+)

-- 实际执行并分析
EXPLAIN ANALYZE SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.city = 'Beijing'
GROUP BY u.id, u.name
ORDER BY order_count DESC
LIMIT 10;

-- 输出包含实际执行时间和行数

# PostgreSQL执行计划

# 1. EXPLAIN基本用法

-- 基本EXPLAIN
EXPLAIN SELECT * FROM users WHERE city = 'Beijing';

-- 详细分析(包含实际执行)
EXPLAIN (ANALYZE, BUFFERS, VERBOSE) 
SELECT * FROM users WHERE city = 'Beijing';

-- JSON格式
EXPLAIN (FORMAT JSON, ANALYZE) 
SELECT * FROM users WHERE city = 'Beijing';

# 2. 执行计划节点类型

-- Seq Scan:顺序扫描(全表扫描)
EXPLAIN SELECT * FROM users WHERE age > 25;

-- Index Scan:索引扫描
CREATE INDEX idx_users_email ON users(email);
EXPLAIN SELECT * FROM users WHERE email = 'john@example.com';

-- Index Only Scan:仅索引扫描(覆盖索引)
CREATE INDEX idx_users_city_name ON users(city, name);
EXPLAIN SELECT name FROM users WHERE city = 'Beijing';

-- Bitmap Index Scan:位图索引扫描
EXPLAIN SELECT * FROM users WHERE city = 'Beijing' OR city = 'Shanghai';

-- Nested Loop:嵌套循环连接
EXPLAIN SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.id = 123;

-- Hash Join:哈希连接
EXPLAIN SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- Merge Join:归并连接
EXPLAIN SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
ORDER BY u.id;

# 3. 成本分析

-- 理解成本信息
EXPLAIN SELECT * FROM users WHERE city = 'Beijing';

-- 输出示例:
-- Seq Scan on users  (cost=0.00..25.00 rows=5 width=64)
--   Filter: (city = 'Beijing'::text)

-- cost=启动成本..总成本
-- rows=预估返回行数
-- width=平均行宽度(字节)

# SQL Server执行计划

# 1. 查看执行计划

-- 显示估计执行计划
SET SHOWPLAN_ALL ON;
SELECT * FROM users WHERE city = 'Beijing';
SET SHOWPLAN_ALL OFF;

-- 显示实际执行计划
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT * FROM users WHERE city = 'Beijing';
SET STATISTICS IO OFF;
SET STATISTICS TIME OFF;

-- 图形化执行计划(SSMS中)
-- Ctrl + M 开启图形化执行计划
SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.city = 'Beijing';

# 2. 动态管理视图

-- 查看缓存的执行计划
SELECT 
    cp.plan_handle,
    cp.usecounts,
    cp.size_in_bytes,
    st.text
FROM sys.dm_exec_cached_plans cp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) st
WHERE st.text LIKE '%users%';

-- 查看当前执行的查询
SELECT 
    session_id,
    request_id,
    start_time,
    status,
    command,
    sql_handle,
    statement_start_offset,
    statement_end_offset
FROM sys.dm_exec_requests
WHERE session_id > 50;

# Oracle执行计划

# 1. 查看执行计划

-- 生成执行计划
EXPLAIN PLAN FOR
SELECT * FROM users WHERE city = 'Beijing';

-- 查看执行计划
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

-- 查看实际执行计划
SELECT /*+ GATHER_PLAN_STATISTICS */ * FROM users WHERE city = 'Beijing';
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(NULL, NULL, 'ALLSTATS LAST'));

-- 自动跟踪
SET AUTOTRACE ON;
SELECT * FROM users WHERE city = 'Beijing';
SET AUTOTRACE OFF;

# 2. 执行计划操作类型

-- TABLE ACCESS FULL:全表扫描
EXPLAIN PLAN FOR SELECT * FROM users WHERE age > 25;

-- INDEX RANGE SCAN:索引范围扫描
CREATE INDEX idx_users_city ON users(city);
EXPLAIN PLAN FOR SELECT * FROM users WHERE city = 'Beijing';

-- NESTED LOOPS:嵌套循环
EXPLAIN PLAN FOR 
SELECT u.name, o.order_date
FROM users u, orders o
WHERE u.id = o.user_id AND u.id = 123;

-- HASH JOIN:哈希连接
EXPLAIN PLAN FOR 
SELECT u.name, o.order_date
FROM users u, orders o
WHERE u.id = o.user_id;

# 执行计划分析技巧

# 1. 识别性能瓶颈

# 高成本操作

-- MySQL:关注rows列
EXPLAIN SELECT * FROM orders o
INNER JOIN order_items oi ON o.id = oi.order_id
WHERE o.order_date >= '2024-01-01';

-- 如果rows很大,考虑:
-- 1. 添加索引
CREATE INDEX idx_orders_date ON orders(order_date);
-- 2. 优化WHERE条件
-- 3. 使用LIMIT限制结果

# 全表扫描

-- 发现全表扫描
EXPLAIN SELECT * FROM users WHERE UPPER(name) = 'JOHN';
-- type = ALL,需要优化

-- 优化方案:
-- 1. 避免在WHERE中使用函数
EXPLAIN SELECT * FROM users WHERE name = 'John';
-- 2. 创建函数索引(支持的数据库)
CREATE INDEX idx_users_upper_name ON users(UPPER(name));

# 文件排序和临时表

-- 发现Using filesort
EXPLAIN SELECT * FROM users ORDER BY age;

-- 优化:创建索引
CREATE INDEX idx_users_age ON users(age);
EXPLAIN SELECT * FROM users ORDER BY age;

-- 发现Using temporary
EXPLAIN SELECT city, COUNT(*) FROM users GROUP BY city;

-- 优化:创建索引
CREATE INDEX idx_users_city ON users(city);
EXPLAIN SELECT city, COUNT(*) FROM users GROUP BY city;

# 2. 连接优化分析

# 连接类型选择

-- 分析不同连接算法的选择
EXPLAIN SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

-- Nested Loop:适合小表连接大表
-- Hash Join:适合大表连接
-- Merge Join:适合已排序的数据

# 连接顺序优化

-- 多表连接的顺序很重要
EXPLAIN SELECT u.name, o.order_date, p.product_name
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE u.city = 'Beijing';

-- 优化器会选择最优的连接顺序
-- 但可以通过提示强制指定(某些数据库)

# 3. 索引使用分析

# 索引选择性

-- 检查索引是否被使用
EXPLAIN SELECT * FROM users WHERE city = 'Beijing' AND age > 25;

-- 如果没有使用复合索引,可能需要调整索引列顺序
CREATE INDEX idx_users_city_age ON users(city, age);
-- 或
CREATE INDEX idx_users_age_city ON users(age, city);

-- 重新分析
EXPLAIN SELECT * FROM users WHERE city = 'Beijing' AND age > 25;

# 覆盖索引效果

-- 检查是否使用覆盖索引
CREATE INDEX idx_users_city_name_email ON users(city, name, email);

EXPLAIN SELECT name, email FROM users WHERE city = 'Beijing';
-- 应该显示Using index

# 实战案例分析

# 案例1:慢查询优化

# 问题查询

-- 原始查询(性能差)
SELECT u.name, u.email, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE YEAR(u.created_at) = 2024
GROUP BY u.id, u.name, u.email
ORDER BY order_count DESC;

-- 查看执行计划
EXPLAIN SELECT u.name, u.email, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE YEAR(u.created_at) = 2024
GROUP BY u.id, u.name, u.email
ORDER BY order_count DESC;

# 执行计划分析

问题发现:
1. WHERE YEAR(u.created_at) = 2024 导致全表扫描
2. ORDER BY order_count 导致Using filesort
3. 没有合适的索引支持连接操作

# 优化方案

-- 1. 修改WHERE条件,避免函数
SELECT u.name, u.email, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01' AND u.created_at < '2025-01-01'
GROUP BY u.id, u.name, u.email
ORDER BY order_count DESC;

-- 2. 创建支持索引
CREATE INDEX idx_users_created_at ON users(created_at);
CREATE INDEX idx_orders_user_id ON orders(user_id);

-- 3. 验证优化效果
EXPLAIN SELECT u.name, u.email, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01' AND u.created_at < '2025-01-01'
GROUP BY u.id, u.name, u.email
ORDER BY order_count DESC;

# 案例2:复杂连接优化

# 问题查询

-- 多表连接查询
SELECT 
    u.name,
    o.order_date,
    p.product_name,
    oi.quantity,
    oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE u.city = 'Beijing'
  AND o.order_date >= '2024-01-01'
  AND p.category = 'Electronics';

-- 分析执行计划
EXPLAIN SELECT 
    u.name,
    o.order_date,
    p.product_name,
    oi.quantity,
    oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE u.city = 'Beijing'
  AND o.order_date >= '2024-01-01'
  AND p.category = 'Electronics';

# 优化策略

-- 1. 为连接列创建索引
CREATE INDEX idx_users_city ON users(city);
CREATE INDEX idx_orders_user_date ON orders(user_id, order_date);
CREATE INDEX idx_order_items_order ON order_items(order_id);
CREATE INDEX idx_order_items_product ON order_items(product_id);
CREATE INDEX idx_products_category ON products(category);

-- 2. 考虑创建覆盖索引
CREATE INDEX idx_users_city_name ON users(city, name);
CREATE INDEX idx_products_category_name ON products(category, product_name);

-- 3. 重新分析执行计划
EXPLAIN SELECT 
    u.name,
    o.order_date,
    p.product_name,
    oi.quantity,
    oi.price
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_items oi ON o.id = oi.order_id
INNER JOIN products p ON oi.product_id = p.id
WHERE u.city = 'Beijing'
  AND o.order_date >= '2024-01-01'
  AND p.category = 'Electronics';

# 案例3:子查询优化

# 问题查询

-- 相关子查询(性能差)
SELECT u.name, u.email
FROM users u
WHERE u.id IN (
    SELECT o.user_id 
    FROM orders o 
    WHERE o.total_amount > 1000
      AND o.order_date >= '2024-01-01'
);

-- 分析执行计划
EXPLAIN SELECT u.name, u.email
FROM users u
WHERE u.id IN (
    SELECT o.user_id 
    FROM orders o 
    WHERE o.total_amount > 1000
      AND o.order_date >= '2024-01-01'
);

# 优化方案

-- 1. 转换为JOIN
SELECT DISTINCT u.name, u.email
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 1000
  AND o.order_date >= '2024-01-01';

-- 2. 使用EXISTS(某些情况下更优)
SELECT u.name, u.email
FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o 
    WHERE o.user_id = u.id
      AND o.total_amount > 1000
      AND o.order_date >= '2024-01-01'
);

-- 3. 比较执行计划
EXPLAIN SELECT DISTINCT u.name, u.email
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 1000
  AND o.order_date >= '2024-01-01';

EXPLAIN SELECT u.name, u.email
FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o 
    WHERE o.user_id = u.id
      AND o.total_amount > 1000
      AND o.order_date >= '2024-01-01'
);

# 执行计划监控和自动化

# 1. 性能监控脚本

-- MySQL:监控慢查询
SELECT 
    sql_text,
    exec_count,
    avg_timer_wait/1000000000 as avg_time_sec,
    sum_timer_wait/1000000000 as total_time_sec
FROM performance_schema.events_statements_summary_by_digest
WHERE avg_timer_wait > 1000000000  -- 超过1秒的查询
ORDER BY avg_timer_wait DESC
LIMIT 10;

-- PostgreSQL:监控慢查询
SELECT 
    query,
    calls,
    total_time,
    mean_time,
    rows
FROM pg_stat_statements
WHERE mean_time > 1000  -- 平均执行时间超过1秒
ORDER BY mean_time DESC
LIMIT 10;

# 2. 执行计划缓存分析

-- SQL Server:分析计划缓存
SELECT 
    cp.usecounts,
    cp.size_in_bytes,
    cp.cacheobjtype,
    st.text
FROM sys.dm_exec_cached_plans cp
CROSS APPLY sys.dm_exec_sql_text(cp.plan_handle) st
WHERE cp.usecounts = 1  -- 只使用一次的计划(可能需要参数化)
ORDER BY cp.size_in_bytes DESC;

-- Oracle:分析共享池
SELECT 
    sql_text,
    executions,
    elapsed_time/executions/1000000 as avg_elapsed_sec,
    buffer_gets/executions as avg_buffer_gets
FROM v$sql
WHERE executions > 0
  AND elapsed_time/executions/1000000 > 1  -- 平均执行时间超过1秒
ORDER BY elapsed_time/executions DESC;

# 最佳实践

# 1. 执行计划分析流程

  1. 收集基线:记录正常情况下的执行计划
  2. 定期检查:定期分析慢查询的执行计划
  3. 对比分析:比较优化前后的执行计划差异
  4. 验证效果:在测试环境验证优化效果
  5. 监控变化:持续监控执行计划的变化

# 2. 常见优化策略

  • 索引优化:根据执行计划创建合适的索引
  • 查询重写:改写SQL语句以获得更好的执行计划
  • 统计信息:保持统计信息的及时更新
  • 提示使用:在必要时使用优化器提示
  • 分区策略:对大表使用分区提高查询性能

# 3. 注意事项

  • 环境差异:测试环境和生产环境的执行计划可能不同
  • 数据变化:数据量变化会影响执行计划的选择
  • 版本升级:数据库版本升级可能改变优化器行为
  • 参数调整:数据库参数调整会影响执行计划
  • 并发影响:高并发情况下的执行计划可能与单独执行不同

通过系统性的执行计划分析,可以深入理解查询的执行过程,识别性能瓶颈,制定有效的优化策略,从而显著提升数据库查询性能。