子查询
# 子查询
# 概述
子查询(Subquery)是嵌套在其他SQL语句中的查询语句。子查询可以出现在SELECT、FROM、WHERE、HAVING等子句中,为主查询提供数据或条件。
# 子查询分类
# 按返回结果分类
# 1. 标量子查询(返回单个值)
-- 查询工资高于平均工资的员工
SELECT name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
# 2. 行子查询(返回单行多列)
-- 查询与某个员工同部门同职位的其他员工
SELECT * FROM employees
WHERE (department_id, job_id) = (
SELECT department_id, job_id
FROM employees
WHERE employee_id = 100
);
# 3. 列子查询(返回多行单列)
-- 查询在销售部门工作的员工
SELECT * FROM employees
WHERE department_id IN (
SELECT department_id
FROM departments
WHERE department_name LIKE '%Sales%'
);
# 4. 表子查询(返回多行多列)
-- 查询每个部门工资最高的员工
SELECT * FROM employees e1
WHERE (department_id, salary) IN (
SELECT department_id, MAX(salary)
FROM employees
GROUP BY department_id
);
# 按执行方式分类
# 1. 相关子查询
-- 查询工资高于本部门平均工资的员工
SELECT e1.name, e1.salary, e1.department_id
FROM employees e1
WHERE e1.salary > (
SELECT AVG(e2.salary)
FROM employees e2
WHERE e2.department_id = e1.department_id
);
# 2. 非相关子查询
-- 查询工资高于公司平均工资的员工
SELECT name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
# 子查询位置
# 1. SELECT子句中的子查询
-- 查询员工信息及其部门名称
SELECT
employee_id,
name,
(
SELECT department_name
FROM departments d
WHERE d.department_id = e.department_id
) AS department_name
FROM employees e;
# 2. FROM子句中的子查询(派生表)
-- 查询各部门平均工资信息
SELECT
dept_avg.department_id,
dept_avg.avg_salary,
d.department_name
FROM (
SELECT department_id, AVG(salary) as avg_salary
FROM employees
GROUP BY department_id
) dept_avg
JOIN departments d ON dept_avg.department_id = d.department_id;
# 3. WHERE子句中的子查询
-- 使用EXISTS
SELECT * FROM departments d
WHERE EXISTS (
SELECT 1 FROM employees e
WHERE e.department_id = d.department_id
);
-- 使用NOT EXISTS
SELECT * FROM departments d
WHERE NOT EXISTS (
SELECT 1 FROM employees e
WHERE e.department_id = d.department_id
);
# 4. HAVING子句中的子查询
-- 查询平均工资高于公司总体平均工资的部门
SELECT department_id, AVG(salary) as avg_salary
FROM employees
GROUP BY department_id
HAVING AVG(salary) > (SELECT AVG(salary) FROM employees);
# 常用子查询操作符
# 1. IN / NOT IN
-- 查询在指定部门工作的员工
SELECT * FROM employees
WHERE department_id IN (
SELECT department_id FROM departments
WHERE location_id = 1700
);
-- 查询不在指定部门工作的员工
SELECT * FROM employees
WHERE department_id NOT IN (
SELECT department_id FROM departments
WHERE location_id = 1700
AND department_id IS NOT NULL -- 注意NULL值处理
);
# 2. ANY / SOME
-- 查询工资比任意一个销售员工高的员工
SELECT * FROM employees
WHERE salary > ANY (
SELECT salary FROM employees
WHERE job_id = 'SA_REP'
);
-- 等价于
SELECT * FROM employees
WHERE salary > (
SELECT MIN(salary) FROM employees
WHERE job_id = 'SA_REP'
);
# 3. ALL
-- 查询工资比所有销售员工都高的员工
SELECT * FROM employees
WHERE salary > ALL (
SELECT salary FROM employees
WHERE job_id = 'SA_REP'
);
-- 等价于
SELECT * FROM employees
WHERE salary > (
SELECT MAX(salary) FROM employees
WHERE job_id = 'SA_REP'
);
# 4. EXISTS / NOT EXISTS
-- 查询有员工的部门
SELECT * FROM departments d
WHERE EXISTS (
SELECT 1 FROM employees e
WHERE e.department_id = d.department_id
);
-- 查询没有员工的部门
SELECT * FROM departments d
WHERE NOT EXISTS (
SELECT 1 FROM employees e
WHERE e.department_id = d.department_id
);
# 实战案例
# 案例1:电商订单分析
-- 创建测试数据
CREATE TABLE orders (
order_id INT PRIMARY KEY,
customer_id INT,
order_date DATE,
total_amount DECIMAL(10,2)
);
CREATE TABLE order_items (
item_id INT PRIMARY KEY,
order_id INT,
product_id INT,
quantity INT,
price DECIMAL(10,2)
);
-- 查询购买金额超过平均订单金额的客户
SELECT DISTINCT customer_id
FROM orders
WHERE total_amount > (
SELECT AVG(total_amount) FROM orders
);
-- 查询购买了超过3种不同商品的订单
SELECT order_id, total_amount
FROM orders o
WHERE (
SELECT COUNT(DISTINCT product_id)
FROM order_items oi
WHERE oi.order_id = o.order_id
) > 3;
-- 查询每个客户的最大订单金额
SELECT
customer_id,
(
SELECT MAX(total_amount)
FROM orders o2
WHERE o2.customer_id = o1.customer_id
) as max_order_amount
FROM orders o1
GROUP BY customer_id;
# 案例2:学生成绩分析
-- 查询每门课程成绩高于该课程平均分的学生
SELECT student_id, course_id, score
FROM scores s1
WHERE s1.score > (
SELECT AVG(s2.score)
FROM scores s2
WHERE s2.course_id = s1.course_id
);
-- 查询至少有一门课程不及格的学生
SELECT DISTINCT student_id
FROM students s
WHERE EXISTS (
SELECT 1 FROM scores sc
WHERE sc.student_id = s.student_id
AND sc.score < 60
);
-- 查询所有课程都及格的学生
SELECT student_id
FROM students s
WHERE NOT EXISTS (
SELECT 1 FROM scores sc
WHERE sc.student_id = s.student_id
AND sc.score < 60
);
# 性能优化
# 1. 子查询 vs JOIN
-- 子查询方式(可能较慢)
SELECT * FROM employees
WHERE department_id IN (
SELECT department_id FROM departments
WHERE location_id = 1700
);
-- JOIN方式(通常更快)
SELECT DISTINCT e.*
FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE d.location_id = 1700;
# 2. EXISTS vs IN
-- 当子查询结果集较大时,EXISTS通常更快
SELECT * FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
);
-- IN适用于子查询结果集较小的情况
SELECT * FROM employees
WHERE department_id IN (10, 20, 30);
# 3. 避免相关子查询
-- 相关子查询(较慢)
SELECT e1.*
FROM employees e1
WHERE e1.salary > (
SELECT AVG(e2.salary)
FROM employees e2
WHERE e2.department_id = e1.department_id
);
-- 使用窗口函数优化
SELECT *
FROM (
SELECT *,
AVG(salary) OVER (PARTITION BY department_id) as dept_avg_salary
FROM employees
) t
WHERE salary > dept_avg_salary;
# 常见错误
# 1. 子查询返回多行
-- 错误:子查询返回多行
SELECT * FROM employees
WHERE salary = (
SELECT salary FROM employees
WHERE department_id = 10
);
-- 正确:使用IN或ANY
SELECT * FROM employees
WHERE salary IN (
SELECT salary FROM employees
WHERE department_id = 10
);
# 2. NULL值处理
-- 可能出现意外结果
SELECT * FROM employees
WHERE department_id NOT IN (
SELECT department_id FROM departments
WHERE location_id = 1700
);
-- 正确处理NULL值
SELECT * FROM employees
WHERE department_id NOT IN (
SELECT department_id FROM departments
WHERE location_id = 1700
AND department_id IS NOT NULL
);
# 3. 相关子查询性能问题
-- 避免在大表上使用复杂的相关子查询
-- 考虑使用JOIN或窗口函数替代
# 最佳实践
# 1. 选择合适的子查询类型
- 简单条件过滤:使用WHERE子句子查询
- 复杂数据处理:考虑FROM子句子查询
- 存在性检查:使用EXISTS
# 2. 性能考虑
- 优先考虑JOIN而非子查询
- 使用EXISTS代替IN(当子查询结果集较大时)
- 避免在SELECT列表中使用相关子查询
# 3. 可读性
- 复杂子查询考虑拆分为多个步骤
- 使用有意义的别名
- 适当添加注释
# 4. 调试技巧
- 单独执行子查询验证结果
- 使用EXPLAIN分析执行计划
- 逐步构建复杂查询
# 总结
子查询是SQL中强大的功能,能够实现复杂的数据查询和分析。正确使用子查询可以简化查询逻辑,但也要注意性能影响。在实际应用中,需要根据具体场景选择最适合的查询方式。