子查询

# 子查询

# 概述

子查询(Subquery)是嵌套在其他SQL语句中的查询语句。子查询可以出现在SELECT、FROM、WHERE、HAVING等子句中,为主查询提供数据或条件。

# 子查询分类

# 按返回结果分类

# 1. 标量子查询(返回单个值)

-- 查询工资高于平均工资的员工
SELECT name, salary 
FROM employees 
WHERE salary > (SELECT AVG(salary) FROM employees);

# 2. 行子查询(返回单行多列)

-- 查询与某个员工同部门同职位的其他员工
SELECT * FROM employees 
WHERE (department_id, job_id) = (
    SELECT department_id, job_id 
    FROM employees 
    WHERE employee_id = 100
);

# 3. 列子查询(返回多行单列)

-- 查询在销售部门工作的员工
SELECT * FROM employees 
WHERE department_id IN (
    SELECT department_id 
    FROM departments 
    WHERE department_name LIKE '%Sales%'
);

# 4. 表子查询(返回多行多列)

-- 查询每个部门工资最高的员工
SELECT * FROM employees e1
WHERE (department_id, salary) IN (
    SELECT department_id, MAX(salary)
    FROM employees
    GROUP BY department_id
);

# 按执行方式分类

# 1. 相关子查询

-- 查询工资高于本部门平均工资的员工
SELECT e1.name, e1.salary, e1.department_id
FROM employees e1
WHERE e1.salary > (
    SELECT AVG(e2.salary)
    FROM employees e2
    WHERE e2.department_id = e1.department_id
);

# 2. 非相关子查询

-- 查询工资高于公司平均工资的员工
SELECT name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);

# 子查询位置

# 1. SELECT子句中的子查询

-- 查询员工信息及其部门名称
SELECT 
    employee_id,
    name,
    (
        SELECT department_name 
        FROM departments d 
        WHERE d.department_id = e.department_id
    ) AS department_name
FROM employees e;

# 2. FROM子句中的子查询(派生表)

-- 查询各部门平均工资信息
SELECT 
    dept_avg.department_id,
    dept_avg.avg_salary,
    d.department_name
FROM (
    SELECT department_id, AVG(salary) as avg_salary
    FROM employees
    GROUP BY department_id
) dept_avg
JOIN departments d ON dept_avg.department_id = d.department_id;

# 3. WHERE子句中的子查询

-- 使用EXISTS
SELECT * FROM departments d
WHERE EXISTS (
    SELECT 1 FROM employees e
    WHERE e.department_id = d.department_id
);

-- 使用NOT EXISTS
SELECT * FROM departments d
WHERE NOT EXISTS (
    SELECT 1 FROM employees e
    WHERE e.department_id = d.department_id
);

# 4. HAVING子句中的子查询

-- 查询平均工资高于公司总体平均工资的部门
SELECT department_id, AVG(salary) as avg_salary
FROM employees
GROUP BY department_id
HAVING AVG(salary) > (SELECT AVG(salary) FROM employees);

# 常用子查询操作符

# 1. IN / NOT IN

-- 查询在指定部门工作的员工
SELECT * FROM employees
WHERE department_id IN (
    SELECT department_id FROM departments
    WHERE location_id = 1700
);

-- 查询不在指定部门工作的员工
SELECT * FROM employees
WHERE department_id NOT IN (
    SELECT department_id FROM departments
    WHERE location_id = 1700
    AND department_id IS NOT NULL  -- 注意NULL值处理
);

# 2. ANY / SOME

-- 查询工资比任意一个销售员工高的员工
SELECT * FROM employees
WHERE salary > ANY (
    SELECT salary FROM employees
    WHERE job_id = 'SA_REP'
);

-- 等价于
SELECT * FROM employees
WHERE salary > (
    SELECT MIN(salary) FROM employees
    WHERE job_id = 'SA_REP'
);

# 3. ALL

-- 查询工资比所有销售员工都高的员工
SELECT * FROM employees
WHERE salary > ALL (
    SELECT salary FROM employees
    WHERE job_id = 'SA_REP'
);

-- 等价于
SELECT * FROM employees
WHERE salary > (
    SELECT MAX(salary) FROM employees
    WHERE job_id = 'SA_REP'
);

# 4. EXISTS / NOT EXISTS

-- 查询有员工的部门
SELECT * FROM departments d
WHERE EXISTS (
    SELECT 1 FROM employees e
    WHERE e.department_id = d.department_id
);

-- 查询没有员工的部门
SELECT * FROM departments d
WHERE NOT EXISTS (
    SELECT 1 FROM employees e
    WHERE e.department_id = d.department_id
);

# 实战案例

# 案例1:电商订单分析

-- 创建测试数据
CREATE TABLE orders (
    order_id INT PRIMARY KEY,
    customer_id INT,
    order_date DATE,
    total_amount DECIMAL(10,2)
);

CREATE TABLE order_items (
    item_id INT PRIMARY KEY,
    order_id INT,
    product_id INT,
    quantity INT,
    price DECIMAL(10,2)
);

-- 查询购买金额超过平均订单金额的客户
SELECT DISTINCT customer_id
FROM orders
WHERE total_amount > (
    SELECT AVG(total_amount) FROM orders
);

-- 查询购买了超过3种不同商品的订单
SELECT order_id, total_amount
FROM orders o
WHERE (
    SELECT COUNT(DISTINCT product_id)
    FROM order_items oi
    WHERE oi.order_id = o.order_id
) > 3;

-- 查询每个客户的最大订单金额
SELECT 
    customer_id,
    (
        SELECT MAX(total_amount)
        FROM orders o2
        WHERE o2.customer_id = o1.customer_id
    ) as max_order_amount
FROM orders o1
GROUP BY customer_id;

# 案例2:学生成绩分析

-- 查询每门课程成绩高于该课程平均分的学生
SELECT student_id, course_id, score
FROM scores s1
WHERE s1.score > (
    SELECT AVG(s2.score)
    FROM scores s2
    WHERE s2.course_id = s1.course_id
);

-- 查询至少有一门课程不及格的学生
SELECT DISTINCT student_id
FROM students s
WHERE EXISTS (
    SELECT 1 FROM scores sc
    WHERE sc.student_id = s.student_id
    AND sc.score < 60
);

-- 查询所有课程都及格的学生
SELECT student_id
FROM students s
WHERE NOT EXISTS (
    SELECT 1 FROM scores sc
    WHERE sc.student_id = s.student_id
    AND sc.score < 60
);

# 性能优化

# 1. 子查询 vs JOIN

-- 子查询方式(可能较慢)
SELECT * FROM employees
WHERE department_id IN (
    SELECT department_id FROM departments
    WHERE location_id = 1700
);

-- JOIN方式(通常更快)
SELECT DISTINCT e.*
FROM employees e
JOIN departments d ON e.department_id = d.department_id
WHERE d.location_id = 1700;

# 2. EXISTS vs IN

-- 当子查询结果集较大时,EXISTS通常更快
SELECT * FROM customers c
WHERE EXISTS (
    SELECT 1 FROM orders o
    WHERE o.customer_id = c.customer_id
);

-- IN适用于子查询结果集较小的情况
SELECT * FROM employees
WHERE department_id IN (10, 20, 30);

# 3. 避免相关子查询

-- 相关子查询(较慢)
SELECT e1.*
FROM employees e1
WHERE e1.salary > (
    SELECT AVG(e2.salary)
    FROM employees e2
    WHERE e2.department_id = e1.department_id
);

-- 使用窗口函数优化
SELECT *
FROM (
    SELECT *,
           AVG(salary) OVER (PARTITION BY department_id) as dept_avg_salary
    FROM employees
) t
WHERE salary > dept_avg_salary;

# 常见错误

# 1. 子查询返回多行

-- 错误:子查询返回多行
SELECT * FROM employees
WHERE salary = (
    SELECT salary FROM employees
    WHERE department_id = 10
);

-- 正确:使用IN或ANY
SELECT * FROM employees
WHERE salary IN (
    SELECT salary FROM employees
    WHERE department_id = 10
);

# 2. NULL值处理

-- 可能出现意外结果
SELECT * FROM employees
WHERE department_id NOT IN (
    SELECT department_id FROM departments
    WHERE location_id = 1700
);

-- 正确处理NULL值
SELECT * FROM employees
WHERE department_id NOT IN (
    SELECT department_id FROM departments
    WHERE location_id = 1700
    AND department_id IS NOT NULL
);

# 3. 相关子查询性能问题

-- 避免在大表上使用复杂的相关子查询
-- 考虑使用JOIN或窗口函数替代

# 最佳实践

# 1. 选择合适的子查询类型

  • 简单条件过滤:使用WHERE子句子查询
  • 复杂数据处理:考虑FROM子句子查询
  • 存在性检查:使用EXISTS

# 2. 性能考虑

  • 优先考虑JOIN而非子查询
  • 使用EXISTS代替IN(当子查询结果集较大时)
  • 避免在SELECT列表中使用相关子查询

# 3. 可读性

  • 复杂子查询考虑拆分为多个步骤
  • 使用有意义的别名
  • 适当添加注释

# 4. 调试技巧

  • 单独执行子查询验证结果
  • 使用EXPLAIN分析执行计划
  • 逐步构建复杂查询

# 总结

子查询是SQL中强大的功能,能够实现复杂的数据查询和分析。正确使用子查询可以简化查询逻辑,但也要注意性能影响。在实际应用中,需要根据具体场景选择最适合的查询方式。