JOIN表连接
哪吒 2024/1/1
# JOIN表连接
点击勘误issues (opens new window),哪吒感谢大家的阅读
# JOIN概述
JOIN是SQL中用于连接两个或多个表的操作,通过指定的连接条件将不同表中的相关数据组合在一起。JOIN是关系数据库查询的核心功能之一。
# 基本语法
SELECT 列名列表
FROM 表1
JOIN 表2 ON 连接条件
WHERE 过滤条件;
# 准备测试数据
为了更好地理解JOIN操作,我们先创建几个示例表:
-- 员工表
CREATE TABLE employees (
emp_id INT PRIMARY KEY,
name VARCHAR(100),
department_id INT,
salary DECIMAL(10,2),
hire_date DATE
);
-- 部门表
CREATE TABLE departments (
dept_id INT PRIMARY KEY,
dept_name VARCHAR(100),
location VARCHAR(100)
);
-- 项目表
CREATE TABLE projects (
project_id INT PRIMARY KEY,
project_name VARCHAR(100),
start_date DATE,
end_date DATE
);
-- 员工项目关联表
CREATE TABLE employee_projects (
emp_id INT,
project_id INT,
role VARCHAR(50),
PRIMARY KEY (emp_id, project_id)
);
-- 插入测试数据
INSERT INTO departments VALUES
(1, 'IT部', '北京'),
(2, 'HR部', '上海'),
(3, '财务部', '广州'),
(4, '市场部', '深圳');
INSERT INTO employees VALUES
(1, '张三', 1, 8000, '2020-01-15'),
(2, '李四', 1, 9000, '2019-03-20'),
(3, '王五', 2, 7000, '2021-06-10'),
(4, '赵六', 2, 7500, '2020-11-05'),
(5, '钱七', NULL, 6000, '2022-01-20'),
(6, '孙八', 3, 8500, '2018-09-15');
INSERT INTO projects VALUES
(1, '电商平台', '2023-01-01', '2023-12-31'),
(2, '移动应用', '2023-03-01', '2023-09-30'),
(3, '数据分析', '2023-06-01', '2024-02-29');
INSERT INTO employee_projects VALUES
(1, 1, '项目经理'),
(2, 1, '开发工程师'),
(1, 2, '技术顾问'),
(3, 3, '数据分析师');
# INNER JOIN(内连接)
INNER JOIN返回两个表中满足连接条件的记录。
# 基本语法
SELECT 列名列表
FROM 表1
INNER JOIN 表2 ON 表1.列名 = 表2.列名;
# 实例演示
-- 查询员工及其部门信息
SELECT e.name, e.salary, d.dept_name, d.location
FROM employees e
INNER JOIN departments d ON e.department_id = d.dept_id;
-- 结果:只返回有部门分配的员工
-- 张三, 8000, IT部, 北京
-- 李四, 9000, IT部, 北京
-- 王五, 7000, HR部, 上海
-- 赵六, 7500, HR部, 上海
-- 孙八, 8500, 财务部, 广州
# 多表INNER JOIN
-- 查询员工、部门和项目信息
SELECT e.name, d.dept_name, p.project_name, ep.role
FROM employees e
INNER JOIN departments d ON e.department_id = d.dept_id
INNER JOIN employee_projects ep ON e.emp_id = ep.emp_id
INNER JOIN projects p ON ep.project_id = p.project_id;
# 使用WHERE进一步过滤
-- 查询IT部门参与项目的员工
SELECT e.name, d.dept_name, p.project_name, ep.role
FROM employees e
INNER JOIN departments d ON e.department_id = d.dept_id
INNER JOIN employee_projects ep ON e.emp_id = ep.emp_id
INNER JOIN projects p ON ep.project_id = p.project_id
WHERE d.dept_name = 'IT部';
# LEFT JOIN(左外连接)
LEFT JOIN返回左表的所有记录,以及右表中满足连接条件的记录。如果右表中没有匹配的记录,则用NULL填充。
# 基本语法
SELECT 列名列表
FROM 表1
LEFT JOIN 表2 ON 表1.列名 = 表2.列名;
# 实例演示
-- 查询所有员工及其部门信息(包括没有部门分配的员工)
SELECT e.name, e.salary, d.dept_name, d.location
FROM employees e
LEFT JOIN departments d ON e.department_id = d.dept_id;
-- 结果:包含所有员工,没有部门的显示NULL
-- 张三, 8000, IT部, 北京
-- 李四, 9000, IT部, 北京
-- 王五, 7000, HR部, 上海
-- 赵六, 7500, HR部, 上海
-- 钱七, 6000, NULL, NULL
-- 孙八, 8500, 财务部, 广州
# 查找没有匹配的记录
-- 查询没有部门分配的员工
SELECT e.name, e.salary
FROM employees e
LEFT JOIN departments d ON e.department_id = d.dept_id
WHERE d.dept_id IS NULL;
-- 查询没有员工的部门
SELECT d.dept_name, d.location
FROM departments d
LEFT JOIN employees e ON d.dept_id = e.department_id
WHERE e.emp_id IS NULL;
# 统计查询
-- 查询每个部门的员工数量
SELECT d.dept_name, COUNT(e.emp_id) as employee_count
FROM departments d
LEFT JOIN employees e ON d.dept_id = e.department_id
GROUP BY d.dept_id, d.dept_name;
# RIGHT JOIN(右外连接)
RIGHT JOIN返回右表的所有记录,以及左表中满足连接条件的记录。
# 基本语法
SELECT 列名列表
FROM 表1
RIGHT JOIN 表2 ON 表1.列名 = 表2.列名;
# 实例演示
-- 查询所有部门及其员工信息(包括没有员工的部门)
SELECT e.name, e.salary, d.dept_name, d.location
FROM employees e
RIGHT JOIN departments d ON e.department_id = d.dept_id;
-- 等价于LEFT JOIN的写法
SELECT e.name, e.salary, d.dept_name, d.location
FROM departments d
LEFT JOIN employees e ON d.dept_id = e.department_id;
# FULL OUTER JOIN(全外连接)
FULL OUTER JOIN返回两个表中的所有记录,无论是否有匹配。MySQL不直接支持FULL OUTER JOIN,但可以通过UNION模拟。
# MySQL中的实现
-- 使用UNION模拟FULL OUTER JOIN
SELECT e.name, e.salary, d.dept_name, d.location
FROM employees e
LEFT JOIN departments d ON e.department_id = d.dept_id
UNION
SELECT e.name, e.salary, d.dept_name, d.location
FROM employees e
RIGHT JOIN departments d ON e.department_id = d.dept_id
WHERE e.emp_id IS NULL;
# 其他数据库的语法
-- PostgreSQL, SQL Server, Oracle
SELECT e.name, e.salary, d.dept_name, d.location
FROM employees e
FULL OUTER JOIN departments d ON e.department_id = d.dept_id;
# CROSS JOIN(交叉连接)
CROSS JOIN返回两个表的笛卡尔积,即第一个表的每一行与第二个表的每一行组合。
# 基本语法
SELECT 列名列表
FROM 表1
CROSS JOIN 表2;
-- 或者
SELECT 列名列表
FROM 表1, 表2;
# 实例演示
-- 生成员工和项目的所有组合
SELECT e.name, p.project_name
FROM employees e
CROSS JOIN projects p;
-- 注意:结果会有 6 * 3 = 18 行记录
# 实际应用场景
-- 生成日期序列和员工的组合(用于考勤统计)
SELECT e.name, dates.date_value
FROM employees e
CROSS JOIN (
SELECT DATE_ADD('2023-01-01', INTERVAL n DAY) as date_value
FROM (
SELECT 0 as n UNION SELECT 1 UNION SELECT 2 UNION SELECT 3
UNION SELECT 4 UNION SELECT 5 UNION SELECT 6
) numbers
WHERE DATE_ADD('2023-01-01', INTERVAL n DAY) <= '2023-01-07'
) dates;
# SELF JOIN(自连接)
自连接是表与自身的连接,通常用于处理层次结构数据。
# 准备层次结构数据
-- 添加管理者字段
ALTER TABLE employees ADD COLUMN manager_id INT;
-- 更新管理关系
UPDATE employees SET manager_id = 2 WHERE emp_id = 1;
UPDATE employees SET manager_id = 1 WHERE emp_id IN (3, 4);
UPDATE employees SET manager_id = 3 WHERE emp_id = 5;
# 自连接示例
-- 查询员工及其直接上级
SELECT
e.name as employee_name,
m.name as manager_name
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.emp_id;
-- 查询有下属的管理者
SELECT DISTINCT
m.name as manager_name,
COUNT(e.emp_id) as subordinate_count
FROM employees e
INNER JOIN employees m ON e.manager_id = m.emp_id
GROUP BY m.emp_id, m.name;
-- 查询同级同事(有相同管理者的员工)
SELECT
e1.name as employee1,
e2.name as employee2,
m.name as common_manager
FROM employees e1
INNER JOIN employees e2 ON e1.manager_id = e2.manager_id AND e1.emp_id < e2.emp_id
INNER JOIN employees m ON e1.manager_id = m.emp_id;
# 复杂JOIN查询
# 多表连接
-- 查询员工的完整信息:姓名、部门、项目、角色
SELECT
e.name as employee_name,
d.dept_name,
p.project_name,
ep.role,
e.salary
FROM employees e
LEFT JOIN departments d ON e.department_id = d.dept_id
LEFT JOIN employee_projects ep ON e.emp_id = ep.emp_id
LEFT JOIN projects p ON ep.project_id = p.project_id
ORDER BY e.name, p.project_name;
# 条件连接
-- 查询薪资高于部门平均薪资的员工
SELECT
e.name,
e.salary,
d.dept_name,
dept_avg.avg_salary
FROM employees e
INNER JOIN departments d ON e.department_id = d.dept_id
INNER JOIN (
SELECT
department_id,
AVG(salary) as avg_salary
FROM employees
WHERE department_id IS NOT NULL
GROUP BY department_id
) dept_avg ON e.department_id = dept_avg.department_id
WHERE e.salary > dept_avg.avg_salary;
# 使用子查询的JOIN
-- 查询参与项目最多的员工
SELECT
e.name,
d.dept_name,
project_count.count as project_count
FROM employees e
INNER JOIN departments d ON e.department_id = d.dept_id
INNER JOIN (
SELECT
emp_id,
COUNT(*) as count
FROM employee_projects
GROUP BY emp_id
HAVING COUNT(*) = (
SELECT MAX(project_count)
FROM (
SELECT COUNT(*) as project_count
FROM employee_projects
GROUP BY emp_id
) counts
)
) project_count ON e.emp_id = project_count.emp_id;
# JOIN性能优化
# 1. 索引优化
-- 在连接字段上创建索引
CREATE INDEX idx_emp_dept ON employees(department_id);
CREATE INDEX idx_emp_proj_emp ON employee_projects(emp_id);
CREATE INDEX idx_emp_proj_proj ON employee_projects(project_id);
-- 复合索引
CREATE INDEX idx_emp_dept_salary ON employees(department_id, salary);
# 2. 查询优化技巧
-- 优化前:大表在前
SELECT *
FROM large_table l
JOIN small_table s ON l.id = s.large_id;
-- 优化后:小表在前(某些数据库)
SELECT *
FROM small_table s
JOIN large_table l ON s.large_id = l.id;
-- 使用EXISTS替代IN子查询
-- 优化前
SELECT * FROM employees
WHERE department_id IN (
SELECT dept_id FROM departments WHERE location = '北京'
);
-- 优化后
SELECT * FROM employees e
WHERE EXISTS (
SELECT 1 FROM departments d
WHERE d.dept_id = e.department_id AND d.location = '北京'
);
# 3. 避免笛卡尔积
-- 错误:忘记连接条件
SELECT e.name, d.dept_name
FROM employees e, departments d; -- 产生笛卡尔积
-- 正确:添加连接条件
SELECT e.name, d.dept_name
FROM employees e, departments d
WHERE e.department_id = d.dept_id;
-- 更好:使用显式JOIN
SELECT e.name, d.dept_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.dept_id;
# 实战案例
# 案例1:员工薪资分析
-- 查询各部门薪资统计
SELECT
d.dept_name,
COUNT(e.emp_id) as employee_count,
AVG(e.salary) as avg_salary,
MIN(e.salary) as min_salary,
MAX(e.salary) as max_salary,
SUM(e.salary) as total_salary
FROM departments d
LEFT JOIN employees e ON d.dept_id = e.department_id
GROUP BY d.dept_id, d.dept_name
ORDER BY avg_salary DESC;
# 案例2:项目参与情况分析
-- 查询项目参与情况
SELECT
p.project_name,
COUNT(ep.emp_id) as participant_count,
GROUP_CONCAT(CONCAT(e.name, '(', ep.role, ')') SEPARATOR ', ') as participants,
DATEDIFF(p.end_date, p.start_date) as duration_days
FROM projects p
LEFT JOIN employee_projects ep ON p.project_id = ep.project_id
LEFT JOIN employees e ON ep.emp_id = e.emp_id
GROUP BY p.project_id, p.project_name, p.start_date, p.end_date
ORDER BY participant_count DESC;
# 案例3:组织架构查询
-- 查询组织架构(递归查询,MySQL 8.0+)
WITH RECURSIVE org_chart AS (
-- 查找顶级管理者
SELECT
emp_id,
name,
manager_id,
0 as level,
CAST(name AS CHAR(1000)) as path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归查找下级
SELECT
e.emp_id,
e.name,
e.manager_id,
oc.level + 1,
CONCAT(oc.path, ' -> ', e.name)
FROM employees e
INNER JOIN org_chart oc ON e.manager_id = oc.emp_id
)
SELECT
CONCAT(REPEAT(' ', level), name) as hierarchy,
level,
path
FROM org_chart
ORDER BY path;
# 常见错误
# 1. 连接条件错误
-- 错误:连接条件不正确
SELECT e.name, d.dept_name
FROM employees e
JOIN departments d ON e.emp_id = d.dept_id; -- 字段不匹配
-- 正确
SELECT e.name, d.dept_name
FROM employees e
JOIN departments d ON e.department_id = d.dept_id;
# 2. 忘记处理NULL值
-- 可能遗漏NULL值的情况
SELECT e.name, d.dept_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.dept_id;
-- 这会遗漏department_id为NULL的员工
-- 如果需要包含所有员工
SELECT e.name, COALESCE(d.dept_name, '未分配') as dept_name
FROM employees e
LEFT JOIN departments d ON e.department_id = d.dept_id;
# 3. 重复数据问题
-- 可能产生重复数据
SELECT e.name, d.dept_name
FROM employees e
JOIN departments d ON e.department_id = d.dept_id
JOIN employee_projects ep ON e.emp_id = ep.emp_id;
-- 如果员工参与多个项目,会出现重复行
-- 解决方案:使用DISTINCT或适当的GROUP BY
SELECT DISTINCT e.name, d.dept_name
FROM employees e
JOIN departments d ON e.department_id = d.dept_id
JOIN employee_projects ep ON e.emp_id = ep.emp_id;
# 总结
JOIN是SQL中最重要的功能之一,掌握不同类型的JOIN对于编写复杂查询至关重要:
- INNER JOIN:返回两表匹配的记录
- LEFT JOIN:返回左表所有记录和右表匹配的记录
- RIGHT JOIN:返回右表所有记录和左表匹配的记录
- FULL OUTER JOIN:返回两表所有记录
- CROSS JOIN:返回两表的笛卡尔积
- SELF JOIN:表与自身连接
# 最佳实践
- 总是使用显式JOIN语法而不是隐式连接
- 在连接字段上创建适当的索引
- 注意NULL值的处理
- 避免不必要的笛卡尔积
- 合理使用WHERE条件过滤数据
- 考虑查询性能和执行计划