DQL数据查询语言
哪吒 2024/1/1
# DQL数据查询语言
DQL (Data Query Language) 数据查询语言,主要用于从数据库中检索数据
# 概述
DQL是SQL的核心部分,主要包含SELECT语句及其相关子句。虽然SELECT在技术上属于DML的一部分,但由于其重要性和复杂性,通常单独归类为DQL。
# 基本语法结构
SELECT [DISTINCT] 列名列表
FROM 表名列表
[WHERE 条件]
[GROUP BY 分组列]
[HAVING 分组条件]
[ORDER BY 排序列]
[LIMIT 限制条数];
# 核心组件
# 1. SELECT子句
# 基本查询
-- 查询所有列
SELECT * FROM users;
-- 查询指定列
SELECT id, name, email FROM users;
-- 使用别名
SELECT
id AS user_id,
name AS user_name,
email AS user_email
FROM users;
# 去重查询
-- 去除重复记录
SELECT DISTINCT city FROM users;
-- 多列去重
SELECT DISTINCT city, country FROM users;
# 计算字段
-- 数值计算
SELECT
product_name,
price,
quantity,
price * quantity AS total_amount
FROM order_items;
-- 字符串拼接
SELECT
CONCAT(first_name, ' ', last_name) AS full_name
FROM users;
# 2. FROM子句
# 单表查询
SELECT * FROM products;
# 多表查询
-- 交叉连接(笛卡尔积)
SELECT * FROM users, orders;
-- 内连接
SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
# 子查询作为数据源
SELECT *
FROM (
SELECT user_id, COUNT(*) as order_count
FROM orders
GROUP BY user_id
) AS user_orders
WHERE order_count > 5;
# 3. WHERE子句
# 基本条件
-- 等值条件
SELECT * FROM users WHERE age = 25;
-- 范围条件
SELECT * FROM products WHERE price BETWEEN 100 AND 500;
-- 模糊匹配
SELECT * FROM users WHERE name LIKE 'John%';
-- 空值检查
SELECT * FROM users WHERE phone IS NOT NULL;
# 复合条件
-- AND条件
SELECT * FROM users
WHERE age >= 18 AND city = 'Beijing';
-- OR条件
SELECT * FROM products
WHERE category = 'Electronics' OR category = 'Books';
-- IN条件
SELECT * FROM users
WHERE city IN ('Beijing', 'Shanghai', 'Guangzhou');
# 4. GROUP BY子句
-- 基本分组
SELECT city, COUNT(*) as user_count
FROM users
GROUP BY city;
-- 多列分组
SELECT city, gender, COUNT(*) as count
FROM users
GROUP BY city, gender;
-- 分组统计
SELECT
category,
COUNT(*) as product_count,
AVG(price) as avg_price,
MAX(price) as max_price,
MIN(price) as min_price
FROM products
GROUP BY category;
# 5. HAVING子句
-- 分组后筛选
SELECT city, COUNT(*) as user_count
FROM users
GROUP BY city
HAVING COUNT(*) > 100;
-- 复合条件
SELECT
category,
AVG(price) as avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 200 AND COUNT(*) >= 10;
# 6. ORDER BY子句
-- 单列排序
SELECT * FROM users ORDER BY age DESC;
-- 多列排序
SELECT * FROM users
ORDER BY city ASC, age DESC;
-- 按表达式排序
SELECT name, price, quantity, price * quantity as total
FROM order_items
ORDER BY price * quantity DESC;
# 7. LIMIT子句
-- 限制返回行数
SELECT * FROM users LIMIT 10;
-- 分页查询
SELECT * FROM users
ORDER BY id
LIMIT 20 OFFSET 40; -- 跳过前40行,取20行
-- MySQL分页语法
SELECT * FROM users
ORDER BY id
LIMIT 40, 20; -- 从第40行开始,取20行
# 高级查询技巧
# 1. 子查询
# 标量子查询
SELECT name, age
FROM users
WHERE age > (SELECT AVG(age) FROM users);
# 列子查询
SELECT *
FROM products
WHERE category_id IN (
SELECT id FROM categories
WHERE name LIKE '%Electronics%'
);
# 行子查询
SELECT *
FROM products
WHERE (category_id, price) = (
SELECT category_id, MAX(price)
FROM products
GROUP BY category_id
LIMIT 1
);
# 2. 关联子查询
-- EXISTS子查询
SELECT *
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id
);
-- NOT EXISTS子查询
SELECT *
FROM users u
WHERE NOT EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id
);
# 3. 窗口函数
-- 排名函数
SELECT
name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num,
RANK() OVER (ORDER BY salary DESC) as rank_num,
DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank
FROM employees;
-- 分组排名
SELECT
department,
name,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank
FROM employees;
# 4. 公用表表达式(CTE)
-- 基本CTE
WITH high_value_orders AS (
SELECT user_id, order_id, total_amount
FROM orders
WHERE total_amount > 1000
)
SELECT u.name, h.order_id, h.total_amount
FROM users u
JOIN high_value_orders h ON u.id = h.user_id;
-- 递归CTE
WITH RECURSIVE employee_hierarchy AS (
-- 锚点查询
SELECT id, name, manager_id, 1 as level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归查询
SELECT e.id, e.name, e.manager_id, eh.level + 1
FROM employees e
JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT * FROM employee_hierarchy;
# 实战案例
# 案例1:电商订单分析
-- 查询每个用户的订单统计
SELECT
u.id,
u.name,
COUNT(o.id) as order_count,
COALESCE(SUM(o.total_amount), 0) as total_spent,
COALESCE(AVG(o.total_amount), 0) as avg_order_value,
MAX(o.order_date) as last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name
ORDER BY total_spent DESC;
# 案例2:产品销售排行
-- 查询最受欢迎的产品
SELECT
p.id,
p.name,
p.category,
SUM(oi.quantity) as total_sold,
SUM(oi.quantity * oi.price) as total_revenue,
COUNT(DISTINCT oi.order_id) as order_count
FROM products p
JOIN order_items oi ON p.id = oi.product_id
JOIN orders o ON oi.order_id = o.id
WHERE o.order_date >= DATE_SUB(NOW(), INTERVAL 30 DAY)
GROUP BY p.id, p.name, p.category
HAVING total_sold > 0
ORDER BY total_revenue DESC
LIMIT 20;
# 案例3:用户行为分析
-- 分析用户购买行为模式
WITH user_stats AS (
SELECT
user_id,
COUNT(*) as order_count,
SUM(total_amount) as total_spent,
DATEDIFF(MAX(order_date), MIN(order_date)) as customer_lifetime_days
FROM orders
GROUP BY user_id
),
user_segments AS (
SELECT
user_id,
order_count,
total_spent,
customer_lifetime_days,
CASE
WHEN total_spent >= 5000 THEN 'VIP'
WHEN total_spent >= 2000 THEN 'Premium'
WHEN total_spent >= 500 THEN 'Regular'
ELSE 'Basic'
END as customer_segment
FROM user_stats
)
SELECT
customer_segment,
COUNT(*) as customer_count,
AVG(total_spent) as avg_spent,
AVG(order_count) as avg_orders,
AVG(customer_lifetime_days) as avg_lifetime_days
FROM user_segments
GROUP BY customer_segment
ORDER BY avg_spent DESC;
# 性能优化
# 1. 索引优化
-- 为WHERE条件创建索引
CREATE INDEX idx_users_city_age ON users(city, age);
-- 为ORDER BY创建索引
CREATE INDEX idx_orders_date ON orders(order_date);
-- 为JOIN条件创建索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
# 2. 查询优化技巧
-- 避免SELECT *
-- 不好的写法
SELECT * FROM users WHERE city = 'Beijing';
-- 好的写法
SELECT id, name, email FROM users WHERE city = 'Beijing';
-- 使用LIMIT限制结果集
SELECT id, name FROM users ORDER BY created_at DESC LIMIT 100;
-- 避免在WHERE子句中使用函数
-- 不好的写法
SELECT * FROM orders WHERE YEAR(order_date) = 2024;
-- 好的写法
SELECT * FROM orders
WHERE order_date >= '2024-01-01'
AND order_date < '2025-01-01';
# 3. 子查询优化
-- 将相关子查询改为JOIN
-- 不好的写法
SELECT *
FROM users u
WHERE u.id IN (
SELECT user_id FROM orders WHERE total_amount > 1000
);
-- 好的写法
SELECT DISTINCT u.*
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 1000;
# 常见错误与解决方案
# 1. GROUP BY错误
-- 错误:SELECT列表中包含非聚合列
-- SELECT name, city, COUNT(*) FROM users GROUP BY city; -- 错误
-- 正确写法
SELECT city, COUNT(*) FROM users GROUP BY city;
# 2. NULL值处理
-- 注意NULL值的比较
SELECT * FROM users WHERE phone != 'unknown'; -- 不会匹配NULL值
SELECT * FROM users WHERE phone != 'unknown' OR phone IS NULL;
-- 使用COALESCE处理NULL
SELECT name, COALESCE(phone, 'N/A') as phone FROM users;
# 3. 字符串比较
-- 注意大小写敏感性
SELECT * FROM users WHERE LOWER(name) = 'john'; -- 不区分大小写
-- 使用COLLATE指定排序规则
SELECT * FROM users WHERE name = 'John' COLLATE utf8_general_ci;
# 最佳实践
# 1. 查询设计原则
- 明确需求:只查询需要的列和行
- 合理使用索引:为常用查询条件创建索引
- 避免复杂子查询:优先使用JOIN替代子查询
- 使用LIMIT:限制大结果集的返回
# 2. 代码规范
-- 使用一致的命名规范
-- 使用有意义的别名
-- 合理格式化SQL语句
SELECT
u.id AS user_id,
u.name AS user_name,
COUNT(o.id) AS order_count,
SUM(o.total_amount) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.status = 'active'
GROUP BY u.id, u.name
HAVING COUNT(o.id) > 0
ORDER BY total_spent DESC
LIMIT 100;
# 3. 安全考虑
- 防止SQL注入:使用参数化查询
- 权限控制:只授予必要的查询权限
- 敏感数据保护:避免在日志中记录敏感信息
# 总结
DQL是SQL的核心,掌握好SELECT语句及其各种子句的使用是数据库开发的基础。通过合理的查询设计、索引优化和性能调优,可以构建高效的数据查询系统。在实际应用中,要根据具体的业务需求和数据特点,选择最适合的查询策略。