DQL数据查询语言

2024/1/1

# DQL数据查询语言

DQL (Data Query Language) 数据查询语言,主要用于从数据库中检索数据

# 概述

DQL是SQL的核心部分,主要包含SELECT语句及其相关子句。虽然SELECT在技术上属于DML的一部分,但由于其重要性和复杂性,通常单独归类为DQL。

# 基本语法结构

SELECT [DISTINCT] 列名列表
FROM 表名列表
[WHERE 条件]
[GROUP BY 分组列]
[HAVING 分组条件]
[ORDER BY 排序列]
[LIMIT 限制条数];

# 核心组件

# 1. SELECT子句

# 基本查询

-- 查询所有列
SELECT * FROM users;

-- 查询指定列
SELECT id, name, email FROM users;

-- 使用别名
SELECT 
    id AS user_id,
    name AS user_name,
    email AS user_email
FROM users;

# 去重查询

-- 去除重复记录
SELECT DISTINCT city FROM users;

-- 多列去重
SELECT DISTINCT city, country FROM users;

# 计算字段

-- 数值计算
SELECT 
    product_name,
    price,
    quantity,
    price * quantity AS total_amount
FROM order_items;

-- 字符串拼接
SELECT 
    CONCAT(first_name, ' ', last_name) AS full_name
FROM users;

# 2. FROM子句

# 单表查询

SELECT * FROM products;

# 多表查询

-- 交叉连接(笛卡尔积)
SELECT * FROM users, orders;

-- 内连接
SELECT u.name, o.order_date
FROM users u
INNER JOIN orders o ON u.id = o.user_id;

# 子查询作为数据源

SELECT *
FROM (
    SELECT user_id, COUNT(*) as order_count
    FROM orders
    GROUP BY user_id
) AS user_orders
WHERE order_count > 5;

# 3. WHERE子句

# 基本条件

-- 等值条件
SELECT * FROM users WHERE age = 25;

-- 范围条件
SELECT * FROM products WHERE price BETWEEN 100 AND 500;

-- 模糊匹配
SELECT * FROM users WHERE name LIKE 'John%';

-- 空值检查
SELECT * FROM users WHERE phone IS NOT NULL;

# 复合条件

-- AND条件
SELECT * FROM users 
WHERE age >= 18 AND city = 'Beijing';

-- OR条件
SELECT * FROM products 
WHERE category = 'Electronics' OR category = 'Books';

-- IN条件
SELECT * FROM users 
WHERE city IN ('Beijing', 'Shanghai', 'Guangzhou');

# 4. GROUP BY子句

-- 基本分组
SELECT city, COUNT(*) as user_count
FROM users
GROUP BY city;

-- 多列分组
SELECT city, gender, COUNT(*) as count
FROM users
GROUP BY city, gender;

-- 分组统计
SELECT 
    category,
    COUNT(*) as product_count,
    AVG(price) as avg_price,
    MAX(price) as max_price,
    MIN(price) as min_price
FROM products
GROUP BY category;

# 5. HAVING子句

-- 分组后筛选
SELECT city, COUNT(*) as user_count
FROM users
GROUP BY city
HAVING COUNT(*) > 100;

-- 复合条件
SELECT 
    category,
    AVG(price) as avg_price
FROM products
GROUP BY category
HAVING AVG(price) > 200 AND COUNT(*) >= 10;

# 6. ORDER BY子句

-- 单列排序
SELECT * FROM users ORDER BY age DESC;

-- 多列排序
SELECT * FROM users 
ORDER BY city ASC, age DESC;

-- 按表达式排序
SELECT name, price, quantity, price * quantity as total
FROM order_items
ORDER BY price * quantity DESC;

# 7. LIMIT子句

-- 限制返回行数
SELECT * FROM users LIMIT 10;

-- 分页查询
SELECT * FROM users 
ORDER BY id
LIMIT 20 OFFSET 40;  -- 跳过前40行,取20行

-- MySQL分页语法
SELECT * FROM users 
ORDER BY id
LIMIT 40, 20;  -- 从第40行开始,取20行

# 高级查询技巧

# 1. 子查询

# 标量子查询

SELECT name, age
FROM users
WHERE age > (SELECT AVG(age) FROM users);

# 列子查询

SELECT *
FROM products
WHERE category_id IN (
    SELECT id FROM categories 
    WHERE name LIKE '%Electronics%'
);

# 行子查询

SELECT *
FROM products
WHERE (category_id, price) = (
    SELECT category_id, MAX(price)
    FROM products
    GROUP BY category_id
    LIMIT 1
);

# 2. 关联子查询

-- EXISTS子查询
SELECT *
FROM users u
WHERE EXISTS (
    SELECT 1 FROM orders o 
    WHERE o.user_id = u.id
);

-- NOT EXISTS子查询
SELECT *
FROM users u
WHERE NOT EXISTS (
    SELECT 1 FROM orders o 
    WHERE o.user_id = u.id
);

# 3. 窗口函数

-- 排名函数
SELECT 
    name,
    salary,
    ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num,
    RANK() OVER (ORDER BY salary DESC) as rank_num,
    DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank
FROM employees;

-- 分组排名
SELECT 
    department,
    name,
    salary,
    ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank
FROM employees;

# 4. 公用表表达式(CTE)

-- 基本CTE
WITH high_value_orders AS (
    SELECT user_id, order_id, total_amount
    FROM orders
    WHERE total_amount > 1000
)
SELECT u.name, h.order_id, h.total_amount
FROM users u
JOIN high_value_orders h ON u.id = h.user_id;

-- 递归CTE
WITH RECURSIVE employee_hierarchy AS (
    -- 锚点查询
    SELECT id, name, manager_id, 1 as level
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- 递归查询
    SELECT e.id, e.name, e.manager_id, eh.level + 1
    FROM employees e
    JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT * FROM employee_hierarchy;

# 实战案例

# 案例1:电商订单分析

-- 查询每个用户的订单统计
SELECT 
    u.id,
    u.name,
    COUNT(o.id) as order_count,
    COALESCE(SUM(o.total_amount), 0) as total_spent,
    COALESCE(AVG(o.total_amount), 0) as avg_order_value,
    MAX(o.order_date) as last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name
ORDER BY total_spent DESC;

# 案例2:产品销售排行

-- 查询最受欢迎的产品
SELECT 
    p.id,
    p.name,
    p.category,
    SUM(oi.quantity) as total_sold,
    SUM(oi.quantity * oi.price) as total_revenue,
    COUNT(DISTINCT oi.order_id) as order_count
FROM products p
JOIN order_items oi ON p.id = oi.product_id
JOIN orders o ON oi.order_id = o.id
WHERE o.order_date >= DATE_SUB(NOW(), INTERVAL 30 DAY)
GROUP BY p.id, p.name, p.category
HAVING total_sold > 0
ORDER BY total_revenue DESC
LIMIT 20;

# 案例3:用户行为分析

-- 分析用户购买行为模式
WITH user_stats AS (
    SELECT 
        user_id,
        COUNT(*) as order_count,
        SUM(total_amount) as total_spent,
        DATEDIFF(MAX(order_date), MIN(order_date)) as customer_lifetime_days
    FROM orders
    GROUP BY user_id
),
user_segments AS (
    SELECT 
        user_id,
        order_count,
        total_spent,
        customer_lifetime_days,
        CASE 
            WHEN total_spent >= 5000 THEN 'VIP'
            WHEN total_spent >= 2000 THEN 'Premium'
            WHEN total_spent >= 500 THEN 'Regular'
            ELSE 'Basic'
        END as customer_segment
    FROM user_stats
)
SELECT 
    customer_segment,
    COUNT(*) as customer_count,
    AVG(total_spent) as avg_spent,
    AVG(order_count) as avg_orders,
    AVG(customer_lifetime_days) as avg_lifetime_days
FROM user_segments
GROUP BY customer_segment
ORDER BY avg_spent DESC;

# 性能优化

# 1. 索引优化

-- 为WHERE条件创建索引
CREATE INDEX idx_users_city_age ON users(city, age);

-- 为ORDER BY创建索引
CREATE INDEX idx_orders_date ON orders(order_date);

-- 为JOIN条件创建索引
CREATE INDEX idx_orders_user_id ON orders(user_id);

# 2. 查询优化技巧

-- 避免SELECT *
-- 不好的写法
SELECT * FROM users WHERE city = 'Beijing';

-- 好的写法
SELECT id, name, email FROM users WHERE city = 'Beijing';

-- 使用LIMIT限制结果集
SELECT id, name FROM users ORDER BY created_at DESC LIMIT 100;

-- 避免在WHERE子句中使用函数
-- 不好的写法
SELECT * FROM orders WHERE YEAR(order_date) = 2024;

-- 好的写法
SELECT * FROM orders 
WHERE order_date >= '2024-01-01' 
AND order_date < '2025-01-01';

# 3. 子查询优化

-- 将相关子查询改为JOIN
-- 不好的写法
SELECT *
FROM users u
WHERE u.id IN (
    SELECT user_id FROM orders WHERE total_amount > 1000
);

-- 好的写法
SELECT DISTINCT u.*
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 1000;

# 常见错误与解决方案

# 1. GROUP BY错误

-- 错误:SELECT列表中包含非聚合列
-- SELECT name, city, COUNT(*) FROM users GROUP BY city;  -- 错误

-- 正确写法
SELECT city, COUNT(*) FROM users GROUP BY city;

# 2. NULL值处理

-- 注意NULL值的比较
SELECT * FROM users WHERE phone != 'unknown';  -- 不会匹配NULL值
SELECT * FROM users WHERE phone != 'unknown' OR phone IS NULL;

-- 使用COALESCE处理NULL
SELECT name, COALESCE(phone, 'N/A') as phone FROM users;

# 3. 字符串比较

-- 注意大小写敏感性
SELECT * FROM users WHERE LOWER(name) = 'john';  -- 不区分大小写

-- 使用COLLATE指定排序规则
SELECT * FROM users WHERE name = 'John' COLLATE utf8_general_ci;

# 最佳实践

# 1. 查询设计原则

  • 明确需求:只查询需要的列和行
  • 合理使用索引:为常用查询条件创建索引
  • 避免复杂子查询:优先使用JOIN替代子查询
  • 使用LIMIT:限制大结果集的返回

# 2. 代码规范

-- 使用一致的命名规范
-- 使用有意义的别名
-- 合理格式化SQL语句
SELECT 
    u.id AS user_id,
    u.name AS user_name,
    COUNT(o.id) AS order_count,
    SUM(o.total_amount) AS total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.status = 'active'
GROUP BY u.id, u.name
HAVING COUNT(o.id) > 0
ORDER BY total_spent DESC
LIMIT 100;

# 3. 安全考虑

  • 防止SQL注入:使用参数化查询
  • 权限控制:只授予必要的查询权限
  • 敏感数据保护:避免在日志中记录敏感信息

# 总结

DQL是SQL的核心,掌握好SELECT语句及其各种子句的使用是数据库开发的基础。通过合理的查询设计、索引优化和性能调优,可以构建高效的数据查询系统。在实际应用中,要根据具体的业务需求和数据特点,选择最适合的查询策略。