SQL练习题

练习一

准备数据

建表语句

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
CREATE TABLE students
(sno VARCHAR(3) NOT NULL,
sname VARCHAR(4) NOT NULL,
ssex VARCHAR(2) NOT NULL,
sbirthday DATETIME,
class VARCHAR(5));

CREATE TABLE courses
(cno VARCHAR(5) NOT NULL,
cname VARCHAR(10) NOT NULL,
tno VARCHAR(10) NOT NULL);

CREATE TABLE scores
(sno VARCHAR(3) NOT NULL,
cno VARCHAR(5) NOT NULL,
degree NUMERIC(10, 1) NOT NULL);

CREATE TABLE teachers
(tno VARCHAR(3) NOT NULL,
tname VARCHAR(4) NOT NULL, tsex VARCHAR(2) NOT NULL,
tbirthday DATETIME NOT NULL, prof VARCHAR(6),
depart VARCHAR(10) NOT NULL);

插入数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (108 ,'曾华' ,'男' ,'1977-09-01',95033);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (105 ,'匡明' ,'男' ,'1975-10-02',95031);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (107 ,'王丽' ,'女' ,'1976-01-23',95033);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, classS) VALUES (101 ,'李军' ,'男' ,'1976-02-20',95033);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (109 ,'王芳' ,'女' ,'1975-02-10',95031);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (103 ,'陆君' ,'男' ,'1974-06-03',95031);

INSERT INTO COURSES(cno, cname, tno) VALUES ('3-105' ,'计算机导论',825);
INSERT INTO COURSES(cno, cname, tno) VALUES ('3-245' ,'操作系统' ,804);
INSERT INTO COURSES(cno, cname, tno) VALUES ('6-166' ,'数据电路' ,856);
INSERT INTO COURSES(cno, cname, tno) VALUES ('9-888' ,'高等数学' ,100);

INSERT INTO SCORES(sno, cno, degree) VALUES (103,'3-245',86);
INSERT INTO SCORES(sno, cno, degree) VALUES (105,'3-245',75);
INSERT INTO SCORES(sno, cno, degree) VALUES (109,'3-245',68);
INSERT INTO SCORES(sno, cno, degree) VALUES (103,'3-105',92);
INSERT INTO SCORES(sno, cno, degree) VALUES (105,'3-105',88);
INSERT INTO SCORES(sno, cno, degree) VALUES (109,'3-105',76);
INSERT INTO SCORES(sno, cno, degree) VALUES (101,'3-105',64);
INSERT INTO SCORES(sno, cno, degree) VALUES (107,'3-105',91);
INSERT INTO SCORES(sno, cno, degree) VALUES (108,'3-105',78);
INSERT INTO SCORES(sno, cno, degree) VALUES (101,'6-166',85);
INSERT INTO SCORES(sno, cno, degree) VALUES (107,'6-166',79);
INSERT INTO SCORES(sno, cno, degree) VALUES (108,'6-166',81);

INSERT INTO TEACHERS(tno, tname, tsex, tbirthday, prof, depart) VALUES (804,'李诚','男','1958-12-02','副教授','计算机系');
INSERT INTO TEACHERS(tno, tname, tsex, tbirthday, prof, depart) VALUES (856,'张旭','男','1969-03-12','讲师','电子工程系');
INSERT INTO TEACHERS(tno, tname, tsex, tbirthday, prof, depart) VALUES (825,'王萍','女','1972-05-05','助教','计算机系');
INSERT INTO TEACHERS(tno, tname, tsex, tbirthday, prof, depart) VALUES (831,'刘冰','女','1977-08-14','助教','电子工程系');

练习题

  1. 查询student表中的所有记录的sname、ssex和class列。

    解答:

    1
    2
    SELECT sname, ssex, class
    FROM students;
  2. 查询教师所有的单位,即不重复的depart列。

    解答:

    1
    2
    SELECT DISTINCT depart
    FROM teachers;
  3. 查询students表的所有记录。

    解答:

    1
    2
    SELECT *
    FROM students;
  4. 查询scores表中成绩在60到80之间的所有记录。

    解答:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # 方法一
    SELECT *
    FROM scores
    WHERE degree >= 60 AND degree <= 80;

    # 方法二
    SELECT *
    FROM scores
    WHERE degree BETWEEN 60 AND 80;
  5. ★查询scores表中成绩为85,86或88的记录

    解答:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    # 方法一
    SELECT *
    FROM scores
    WHERE degree IN (85, 86, 88);

    # 方法二
    SELECT *
    FROM scores
    WHERE degree = 85 OR degree = 86 OR degree = 88;
  6. 查询students表中“95031”班性别为“女”的同学记录。

    解答:

    1
    2
    3
    SELECT *
    FROM students
    WHERE class = '95031' OR ssex = '女';
  7. class降序查询students表的所有记录。

    解答:

    1
    2
    3
    SELECT *
    FROM students
    ORDER BY class DESC;
  8. ★以cno升序degree降序查询scores表的所有记录。

    解答:

    1
    2
    3
    SELECT *
    FROM scores
    ORDER BY con, degree DESC;
  9. 查询student表中“95031”班的学生人数

    解答:

    1
    2
    3
    SELECT COUNT(*) AS stu_num
    FROM students
    WHERE class = '95031';
  10. ★★★查询scores表中的最高分的学生学号和课程号。

    解答:子查询

    1
    2
    3
    4
    SELECT sno, cno
    FROM scores
    WHERE degree = (SELECT MAX(degree)
    FROM scores);
  11. ★查询scores表中‘3-105’号课程的平均分

    解答:

    1
    2
    3
    SELECT AVG(degree)
    FROM scores
    WHERE cno = '3-105';
  12. ★★★查询scores表中至少有5名学生选修的并以3开头的课程平均分数

    解答:分组HAVING子句用于过滤分组)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    # 方法一
    SELECT cno, AVG(degree) AS avg_degree
    FROM scores
    WHERE cno LIKE '3%'
    GROUP BY cno
    HAVING COUNT(*) >= 5;

    # 方法二
    SELECT cno, AVG(degree) AS avg_degree
    FROM scores
    GROUP BY cno
    HAVING COUNT(*) >= 5 AND cno LIKE '3%';
  13. ★★★查询scores表中最低分大于70最高分小于90的sno列。

    解答:

    1
    2
    3
    4
    SELECT sno
    FROM scores
    GROUP BY sno
    HAVING MIN(degree) > 70 AND MAX(degree) < 90;
  14. ★★查询所有学生的sname、cno和degree列。

    解答:

    • 外部联结(OUTER JOIN, OUTER可省略)
    • 内部联结(INNER JOIN)
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # 方法一:外部联结, LEFT JOIN 左边的表 所有记录都会列出
    SELECT stu.sname, sco.cno, sco.degree
    FROM students AS stu LEFT JOIN scores AS sco
    ON stu.sno = sco.sno
    ORDER BY sname;

    # 方法二:内部联结, INNER JOIN 只返回两个表中联结字段相等的行
    SELECT stu.sname, sco.cno, sco.degree
    FROM students AS stu INNER JOIN scores AS sco
    ON stu.sno = sco.sno
    ORDER BY sname;
  15. ★查询所有学生的sno、cname和degree列。

    解答:外部联结

    1
    2
    3
    4
    SELECT s.sno, c.cname, s.degree
    FROM scores AS s
    LEFT JOIN courses AS c ON s.cno = c.cno
    ORDER BY sno;
  16. ★★★查询所有学生的sname、cname和degree列。

    解答:外部联结

    1
    2
    3
    4
    5
    SELECT st.sname, c.cname, sc.degree
    FROM students AS st
    LEFT JOIN scores AS sc ON st.sno = sc.sno
    LEFT JOIN courses AS c ON sc.cno = c.cno
    ORDER BY sname;
  17. ★★★查询“95033”班所选课程的平均分。

    解答:

    1
    2
    3
    4
    5
    6
    7
    SELECT cname, AVG(degree)
    FROM students AS st
    INNER JOIN scores AS sc ON st.sno = sc.sno
    INNER JOIN courses AS c ON sc.cno = c.cno
    WHERE class = '95033'
    GROUP BY c.cno
    ORDER BY cname;
  18. ★★★假设使用如下命令建立了一个grade表:

    1
    2
    3
    4
    5
    6
    7
    CREATE TABLE grade(low INT(3), upp INT(3), rank CHAR(1));
    INSERT INTO grade VALUES(90,100,'A');
    INSERT INTO grade VALUES(80,89,'B');
    INSERT INTO grade VALUES(70,79,'C');
    INSERT INTO grade VALUES(60,69,'D');
    INSERT INTO grade VALUES(0,59,'E');
    COMMIT;

    现查询所有同学的sno、cno和rank列。

    解答:

    1
    2
    3
    4
    SELECT sno, cno, rank
    FROM scores INNER JOIN grade
    ON scores.degree BETWEEN grade.low AND grade.upp
    ORDER BY sno;
  19. ★★★查询选修“3-105”课程成绩高于“109”号同学成绩的所有同学的记录。

    解答:

    • 自联结
    1
    2
    3
    4
    5
    6
    SELECT sname, s1.degree
    FROM scores AS s1
    INNER JOIN scores AS s2 ON s1.cno = s2.cno AND s1.degree > s2.degree
    INNER JOIN students AS st ON s1.sno = st.sno
    WHERE s1.cno = '3-105' AND s2.sno = '109'
    ORDER BY s1.degree;
  20. ★★★查询scores中选学一门以上课程的同学中分数为非最高分成绩的记录。

    解答:

    • 条件:
      1. 选学一门以上课程
      2. 列出这些同学的所有非最高分成绩
    1
    2
    3
    4
    5
    6
    7
    8
    SELECT scores.sno, cno, degree, max_degree
    FROM scores INNER JOIN
    (SELECT sno, MAX(degree) AS max_degree
    FROM scores
    GROUP BY sno
    HAVING COUNT(*) > 1) AS max
    ON scores.sno = max.sno AND degree < max_degree
    ORDER BY sno;
  21. 查询成绩高于学号为“109”、课程号为“3-105”的成绩的所有记录。

    解答:

    1
    2
    3
    4
    5
    6
    7
    SELECT st.sname, s1.cno, s1.degree
    FROM scores AS s1 INNER JOIN scores AS s2
    ON s1.cno = s2.cno AND s1.degree > s2.degree
    INNER JOIN students AS st
    ON s1.sno = st.sno
    WHERE s1.cno = '3-105' AND s2.sno = '109'
    ORDER BY s1.degree;
  22. ★★★查询和学号为108的同学同年出生的所有学生的sno、sname和sbirthday列。

    解答:函数YEAR(d)

    1
    2
    3
    4
    5
    SELECT s1.sno, s1.sname, s1.sbirthday
    FROM students AS s1 INNER JOIN students AS s2
    ON YEAR(s1.sbirthday) = YEAR(s2.sbirthday)
    WHERE s2.sno = '108'
    ORDER BY sbirthday;
  23. 查询“张旭“教师任课的学生成绩。

    解答:

    1
    2
    3
    4
    5
    6
    7
    SELECT sno, degree
    FROM scores INNER JOIN courses
    ON scores.cno = courses.cno
    INNER JOIN teachers
    ON courses.tno = teachers.tno
    WHERE teachers.tname = '张旭'
    ORDER BY degree;
  24. 查询选修某课程的同学人数多于5人教师姓名

    解答:

    1
    2
    3
    4
    5
    6
    7
    SELECT tname
    FROM scores AS s INNER JOIN courses AS c
    ON s.cno = c.cno
    INNER JOIN teachers AS t
    ON t.tno = c.tno
    GROUP BY c.cno
    HAVING COUNT(c.cno) > 5;
  25. 查询95033班和95031班全体学生的记录

    解答:

    1
    2
    3
    4
    SELECT *
    FROM students
    WHERE class IN ('95033', '95031')
    ORDER BY class;
  26. 查询有85分以上成绩的课程cno。

    解答:DISTINCT关键字

    1
    2
    3
    SELECT DISTINCT c.cno
    FROM scores
    WHERE degree > 85;
  27. 查询出“计算机系“教师所教课程的成绩表。

    解答:

    1
    2
    3
    4
    5
    6
    7
    SELECT t.tname, s.cno, cname, sno, degree
    FROM scores AS s INNER JOIN courses AS c
    ON s.cno = c.cno
    INNER JOIN teachers AS t
    ON t.tno = c.tno
    WHERE t.depart = '计算机系'
    ORDER BY t.tname, cname, degree DESC;
  28. 查询“计算机系”中与“电子工程系“的教师不同职称教师的tname和prof。

    解答:

    1
    2
    3
    4
    5
    6
    SELECT tname, prof
    FROM teachers
    WHERE depart = '计算机系' AND prof NOT IN
    (SELECT prof
    FROM teachers
    WHERE depart = '电子工程系');
  29. 查询选修编号为“3-105”课程且成绩至少高于任意选修编号为“3-245”的同学的成绩的cno、sno和degree。

    解答:

    1
    2
    3
    4
    5
    6
    SELECT cno, sno, degree
    FROM scores
    WHERE cno = '3-105' AND degree > (SELECT MIN(degree)
    FROM scores
    WHERE cno = '3-245')
    ORDER BY degree DESC;
  30. 查询选修编号为”3-105“且成绩高于所有选修编号为”3-245“课程的同学的cno、sno和degree。

    解答:

    1
    2
    3
    4
    5
    6
    SELECT cno, sno, degree
    FROM scores
    WHERE cno = '3-105' AND degree > (SELECT MAX(degree)
    FROM scores
    WHERE cno = '3-245')
    ORDER BY degree DESC;
  31. 查询所有教师和同学的name、sex和birthday。

    解答:组合查询UNION

    • UNION中每个查询必须包含相同的列、表达式或聚集函数(不过各个列不需要以相同的次序列出)。
    • 列数据类型必须兼容。类型不必完全相同,但必须是DBMS可以隐含地转换的类型(如不同的数值类型不同的日期类型)。
    1
    2
    3
    4
    5
    6
    SELECT tname AS name, tsex AS sex, tbirthday AS birthday
    FROM teachers
    UNION
    SELECT sname AS name, ssex AS sex, sbirthday AS birthday
    FROM students
    ORDER BY birthday;
  32. 查询所有“女”教师和“女”同学的name、sex和birthday。

    解答:

    1
    2
    3
    4
    5
    6
    7
    8
    SELECT tname AS name, tsex AS sex, tbirthday AS birthday
    FROM teachers AS t
    WHERE t.tsex = '女'
    UNION
    SELECT sname AS name, ssex AS sex, sbirthday AS birthday
    FROM students AS s
    WHERE s.ssex = '女'
    ORDER BY birthday;
  33. ★★★查询成绩比该课程平均成绩低的同学的成绩表。

    解答:

    1
    2
    3
    4
    5
    6
    SELECT s1.*, avg_degree
    FROM scores AS s1 INNER JOIN (
    SELECT cno, AVG(degree) AS avg_degree
    FROM scores
    GROUP BY cno) AS s2
    ON s1.cno = s2.cno AND s1.degree < s2.avg_degree;
  34. 查询所有任课教师的tname和depart。

    解答:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    SELECT DISTINCT tname, depart
    FROM courses AS c INNER JOIN teachers AS t
    ON c.tno = t.tno
    ORDER BY tname;

    SELECT tname, depart
    FROM teachers
    WHERE tno IN(
    SELECT tno
    FROM courses)
    ORDER BY tname;
  35. ★查询所有未讲课的教师的Tname和Depart。

    解答:

    1
    2
    3
    4
    5
    6
    SELECT tname, depart
    FROM teachers
    WHERE tno NOT IN(
    SELECT tno
    FROM courses)
    ORDER BY tname;
  36. ★★查询至少有2名男生的班号。

    解答:

    1
    2
    3
    4
    5
    SELECT class
    FROM students
    WHERE ssex = '男'
    GROUP BY class
    HAVING COUNT(*) >= 2;
  37. ★查询students表中不姓“王”的同学记录。

    解答:

    • LIKE操作符
    • 通配符%
    1
    2
    3
    SELECT *
    FROM students
    WHERE sname NOT LIKE '王%';
  38. ★★查询students表中每个学生的姓名和年龄

    解答:

    • 当前日期函数CURDATE()
    • 当前日期和时间函数NOW()
    1
    2
    SELECT sname, TIMESTAMPDIFF(YEAR, sbirthday, CURDATE()) AS age
    FROM students;
  39. ★★查询students表中最大最小的sbirthday日期值。

    解答:

    1
    2
    SELECT DATE(MAX(Sbirthday)) AS max_birthday, DATE(MIN(Sbirthday)) AS min_birthday,
    FROM Students;
  40. 班号年龄从大到小的顺序查询students表中的全部记录。

    解答:

    1
    2
    3
    SELECT *
    FROM students
    ORDER BY class DESC, TIMESTAMPDIFF(YEAR, sbirthday, CURDATE()) DESC;
  41. 查询“男”教师及其所上的课程

    解答:

    1
    2
    3
    4
    5
    SELECT tname, cname
    FROM teachers AS t INNER JOIN courses AS c
    ON t.tno = c.tno
    WHERE tsex='男'
    ORDER BY tname;
  42. ★查询最高分同学的sno、cno和degree列。

    解答:

    1
    2
    3
    4
    SELECT sno, cno, degree
    FROM scores
    GROUP BY cno
    HAVING degree=MAX(degree);
  43. 查询和“李军”同性别的所有同学的sname。

    解答:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    # 方法一 子查询
    SELECT sname
    FROM students
    WHERE ssex = (
    SELECT ssex
    FROM students
    WHERE sname = '李军');

    # 方法二 联结表
    SELECT s1.Sname
    FROM students AS s1 INNER JOIN students AS s2
    ON s1.ssex=s2.ssex
    WHERE s2.sname = '李军';
  44. ★★查询和“李军”同性别同班的同学sname。

    解答:

    1
    2
    3
    4
    SELECT sname
    FROM students AS s1 INNER JOIN students AS s2
    ON s1.ssex = s2.ssex AND s1.class = s2.class
    WHERE s2.sname = '李军';
  45. 查询所有选修“计算机导论”课程“男”同学的成绩表。

    解答:

    1
    2
    3
    4
    5
    6
    7
    SELECT sname, degree
    FROM scores AS s1 INNER JOIN courses AS c
    ON s1.cno = c.cno
    INNER JOIN students AS s2
    ON s1.sno = s2.sno
    WHERE c.cname = '计算机导论' AND s2.ssex = '男'
    ORDER BY degree DESC;

练习二

准备数据

建表语句

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
CREATE TABLE userinfo (
userId int(11) NOT NULL,
sex varchar(2),
birth date,
PRIMARY KEY (userId)
) ENGINE = InnoDB;

CREATE TABLE orderinfo (
orderId int(11) NOT NULL,
userId int(11) NOT NULL,
isPaid varchar(10),
price float(11, 2),
paidTime timestamp(0) NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP(0),
PRIMARY KEY (orderId)
) ENGINE = InnoDB;

导入数据

  • 获取链接:用户消费行为分析数据;提取码:yu63

  • 数据文件

    • user_info_utf.csv
    • order_info_utf.csv
  • 通过navicat,选择对应的数据库表,选择导入

    1. 导入类型:CSV File

      • 导入从:选择需要导入的数据文件
      • 编码:UTF-8(默认)
    2. 记录分隔符:CRLF(默认)

      • 字段名行:若数据文件不包含数据表的列名,则输入0(默认为第1行);
      • 第一个数据行:若文件不包含数据表的列名,则输入1(默认为第2行);
      • 日期排序:注意年月日的顺序
    3. 确认目标表

    4. 确认对应的目标字段

    5. 将记录添加到目标表/删除目标表中原数据,用导入的记录代替

    6. 开始执行


练习题

  1. 统计不同月份的下单人数

    1
    2
    3
    4
    SELECT MONTH(paidTime), COUNT(DISTINCT userId)
    FROM orderinfo
    WHERE isPaid = '已支付'
    GROUP BY MONTH(paidTime);
  2. 统计用户三月份复购率和回购率(三月份购买的用户,四月份也购买)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    # 复购率
    SELECT COUNT(cnt) AS 下单用户数, COUNT(IF(cnt > 1, 1, NULL)) AS 复购用户数
    FROM (SELECT userId, COUNT(userId) as cnt
    FROM orderinfo
    WHERE isPaid = '已支付' AND MONTH(paidTime) = 3
    GROUP BY userId) AS t;

    # 3月份的回购率
    SELECT t1.m, COUNT(t1.m), COUNT(t2.m)
    FROM (SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') AS m
    FROM orderinfo
    WHERE isPaid = '已支付'
    GROUP BY userId, DATE_FORMAT(paidTime, '%Y-%m-01')) AS t1
    LEFT JOIN (SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') AS m
    FROM orderinfo
    WHERE isPaid = '已支付'
    GROUP BY userId, DATE_FORMAT(paidTime, '%Y-%m-01')) AS t2
    ON t1.userId = t2.userId AND t1.m = DATE_ADD(t2.m, INTERVAL -1 MONTH)
    GROUP BY t1.m;
  3. 统计男女用户的消费频次(平均数)是否有差异

1
2
3
4
5
6
7
8
9
SELECT sex, AVG(cnt)
FROM (SELECT o.userId, sex, COUNT(*) AS cnt # count(*)的作用为 统计消费次数
FROM orderinfo AS o
INNER JOIN (SELECT *
FROM userinfo
WHERE sex != '') AS t
ON o.userId = t.userId
GROUP BY o.userId, sex) AS t2
GROUP BY sex;
  1. 统计多次消费的用户,第一次和最后一次消费间隔是多少

    1
    2
    3
    4
    5
    SELECT userId, DATEDIFF(MAX(paidTime), MIN(paidTime)) 
    FROM orderinfo
    WHERE isPaid = '已支付'
    GROUP BY userId
    HAVING COUNT(*) > 1;
  2. 统计不同年龄段,用户的消费金额是否有差异?

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    SELECT ageGroup, AVG(cnt)
    FROM (SELECT o.userId, ageGroup, count(o.userId) AS cnt
    FROM orderinfo AS o
    INNER JOIN (SELECT userId, CEIL((YEAR(NOW()) - YEAR(birth)) / 10) AS ageGroup
    FROM userinfo
    WHERE birth > '1901-00-00') AS t
    ON o.userId = t.userId
    WHERE isPaid = '已支付'
    GROUP BY o.userId, ageGroup) AS t2
    GROUP BY ageGroup;
  3. 统计消费的二八法则,消费的top20%用户,贡献了多少额度

    1
    2
    3
    4
    5
    6
    7
    SELECT COUNT(userId), SUM(total)
    FROM (SELECT userId, SUM(price) AS total
    FROM orderinfo AS o
    WHERE isPaid = '已支付'
    GROUP BY userId
    ORDER BY total DESC
    LIMIT 17000) AS t;

练习三

准备数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
CREATE TABLE datafrog_test1
(userid VARCHAR(20),
changjing VARCHAR(20),
int_time VARCHAR(20)
);

INSERT INTO datafrog_test1
VALUES (1,1001,1400),
(2,1002,1401),
(1,1002,1402),
(1,1001,1402),
(2,1003,1403),
(2,1004,1404),
(3,1003,1400),
(4,1004,1402),
(4,1003,1403),
(4,1001,1403),
(4,1002,1404),
(5,1002,1402),
(5,1002,1403),
(5,1001,1404),
(5,1003,1405);

问题

求用户id对应的前两个不同场景。(场景重复的话,选场景的第一个访问时间,场景不足两个也输出其场景)


解答


Leetcode 上的SQL题



牛客网上的SQL题

  • 获取当前薪水第二多的员工的emp_no以及其对应的薪水salary

  • 不让使用ORDER BY排序时,利用子查询MAX()

  • 查找所有员工自入职以来的薪水涨幅情况

    • 嵌套查询
  • 对所有员工的薪水按照salary进行按照1-N的排名

    • 窗口函数
    • 排序细节
  • 获取所有非manager员工当前的薪水情况

    • 使用INNER JOIN而不是LEFT JOIN
    • 一个部门里可能有多个manager,所以用NOT IN比用!=更合理
  • 获取员工其当前的薪水比其manager当前薪水还高的相关信息

    • 创建两张表(一张记录当前所有员工的工资,另一张只记录部门经理的工资)进行比较
  • 给出每个员工每年薪水涨幅超过5000的员工编号emp_no

    • 题意模糊:应该为给出薪水与去年相比丈夫超过5000的员工编号
    • 使用INNER JOIN而不是LEFT JOIN
    • 时间线的判断方法很有参考价值
  • 查找描述信息中包含robot的电影对应的分类名称以及电影数目,而且还需要该分类对应电影数量>=5部

    • 筛选方法
  • 创建一个actor表,包含如下列信息

    • 创建表的时候,有些地方必须加括号
  • 批量插入数据,不使用replace操作

    • MySQL插入数据,如果数据已存在则忽略的语句是INSERT IGNORE table_name VALUES(...);
  • 对first_name创建唯一索引uniq_idx_firstname

    • 创建索引
  • 针对actor表创建视图actor_name_view

    • 创建视图
  • 针对上面的salaries表emp_no字段创建索引idx_emp_no

    • 强制索引(MySQL 为 FORCE INDEX)
  • 构造一个触发器audit_log

    • 此触发器必须按照AFTER INSERT执行,因为在BEFORE INSERT语句执行之前,新的行数据还没有生成
      • 通常,将BEFORE用于数据验证和净化
  • 删除emp_no重复的记录,只保留最小的id对应的记录

    •   DELETE FROM table_name 
        WHERE xxx
        
      1
      2
      3
      4
      5
      6
      7

      - [将所有to_date为9999-01-01的全部更新为NULL](https://www.nowcoder.com/practice/859f28f43496404886a77600ea68ef59?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      - ```sql
      UPDATE table_name
      SET xxx
      WHERE xxx;
  • 将id=5以及emp_no=10001的行数据替换成id=5以及emp_no=10005

    • REPLACE(s, s1, s2)函数
  • 将titles_test表名修改为titles_2017

    • 修改表名的两种方式
      • RENAME TABLE table_name1 TO table_name2;
      • ALTER TABLE table_name1 RENAME TO table_name2;
  • 在audit表上创建外键约束,其emp_no对应employees_test表的主键id

    •   ALTER TABLE table_name
        ADD [CONSTRAINT fk_name] -- CONSTRAINT fk_name 定义外键名,可不定义
        FOREIGN KEY (xx) REFERENCES another_table (xx);
        
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82

      - [将所有获取奖金的员工当前的薪水增加10%](https://www.nowcoder.com/practice/d3b058dcc94147e09352eb76f93b3274?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      - MySQL不支持`*=`这样的简化字符使用

      - [使用含有关键字exists查找未分配具体部门的员工的所有信息](https://www.nowcoder.com/practice/c39cbfbd111a4d92b221acec1c7c1484?tpId=82&tags=&title=&diffculty=0&judgeStatus=&rp=1)

      - EXISTS和IN的选择
      - EXISTS的用法

      - [获取有奖金的员工相关信息](https://www.nowcoder.com/practice/5cdbf1dcbe8d4c689020b6b2743820bf?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      - CASE语句

      - [统计salary的累计和running_total](https://www.nowcoder.com/practice/58824cd644ea47d7b2b670c506a159a6?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      - 窗口函数

      - [对于employees表中,给出奇数行的first_name](https://www.nowcoder.com/practice/e3cf1171f6cc426bac85fd4ffa786594?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      - 窗口函数

      - [刷题通过的题目排名](https://www.nowcoder.com/practice/cd2e10a588dc4c1db0407d0bf63394f3?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      - 窗口函数
      - 排序

      - [异常的邮件概率](https://www.nowcoder.com/practice/d6dd656483b545159d3aa89b4c26004e?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      - COUNT函数非NULL的都会计数

      - [牛客每个人最近的登录日期(三)](https://www.nowcoder.com/practice/16d41af206cd4066a06a3a0aa585ad3d?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      - 在执行乘法`*`前,**注意括号的使用**
      - `WHERE XX IN XXX`**可以用于对一个组合的查询**

      - ⭐[牛客每个人最近的登录日期(四)](https://www.nowcoder.com/practice/e524dc7450234395aa21c75303a42b0a?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      - 解法一:LEFT JOIN + IFNULL函数
      - 笛卡尔积**如果WHERE不满足,整个所生成的元组就不会显示**,**如果要显示的话,就必须使用JOIN连接**
      - 解法二:窗口函数 + SUM函数

      - ⭐⭐[牛客每个人最近的登录日期(五)](https://www.nowcoder.com/practice/ea0c56cd700344b590182aad03cc61b8?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      - ⭐[牛客每个人最近的登录日期(六)](https://www.nowcoder.com/practice/572a027e52804c058e1f8b0c5e8a65b4?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)

      ---

      ## 最近登陆时间,增量更新:

      table_1是一个按天分区的 用户日志表,每次一个用户登陆都会新加一行:

      day timestamp uid

      2018-08-01 2018-08-01 11:00:00 1

      2018-08-01 2018-08-01 14:00:00 1



      需要根据table_1新建一个table_2:

      Day uid latest_login_time

      2018-08-01 1 2018-08-01 14:00:00



      每天新增一个昨日的分区的全量用户表,其中latest_login_time是自用户登录起最近一次登录的时间

      ```sql
      CREATE TABLE tabel2 (
      day DATE NOT NULL,
      uid INT NOT NULL,
      `latest login time` TIMESTAMP
      );

      INSERT INTO table2(day,
      uid,
      `latest login time`)
      SELECT day, uid, MAX(`timestamp`)
      FROM table_1
      WHERE DATEDIFF(CURDATE(), day) = 1 -- 昨天
      GROUP BY uid;

累计用户数计算

table a是一个用户注册时间的记录表,一个用户只有一条数据,a表如下:

create_time uid

2018-08-01 14:07:09 111

2018-08-02 14:07:09 134

需要计算8月份累计注册用户数(从8月1日开始累计计算,8月1日为8月1日注册用户数,8月2日为8月1日-2日两天的注册用户数),计算结果格式如下:

时间 累计用户数

2018-08-01 2000000

2018-08-02 2100000

…………….

2018-08-31 10000000

1
2
3
4
SELECT DATE(create_time) AS 时间, 
COUNT(uid) OVER (ORDER BY creat_time) AS 累计用户数
FROM `table a`
WHERE YEAR(create_time) = 2018 AND MONTH(create_time) = 8;

连续访问天数

table a 是一个用户登陆时间记录表,每登陆一次会记录一条记录,a表如下:

log_time uid

2018-07-01 12:00:00 123

2018-07-01 13:00:00 123

2018-07-02 14:08:09 456

需要计算出7月1日登陆的用户,在后续连续登陆7天,14天,30天的人数

计算结果格式如下:

7月1日登陆总用户数 连续登陆7天用户数 连续登陆14天用户数 。。。。

1000 500 200

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
SELECT COUNT(table_a.uid) AS '7月1日登录总用户数', COUNT(table_b.uid) AS '连续登录7天的用户数', COUNT(table_c.uid) AS '连续登录14天的用户数'
FROM (SELECT DISTINCT uid
FROM consecutive_task
WHERE DATE(login_time) = '2018-07-01') table_a
LEFT JOIN (SELECT uid, (login_date - rank_num) AS rnk
FROM (SELECT uid, login_date, (ROW_NUMBER() OVER (PARTITION BY uid
ORDER BY login_date)) AS rank_num
FROM (SELECT uid, DATE(login_time) AS login_date
FROM consecutive_task
WHERE DATE(login_time) BETWEEN '2018-07-01' AND '2018-07-07'
GROUP BY uid, login_date) b1 ) b2
GROUP BY uid, rnk
HAVING COUNT(rnk) = 7) table_b
ON table_a.uid = table_b.uid
LEFT JOIN (SELECT uid, (login_date - rank_num) AS rnk
FROM (SELECT uid, login_date, (ROW_NUMBER() OVER (PARTITION BY uid
ORDER BY login_date)) AS rank_num
FROM (SELECT uid, DATE(login_time) AS login_date
FROM consecutive_task
WHERE DATE(login_time) BETWEEN '2018-07-01' AND '2020-07-14'
GROUP BY uid, login_date) c1 ) c2
GROUP BY uid, rnk
HAVING COUNT(rnk) = 14) table_c
ON table_a.uid = table_c.uid;

新增用户的近7日留存率

table1:用户新增表,一个设备首次激活都新加一行:

timestamp,device 【新增的日期,新增的设备】

table2: 是一个按天分区的用户活跃表,每次一个用户登陆都会新加一行:

day,device 【活跃的日期,活跃的设备】

需要计算用户新增用户的留存数,留存率【1-7日】

计算结果格式如下:

新增日期 新增设备数 次日留存数 次日留存率 2日留存数 2日留存率 ….

timestamp 1000 500 50% 450 45%

1
2
3
4
5
6
7
SELECT a.first AS 新增日期, COUNT(DISTINCT a.device) AS 新增设备数, 
COUNT(DISTINCT IF(first - b.day = 1, a.device, NULL)) AS 次日留存数,
CONCAT(ROUND(COUNT(DISTINCT IF(first - b.day = 1, a.device, NULL)) / COUNT(DISTINCT a.device) * 100, 0), '%') AS 次日留存率
FROM (SELECT a.device, DATE(a.timestamp) AS first, b.day
FROM table1 a LEFT JOIN table2 b
ON a.device = b.device) c
GROUP BY a.first; -- 按 新增用户的日期 分组

计算日活用户签到,开宝箱,阅读行为的用户各自占比

table1: 是一个按天分区的用户活跃表,每次一个用户登陆都会新加一行:

day,uid 【活跃的日期,活跃的用户id】

table2:是一个按天分区的用户行为表,每一种行为都会新加一行:

day ,uid,type 【日期,用户id,type类型:1表示签到,2表示开宝箱,3表示阅读】

需要计算,签到占日活的比例,开宝箱占日活的比例,阅读占日活的比例

计算结果格式如下:

日期 活跃用户数 签到占日活的比例 开宝箱占日活的比例 阅读占日活的比例

1
2
3
4
5
6
7
8
SELECT day AS 日期, COUNT(DISTINCT uid) AS 活跃用户数, 
COUNT(IF(type = 1, 1, NULL)) / COUNT(uid) AS 签到占日活的比例,
COUNT(IF(type = 2, 1, NULL)) / COUNT(uid) AS 开宝箱占日活的比例,
COUNT(IF(type = 3, 1, NULL)) / COUNT(uid) AS 阅读占日活的比例
FROM (SELECT a.*, b.type
FROM table_1 AS a LEFT JOIN table_2 AS b
ON a.uid = b.uid AND a.day = b.day) c
GROUP BY day;

练习四

准备数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
CREATE TABLE Employee 
(
id INT(20),
name VARCHAR(20),
salary INT(20),
departmentid INT(20)
);

INSERT INTO Employee
VALUES (1,"Joe",70000,1),
(2,"Henry",80000,2),
(3,"Sam",60000,2),
(4,"Max",90000,1);

CREATE TABLE Department
(
id INT(20),
name VARCHAR(20)
);

INSERT INTO Department
VALUES (1,"IT"),
(2,"Sales");

练习题

找出每个部门工资最高的员工

1
2
3
4
SELECT d.name Department, e.name Employee, MAX(Salary) Salary
FROM Employee e LEFT JOIN Department d
ON e.departmentid = d.id
GROUP BY d.id;

准备数据

1
2
3
4
5
6
7
8
9
10
CREATE TABLE customer
(
Id INT(10),
Email VARCHAR(20)
);

INSERT INTO customer
VALUES (1,'a@b.com'),
(2,'c@d.com'),
(3,'a@b.com');

练习题

查找 customer 表中所有重复的电子邮箱

1
2
3
4
SELECT Email
FROM customer
GROUP BY Email
HAVING COUNT(Email)>1;

准备数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
CREATE TABLE Customers
(
Id INT(10),
Name VARCHAR(20)
);

INSERT INTO Customers
VALUES (1,'Joe'),
(2,'Henry'),
(3,'Sam'),
(4,'Max');

CREATE TABLE Orders
(
Id INT(10),
CustomerId INT(10)
);

INSERT INTO Orders
VALUES (1,3),
(2,1);

练习题:找出所有从不订购任何东西的客户

1
2
3
4
5
6
7
8
9
10
11
-- 方法一
SELECT name AS customers
FROM customers c LEFT JOIN orders o
ON c.id = o.customerId
WHERE o.id IS NULL;

-- 方法二
SELECT name AS customers
FROM customers
WHERE id NOT IN (SELECT customerid
FROM orders);

准备数据

1
2
3
4
5
6
7
8
9
10
11
12
13
CREATE TABLE Scores
(
id VARCHAR(20),
score FLOAT(4,2)
);

INSERT INTO scores
VALUES (1,3.5),
(2,3.65),
(3,4.00),
(4,3.85),
(5,4.00),
(6,3.65);

练习题:通过查询实现分数排名

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
-- 窗口函数 ROW_NUMBER()
SELECT score, ROW_NUMBER() OVER (ORDER BY score DESC) AS `Rank`
FROM scores
ORDER BY score DESC;

-- ROW_NUMBER() 对应 使用变量 的做法
SELECT score, @curRank=@curRank+1 AS `Rank`
FROM scores, (SELECT @RANK:=0) r
ORDER BY score DESC;

-- RANK() (有并列名次的行,会占用下一名次的位置)对应使用变量的做法
SELECT score, `rank`
FROM (SELECT score, @curRank = IF(@preScore = score, @curRank, @incRank) AS `rank`, -- 同分同排名,不同分下一排名
@incRank:=@incRank+1; -- 作用等同于 ROW_NUMBER()
@preScore:=score
FROM scores, (SELECT @curRank:=0, @preScore:=NULL, @incRank:=1) r
ORDER BY score DESC) s;

-- DENSE_RANK() 对应使用变量的做法

SELECT score, `rank`
FROM (SELECT score, @curRank=IF(@preScore=score, @curRank, @curRank+1) AS `rank`,
@preScore=score
FROM scores, (SELECT @curRank:=0, @preScore:=NULL) r
ORDER BY socre DESC) s;

-- 使用CASE WHEN更简洁
SELECT score, CASE
WHEN score=@preScore THEN @curRank
WHEN @preScore:=score THEN @curRank:=@curRank+1 -- @preScore:=score 赋值语句必为true
END AS `rank`
FROM scores, (SELECT @curRank:=0, @preScore:=NULL) r
ORDER BY score DESC;

准备数据

1
2
3
4
5
6
7
8
9
10
11
12
CREATE TABLE seat 
(
id INT(20),
student VARCHAR(20)
);

INSERT INTO seat
VALUES (1,'Abbot'),
(2,'Doris'),
(3,'Emerson'),
(4,'Green'),
(5,'Jeames');

练习题:座位id 是连续递增的,改变相邻俩学生的座位。
要求:

  • 如果学生人数是奇数,则不需要改变最后一个同学的座位。
1
2
3
4
5
6
7
8
SELECT (CASE
WHEN id % 2 = 1 AND id != cnt THEN id+1 -- 奇数且不是最后一个
WHEN id % 2 = 1 AND id = cnt THEN id
ELSE id - 1 -- 偶数
END) AS id, student
FROM seat, (SELECT COUNT(*) AS cnt -- 用于判断当前是否是最后一个学生(奇数的特殊判断)
FROM seat) t
ORDER BY id;

准备数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
DROP TABLE IF EXISTS Employee;
CREATE TABLE Employee
(
Id INT(10),
Name VARCHAR(20),
Salary INT(20),
ManagerId INT(10)
);

INSERT INTO Employee
VALUES (1, 'Joe', 70000, 3),
(2, 'Henry', 80000, 4),
(3, 'Sam', 60000, NULL),
(4, 'Max', 90000, NULL);

练习题:编写一个 SQL 查询,获取收入超过他们经理的员工的姓名。

1
2
3
4
SELECT e1.Name
FROM Employee e1 LEFT JOIN Employee e2
ON e1.ManagerId = e2.Id
WHERE e1.Salary > e2.Salary;

练习五

题目:X 市建了一个新的体育馆,每日人流量信息被记录在这三列信息中:序号 (id)、日期 (visit_date)、 人流量 (people)。
请编写一个查询语句,找出
人流量的高峰期

要求:

  • 高峰期时,至少连续三行记录中的人流量不少于100
  • 每天只有一行记录,日期随着 id 的增加而增加
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
CREATE TABLE stadium 
(
id INT(20),
visit_date DATE,
people INT(20)
);

INSERT INTO stadium
VALUES (1,'2017-01-01',10),
(2,'2017-01-02',109),
(3,'2017-01-03',150),
(4,'2017-01-04',99),
(5,'2017-01-05',145),
(6,'2017-01-06',1455),
(7,'2017-01-07',199),
(8,'2017-01-08',188);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
-- 方法一:朴实的自联结
SELECT DISTINCT t1.*
FROM stadium t1, stadium t2, stadium t3
WHERE (t1.people >= 100 -- 筛选表中 人流量>=100 的行
AND t2.people >= 100
AND t3.people >= 100
AND ( (t1.id - t2.id = 1 -- 错位联结,从t1的id开始连续3天有人流高峰
AND t1.id - t3.id = 2
AND t2.id - t3.id = 1
)
OR (t2.id - t1.id = 1 -- 补充 t1之后只有1天人流高峰,但之前也有连续高峰 的情况
AND t2.id - t3.id = 2
AND t1.id - t3.id = 1)
OR (t3.id - t2.id = 1 -- 补充t1为人流高峰最后一天 的情况
AND t2.id - t1.id = 1
AND t3.id - t1.id = 2)
)
)
ORDER BY t1.id;


-- 方法二:MySQL 8.0 之后开始支持 WITH AS 语句(子查询部分,作用类似于一个视图,但with as 属于一次性的,而且必须和其他sql一起使用才行) WITH子句的查询必须用括号括起来
WITH t1 AS
(SELECT id, visit_date, people,
(id - ROW_NUMBER() OVER (ORDER BY id)) rk
FROM stadium
WHERE people >= 100)

SELECT id, visit_date, people
FROM t1
WHERE rk IN (SELECT rk
FROM t1
GROUP BY rk
HAVING COUNT(*) >= 3);

挑战题

1
2
3
4
5
6
7
8
9
10
11
SELECT 日期, COUNT(DISTINCT a.uid) 活跃用户数, COUNT(DISTINCT IF(day-first=1, a.uid, NULL)) 次日留存用户数,
COUNT(DISTINCT IF(day-first=3), a.uid, NULL) 三日留存用户数,
COUNT(DISTINCT IF(day-first=7), a.uid, NULL) 七日留存用户数,
CONCAT(ROUND(COUNT(DISTINCT IF(day-first=1, a.uid, NULL)) / COUNT(DISTINCT a.uid), 2), '%') 次日留存率, CONCAT(ROUND(COUNT(DISTINCT IF(day-first=3, a.uid, NULL)) / COUNT(DISTINCT a.uid), 2), '%') 三日留存率, CONCAT(ROUND(COUNT(DISTINCT IF(day-first=7, a.uid, NULL)) / COUNT(DISTINCT a.uid), 2), '%') 七日留存率
FROM (SELECT uid, DATE_FORMAT(dayno, '%Y-%m-%d') AS first
FROM act_user_info
WHERE app_name = '相机') a LEFT JOIN (SELECT uid, DATE_FORMAT(dayno, '%Y-%m-%d') AS day
FROM act_user_info
WHERE app_name = '相机') b
ON a.uid = b.uid
GROUP BY first;

PDD笔试题

2. 用户行为分析

表1——用户行为表tracking_log,大概字段有(user_id‘用户编号’,opr_id‘操作编号’,log_time‘操作时间’)

  • 统计每天符合以下条件的用户数A操作之后是B操作,AB操作必须相连
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
-- 使用窗口函数
SELECT DATE(log_time), COUNT(DISTINCT user_id)
FROM (SELECT DATE(log_time), user_id, opr_id,
LEAD(opr_id, 1) OVER() AS pre_opr
FROM tracking_log) table_1
WHERE opr_id='A' AND pre_opr='B'
GROUP BY DATE(log_time);


-- 使用用户变量
SELECT DATE(log_time), COUNT(DISTINCT user_id)
FROM (SELECT DATE(log_time), user_id,
(CASE
WHEN @pre_opr='A' AND opr_id='B' THEN @isJoin:=True AND @pre_opr:=opr_id
ELSE @pre_opr:=opr_id
END) isJoin
FROM tracking_log, (SELECT @pre_opr:=NULL, @isJoin:=FALSE) a) table_1
WHERE isJoin=True
GROUP BY DATE(log_time);

3. 用户新增留存分析

表1——用户登陆表user_log,大概字段有(user_id‘用户编号’,log_time‘登陆时间’)

  • 获取每日新增用户数,以及第2天、第30天的回访比例
1
2
3
4
5
6
7
8
SELECT COUNT(user_id) 新增用户数, 
(COUNT(DISTINCT IF(DATEDIFF(DATE(log_time), 注册日期)=1, u1.user_id, NULL)) / COUNT(user_id)) AS2天回访比例,
(COUNT(DISTINCT IF(DATEDIFF(DATE(log_time), 注册日期)=29, u1.user_id, NULL)) / COUNT(user_id)) AS30天回访比例
FROM (SELECT user_id, DATE(MIN(log_time)) AS 注册日期
FROM user_log
GROUP BY user_id) u1 LEFT JOIN user_log u2
ON u1.user_id = u2.user_id
GROUP BY 注册日期;

4. 贝叶斯公式的应用

数据分析笔试题(1) - 知乎


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
SELECT Partition_date, 
(CASE
WHEN DATEDIFF(t2.Partition_date, t1.Partition_date)=1 THEN COUNT(DISTINCT t1.user_id)
END) AS '流失1天',
(CASE
WHEN DATEDIFF(t2.Partition_date, t1.Partition_date)=2 THEN COUNT(DISTINCT t1.user_id)
END) AS '流失2天',
(CASE
WHEN DATEDIFF(t2.Partition_date, t1.Partition_date)=3 THEN COUNT(DISTINCT t1.user_id)
END) AS '流失3天',
(CASE
WHEN DATEDIFF(t2.Partition_date, t1.Partition_date)>=30 THEN COUNT(DISTINCT t1.user_id)
END) AS '流失30天以上'
FROM (SELECT user_id, Partition_date
FROM user_active
WHERE daily_active_status_map=1) t1 LEFT JOIN (SELECT user_id, Partition_date
FROM usre_active
WHERE daily_active_status_map=1) t2
ON t1.user_id=t2.user_id
WHERE t2.user_id IS NULL
GROUP BY Partition_date
ORDER BY Partition_date DESC;