SQL练习题

练习一

准备数据

建表语句

CREATE TABLE students
(sno VARCHAR(3) NOT NULL, 
sname VARCHAR(4) NOT NULL,
ssex VARCHAR(2) NOT NULL, 
sbirthday DATETIME,
class VARCHAR(5));

CREATE TABLE courses
(cno VARCHAR(5) NOT NULL, 
cname VARCHAR(10) NOT NULL, 
tno VARCHAR(10) NOT NULL);

CREATE TABLE scores
(sno VARCHAR(3) NOT NULL,
cno VARCHAR(5) NOT NULL,
degree NUMERIC(10, 1) NOT NULL);

CREATE TABLE teachers 
(tno VARCHAR(3) NOT NULL, 
tname VARCHAR(4) NOT NULL, tsex VARCHAR(2) NOT NULL, 
tbirthday DATETIME NOT NULL, prof VARCHAR(6), 
depart VARCHAR(10) NOT NULL);

插入数据

INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (108 ,'曾华' ,'男' ,'1977-09-01',95033);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (105 ,'匡明' ,'男' ,'1975-10-02',95031);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (107 ,'王丽' ,'女' ,'1976-01-23',95033);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, classS) VALUES (101 ,'李军' ,'男' ,'1976-02-20',95033);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (109 ,'王芳' ,'女' ,'1975-02-10',95031);
INSERT INTO STUDENTS (sno, sname, ssex, sbrithday, class) VALUES (103 ,'陆君' ,'男' ,'1974-06-03',95031);

INSERT INTO COURSES(cno, cname, tno) VALUES ('3-105' ,'计算机导论',825);
INSERT INTO COURSES(cno, cname, tno) VALUES ('3-245' ,'操作系统' ,804);
INSERT INTO COURSES(cno, cname, tno) VALUES ('6-166' ,'数据电路' ,856);
INSERT INTO COURSES(cno, cname, tno) VALUES ('9-888' ,'高等数学' ,100);

INSERT INTO SCORES(sno, cno, degree) VALUES (103,'3-245',86);
INSERT INTO SCORES(sno, cno, degree) VALUES (105,'3-245',75);
INSERT INTO SCORES(sno, cno, degree) VALUES (109,'3-245',68);
INSERT INTO SCORES(sno, cno, degree) VALUES (103,'3-105',92);
INSERT INTO SCORES(sno, cno, degree) VALUES (105,'3-105',88);
INSERT INTO SCORES(sno, cno, degree) VALUES (109,'3-105',76);
INSERT INTO SCORES(sno, cno, degree) VALUES (101,'3-105',64);
INSERT INTO SCORES(sno, cno, degree) VALUES (107,'3-105',91);
INSERT INTO SCORES(sno, cno, degree) VALUES (108,'3-105',78);
INSERT INTO SCORES(sno, cno, degree) VALUES (101,'6-166',85);
INSERT INTO SCORES(sno, cno, degree) VALUES (107,'6-166',79);
INSERT INTO SCORES(sno, cno, degree) VALUES (108,'6-166',81);

INSERT INTO TEACHERS(tno, tname, tsex, tbirthday, prof, depart) VALUES (804,'李诚','男','1958-12-02','副教授','计算机系');
INSERT INTO TEACHERS(tno, tname, tsex, tbirthday, prof, depart) VALUES (856,'张旭','男','1969-03-12','讲师','电子工程系');
INSERT INTO TEACHERS(tno, tname, tsex, tbirthday, prof, depart) VALUES (825,'王萍','女','1972-05-05','助教','计算机系');
INSERT INTO TEACHERS(tno, tname, tsex, tbirthday, prof, depart) VALUES (831,'刘冰','女','1977-08-14','助教','电子工程系');

练习题

  1. 查询student表中的所有记录的sname、ssex和class列。

    解答:

    SELECT sname, ssex, class
    FROM students;
  2. 查询教师所有的单位,即不重复的depart列。

    解答:

    SELECT DISTINCT depart
    FROM teachers;
  3. 查询students表的所有记录。

    解答:

    SELECT *
    FROM students;
  4. 查询scores表中成绩在60到80之间的所有记录。

    解答:

    # 方法一
    SELECT *
    FROM scores
    WHERE degree >= 60 AND degree <= 80;
    
    # 方法二
    SELECT *
    FROM scores
    WHERE degree BETWEEN 60 AND 80;
  5. ★查询scores表中成绩为85,86或88的记录

    解答:

    # 方法一
    SELECT *
    FROM scores
    WHERE degree IN (85, 86, 88);
    
    # 方法二
    SELECT *
    FROM scores
    WHERE degree = 85 OR degree = 86 OR degree = 88;
  6. 查询students表中**“95031”班性别为“女”**的同学记录。

    解答:

    SELECT *
    FROM students
    WHERE class = '95031' OR ssex = '女';
  7. class降序查询students表的所有记录。

    解答:

    SELECT *
    FROM students
    ORDER BY class DESC;
  8. ★以cno升序degree降序查询scores表的所有记录。

    解答:

    SELECT *
    FROM scores
    ORDER BY con, degree DESC;
  9. 查询student表中“95031”班的学生人数

    解答:

    SELECT COUNT(*) AS stu_num
    FROM students
    WHERE class = '95031';
  10. ★★★查询scores表中的最高分的学生学号和课程号。

    解答:子查询

    SELECT sno, cno
    FROM scores
    WHERE degree = (SELECT MAX(degree)
                    FROM scores);
  11. ★查询scores表中‘3-105’号课程的平均分

    解答:

    SELECT AVG(degree)
    FROM scores
    WHERE cno = '3-105';
  12. ★★★查询scores表中至少有5名学生选修的并以3开头的课程平均分数

    解答:分组HAVING子句用于过滤分组)

    # 方法一
    SELECT cno, AVG(degree) AS avg_degree
    FROM scores
    WHERE cno LIKE '3%'
    GROUP BY cno
    HAVING COUNT(*) >= 5;
    
    # 方法二
    SELECT cno, AVG(degree) AS avg_degree
    FROM scores
    GROUP BY cno
    HAVING COUNT(*) >= 5 AND cno LIKE '3%';
  13. ★★★查询scores表中最低分大于70最高分小于90的sno列。

    解答:

    SELECT sno
    FROM scores
    GROUP BY sno
    HAVING MIN(degree) > 70 AND MAX(degree) < 90;
  14. ★★查询所有学生的sname、cno和degree列。

    解答:

    • 外部联结(OUTER JOIN, OUTER可省略)
    • 内部联结(INNER JOIN)
    # 方法一:外部联结, LEFT JOIN 左边的表 所有记录都会列出
    SELECT stu.sname, sco.cno, sco.degree
    FROM students AS stu LEFT JOIN scores AS sco
    ON stu.sno = sco.sno
    ORDER BY sname;
    
    # 方法二:内部联结, INNER JOIN 只返回两个表中联结字段相等的行
    SELECT stu.sname, sco.cno, sco.degree
    FROM students AS stu INNER JOIN scores AS sco
    ON stu.sno = sco.sno
    ORDER BY sname;
  15. ★查询所有学生的sno、cname和degree列。

    解答:外部联结

    SELECT s.sno, c.cname, s.degree
    FROM scores AS s
    LEFT JOIN courses AS c ON s.cno = c.cno
    ORDER BY sno;
  16. ★★★查询所有学生的sname、cname和degree列。

    解答:外部联结

    SELECT st.sname, c.cname, sc.degree
    FROM students AS st 
    LEFT JOIN scores AS sc ON st.sno = sc.sno
    LEFT JOIN courses AS c ON sc.cno = c.cno
    ORDER BY sname;
  17. ★★★查询“95033”班所选课程的平均分。

    解答:

    SELECT cname, AVG(degree)
    FROM students AS st
    INNER JOIN scores AS sc ON st.sno = sc.sno
    INNER JOIN courses AS c ON sc.cno = c.cno
    WHERE class = '95033'
    GROUP BY c.cno
    ORDER BY cname;
  18. ★★★假设使用如下命令建立了一个grade表:

    CREATE TABLE grade(low INT(3), upp INT(3), rank CHAR(1));
    INSERT INTO grade VALUES(90,100,'A');
    INSERT INTO grade VALUES(80,89,'B');
    INSERT INTO grade VALUES(70,79,'C');
    INSERT INTO grade VALUES(60,69,'D');
    INSERT INTO grade VALUES(0,59,'E');
    COMMIT;

    现查询所有同学的sno、cno和rank列。

    解答:

    SELECT sno, cno, rank
    FROM scores INNER JOIN grade
    ON scores.degree BETWEEN grade.low AND grade.upp
    ORDER BY sno;
  19. ★★★查询选修“3-105”课程成绩高于“109”号同学成绩的所有同学的记录。

    解答:

    • 自联结
    SELECT sname, s1.degree
    FROM scores AS s1
    INNER JOIN scores AS s2 ON s1.cno = s2.cno AND  s1.degree > s2.degree
    INNER JOIN students AS st ON s1.sno = st.sno
    WHERE s1.cno = '3-105' AND s2.sno = '109'
    ORDER BY s1.degree;
  20. ★★★查询scores中选学一门以上课程的同学中分数为非最高分成绩的记录。

    解答:

    • 条件:
      1. 选学一门以上课程
      2. 列出这些同学的所有非最高分成绩
    SELECT scores.sno, cno, degree, max_degree
    FROM scores INNER JOIN 
    			(SELECT sno, MAX(degree) AS max_degree 
                 FROM scores 
                 GROUP BY sno 
                 HAVING COUNT(*) > 1) AS max 
    ON scores.sno = max.sno AND degree < max_degree
    ORDER BY sno;
  21. 查询成绩高于学号为“109”、课程号为“3-105”的成绩的所有记录。

    解答:

    SELECT st.sname, s1.cno, s1.degree
    FROM scores AS s1 INNER JOIN scores AS s2
    ON s1.cno = s2.cno AND s1.degree > s2.degree
    INNER JOIN students AS st
    ON s1.sno = st.sno
    WHERE s1.cno = '3-105' AND s2.sno = '109'
    ORDER BY s1.degree;
  22. ★★★查询和学号为108的同学同年出生的所有学生的sno、sname和sbirthday列。

    解答:函数YEAR(d)

    SELECT s1.sno, s1.sname, s1.sbirthday
    FROM students AS s1 INNER JOIN students AS s2
    ON YEAR(s1.sbirthday) = YEAR(s2.sbirthday)
    WHERE s2.sno = '108'
    ORDER BY sbirthday;
  23. 查询“张旭“教师任课的学生成绩。

    解答:

    SELECT sno, degree
    FROM scores INNER JOIN courses
    ON scores.cno = courses.cno
    INNER JOIN teachers
    ON courses.tno = teachers.tno
    WHERE teachers.tname = '张旭'
    ORDER BY degree;
  24. 查询选修某课程的同学人数多于5人教师姓名

    解答:

    SELECT tname
    FROM scores AS s INNER JOIN courses AS c
    ON s.cno = c.cno
    INNER JOIN teachers AS t
    ON t.tno = c.tno
    GROUP BY c.cno
    HAVING COUNT(c.cno) > 5;
  25. 查询95033班和95031班全体学生的记录

    解答:

    SELECT *
    FROM students
    WHERE class IN ('95033', '95031')
    ORDER BY class;
  26. 查询有85分以上成绩的课程cno。

    解答:DISTINCT关键字

    SELECT DISTINCT c.cno
    FROM scores
    WHERE degree > 85;
  27. 查询出“计算机系“教师所教课程的成绩表。

    解答:

    SELECT t.tname, s.cno, cname, sno, degree
    FROM scores AS s INNER JOIN courses AS c
    ON s.cno = c.cno
    INNER JOIN teachers AS t
    ON t.tno = c.tno
    WHERE t.depart = '计算机系'
    ORDER BY t.tname, cname, degree DESC;
  28. 查询“计算机系”中与“电子工程系“的教师不同职称教师的tname和prof。

    解答:

    SELECT tname, prof
    FROM teachers
    WHERE depart = '计算机系' AND prof NOT IN 
    								(SELECT prof
                                    FROM teachers
                                    WHERE depart = '电子工程系');
  29. 查询选修编号为“3-105”课程且成绩至少高于任意选修编号为“3-245”的同学的成绩的cno、sno和degree。

    解答:

    SELECT cno, sno, degree
    FROM scores
    WHERE cno = '3-105' AND degree > (SELECT MIN(degree)
        							FROM scores
        							WHERE cno = '3-245')
    ORDER BY degree DESC;
  30. 查询选修编号为”3-105“且成绩高于所有选修编号为”3-245“课程的同学的cno、sno和degree。

    解答:

    SELECT cno, sno, degree
    FROM scores
    WHERE cno = '3-105' AND degree > (SELECT MAX(degree)
        							FROM scores
        							WHERE cno = '3-245')
    ORDER BY degree DESC;
  31. 查询所有教师和同学的name、sex和birthday。

    解答:组合查询UNION

    • UNION中每个查询必须包含相同的列、表达式或聚集函数(不过各个列不需要以相同的次序列出)。
    • 列数据类型必须兼容。类型不必完全相同,但必须是DBMS可以隐含地转换的类型(如不同的数值类型不同的日期类型)。
    SELECT tname AS name, tsex AS sex, tbirthday AS birthday
    FROM teachers
    UNION 
    SELECT sname AS name, ssex AS sex, sbirthday AS birthday
    FROM students
    ORDER BY birthday;
  32. 查询所有“女”教师和“女”同学的name、sex和birthday。

    解答:

    SELECT tname AS name, tsex AS sex, tbirthday AS birthday
    FROM teachers AS t
    WHERE t.tsex = '女'
    UNION 
    SELECT sname AS name, ssex AS sex, sbirthday AS birthday
    FROM students AS s
    WHERE s.ssex = '女'
    ORDER BY birthday;
  33. ★★★查询成绩比该课程平均成绩低的同学的成绩表。

    解答:

    SELECT s1.*, avg_degree
    FROM scores AS s1 INNER JOIN (
    	SELECT cno, AVG(degree) AS avg_degree
    	FROM scores
    	GROUP BY cno) AS s2
    ON s1.cno = s2.cno AND s1.degree < s2.avg_degree;
  34. 查询所有任课教师的tname和depart。

    解答:

    SELECT DISTINCT tname, depart
    FROM courses AS c INNER JOIN teachers AS t
    ON c.tno = t.tno
    ORDER BY tname;
    
    SELECT tname, depart
    FROM teachers
    WHERE tno IN(
    	SELECT tno
    	FROM courses)
    ORDER BY tname;
  35. ★查询所有未讲课的教师的Tname和Depart。

    解答:

    SELECT tname, depart
    FROM teachers
    WHERE tno NOT IN(
    	SELECT tno
    	FROM courses)
    ORDER BY tname;
  36. ★★查询至少有2名男生的班号。

    解答:

    SELECT class
    FROM students
    WHERE ssex = '男'
    GROUP BY class
    HAVING COUNT(*) >= 2;
  37. ★查询students表中**不姓“王”**的同学记录。

    解答:

    • LIKE操作符
    • 通配符%
    SELECT *
    FROM students
    WHERE sname NOT LIKE '王%';
  38. ★★查询students表中每个学生的姓名和年龄

    解答:

    • 当前日期函数CURDATE()
    • 当前日期和时间函数NOW()
    SELECT sname, TIMESTAMPDIFF(YEAR, sbirthday, CURDATE()) AS age
    FROM students;
  39. ★★查询students表中最大最小的sbirthday日期值。

    解答:

    SELECT DATE(MAX(Sbirthday)) AS max_birthday, DATE(MIN(Sbirthday)) AS min_birthday,
    FROM Students;
  40. 班号年龄从大到小的顺序查询students表中的全部记录。

    解答:

    SELECT *
    FROM students
    ORDER BY class DESC, TIMESTAMPDIFF(YEAR, sbirthday, CURDATE()) DESC;
  41. 查询“男”教师及其所上的课程

    解答:

    SELECT tname, cname
    FROM teachers AS t INNER JOIN courses AS c
    ON t.tno = c.tno
    WHERE tsex='男'
    ORDER BY tname;
  42. ★查询最高分同学的sno、cno和degree列。

    解答:

    SELECT sno, cno, degree
    FROM scores
    GROUP BY cno
    HAVING degree=MAX(degree);
  43. 查询和“李军”同性别的所有同学的sname。

    解答:

    # 方法一 子查询
    SELECT sname
    FROM students
    WHERE ssex = (
    			SELECT ssex
    			FROM students
    			WHERE sname = '李军');
    			
    # 方法二 联结表
    SELECT s1.Sname
    FROM students AS s1 INNER JOIN students AS s2
    ON s1.ssex=s2.ssex
    WHERE s2.sname = '李军';
  44. ★★查询和“李军”同性别同班的同学sname。

    解答:

    SELECT sname
    FROM students AS s1 INNER JOIN students AS s2
    ON s1.ssex = s2.ssex AND s1.class = s2.class
    WHERE s2.sname = '李军';
  45. 查询所有选修“计算机导论”课程的**“男”**同学的成绩表。

    解答:

    SELECT sname, degree
    FROM scores AS s1 INNER JOIN courses AS c
    ON s1.cno = c.cno
    INNER JOIN students AS s2
    ON s1.sno = s2.sno
    WHERE c.cname = '计算机导论' AND s2.ssex = '男'
    ORDER BY degree DESC;

练习二

准备数据

建表语句

CREATE TABLE userinfo (
  userId int(11) NOT NULL,
  sex varchar(2),
  birth date,
  PRIMARY KEY (userId)
) ENGINE = InnoDB;

CREATE TABLE orderinfo (
  orderId int(11) NOT NULL,
  userId int(11) NOT NULL,
  isPaid varchar(10),
  price float(11, 2),
  paidTime timestamp(0) NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP(0),
  PRIMARY KEY (orderId)
) ENGINE = InnoDB;

导入数据

  • 获取链接:用户消费行为分析数据;提取码:yu63

  • 数据文件

    • user_info_utf.csv
    • order_info_utf.csv
  • 通过navicat,选择对应的数据库表,选择导入

    1. 导入类型:CSV File

      • 导入从:选择需要导入的数据文件
      • 编码:UTF-8(默认)
    2. 记录分隔符:CRLF(默认)

      • 字段名行:若数据文件不包含数据表的列名,则输入0(默认为第1行);
      • 第一个数据行:若文件不包含数据表的列名,则输入1(默认为第2行);
      • 日期排序:注意年月日的顺序
    3. 确认目标表

    4. 确认对应的目标字段

    5. 将记录添加到目标表/删除目标表中原数据,用导入的记录代替

    6. 开始执行


练习题

  1. 统计不同月份的下单人数

    SELECT MONTH(paidTime), COUNT(DISTINCT userId)
    FROM orderinfo
    WHERE isPaid = '已支付'
    GROUP BY MONTH(paidTime);
  2. 统计用户三月份复购率和回购率(三月份购买的用户,四月份也购买)

    # 复购率
    SELECT COUNT(cnt) AS 下单用户数, COUNT(IF(cnt > 1, 1, NULL)) AS 复购用户数
    FROM (SELECT userId, COUNT(userId) as cnt
          FROM orderinfo
          WHERE isPaid = '已支付' AND MONTH(paidTime) = 3
          GROUP BY userId) AS t;
          
    # 3月份的回购率
    SELECT t1.m, COUNT(t1.m), COUNT(t2.m) 
    FROM (SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') AS m
    			FROM orderinfo
    			WHERE isPaid = '已支付'
    			GROUP BY userId, DATE_FORMAT(paidTime, '%Y-%m-01')) AS t1
    LEFT JOIN (SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') AS m
    			FROM orderinfo
    			WHERE isPaid = '已支付'
    			GROUP BY userId, DATE_FORMAT(paidTime, '%Y-%m-01')) AS t2
    ON t1.userId = t2.userId AND t1.m = DATE_ADD(t2.m, INTERVAL -1 MONTH)
    GROUP BY t1.m;
  3. 统计男女用户的消费频次(平均数)是否有差异

SELECT sex, AVG(cnt)
FROM (SELECT o.userId, sex, COUNT(*) AS cnt  # count(*)的作用为 统计消费次数
      FROM orderinfo AS o
      INNER JOIN (SELECT *
                  FROM userinfo
                  WHERE sex != '') AS t
      ON o.userId = t.userId
      GROUP BY o.userId, sex) AS t2
GROUP BY sex;
  1. 统计多次消费的用户,第一次和最后一次消费间隔是多少

    SELECT userId, DATEDIFF(MAX(paidTime), MIN(paidTime)) 
    FROM orderinfo
    WHERE isPaid = '已支付'
    GROUP BY userId
    HAVING COUNT(*) > 1;
  2. 统计不同年龄段,用户的消费金额是否有差异?

    SELECT ageGroup, AVG(cnt)
    FROM (SELECT o.userId, ageGroup, count(o.userId) AS cnt
    FROM orderinfo AS o
    INNER JOIN (SELECT userId, CEIL((YEAR(NOW()) - YEAR(birth)) / 10) AS ageGroup
    					FROM userinfo
    					WHERE birth > '1901-00-00') AS t
    ON o.userId = t.userId
    WHERE isPaid = '已支付'
    GROUP BY o.userId, ageGroup) AS t2
    GROUP BY ageGroup;
  3. 统计消费的二八法则,消费的top20%用户,贡献了多少额度

    SELECT COUNT(userId), SUM(total)
    FROM (SELECT userId, SUM(price) AS total
    	FROM orderinfo AS o
    	WHERE isPaid = '已支付'
    	GROUP BY userId
    	ORDER BY total DESC
    	LIMIT 17000) AS t;

练习三

准备数据

CREATE TABLE datafrog_test1
(userid VARCHAR(20),
changjing VARCHAR(20),
int_time VARCHAR(20)
);

INSERT INTO datafrog_test1 
VALUES 	(1,1001,1400),
		(2,1002,1401),
        (1,1002,1402),
        (1,1001,1402),
        (2,1003,1403),
        (2,1004,1404),
        (3,1003,1400),
        (4,1004,1402),
        (4,1003,1403),
        (4,1001,1403),
        (4,1002,1404),
        (5,1002,1402),
        (5,1002,1403),
        (5,1001,1404),
        (5,1003,1405);

问题

求用户id对应的前两个不同场景。(场景重复的话,选场景的第一个访问时间,场景不足两个也输出其场景)


解答


Leetcode 上的SQL题



牛客网上的SQL题

  • 获取当前薪水第二多的员工的emp_no以及其对应的薪水salary

  • 不让使用ORDER BY排序时,利用子查询MAX()

  • 查找所有员工自入职以来的薪水涨幅情况

    • 嵌套查询
  • 对所有员工的薪水按照salary进行按照1-N的排名

    • 窗口函数
    • 排序细节
  • 获取所有非manager员工当前的薪水情况

    • 使用INNER JOIN而不是LEFT JOIN
    • 一个部门里可能有多个manager,所以用NOT IN比用!=更合理
  • 获取员工其当前的薪水比其manager当前薪水还高的相关信息

    • 创建两张表(一张记录当前所有员工的工资,另一张只记录部门经理的工资)进行比较
  • 给出每个员工每年薪水涨幅超过5000的员工编号emp_no

    • 题意模糊:应该为给出薪水与去年相比丈夫超过5000的员工编号
    • 使用INNER JOIN而不是LEFT JOIN
    • 时间线的判断方法很有参考价值
  • 查找描述信息中包含robot的电影对应的分类名称以及电影数目,而且还需要该分类对应电影数量>=5部

    • 筛选方法
  • 创建一个actor表,包含如下列信息

    • 创建表的时候,有些地方必须加括号
  • 批量插入数据,不使用replace操作

    • MySQL插入数据,如果数据已存在则忽略的语句是INSERT IGNORE table_name VALUES(...);
  • 对first_name创建唯一索引uniq_idx_firstname

    • 创建索引
  • 针对actor表创建视图actor_name_view

    • 创建视图
  • 针对上面的salaries表emp_no字段创建索引idx_emp_no

    • 强制索引(MySQL 为 FORCE INDEX)
  • 构造一个触发器audit_log

    • 此触发器必须按照AFTER INSERT执行,因为在BEFORE INSERT语句执行之前,新的行数据还没有生成
      • 通常,将BEFORE用于数据验证和净化
  • 删除emp_no重复的记录,只保留最小的id对应的记录

    •   DELETE FROM table_name 
        WHERE xxx
        
      
      - [将所有to_date为9999-01-01的全部更新为NULL](https://www.nowcoder.com/practice/859f28f43496404886a77600ea68ef59?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      
      	- ```sql
      		UPDATE table_name
      		SET xxx
      		WHERE xxx;
  • 将id=5以及emp_no=10001的行数据替换成id=5以及emp_no=10005

    • REPLACE(s, s1, s2)函数
  • 将titles_test表名修改为titles_2017

    • 修改表名的两种方式
      • RENAME TABLE table_name1 TO table_name2;
      • ALTER TABLE table_name1 RENAME TO table_name2;
  • 在audit表上创建外键约束,其emp_no对应employees_test表的主键id

    •   ALTER TABLE table_name
        ADD [CONSTRAINT fk_name] -- CONSTRAINT fk_name 定义外键名,可不定义
        FOREIGN KEY (xx) REFERENCES another_table (xx);
        
      
      - [将所有获取奖金的员工当前的薪水增加10%](https://www.nowcoder.com/practice/d3b058dcc94147e09352eb76f93b3274?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      
      	- MySQL不支持`*=`这样的简化字符使用
      
      - [使用含有关键字exists查找未分配具体部门的员工的所有信息](https://www.nowcoder.com/practice/c39cbfbd111a4d92b221acec1c7c1484?tpId=82&tags=&title=&diffculty=0&judgeStatus=&rp=1)
      
      	- EXISTS和IN的选择
      	- EXISTS的用法
      
      - [获取有奖金的员工相关信息](https://www.nowcoder.com/practice/5cdbf1dcbe8d4c689020b6b2743820bf?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      
      	- CASE语句
      
      - [统计salary的累计和running_total](https://www.nowcoder.com/practice/58824cd644ea47d7b2b670c506a159a6?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      
      	- 窗口函数
      
      - [对于employees表中,给出奇数行的first_name](https://www.nowcoder.com/practice/e3cf1171f6cc426bac85fd4ffa786594?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      
      	- 窗口函数
      
      - [刷题通过的题目排名](https://www.nowcoder.com/practice/cd2e10a588dc4c1db0407d0bf63394f3?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      
      	- 窗口函数
      	- 排序
      
      - [异常的邮件概率](https://www.nowcoder.com/practice/d6dd656483b545159d3aa89b4c26004e?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      	
      	- COUNT函数非NULL的都会计数
      	
      - [牛客每个人最近的登录日期(三)](https://www.nowcoder.com/practice/16d41af206cd4066a06a3a0aa585ad3d?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      	- 在执行乘法`*`前,**注意括号的使用**
      	- `WHERE XX IN XXX`**可以用于对一个组合的查询**
      	
      - ⭐[牛客每个人最近的登录日期(四)](https://www.nowcoder.com/practice/e524dc7450234395aa21c75303a42b0a?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      	- 解法一:LEFT JOIN + IFNULL函数
      		- 笛卡尔积**如果WHERE不满足,整个所生成的元组就不会显示**,**如果要显示的话,就必须使用JOIN连接**
      	- 解法二:窗口函数 + SUM函数
      	
      - ⭐⭐[牛客每个人最近的登录日期(五)](https://www.nowcoder.com/practice/ea0c56cd700344b590182aad03cc61b8?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      
      - ⭐[牛客每个人最近的登录日期(六)](https://www.nowcoder.com/practice/572a027e52804c058e1f8b0c5e8a65b4?tpId=82&tags=&title=&diffculty=0&judgeStatus=0&rp=1)
      
      ---
      
      ## 最近登陆时间,增量更新:
      
      table_1是一个按天分区的 用户日志表,每次一个用户登陆都会新加一行:
      
      day           timestamp             uid
      
      2018-08-01     2018-08-01 11:00:00    1
      
      2018-08-01     2018-08-01 14:00:00    1
      
       
      
      需要根据table_1新建一个table_2:
      
      Day           uid        latest_login_time
      
      2018-08-01    1          2018-08-01 14:00:00 
      
       
      
      每天新增一个昨日的分区的全量用户表,其中latest_login_time是自用户登录起最近一次登录的时间
      
      ```sql
      CREATE TABLE tabel2 (
          day	DATE NOT NULL,
          uid	INT NOT NULL,
          `latest login time` TIMESTAMP
      );
      
      INSERT INTO table2(day,
                        uid,
                        `latest login time`)
      SELECT day, uid, MAX(`timestamp`)
      FROM table_1
      WHERE DATEDIFF(CURDATE(), day) = 1 -- 昨天
      GROUP BY uid;

累计用户数计算

table a是一个用户注册时间的记录表,一个用户只有一条数据,a表如下:

create_time uid

2018-08-01 14:07:09 111

2018-08-02 14:07:09 134

需要计算8月份累计注册用户数(从8月1日开始累计计算,8月1日为8月1日注册用户数,8月2日为8月1日-2日两天的注册用户数),计算结果格式如下:

时间 累计用户数

2018-08-01 2000000

2018-08-02 2100000

…………….

2018-08-31 10000000

SELECT DATE(create_time) AS 时间, 
		COUNT(uid) OVER (ORDER BY creat_time) AS 累计用户数
FROM `table a`
WHERE YEAR(create_time) = 2018 AND MONTH(create_time) = 8;

连续访问天数

table a 是一个用户登陆时间记录表,每登陆一次会记录一条记录,a表如下:

log_time uid

2018-07-01 12:00:00 123

2018-07-01 13:00:00 123

2018-07-02 14:08:09 456

需要计算出7月1日登陆的用户,在后续连续登陆7天,14天,30天的人数

计算结果格式如下:

7月1日登陆总用户数 连续登陆7天用户数 连续登陆14天用户数 。。。。

1000 500 200

SELECT COUNT(table_a.uid) AS '7月1日登录总用户数', COUNT(table_b.uid) AS '连续登录7天的用户数', COUNT(table_c.uid) AS '连续登录14天的用户数'
FROM (SELECT DISTINCT uid
			FROM consecutive_task
			WHERE DATE(login_time) = '2018-07-01') table_a
LEFT JOIN (SELECT uid, (login_date - rank_num) AS rnk
           FROM (SELECT uid, login_date, (ROW_NUMBER() OVER (PARTITION BY uid
                                                             ORDER BY login_date)) AS rank_num
                 FROM (SELECT uid, DATE(login_time) AS login_date
                       FROM consecutive_task
                       WHERE DATE(login_time) BETWEEN '2018-07-01' AND '2018-07-07'
                       GROUP BY uid, login_date) b1 ) b2
           GROUP BY uid, rnk
           HAVING COUNT(rnk) = 7) table_b									
ON table_a.uid = table_b.uid
LEFT JOIN (SELECT uid, (login_date - rank_num) AS rnk
           FROM (SELECT uid, login_date, (ROW_NUMBER() OVER (PARTITION BY uid
                                                             ORDER BY login_date)) AS rank_num
                 FROM (SELECT uid, DATE(login_time) AS login_date
                       FROM consecutive_task
                       WHERE DATE(login_time) BETWEEN '2018-07-01' AND '2020-07-14'
                       GROUP BY uid, login_date) c1 ) c2
           GROUP BY uid, rnk
           HAVING COUNT(rnk) = 14) table_c									
ON table_a.uid = table_c.uid;

新增用户的近7日留存率

table1:用户新增表,一个设备首次激活都新加一行:

timestamp,device 【新增的日期,新增的设备】

table2: 是一个按天分区的用户活跃表,每次一个用户登陆都会新加一行:

day,device 【活跃的日期,活跃的设备】

需要计算用户新增用户的留存数,留存率【1-7日】

计算结果格式如下:

新增日期 新增设备数 次日留存数 次日留存率 2日留存数 2日留存率 ….

timestamp 1000 500 50% 450 45%

SELECT a.first AS 新增日期, COUNT(DISTINCT a.device) AS 新增设备数, 
			COUNT(DISTINCT IF(first - b.day = 1, a.device, NULL)) AS 次日留存数, 
            	CONCAT(ROUND(COUNT(DISTINCT IF(first - b.day = 1, a.device, NULL)) / COUNT(DISTINCT a.device) * 100, 0), '%') AS 次日留存率 
FROM (SELECT a.device, DATE(a.timestamp) AS first, b.day 
      FROM table1 a LEFT JOIN table2 b 
      ON a.device = b.device) c
GROUP BY a.first; -- 按 新增用户的日期 分组

计算日活用户签到,开宝箱,阅读行为的用户各自占比

table1: 是一个按天分区的用户活跃表,每次一个用户登陆都会新加一行:

day,uid 【活跃的日期,活跃的用户id】

table2:是一个按天分区的用户行为表,每一种行为都会新加一行:

day ,uid,type 【日期,用户id,type类型:1表示签到,2表示开宝箱,3表示阅读】

需要计算,签到占日活的比例,开宝箱占日活的比例,阅读占日活的比例

计算结果格式如下:

日期 活跃用户数 签到占日活的比例 开宝箱占日活的比例 阅读占日活的比例

SELECT day AS 日期, COUNT(DISTINCT uid) AS 活跃用户数, 
			COUNT(IF(type = 1, 1, NULL)) / COUNT(uid) AS 签到占日活的比例,
			COUNT(IF(type = 2, 1, NULL)) / COUNT(uid) AS 开宝箱占日活的比例,
			COUNT(IF(type = 3, 1, NULL)) / COUNT(uid) AS 阅读占日活的比例
FROM (SELECT a.*, b.type 
      FROM table_1 AS a LEFT JOIN table_2 AS b 
      ON a.uid = b.uid AND a.day = b.day) c
GROUP BY day;

练习四

准备数据

CREATE TABLE Employee 
(
	id INT(20),
	name VARCHAR(20),
	salary INT(20),
	departmentid INT(20)
);

INSERT INTO Employee 
VALUES (1,"Joe",70000,1), 
		(2,"Henry",80000,2),
		(3,"Sam",60000,2),
		(4,"Max",90000,1);

CREATE TABLE Department 
(
    id INT(20),
    name VARCHAR(20)
);

INSERT INTO Department 
VALUES (1,"IT"),
		(2,"Sales");

练习题

找出每个部门工资最高的员工

SELECT d.name Department, e.name Employee, MAX(Salary) Salary
FROM Employee e LEFT JOIN Department d
ON e.departmentid = d.id
GROUP BY d.id;

准备数据

CREATE TABLE customer
(
    Id INT(10),
	Email VARCHAR(20)
);

INSERT INTO customer
VALUES (1,'a@b.com'),
		(2,'c@d.com'),
		(3,'a@b.com');

练习题

查找 customer 表中所有重复的电子邮箱

SELECT Email
FROM customer
GROUP BY Email
HAVING COUNT(Email)>1;

准备数据

CREATE TABLE Customers
( 
    Id INT(10),
    Name VARCHAR(20)
);

INSERT INTO Customers 
VALUES (1,'Joe'),
		(2,'Henry'),
		(3,'Sam'),
		(4,'Max');

CREATE TABLE Orders
(
	Id INT(10),
	CustomerId INT(10)
);

INSERT INTO Orders
VALUES (1,3),
		(2,1);

练习题:找出所有从不订购任何东西的客户

-- 方法一
SELECT name AS customers
FROM customers c LEFT JOIN orders o
ON c.id = o.customerId
WHERE o.id IS NULL;

-- 方法二
SELECT name AS customers
FROM customers
WHERE id NOT IN (SELECT customerid
								FROM orders);

准备数据

CREATE TABLE Scores
( 
    id VARCHAR(20), 
    score FLOAT(4,2)
);

INSERT INTO scores 
VALUES (1,3.5), 
		(2,3.65), 
		(3,4.00), 
		(4,3.85), 
		(5,4.00),
		(6,3.65);

练习题:通过查询实现分数排名

-- 窗口函数 ROW_NUMBER()
SELECT score, ROW_NUMBER() OVER (ORDER BY score DESC) AS `Rank`
FROM scores
ORDER BY score DESC;

-- ROW_NUMBER() 对应 使用变量 的做法
SELECT score, @curRank=@curRank+1 AS `Rank`
FROM scores, (SELECT @RANK:=0) r
ORDER BY score DESC;

-- RANK() (有并列名次的行,会占用下一名次的位置)对应使用变量的做法
SELECT score, `rank`
FROM (SELECT score, @curRank = IF(@preScore = score, @curRank, @incRank) AS `rank`, -- 同分同排名,不同分下一排名
                @incRank:=@incRank+1; -- 作用等同于 ROW_NUMBER()
                @preScore:=score
      FROM scores, (SELECT @curRank:=0, @preScore:=NULL, @incRank:=1) r
      ORDER BY score DESC) s;
      
-- DENSE_RANK() 对应使用变量的做法

SELECT score, `rank`
FROM (SELECT score, @curRank=IF(@preScore=score, @curRank, @curRank+1) AS `rank`,
				@preScore=score
	  FROM scores, (SELECT @curRank:=0, @preScore:=NULL) r
	  ORDER BY socre DESC) s;
	  
-- 使用CASE WHEN更简洁
SELECT score, CASE 
				WHEN score=@preScore THEN @curRank
                WHEN @preScore:=score THEN @curRank:=@curRank+1 -- @preScore:=score 赋值语句必为true
              END AS `rank`
FROM scores, (SELECT @curRank:=0, @preScore:=NULL) r
ORDER BY score DESC;

准备数据

CREATE TABLE seat 
( 
    id INT(20),
	student VARCHAR(20)
); 

INSERT INTO seat 
VALUES (1,'Abbot'),
		(2,'Doris'),
		(3,'Emerson'),
		(4,'Green'),
		(5,'Jeames');

练习题:座位id 是连续递增的,改变相邻俩学生的座位。
要求:

  • 如果学生人数是奇数,则不需要改变最后一个同学的座位。
SELECT (CASE
            WHEN id % 2 = 1 AND id != cnt THEN id+1 -- 奇数且不是最后一个
            WHEN id % 2 = 1 AND id = cnt THEN id
            ELSE id - 1 -- 偶数
        END) AS id, student
FROM seat, (SELECT COUNT(*) AS cnt -- 用于判断当前是否是最后一个学生(奇数的特殊判断)
            FROM seat) t
ORDER BY id;

准备数据

DROP TABLE IF EXISTS Employee;
CREATE TABLE Employee
( 
		Id INT(10), 
		Name VARCHAR(20),
		Salary INT(20), 
		ManagerId INT(10)
);

INSERT INTO Employee 
VALUES (1, 'Joe', 70000, 3), 
		(2, 'Henry', 80000, 4), 
		(3, 'Sam', 60000, NULL), 
		(4, 'Max', 90000, NULL);

练习题:编写一个 SQL 查询,获取收入超过他们经理的员工的姓名。

SELECT e1.Name
FROM Employee e1 LEFT JOIN Employee e2
ON e1.ManagerId = e2.Id
WHERE e1.Salary > e2.Salary;

练习五

题目:X 市建了一个新的体育馆,每日人流量信息被记录在这三列信息中:序号 (id)、日期 (visit_date)、 人流量 (people)。
请编写一个查询语句,找出
人流量的高峰期

要求:

  • 高峰期时,至少连续三行记录中的人流量不少于100
  • 每天只有一行记录,日期随着 id 的增加而增加
CREATE TABLE stadium 
( 
    id INT(20), 
	visit_date DATE, 
	people INT(20)
); 

INSERT INTO stadium 
VALUES (1,'2017-01-01',10), 
		(2,'2017-01-02',109), 
		(3,'2017-01-03',150), 
		(4,'2017-01-04',99), 
		(5,'2017-01-05',145), 
		(6,'2017-01-06',1455), 
		(7,'2017-01-07',199), 
		(8,'2017-01-08',188);
-- 方法一:朴实的自联结
SELECT DISTINCT t1.*
FROM stadium t1, stadium t2, stadium t3
WHERE (t1.people >= 100     -- 筛选表中 人流量>=100 的行
		AND t2.people >= 100
		AND t3.people >= 100
		AND ( (t1.id - t2.id = 1 -- 错位联结,从t1的id开始连续3天有人流高峰
				 AND t1.id - t3.id = 2
			 	 AND t2.id - t3.id = 1
                ) 
             	OR (t2.id - t1.id = 1 -- 补充 t1之后只有1天人流高峰,但之前也有连续高峰 的情况
					 AND t2.id - t3.id = 2
					 AND t1.id - t3.id = 1)
				OR (t3.id - t2.id = 1 -- 补充t1为人流高峰最后一天 的情况
					 AND t2.id - t1.id = 1
					 AND t3.id - t1.id = 2) 
             )
        )
ORDER BY t1.id; 
       
       
-- 方法二:MySQL 8.0 之后开始支持 WITH AS 语句(子查询部分,作用类似于一个视图,但with as 属于一次性的,而且必须和其他sql一起使用才行) WITH子句的查询必须用括号括起来
WITH t1 AS
(SELECT id, visit_date, people,
        (id - ROW_NUMBER() OVER (ORDER BY id)) rk
FROM stadium
WHERE people >= 100)

SELECT id, visit_date, people
FROM t1
WHERE rk IN (SELECT rk 
            FROM t1
            GROUP BY rk
            HAVING COUNT(*) >= 3);       

挑战题

SELECT 日期, COUNT(DISTINCT a.uid) 活跃用户数, COUNT(DISTINCT IF(day-first=1, a.uid, NULL)) 次日留存用户数,
			COUNT(DISTINCT IF(day-first=3), a.uid, NULL) 三日留存用户数,
			COUNT(DISTINCT IF(day-first=7), a.uid, NULL) 七日留存用户数,
			CONCAT(ROUND(COUNT(DISTINCT IF(day-first=1, a.uid, NULL)) / COUNT(DISTINCT a.uid), 2), '%') 次日留存率, CONCAT(ROUND(COUNT(DISTINCT IF(day-first=3, a.uid, NULL)) / COUNT(DISTINCT a.uid), 2), '%') 三日留存率, CONCAT(ROUND(COUNT(DISTINCT IF(day-first=7, a.uid, NULL)) / COUNT(DISTINCT a.uid), 2), '%') 七日留存率
FROM (SELECT uid, DATE_FORMAT(dayno, '%Y-%m-%d') AS first
      FROM act_user_info
      WHERE app_name = '相机') a LEFT JOIN (SELECT uid, DATE_FORMAT(dayno, '%Y-%m-%d') AS day
                                           FROM act_user_info
                                           WHERE app_name = '相机') b
ON a.uid = b.uid
GROUP BY first;

PDD笔试题

2. 用户行为分析

表1——用户行为表tracking_log,大概字段有(user_id‘用户编号’,opr_id‘操作编号’,log_time‘操作时间’)

  • 统计每天符合以下条件的用户数A操作之后是B操作,AB操作必须相连
-- 使用窗口函数
SELECT DATE(log_time), COUNT(DISTINCT user_id)
FROM (SELECT DATE(log_time), user_id, opr_id,
     			LEAD(opr_id, 1) OVER() AS pre_opr
     FROM tracking_log) table_1
WHERE opr_id='A' AND pre_opr='B'
GROUP BY DATE(log_time);


-- 使用用户变量
SELECT DATE(log_time), COUNT(DISTINCT user_id)
FROM (SELECT DATE(log_time), user_id,
     			(CASE
     				WHEN @pre_opr='A' AND opr_id='B' THEN @isJoin:=True AND @pre_opr:=opr_id
      				ELSE @pre_opr:=opr_id
      			END) isJoin
      FROM tracking_log, (SELECT @pre_opr:=NULL, @isJoin:=FALSE) a) table_1
WHERE isJoin=True
GROUP BY DATE(log_time);

3. 用户新增留存分析

表1——用户登陆表user_log,大概字段有(user_id‘用户编号’,log_time‘登陆时间’)

  • 获取每日新增用户数,以及第2天、第30天的回访比例
SELECT COUNT(user_id) 新增用户数, 
		(COUNT(DISTINCT IF(DATEDIFF(DATE(log_time), 注册日期)=1, u1.user_id, NULL)) / COUNT(user_id)) AS2天回访比例, 
		(COUNT(DISTINCT IF(DATEDIFF(DATE(log_time), 注册日期)=29, u1.user_id, NULL)) / COUNT(user_id)) AS30天回访比例
FROM (SELECT user_id, DATE(MIN(log_time)) AS 注册日期
      FROM user_log
      GROUP BY user_id) u1 LEFT JOIN user_log u2
ON u1.user_id = u2.user_id
GROUP BY 注册日期;

4. 贝叶斯公式的应用

数据分析笔试题(1) - 知乎


SELECT Partition_date, 
		(CASE
			WHEN DATEDIFF(t2.Partition_date, t1.Partition_date)=1 THEN COUNT(DISTINCT t1.user_id) 
		END) AS '流失1天',
		(CASE
        	WHEN DATEDIFF(t2.Partition_date, t1.Partition_date)=2 THEN COUNT(DISTINCT t1.user_id) 
        END) AS '流失2天',
		(CASE
        	WHEN DATEDIFF(t2.Partition_date, t1.Partition_date)=3 THEN COUNT(DISTINCT t1.user_id) 
        END) AS '流失3天',
		(CASE
        	WHEN DATEDIFF(t2.Partition_date, t1.Partition_date)>=30 THEN COUNT(DISTINCT t1.user_id) 
        END) AS '流失30天以上'
FROM (SELECT user_id, Partition_date
      FROM user_active
      WHERE daily_active_status_map=1) t1 LEFT JOIN (SELECT user_id, Partition_date
                                                     FROM usre_active
                                                     WHERE daily_active_status_map=1) t2
ON t1.user_id=t2.user_id
WHERE t2.user_id IS NULL
GROUP BY Partition_date
ORDER BY Partition_date DESC;