# 掷几次骰子,并将结果存储在一个列表中 results = [] for roll_num inrange(100): result = die.roll() results.append(result) print(results)
分析结果
为分析掷一个D6骰子的结果,我们计算每个点数出现的次数:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
--snip-- # 掷几次骰子,并将结果存储在一个列表中 results = [] for roll_num inrange(1000): result = die.roll() results.append(result) # 分析结果 frequencies = [] for value inrange(1, die.num_sides+1): frequency = results.count(value) frequencies.append(frequency) print(frequencies)
为分析结果,我们创建了空列表frequencies,用于存储每种点数出现的次数
绘制条形图(bar chart)
有了频率列表后,我们就可以绘制一个表示结果的条形图(bar chart),指出各种结果出现的频率。
利用pygal绘制
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
import pygal --snip--
# 分析结果 frequencies = [] for value inrange(1, die.num_sides+1): frequency = results.count(value) frequencies.append(frequency) # 对结果进行可视化 bar = pygal.Bar()
bar.title = "Results of rolling one D6 1000 times." bar.x_labels = ['1', '2', '3', '4', '5', '6'] bar.x_title = "Result" bar.y_title = "Frequency of Result"
# 掷骰子多次,并将结果存储到一个列表中 results = [] for roll_num inrange(50000): result = die_1.roll() + die_2.roll() results.append(result) # 分析结果 frequencies = [] max_result = die_1.num_sides + die_2.num_sides for value inrange(2, max_result+1): frequency = results.count(value) frequencies.append(frequency) # 可视化结果 bar = pygal.Bar()
bar.title = "Results of rolling a D6 and a D10 50,000 times." bar.x_labels = [str(value) for value inrange(2, 17)] bar.x_title = "Result" bar.y_title = "Frequency of Result"
Traceback (most recent call last): File "highs_lows.py", line 17, in <module> high = int(row[1]) ValueError: invalid literal for int() with base 10: ''
# 设置图形的格式 title = "Daily high and low temperatures - 2014" title += "\nSitka, AK and Death Valley, CA" plt.title(title, fontsize=20) plt.xlabel('', fontsize=16) plt.gcf().autofmt_xdate() plt.ylabel("Temperature (F)", fontsize=16) plt.tick_params(axis='both', which='major', labelsize=16) plt.ylim(10, 120)
# 打印每一天的信息 for btc_dict in btc_data: date = btc_dict['date'] month = int(btc_dict['month']) week = int(btc_dict['week']) weekday = btc_dict['weekday'] close = int(float(btc_dict['close'])) print("{} is month {} week {}, {}, the close price is {} RMB".format(date, month, week, weekday, close))
print("\nSelected information about each repository:") for repo_dict in repo_dicts: print('\nName:', repo_dict['name']) print('Owner:', repo_dict['owner']['login']) print('Stars:', repo_dict['stargazers_count']) print('Reposiory:', repo_dict['html_url']) print('Description:', repo_dict['description'])
# 方法一 SELECT* FROM scores WHERE degree >=60AND degree <=80;
# 方法二 SELECT* FROM scores WHERE degree BETWEEN60AND80;
★查询scores表中成绩为85,86或88的记录。
解答:
1 2 3 4 5 6 7 8 9
# 方法一 SELECT* FROM scores WHERE degree IN (85, 86, 88);
# 方法二 SELECT* FROM scores WHERE degree =85OR degree =86OR degree =88;
查询students表中“95031”班或性别为“女”的同学记录。
解答:
1 2 3
SELECT* FROM students WHERE class ='95031'OR ssex ='女';
以class降序查询students表的所有记录。
解答:
1 2 3
SELECT* FROM students ORDERBY class DESC;
★以cno升序、degree降序查询scores表的所有记录。
解答:
1 2 3
SELECT* FROM scores ORDERBY con, degree DESC;
查询student表中“95031”班的学生人数。
解答:
1 2 3
SELECTCOUNT(*) AS stu_num FROM students WHERE class ='95031';
★★★查询scores表中的最高分的学生学号和课程号。
解答:子查询
1 2 3 4
SELECT sno, cno FROM scores WHERE degree = (SELECTMAX(degree) FROM scores);
★查询scores表中‘3-105’号课程的平均分。
解答:
1 2 3
SELECTAVG(degree) FROM scores WHERE cno ='3-105';
★★★查询scores表中至少有5名学生选修的并以3开头的课程的平均分数。
解答:分组(HAVING子句用于过滤分组)
1 2 3 4 5 6 7 8 9 10 11 12
# 方法一 SELECT cno, AVG(degree) AS avg_degree FROM scores WHERE cno LIKE'3%' GROUPBY cno HAVINGCOUNT(*) >=5;
# 方法二 SELECT cno, AVG(degree) AS avg_degree FROM scores GROUPBY cno HAVINGCOUNT(*) >=5AND cno LIKE'3%';
★★★查询scores表中最低分大于70,最高分小于90的sno列。
解答:
1 2 3 4
SELECT sno FROM scores GROUPBY sno HAVINGMIN(degree) >70ANDMAX(degree) <90;
★★查询所有学生的sname、cno和degree列。
解答:
外部联结(OUTER JOIN, OUTER可省略)
内部联结(INNER JOIN)
1 2 3 4 5 6 7 8 9 10 11
# 方法一:外部联结, LEFTJOIN 左边的表 所有记录都会列出 SELECT stu.sname, sco.cno, sco.degree FROM students AS stu LEFTJOIN scores AS sco ON stu.sno = sco.sno ORDERBY sname;
# 方法二:内部联结, INNERJOIN 只返回两个表中联结字段相等的行 SELECT stu.sname, sco.cno, sco.degree FROM students AS stu INNERJOIN scores AS sco ON stu.sno = sco.sno ORDERBY sname;
★查询所有学生的sno、cname和degree列。
解答:外部联结
1 2 3 4
SELECT s.sno, c.cname, s.degree FROM scores AS s LEFTJOIN courses AS c ON s.cno = c.cno ORDERBY sno;
★★★查询所有学生的sname、cname和degree列。
解答:外部联结
1 2 3 4 5
SELECT st.sname, c.cname, sc.degree FROM students AS st LEFTJOIN scores AS sc ON st.sno = sc.sno LEFTJOIN courses AS c ON sc.cno = c.cno ORDERBY sname;
★★★查询“95033”班所选课程的平均分。
解答:
1 2 3 4 5 6 7
SELECT cname, AVG(degree) FROM students AS st INNERJOIN scores AS sc ON st.sno = sc.sno INNERJOIN courses AS c ON sc.cno = c.cno WHERE class ='95033' GROUPBY c.cno ORDERBY cname;
SELECT sno, cno, rank FROM scores INNERJOIN grade ON scores.degree BETWEEN grade.low AND grade.upp ORDERBY sno;
★★★查询选修“3-105”课程的成绩高于“109”号同学成绩的所有同学的记录。
解答:
自联结
1 2 3 4 5 6
SELECT sname, s1.degree FROM scores AS s1 INNERJOIN scores AS s2 ON s1.cno = s2.cno AND s1.degree > s2.degree INNERJOIN students AS st ON s1.sno = st.sno WHERE s1.cno ='3-105'AND s2.sno ='109' ORDERBY s1.degree;
★★★查询scores中选学一门以上课程的同学中分数为非最高分成绩的记录。
解答:
条件:
选学一门以上课程
列出这些同学的所有非最高分成绩
1 2 3 4 5 6 7 8
SELECT scores.sno, cno, degree, max_degree FROM scores INNERJOIN (SELECT sno, MAX(degree) AS max_degree FROM scores GROUPBY sno HAVINGCOUNT(*) >1) AS max ON scores.sno = max.sno AND degree < max_degree ORDERBY sno;
查询成绩高于学号为“109”、课程号为“3-105”的成绩的所有记录。
解答:
1 2 3 4 5 6 7
SELECT st.sname, s1.cno, s1.degree FROM scores AS s1 INNERJOIN scores AS s2 ON s1.cno = s2.cno AND s1.degree > s2.degree INNERJOIN students AS st ON s1.sno = st.sno WHERE s1.cno ='3-105'AND s2.sno ='109' ORDERBY s1.degree;
★★★查询和学号为108的同学同年出生的所有学生的sno、sname和sbirthday列。
解答:函数YEAR(d)
1 2 3 4 5
SELECT s1.sno, s1.sname, s1.sbirthday FROM students AS s1 INNERJOIN students AS s2 ONYEAR(s1.sbirthday) =YEAR(s2.sbirthday) WHERE s2.sno ='108' ORDERBY sbirthday;
查询“张旭“教师任课的学生成绩。
解答:
1 2 3 4 5 6 7
SELECT sno, degree FROM scores INNERJOIN courses ON scores.cno = courses.cno INNERJOIN teachers ON courses.tno = teachers.tno WHERE teachers.tname ='张旭' ORDERBY degree;
查询选修某课程的同学人数多于5人的教师姓名。
解答:
1 2 3 4 5 6 7
SELECT tname FROM scores AS s INNERJOIN courses AS c ON s.cno = c.cno INNERJOIN teachers AS t ON t.tno = c.tno GROUPBY c.cno HAVINGCOUNT(c.cno) >5;
查询95033班和95031班全体学生的记录。
解答:
1 2 3 4
SELECT* FROM students WHERE class IN ('95033', '95031') ORDERBY class;
查询有85分以上成绩的课程cno。
解答:DISTINCT关键字
1 2 3
SELECTDISTINCT c.cno FROM scores WHERE degree >85;
查询出“计算机系“教师所教课程的成绩表。
解答:
1 2 3 4 5 6 7
SELECT t.tname, s.cno, cname, sno, degree FROM scores AS s INNERJOIN courses AS c ON s.cno = c.cno INNERJOIN teachers AS t ON t.tno = c.tno WHERE t.depart ='计算机系' ORDERBY t.tname, cname, degree DESC;
查询“计算机系”中与“电子工程系“的教师不同职称教师的tname和prof。
解答:
1 2 3 4 5 6
SELECT tname, prof FROM teachers WHERE depart ='计算机系'AND prof NOTIN (SELECT prof FROM teachers WHERE depart ='电子工程系');
SELECT tname AS name, tsex AS sex, tbirthday AS birthday FROM teachers UNION SELECT sname AS name, ssex AS sex, sbirthday AS birthday FROM students ORDERBY birthday;
查询所有“女”教师和“女”同学的name、sex和birthday。
解答:
1 2 3 4 5 6 7 8
SELECT tname AS name, tsex AS sex, tbirthday AS birthday FROM teachers AS t WHERE t.tsex ='女' UNION SELECT sname AS name, ssex AS sex, sbirthday AS birthday FROM students AS s WHERE s.ssex ='女' ORDERBY birthday;
★★★查询成绩比该课程平均成绩低的同学的成绩表。
解答:
1 2 3 4 5 6
SELECT s1.*, avg_degree FROM scores AS s1 INNERJOIN ( SELECT cno, AVG(degree) AS avg_degree FROM scores GROUPBY cno) AS s2 ON s1.cno = s2.cno AND s1.degree < s2.avg_degree;
查询所有任课教师的tname和depart。
解答:
1 2 3 4 5 6 7 8 9 10 11
SELECTDISTINCT tname, depart FROM courses AS c INNERJOIN teachers AS t ON c.tno = t.tno ORDERBY tname;
SELECT tname, depart FROM teachers WHERE tno IN( SELECT tno FROM courses) ORDERBY tname;
★查询所有未讲课的教师的Tname和Depart。
解答:
1 2 3 4 5 6
SELECT tname, depart FROM teachers WHERE tno NOTIN( SELECT tno FROM courses) ORDERBY tname;
★★查询至少有2名男生的班号。
解答:
1 2 3 4 5
SELECT class FROM students WHERE ssex ='男' GROUPBY class HAVINGCOUNT(*) >=2;
★查询students表中不姓“王”的同学记录。
解答:
LIKE操作符
通配符%
1 2 3
SELECT* FROM students WHERE sname NOTLIKE'王%';
★★查询students表中每个学生的姓名和年龄。
解答:
当前日期函数CURDATE()
当前日期和时间函数NOW()
1 2
SELECT sname, TIMESTAMPDIFF(YEAR, sbirthday, CURDATE()) AS age FROM students;
★★查询students表中最大和最小的sbirthday日期值。
解答:
1 2
SELECTDATE(MAX(Sbirthday)) AS max_birthday, DATE(MIN(Sbirthday)) AS min_birthday, FROM Students;
以班号和年龄从大到小的顺序查询students表中的全部记录。
解答:
1 2 3
SELECT* FROM students ORDERBY class DESC, TIMESTAMPDIFF(YEAR, sbirthday, CURDATE()) DESC;
查询“男”教师及其所上的课程。
解答:
1 2 3 4 5
SELECT tname, cname FROM teachers AS t INNERJOIN courses AS c ON t.tno = c.tno WHERE tsex='男' ORDERBY tname;
★查询最高分同学的sno、cno和degree列。
解答:
1 2 3 4
SELECT sno, cno, degree FROM scores GROUPBY cno HAVING degree=MAX(degree);
查询和“李军”同性别的所有同学的sname。
解答:
1 2 3 4 5 6 7 8 9 10 11 12 13
# 方法一 子查询 SELECT sname FROM students WHERE ssex = ( SELECT ssex FROM students WHERE sname ='李军'); # 方法二 联结表 SELECT s1.Sname FROM students AS s1 INNERJOIN students AS s2 ON s1.ssex=s2.ssex WHERE s2.sname ='李军';
★★查询和“李军”同性别并同班的同学sname。
解答:
1 2 3 4
SELECT sname FROM students AS s1 INNERJOIN students AS s2 ON s1.ssex = s2.ssex AND s1.class = s2.class WHERE s2.sname ='李军';
查询所有选修“计算机导论”课程的“男”同学的成绩表。
解答:
1 2 3 4 5 6 7
SELECT sname, degree FROM scores AS s1 INNERJOIN courses AS c ON s1.cno = c.cno INNERJOIN students AS s2 ON s1.sno = s2.sno WHERE c.cname ='计算机导论'AND s2.ssex ='男' ORDERBY degree DESC;
SELECTMONTH(paidTime), COUNT(DISTINCT userId) FROM orderinfo WHERE isPaid ='已支付' GROUPBYMONTH(paidTime);
统计用户三月份的复购率和回购率(三月份购买的用户,四月份也购买)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# 复购率 SELECTCOUNT(cnt) AS 下单用户数, COUNT(IF(cnt >1, 1, NULL)) AS 复购用户数 FROM (SELECT userId, COUNT(userId) as cnt FROM orderinfo WHERE isPaid ='已支付'ANDMONTH(paidTime) =3 GROUPBY userId) AS t; # 3月份的回购率 SELECT t1.m, COUNT(t1.m), COUNT(t2.m) FROM (SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') AS m FROM orderinfo WHERE isPaid ='已支付' GROUPBY userId, DATE_FORMAT(paidTime, '%Y-%m-01')) AS t1 LEFTJOIN (SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') AS m FROM orderinfo WHERE isPaid ='已支付' GROUPBY userId, DATE_FORMAT(paidTime, '%Y-%m-01')) AS t2 ON t1.userId = t2.userId AND t1.m = DATE_ADD(t2.m, INTERVAL-1MONTH) GROUPBY t1.m;
统计男女用户的消费频次(平均数)是否有差异
1 2 3 4 5 6 7 8 9
SELECT sex, AVG(cnt) FROM (SELECT o.userId, sex, COUNT(*) AS cnt # count(*)的作用为 统计消费次数 FROM orderinfo AS o INNERJOIN (SELECT* FROM userinfo WHERE sex !='') AS t ON o.userId = t.userId GROUPBY o.userId, sex) AS t2 GROUPBY sex;
统计多次消费的用户,第一次和最后一次消费间隔是多少
1 2 3 4 5
SELECT userId, DATEDIFF(MAX(paidTime), MIN(paidTime)) FROM orderinfo WHERE isPaid ='已支付' GROUPBY userId HAVINGCOUNT(*) >1;
统计不同年龄段,用户的消费金额是否有差异?
1 2 3 4 5 6 7 8 9 10
SELECT ageGroup, AVG(cnt) FROM (SELECT o.userId, ageGroup, count(o.userId) AS cnt FROM orderinfo AS o INNERJOIN (SELECT userId, CEIL((YEAR(NOW()) -YEAR(birth)) /10) AS ageGroup FROM userinfo WHERE birth >'1901-00-00') AS t ON o.userId = t.userId WHERE isPaid ='已支付' GROUPBY o.userId, ageGroup) AS t2 GROUPBY ageGroup;
统计消费的二八法则,消费的top20%用户,贡献了多少额度
1 2 3 4 5 6 7
SELECTCOUNT(userId), SUM(total) FROM (SELECT userId, SUM(price) AS total FROM orderinfo AS o WHERE isPaid ='已支付' GROUPBY userId ORDERBY total DESC LIMIT 17000) AS t;
SELECTDISTINCT Num AS ConsecutiveNums FROM (SELECT Num, COUNT(*) AS num_cnt FROM (SELECT id, num, (ROW_NUMBER() OVER (ORDERBY id) -ROW_NUMBER() OVER (PARTITIONBY num ORDERBY id)) AS orde FROM logs) AS W GROUPBY num, orde) AS S WHERE num_cnt >=3;
```sql CREATE TABLE tabel2 ( day DATE NOT NULL, uid INT NOT NULL, `latest login time` TIMESTAMP );
INSERT INTO table2(day, uid, `latest login time`) SELECT day, uid, MAX(`timestamp`) FROM table_1 WHERE DATEDIFF(CURDATE(), day) = 1 -- 昨天 GROUP BY uid;
SELECTCOUNT(table_a.uid) AS'7月1日登录总用户数', COUNT(table_b.uid) AS'连续登录7天的用户数', COUNT(table_c.uid) AS'连续登录14天的用户数' FROM (SELECTDISTINCT uid FROM consecutive_task WHEREDATE(login_time) ='2018-07-01') table_a LEFTJOIN (SELECT uid, (login_date - rank_num) AS rnk FROM (SELECT uid, login_date, (ROW_NUMBER() OVER (PARTITIONBY uid ORDERBY login_date)) AS rank_num FROM (SELECT uid, DATE(login_time) AS login_date FROM consecutive_task WHEREDATE(login_time) BETWEEN'2018-07-01'AND'2018-07-07' GROUPBY uid, login_date) b1 ) b2 GROUPBY uid, rnk HAVINGCOUNT(rnk) =7) table_b ON table_a.uid = table_b.uid LEFTJOIN (SELECT uid, (login_date - rank_num) AS rnk FROM (SELECT uid, login_date, (ROW_NUMBER() OVER (PARTITIONBY uid ORDERBY login_date)) AS rank_num FROM (SELECT uid, DATE(login_time) AS login_date FROM consecutive_task WHEREDATE(login_time) BETWEEN'2018-07-01'AND'2020-07-14' GROUPBY uid, login_date) c1 ) c2 GROUPBY uid, rnk HAVINGCOUNT(rnk) =14) table_c ON table_a.uid = table_c.uid;
新增用户的近7日留存率
table1:用户新增表,一个设备首次激活都新加一行:
timestamp,device 【新增的日期,新增的设备】
table2: 是一个按天分区的用户活跃表,每次一个用户登陆都会新加一行:
day,device 【活跃的日期,活跃的设备】
需要计算用户新增用户的留存数,留存率【1-7日】
计算结果格式如下:
新增日期 新增设备数 次日留存数 次日留存率 2日留存数 2日留存率 ….
timestamp 1000 500 50% 450 45%
1 2 3 4 5 6 7
SELECT a.first AS 新增日期, COUNT(DISTINCT a.device) AS 新增设备数, COUNT(DISTINCT IF(first- b.day =1, a.device, NULL)) AS 次日留存数, CONCAT(ROUND(COUNT(DISTINCT IF(first- b.day =1, a.device, NULL)) /COUNT(DISTINCT a.device) *100, 0), '%') AS 次日留存率 FROM (SELECT a.device, DATE(a.timestamp) ASfirst, b.day FROM table1 a LEFTJOIN table2 b ON a.device = b.device) c GROUPBY a.first; -- 按 新增用户的日期 分组
计算日活用户签到,开宝箱,阅读行为的用户各自占比
table1: 是一个按天分区的用户活跃表,每次一个用户登陆都会新加一行:
day,uid 【活跃的日期,活跃的用户id】
table2:是一个按天分区的用户行为表,每一种行为都会新加一行:
day ,uid,type 【日期,用户id,type类型:1表示签到,2表示开宝箱,3表示阅读】
需要计算,签到占日活的比例,开宝箱占日活的比例,阅读占日活的比例
计算结果格式如下:
日期 活跃用户数 签到占日活的比例 开宝箱占日活的比例 阅读占日活的比例
1 2 3 4 5 6 7 8
SELECTdayAS 日期, COUNT(DISTINCT uid) AS 活跃用户数, COUNT(IF(type =1, 1, NULL)) /COUNT(uid) AS 签到占日活的比例, COUNT(IF(type =2, 1, NULL)) /COUNT(uid) AS 开宝箱占日活的比例, COUNT(IF(type =3, 1, NULL)) /COUNT(uid) AS 阅读占日活的比例 FROM (SELECT a.*, b.type FROM table_1 AS a LEFTJOIN table_2 AS b ON a.uid = b.uid AND a.day = b.day) c GROUPBYday;
-- 窗口函数 ROW_NUMBER() SELECT score, ROW_NUMBER() OVER (ORDERBY score DESC) AS `Rank` FROM scores ORDERBY score DESC;
-- ROW_NUMBER() 对应 使用变量 的做法 SELECT score, @curRank=@curRank+1AS `Rank` FROM scores, (SELECT@RANK:=0) r ORDERBY score DESC;
-- RANK() (有并列名次的行,会占用下一名次的位置)对应使用变量的做法 SELECT score, `rank` FROM (SELECT score, @curRank= IF(@preScore= score, @curRank, @incRank) AS `rank`, -- 同分同排名,不同分下一排名 @incRank:=@incRank+1; -- 作用等同于 ROW_NUMBER() @preScore:=score FROM scores, (SELECT@curRank:=0, @preScore:=NULL, @incRank:=1) r ORDERBY score DESC) s; -- DENSE_RANK() 对应使用变量的做法
SELECT score, `rank` FROM (SELECT score, @curRank=IF(@preScore=score, @curRank, @curRank+1) AS `rank`, @preScore=score FROM scores, (SELECT@curRank:=0, @preScore:=NULL) r ORDERBY socre DESC) s; -- 使用CASE WHEN更简洁 SELECT score, CASE WHEN score=@preScoreTHEN@curRank WHEN@preScore:=score THEN@curRank:=@curRank+1-- @preScore:=score 赋值语句必为true ENDAS `rank` FROM scores, (SELECT@curRank:=0, @preScore:=NULL) r ORDERBY score DESC;
准备数据
1 2 3 4 5 6 7 8 9 10 11 12
CREATETABLE seat ( id INT(20), student VARCHAR(20) );
SELECT (CASE WHEN id %2=1AND id != cnt THEN id+1-- 奇数且不是最后一个 WHEN id %2=1AND id = cnt THEN id ELSE id -1-- 偶数 END) AS id, student FROM seat, (SELECTCOUNT(*) AS cnt -- 用于判断当前是否是最后一个学生(奇数的特殊判断) FROM seat) t ORDERBY id;
准备数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14
DROPTABLE IF EXISTS Employee; CREATETABLE Employee ( Id INT(10), Name VARCHAR(20), Salary INT(20), ManagerId INT(10) );
-- 方法一:朴实的自联结 SELECTDISTINCT t1.* FROM stadium t1, stadium t2, stadium t3 WHERE (t1.people >=100-- 筛选表中 人流量>=100 的行 AND t2.people >=100 AND t3.people >=100 AND ( (t1.id - t2.id =1-- 错位联结,从t1的id开始连续3天有人流高峰 AND t1.id - t3.id =2 AND t2.id - t3.id =1 ) OR (t2.id - t1.id =1-- 补充 t1之后只有1天人流高峰,但之前也有连续高峰 的情况 AND t2.id - t3.id =2 AND t1.id - t3.id =1) OR (t3.id - t2.id =1-- 补充t1为人流高峰最后一天 的情况 AND t2.id - t1.id =1 AND t3.id - t1.id =2) ) ) ORDERBY t1.id; -- 方法二:MySQL 8.0 之后开始支持 WITH AS 语句(子查询部分,作用类似于一个视图,但with as 属于一次性的,而且必须和其他sql一起使用才行) WITH子句的查询必须用括号括起来 WITH t1 AS (SELECT id, visit_date, people, (id -ROW_NUMBER() OVER (ORDERBY id)) rk FROM stadium WHERE people >=100)
SELECT id, visit_date, people FROM t1 WHERE rk IN (SELECT rk FROM t1 GROUPBY rk HAVINGCOUNT(*) >=3);
挑战题
1 2 3 4 5 6 7 8 9 10 11
SELECT 日期, COUNT(DISTINCT a.uid) 活跃用户数, COUNT(DISTINCT IF(day-first=1, a.uid, NULL)) 次日留存用户数, COUNT(DISTINCT IF(day-first=3), a.uid, NULL) 三日留存用户数, COUNT(DISTINCT IF(day-first=7), a.uid, NULL) 七日留存用户数, CONCAT(ROUND(COUNT(DISTINCT IF(day-first=1, a.uid, NULL)) /COUNT(DISTINCT a.uid), 2), '%') 次日留存率, CONCAT(ROUND(COUNT(DISTINCT IF(day-first=3, a.uid, NULL)) /COUNT(DISTINCT a.uid), 2), '%') 三日留存率, CONCAT(ROUND(COUNT(DISTINCT IF(day-first=7, a.uid, NULL)) /COUNT(DISTINCT a.uid), 2), '%') 七日留存率 FROM (SELECT uid, DATE_FORMAT(dayno, '%Y-%m-%d') ASfirst FROM act_user_info WHERE app_name ='相机') a LEFTJOIN (SELECT uid, DATE_FORMAT(dayno, '%Y-%m-%d') ASday FROM act_user_info WHERE app_name ='相机') b ON a.uid = b.uid GROUPBYfirst;
for requested_topping in requested_toppings: if requested_topping == 'green peppers': print("Sorry, we are out of green peppers right now.") else: print("Adding " + requested_topping + '.') print("\nFinished making your pizza!")
确定列表不是空的
1 2 3 4 5 6 7 8
requested_toppings = []
if requested_toppings: for requested_topping in requested_toppings: print("Adding " + requested_topping + '.') print("\nFinished making your pizza!") else: print("Are you sure you want a plain pizza?")
for requested_topping in requested_toppings: if requested_topping in avaliable_toppings: print("Adding " + requested_topping + ".") else: print("Sorry, we don't have " + requested_topping + ".") print("\nFinished making your pizza!")
for name in favorite_languages.keys(): print(name.title())
遍历字典时,会默认遍历所有的键。因此for name in favorite_languages.keys():和for name in favorite_languages:效果相同。
下面遍历一下字典中的名字,但在名字为指定朋友的名字时,打印一条消息,指出其喜欢的语言:
1 2 3 4 5 6 7
friends = ['phil', 'sarah'] for name in favorite_languages.keys(): print(name.title()) if name in friends: print(" Hi " + name.title() + ", I see your favorite language is " + favorite_languages[name].title() + "!")
还可以使用keys()确定某个人是否接受了调查:
1 2
if'erin'notin favorite_languages.keys(): print("Erin, please take our poll!")
for name, languages in favorite_languages.items(): print("\n" + name.title() + " 's favorite languages are:") for language in languages: print("\t" + language.title())
defbuild_person(first_name, last_name, age=''): """返回一个字典,其中包含有关一个人的信息""" person = {'first': first_name, 'last': last_name} if age: person['age'] = age return person
defbuild_profile(first, last, **user_info): """创建一个字典,其中包含我们知道的有关用户的一切""" profile = {} profile['first_name'] = first profile['last_name'] = last for key, vaule in user_info.items(): profile[key] = value return profile
defmake_pizza(size, *toppings): """概述要制作的披萨""" print("\nMaking a " + str(size) + "-inch pizza with the following toppings:") for topping in toppings: print("- " + topping)
withopen(filename) as file_object: lines = file_object.readlines() for line in lines: print(line.rstrip())
方法readlines()从文件中读取每一行,并将其存储在一个列表中。
使用文件的内容
首先创建一个字符串,它包含文件中存储的所有数字,且没有任何空格:
1 2 3 4 5 6 7 8 9 10 11
filename = 'pi_digits.txt'
withopen(filename) as file_object: lines = file_object.readlines() pi_string = '' for line in lines: pi_string += line.rstrip() print(pi_string) print(len(pi_string))
withopen(filename) as file_object: lines = file_object.readlines() pi_string = '' for line in lines: pi_string += line.strip() birthday = input("Enter your birthday, in thje form mmddyy: ") if birthday in pi_string: print("Your birthday appears in the first million digits of pi!") else: print("Your birthday does not appear in the first million digits of pi.")
写入文件
保存数据最简单的方式之一是将其写入到文件中。
写入空文件
要将文本写入文件,在调用open()时需要提供另一个实参,告诉Python要写入打开的文件。
1 2 3 4
filename = 'programming.txt'
withopen(filename, 'w') as file_object: file_object.write("I love programming.")
withopen(filename, 'a') as file_object: file_object.write("I also love finding meaning in large datasets.\n") file_object.write("I love creating apps that can run in a browser.\n")
filename = 'numbers.json' withopen(filename, 'w') as f_obj: json.dump(numbers, f_obj)
先导入模块json,再创建一个数字列表。
通常使用文件扩展名.json来指出文件存储的数据为JSON格式。
使用函数json.dump()将数字列表存储到文件numbers.json中
使用 json.load()
1 2 3 4 5 6 7
import json
filename = 'numbers.json' withopen(filename) as f_obj: numbers = json.load(f_obj) print(numbers)
保存和读取用户生成的数据
来看这样一个例子:用户首次运行程序时被提示输入自己的名字,这样再次运行程序时就记住他了。
先存储用户的名字:
1 2 3 4 5 6 7 8
import json
username = input("What is your name? ")
filename = 'username.json' withopen(filename, 'w') as f_obj: json.dump(username, f_obj) print("We'll remember you when you come back, " + username + "!")
defget_stored_username(): """如果存储了用户名,就获取它""" filename = 'username.json' try: withopen(filename) as f_obj: username = json.load(f_obj) except FileNotFoundError: returnNone else: return username defget_new_username(): """提示用户输入用户名""" username = input("What is your name? ") filename = 'username.json' withopen(filename. 'w') as f_obj: json.dump(username, f_obj) defgreet_user(): """问候用户,并指出其名字""" username = get_stored_username() if username: print("Welcom back, " + username + "!") else: username = get_new_username() print("We'll remember you when you come back" + username + "!")
greet_user()
在这个版本中,每个函数都执行单一而清晰的任务。
测试代码
测试函数
下面是一个简单的函数,它接受名和姓并返回整洁的姓名:
1 2 3 4
defget_formatted_name(first, last): """生成整洁的姓名""" full_name = first + ' ' + last return full_name.title()
为核实函数像期望的那样工作,来编写一个使用这个函数的程序:
1 2 3 4 5 6 7 8 9 10 11 12 13
from name_function import get_formatted_name
print("Enter 'q' at any time to quit.") whileTrue: first = input("\nPlease give me a forst name: ") if first == 'q': break last = input("Please give me a last time: ") if last == 'q': break formatted_name = get_formatted_name(first, last) print("\tNeatly formatted name: " + formatted_name + '.')
defget_formatted_name(first, middle, last): """生成整洁的姓名""" full_name = first + ' ' + middle + ' ' + last return full_name.title()
这次运行测试代码,将会得到如下输出:
1 2 3 4 5 6 7 8 9 10 11
E ======================================================= ERROR: test_first_last_name (__main__.NamesTestCase) ------------------------------------------------------ ... ... ... ------------------------------------------------------ Ran 1 test in 0.000s
# 定义一个问题,并创建一个表示调查地AnonymousSurvey对象 question = "What language did you first learn to speak?" my_survey = AnonymousSurvey(question)
# 显示问题并存储答案 my_survey。show_question() print("Enter 'q' at any time to quit.\n") whileTrue: response = input("Language: ") if response == 'q': break my_survey.store_response(response) # 显示调查结果 print("\nThank you to everyone who participated in the survey!") my_survey.show_results()
import unittest from survey import AnonymousSurvey
classTestAnonymousSurvey(unittest.TestCase): """针对AnonymousSurvey类的测试""" deftest_store_single_response(self): """测试单个答案会被妥善地存储""" question = "What language did you first learn to speak?" my_survey = AnonymousSurvey(question) my_survey.store_response('English') self.assertIn('English', my_survey.responses) deftest_store_three_responses(self): """测试三个答案会被妥善地存储""" question = "What language did you first learn to speak?" my_survey = AnonymousSurvey(question) responses = ['English', 'Spanish', 'Mandarin'] for response in responses: my_survey.store_response(response) for response in responses: self.assertIn(response, my_survey.responses) unittest.main()
import unittest from survey import AnonymousSurvey
classTestAnonymousSurvey(unittest.TestCase): """针对AnonymousSurvey类的测试""" defsetUp(self): """ 创建一个调查对象和一组答案,供使用的测试方法使用 """ question = "What language did you first learn to speak?" self.my_survey = AnonymousSurvey(question) self.responses = ['English', 'Spanish', 'Mandarin'] deftest_store_single_response(self): """测试单个答案会被妥善地存储""" self.my_survey.store_response(self.responses[0]) self.assertIn(self.responses[0], self.my_survey.responses) deftest_store_three_responses(self): """测试三个答案会被妥善地存储""" for response in self.responses: self.my_survey.store_response(response) for response in self.responses: self.assertIn(response, self.my_survey.responses) unittest.main()
假设一个整点$P$的坐标是$(x,y)$,其中$0\leq x,y\leq Range$,那么可以令hash函数为H(P) = x * Range + y,这样对数据范围内的任意两个整点$P1$与$P2$,H(P1)都不会等于H(P2),就可以用H(P)来唯一地标识该整点P,接着便可以通过整数hash的方法进一步映射到较小的范围。
字符串hash是指将一个字符串S映射为一个整数,使得该整数可以尽可能唯一地代表字符串S。
为了讨论问题方便,假设字符串均由大写字母A~Z构成,不妨把 AZ 视为 025。接着按照26进制转换为10进制的思路,实现将字符串映射为整数的需求(转换成的整数最大为$26^{len}-1$,len为字符串长度)。
1 2 3 4 5 6
inthashFunc(char S[], int len){ //将字符串S转换为整数 int id = 0; for(int i = 0; i < len; i++) id = id * 26 + (S[i] - 'A'); //将26进制转换为10进制 return id; }
为了避免转换成的整数过大,需要注意 字符串长度len 不能太长。如果字符串中还有小写字母,可以把 AZ 作为 025,把 az 作为 2651,这样就变成了52进制转换为10进制的问题。
1 2 3 4 5 6 7 8 9 10 11
inthashFunc(char S[], int len){ int id = 0; for(int i = 0l i < len; i++) { if(isupper(S[i])) { id = id * 52 + (S[i] - 'A'); } elseif(islower(S[i])) { id = id * 52 + (S[i] - 'a'); } } return id; }
inthashFunc(char S[], int len){ int id = 0; for(int i = 0; i < len - 1; i++) id = id * 26 + (S[i] - 'A'); id = id * 10 + (S[len - 1] - '0'); return id; }
inthashFunc(char S[], int len){ //将字符串S转换为整数 int id = 0; for(int i = 0; i < len; i++) id = id * 26 + (S[i] - 'A'); //将26进制转换为10进制 return id; }
intmain(){ int N, M; int hashTable[26 * 26 * 26] = {0}; cin >> N >> M; string S[N]; for(int i = 0; i < N; i++) { cin >> S[i]; int id = hashFunc(S[i], 3); hashTable[id]++; //字符串出现的次数+1 } string tmp; for(int i = 0; i < M; i++) { cin >> tmp; int id = hashFunc(tmp, 3); cout << hashTable[id] << endl; //输出字符串出现的次数 } return0; }