推荐业务中自用的SQL用法总结

音乐数量

select date,count(tag_music) music
from
(
  select date,
(case 
when locate('zzz_Music_',events.content_tag)=0 
then 'None' 
else 
substr(events.content_tag,locate('zzz_Music_',events.content_tag)+10,instr(events.content_tag,'\"',locate('zzz_Music_',events.content_tag))-(locate('zzz_Music_',events.content_tag)+10))
end) tag_music
from events
where date between '2021-08-25' and '2021-08-31'
and event='imp_detail_page'
) a
group by date

人均曝光

select count(event)/count(distinct user_id) ratio
from events
where date between '2021-05-31' and '2021-08-31'
and event='imp_detail_page'

筛选各类别曝光前100

select id,cate,vv from (
select id,cate,vv,row_number() over(partition by cate order by vv desc) as rank1 from (
select id,cate,count(1) as vv 
from events 
group by id,cate) a ) b where rank1 <=100

一共三层嵌套:

第一层:通过groupby分类按照category计数vv
第二层:按category分根据vv排序
第三层:选取排序小于等于500的

二三实际上是为了一个目的:排序并筛选,只是sql比较傻,要向它解释排序筛选实际上是生成排序列然后截断

某一天中新用户中vv次数的比例分布

select vv,mount,count(vv) vvmount
from 
(
select c.*,count(distinct_id) over() as mount
from
(select distinct_id,count(1) as vv
from events
where event='imp_detail_page' and
date='2021-11-16' and
$is_first_day=1
group by distinct_id) c
) d
group by vv,mount
order by vv

用户前两首队列情况-refreshcount

select *
from
(
select distinct_id,content_id, time,rec_queue_name,row_number() over(PARTITION BY distinct_id ORDER BY time) as rank,refresh_count
from events
where event='imp_detail_page'
and date='2021-11-25'
and $is_first_day=1
and refresh_count in('1','2')
) a
where rank<17
order by time

视频完成度

select content_id,
  (case
when events.read_time/events.video_duration is not null and events.read_time/events.video_duration=events.read_time/events.video_duration
then
(case
when events.read_time/events.video_duration>1
then 1
when events.read_time/events.video_duration<0
then 0
else
events.read_time/events.video_duration
end)
else 0
end) as ratio
from events
where event='duration_detail_page'
and date='2021-11-19'

完成度VS完播率

select a.*,b.*
from(
select date,count(1) completeness80
from
(select date, content_id,
  (case
when events.read_time/events.video_duration is not null and events.read_time/events.video_duration=events.read_time/events.video_duration
then
(case
when events.read_time/events.video_duration>1
then 1
when events.read_time/events.video_duration<0
then 0
else
events.read_time/events.video_duration
end)
else 0
end) as ratio
from events
where event='duration_detail_page'
and date between '2021-11-24' and '2021-11-26') a
where ratio>0.8
group by date
) a
left join
(
select date,count(1)
from events
where event in ('video_play','video_play_80')
and date between '2021-11-24' and '2021-11-26'
group by date
) b
on a.date=b.date

查询不同用户快刷类型占比以及对应的浏览的视频分类的方差

select *
from
(
select id,shortviewnum,viewnum
from
(
select id,sum(shortview) as shortviewnum,count(play_duration) as viewnum
from
(
    select distinct_id as id, play_duration,if(play_duration<3000,1,0) as shortview
    from events
    where date between '2021-10-10' and '2021-10-12'
    and event='video_play_duration'
    and is_first_day=1
) a
group by id
) c
where viewnum>1
) everyoneview

left join

(
select id,variance(categorynum)
from
(
select id,content_category,count(1) as categorynum
from
(
    select distinct_id as id,content_category
    from events
    where date between '2021-10-10' and '2021-10-12'
    and event='duration_detail_page'
    andis_first_day=1
    and content_category is not null
    ) b1
group by id,content_category
) b2
group by id
) everyonevar

on everyonevar.id=everyoneview.id
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇