本文主要是介绍数仓学习笔记(6)——数仓搭建(DWS层),希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
目录
一、数仓搭建——DWS层
1、系统函数
1.1 nvl函数
1.2 日期处理函数
1.3 复杂数据类型定义
2、DWS层
2.1 用户主题
2.2 访客主题
2.3 商品主题
2.4 优惠卷主题
2.5 活动主题
2.6 地区主题
7.7 DWS层首日数据装载脚本
7.8 DWS层每日数据装载脚本
一、数仓搭建——DWS层
1、系统函数
1.1 nvl函数
1)基本语法
NVL(表达式1,表达式2)
如果表达式1为空值,NVL返回值为表达式2的值,否则返回表达式1的值。
该函数的目的是把一个空值(null)转换成一个实际的值。其表达式的值可以是数字型、字符型和日期型。但是表达式1和表达式2的数据类型必须为同一个类型。
2)案例实操
hive (gmall)> select nvl(1,0);
1
hive (gmall)> select nvl(null,"hello");
hello
1.2 日期处理函数
1.3 复杂数据类型定义
2、DWS层
DWS层的分区以及装载逻辑都是一致的,因此这里统一进行说明
2.1 用户主题
一般来说,用户是特指进行登录、注册了的,用user_id进行标识,而访客一般使用设备id进行标识,这里我们先说明用户主题
1)建表语句
DROP TABLE IF EXISTS dws_user_action_daycount;
CREATE EXTERNAL TABLE dws_user_action_daycount
(`user_id` STRING COMMENT '用户id',`login_count` BIGINT COMMENT '登录次数',`cart_count` BIGINT COMMENT '加入购物车次数',`favor_count` BIGINT COMMENT '收藏次数',`order_count` BIGINT COMMENT '下单次数',`order_activity_count` BIGINT COMMENT '订单参与活动次数',`order_activity_reduce_amount` DECIMAL(16,2) COMMENT '订单减免金额(活动)',`order_coupon_count` BIGINT COMMENT '订单用券次数',`order_coupon_reduce_amount` DECIMAL(16,2) COMMENT '订单减免金额(优惠券)',`order_original_amount` DECIMAL(16,2) COMMENT '订单单原始金额',`order_final_amount` DECIMAL(16,2) COMMENT '订单总金额',`payment_count` BIGINT COMMENT '支付次数',`payment_amount` DECIMAL(16,2) COMMENT '支付金额',`refund_order_count` BIGINT COMMENT '退单次数',`refund_order_num` BIGINT COMMENT '退单件数',`refund_order_amount` DECIMAL(16,2) COMMENT '退单金额',`refund_payment_count` BIGINT COMMENT '退款次数',`refund_payment_num` BIGINT COMMENT '退款件数',`refund_payment_amount` DECIMAL(16,2) COMMENT '退款金额',`coupon_get_count` BIGINT COMMENT '优惠券领取次数',`coupon_using_count` BIGINT COMMENT '优惠券使用(下单)次数',`coupon_used_count` BIGINT COMMENT '优惠券使用(支付)次数',`appraise_good_count` BIGINT COMMENT '好评数',`appraise_mid_count` BIGINT COMMENT '中评数',`appraise_bad_count` BIGINT COMMENT '差评数',`appraise_default_count` BIGINT COMMENT '默认评价数',`order_detail_stats` array<struct<sku_id:string,sku_num:bigint,order_count:bigint,activity_reduce_amount:decimal(16,2),coupon_reduce_amount:decimal(16,2),original_amount:decimal(16,2),final_amount:decimal(16,2)>> COMMENT '下单明细统计'
) COMMENT '每日用户行为'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dws/dws_user_action_daycount/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
with
tmp_login as
(selectdt,user_id,count(*) login_countfrom dwd_page_logwhere user_id is not nulland last_page_id is nullgroup by dt,user_id
),
tmp_cf as
(selectdt,user_id,sum(if(action_id='cart_add',1,0)) cart_count,sum(if(action_id='favor_add',1,0)) favor_countfrom dwd_action_logwhere user_id is not nulland action_id in ('cart_add','favor_add')group by dt,user_id
),
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,user_id,count(*) order_count,sum(if(activity_reduce_amount>0,1,0)) order_activity_count,sum(if(coupon_reduce_amount>0,1,0)) order_coupon_count,sum(activity_reduce_amount) order_activity_reduce_amount,sum(coupon_reduce_amount) order_coupon_reduce_amount,sum(original_amount) order_original_amount,sum(final_amount) order_final_amountfrom dwd_order_infogroup by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,user_id,count(*) payment_count,sum(payment_amount) payment_amountfrom dwd_payment_infogroup by date_format(callback_time,'yyyy-MM-dd'),user_id
),
tmp_ri as
(selectdate_format(create_time,'yyyy-MM-dd') dt,user_id,count(*) refund_order_count,sum(refund_num) refund_order_num,sum(refund_amount) refund_order_amountfrom dwd_order_refund_infogroup by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_rp as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,rp.user_id,count(*) refund_payment_count,sum(ri.refund_num) refund_payment_num,sum(rp.refund_amount) refund_payment_amountfrom(selectuser_id,order_id,sku_id,refund_amount,callback_timefrom dwd_refund_payment)rpleft join(selectuser_id,order_id,sku_id,refund_numfrom dwd_order_refund_info)rion rp.order_id=ri.order_idand rp.sku_id=rp.sku_idgroup by date_format(callback_time,'yyyy-MM-dd'),rp.user_id
),
tmp_coupon as
(selectcoalesce(coupon_get.dt,coupon_using.dt,coupon_used.dt) dt,coalesce(coupon_get.user_id,coupon_using.user_id,coupon_used.user_id) user_id,nvl(coupon_get_count,0) coupon_get_count,nvl(coupon_using_count,0) coupon_using_count,nvl(coupon_used_count,0) coupon_used_countfrom(selectdate_format(get_time,'yyyy-MM-dd') dt,user_id,count(*) coupon_get_countfrom dwd_coupon_usewhere get_time is not nullgroup by user_id,date_format(get_time,'yyyy-MM-dd'))coupon_getfull outer join(selectdate_format(using_time,'yyyy-MM-dd') dt,user_id,count(*) coupon_using_countfrom dwd_coupon_usewhere using_time is not nullgroup by user_id,date_format(using_time,'yyyy-MM-dd'))coupon_usingon coupon_get.dt=coupon_using.dtand coupon_get.user_id=coupon_using.user_idfull outer join(selectdate_format(used_time,'yyyy-MM-dd') dt,user_id,count(*) coupon_used_countfrom dwd_coupon_usewhere used_time is not nullgroup by user_id,date_format(used_time,'yyyy-MM-dd'))coupon_usedon nvl(coupon_get.dt,coupon_using.dt)=coupon_used.dtand nvl(coupon_get.user_id,coupon_using.user_id)=coupon_used.user_id
),
tmp_comment as
(selectdate_format(create_time,'yyyy-MM-dd') dt,user_id,sum(if(appraise='1201',1,0)) appraise_good_count,sum(if(appraise='1202',1,0)) appraise_mid_count,sum(if(appraise='1203',1,0)) appraise_bad_count,sum(if(appraise='1204',1,0)) appraise_default_countfrom dwd_comment_infogroup by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_od as
(selectdt,user_id,collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',order_count,'activity_reduce_amount',activity_reduce_amount,'coupon_reduce_amount',coupon_reduce_amount,'original_amount',original_amount,'final_amount',final_amount)) order_detail_statsfrom(selectdate_format(create_time,'yyyy-MM-dd') dt,user_id,sku_id,sum(sku_num) sku_num,count(*) order_count,cast(sum(split_activity_amount) as decimal(16,2)) activity_reduce_amount,cast(sum(split_coupon_amount) as decimal(16,2)) coupon_reduce_amount,cast(sum(original_amount) as decimal(16,2)) original_amount,cast(sum(split_final_amount) as decimal(16,2)) final_amountfrom dwd_order_detailgroup by date_format(create_time,'yyyy-MM-dd'),user_id,sku_id)t1group by dt,user_id
)
insert overwrite table dws_user_action_daycount partition(dt)
selectcoalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id,tmp_od.user_id),nvl(login_count,0),nvl(cart_count,0),nvl(favor_count,0),nvl(order_count,0),nvl(order_activity_count,0),nvl(order_activity_reduce_amount,0),nvl(order_coupon_count,0),nvl(order_coupon_reduce_amount,0),nvl(order_original_amount,0),nvl(order_final_amount,0),nvl(payment_count,0),nvl(payment_amount,0),nvl(refund_order_count,0),nvl(refund_order_num,0),nvl(refund_order_amount,0),nvl(refund_payment_count,0),nvl(refund_payment_num,0),nvl(refund_payment_amount,0),nvl(coupon_get_count,0),nvl(coupon_using_count,0),nvl(coupon_used_count,0),nvl(appraise_good_count,0),nvl(appraise_mid_count,0),nvl(appraise_bad_count,0),nvl(appraise_default_count,0),order_detail_stats,coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt,tmp_coupon.dt,tmp_od.dt)
from tmp_login
full outer join tmp_cf
on tmp_login.user_id=tmp_cf.user_id
and tmp_login.dt=tmp_cf.dt
full outer join tmp_order
on coalesce(tmp_login.user_id,tmp_cf.user_id)=tmp_order.user_id
and coalesce(tmp_login.dt,tmp_cf.dt)=tmp_order.dt
full outer join tmp_pay
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id)=tmp_pay.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt)=tmp_pay.dt
full outer join tmp_ri
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id)=tmp_ri.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt)=tmp_ri.dt
full outer join tmp_rp
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id)=tmp_rp.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt)=tmp_rp.dt
full outer join tmp_comment
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id)=tmp_comment.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt)=tmp_comment.dt
full outer join tmp_coupon
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id)=tmp_coupon.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt)=tmp_coupon.dt
full outer join tmp_od
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id)=tmp_od.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt,tmp_coupon.dt)=tmp_od.dt;
(2)每日装载
with
tmp_login as
(selectuser_id,count(*) login_countfrom dwd_page_logwhere dt='2020-06-15'and user_id is not nulland last_page_id is nullgroup by user_id
),
tmp_cf as
(selectuser_id,sum(if(action_id='cart_add',1,0)) cart_count,sum(if(action_id='favor_add',1,0)) favor_countfrom dwd_action_logwhere dt='2020-06-15'and user_id is not nulland action_id in ('cart_add','favor_add')group by user_id
),
tmp_order as
(selectuser_id,count(*) order_count,sum(if(activity_reduce_amount>0,1,0)) order_activity_count,sum(if(coupon_reduce_amount>0,1,0)) order_coupon_count,sum(activity_reduce_amount) order_activity_reduce_amount,sum(coupon_reduce_amount) order_coupon_reduce_amount,sum(original_amount) order_original_amount,sum(final_amount) order_final_amountfrom dwd_order_infowhere (dt='2020-06-15'or dt='9999-99-99')and date_format(create_time,'yyyy-MM-dd')='2020-06-15'group by user_id
),
tmp_pay as
(selectuser_id,count(*) payment_count,sum(payment_amount) payment_amountfrom dwd_payment_infowhere dt='2020-06-15'group by user_id
),
tmp_ri as
(selectuser_id,count(*) refund_order_count,sum(refund_num) refund_order_num,sum(refund_amount) refund_order_amountfrom dwd_order_refund_infowhere dt='2020-06-15'group by user_id
),
tmp_rp as
(selectrp.user_id,count(*) refund_payment_count,sum(ri.refund_num) refund_payment_num,sum(rp.refund_amount) refund_payment_amountfrom(selectuser_id,order_id,sku_id,refund_amountfrom dwd_refund_paymentwhere dt='2020-06-15')rpleft join(selectuser_id,order_id,sku_id,refund_numfrom dwd_order_refund_infowhere dt>=date_add('2020-06-15',-15))rion rp.order_id=ri.order_idand rp.sku_id=rp.sku_idgroup by rp.user_id
),
tmp_coupon as
(selectuser_id,sum(if(date_format(get_time,'yyyy-MM-dd')='2020-06-15',1,0)) coupon_get_count,sum(if(date_format(using_time,'yyyy-MM-dd')='2020-06-15',1,0)) coupon_using_count,sum(if(date_format(used_time,'yyyy-MM-dd')='2020-06-15',1,0)) coupon_used_countfrom dwd_coupon_usewhere (dt='2020-06-15' or dt='9999-99-99')and (date_format(get_time, 'yyyy-MM-dd') = '2020-06-15'or date_format(using_time,'yyyy-MM-dd')='2020-06-15'or date_format(used_time,'yyyy-MM-dd')='2020-06-15')group by user_id
),
tmp_comment as
(selectuser_id,sum(if(appraise='1201',1,0)) appraise_good_count,sum(if(appraise='1202',1,0)) appraise_mid_count,sum(if(appraise='1203',1,0)) appraise_bad_count,sum(if(appraise='1204',1,0)) appraise_default_countfrom dwd_comment_infowhere dt='2020-06-15'group by user_id
),
tmp_od as
(selectuser_id,collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',order_count,'activity_reduce_amount',activity_reduce_amount,'coupon_reduce_amount',coupon_reduce_amount,'original_amount',original_amount,'final_amount',final_amount)) order_detail_statsfrom(selectuser_id,sku_id,sum(sku_num) sku_num,count(*) order_count,cast(sum(split_activity_amount) as decimal(16,2)) activity_reduce_amount,cast(sum(split_coupon_amount) as decimal(16,2)) coupon_reduce_amount,cast(sum(original_amount) as decimal(16,2)) original_amount,cast(sum(split_final_amount) as decimal(16,2)) final_amountfrom dwd_order_detailwhere dt='2020-06-15'group by user_id,sku_id)t1group by user_id
)
insert overwrite table dws_user_action_daycount partition(dt='2020-06-15')
selectcoalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id,tmp_od.user_id),nvl(login_count,0),nvl(cart_count,0),nvl(favor_count,0),nvl(order_count,0),nvl(order_activity_count,0),nvl(order_activity_reduce_amount,0),nvl(order_coupon_count,0),nvl(order_coupon_reduce_amount,0),nvl(order_original_amount,0),nvl(order_final_amount,0),nvl(payment_count,0),nvl(payment_amount,0),nvl(refund_order_count,0),nvl(refund_order_num,0),nvl(refund_order_amount,0),nvl(refund_payment_count,0),nvl(refund_payment_num,0),nvl(refund_payment_amount,0),nvl(coupon_get_count,0),nvl(coupon_using_count,0),nvl(coupon_used_count,0),nvl(appraise_good_count,0),nvl(appraise_mid_count,0),nvl(appraise_bad_count,0),nvl(appraise_default_count,0),order_detail_stats
from tmp_login
full outer join tmp_cf on tmp_login.user_id=tmp_cf.user_id
full outer join tmp_order on coalesce(tmp_login.user_id,tmp_cf.user_id)=tmp_order.user_id
full outer join tmp_pay on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id)=tmp_pay.user_id
full outer join tmp_ri on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id)=tmp_ri.user_id
full outer join tmp_rp on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id)=tmp_rp.user_id
full outer join tmp_comment on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id)=tmp_comment.user_id
full outer join tmp_coupon on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id)=tmp_coupon.user_id
full outer join tmp_od on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id)=tmp_od.user_id;
2.2 访客主题
商品主题和用户主题思路类似,而访客主题略有点区别,因此这里先讲述访客主题
1)建表语句
DROP TABLE IF EXISTS dws_visitor_action_daycount;
CREATE EXTERNAL TABLE dws_visitor_action_daycount
(`mid_id` STRING COMMENT '设备id',`brand` STRING COMMENT '设备品牌',`model` STRING COMMENT '设备型号',`is_new` STRING COMMENT '是否首次访问',`channel` ARRAY<STRING> COMMENT '渠道',`os` ARRAY<STRING> COMMENT '操作系统',`area_code` ARRAY<STRING> COMMENT '地区ID',`version_code` ARRAY<STRING> COMMENT '应用版本',`visit_count` BIGINT COMMENT '访问次数',`page_stats` ARRAY<STRUCT<page_id:STRING,page_count:BIGINT,during_time:BIGINT>> COMMENT '页面访问统计'
) COMMENT '每日设备行为表'
PARTITIONED BY(`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dws/dws_visitor_action_daycount'
TBLPROPERTIES ("parquet.compression"="lzo");
2)数据装载
insert overwrite table dws_visitor_action_daycount partition(dt='2020-06-14')
selectt1.mid_id,t1.brand,t1.model,t1.is_new,t1.channel,t1.os,t1.area_code,t1.version_code,t1.visit_count,t3.page_stats
from
(selectmid_id,brand,model,if(array_contains(collect_set(is_new),'0'),'0','1') is_new,--ods_page_log中,同一天内,同一设备的is_new字段,可能全部为1,可能全部为0,也可能部分为0,部分为1(卸载重装),故做该处理collect_set(channel) channel,collect_set(os) os,collect_set(area_code) area_code,collect_set(version_code) version_code,sum(if(last_page_id is null,1,0)) visit_countfrom dwd_page_logwhere dt='2020-06-14'and last_page_id is nullgroup by mid_id,model,brand
)t1
join
(selectmid_id,brand,model,collect_set(named_struct('page_id',page_id,'page_count',page_count,'during_time',during_time)) page_statsfrom(selectmid_id,brand,model,page_id,count(*) page_count,sum(during_time) during_timefrom dwd_page_logwhere dt='2020-06-14'group by mid_id,model,brand,page_id)t2group by mid_id,model,brand
)t3
on t1.mid_id=t3.mid_id
and t1.brand=t3.brand
and t1.model=t3.model;
2.3 商品主题
1)建表语句
DROP TABLE IF EXISTS dws_sku_action_daycount;
CREATE EXTERNAL TABLE dws_sku_action_daycount
(`sku_id` STRING COMMENT 'sku_id',`order_count` BIGINT COMMENT '被下单次数',`order_num` BIGINT COMMENT '被下单件数',`order_activity_count` BIGINT COMMENT '参与活动被下单次数',`order_coupon_count` BIGINT COMMENT '使用优惠券被下单次数',`order_activity_reduce_amount` DECIMAL(16,2) COMMENT '优惠金额(活动)',`order_coupon_reduce_amount` DECIMAL(16,2) COMMENT '优惠金额(优惠券)',`order_original_amount` DECIMAL(16,2) COMMENT '被下单原价金额',`order_final_amount` DECIMAL(16,2) COMMENT '被下单最终金额',`payment_count` BIGINT COMMENT '被支付次数',`payment_num` BIGINT COMMENT '被支付件数',`payment_amount` DECIMAL(16,2) COMMENT '被支付金额',`refund_order_count` BIGINT COMMENT '被退单次数',`refund_order_num` BIGINT COMMENT '被退单件数',`refund_order_amount` DECIMAL(16,2) COMMENT '被退单金额',`refund_payment_count` BIGINT COMMENT '被退款次数',`refund_payment_num` BIGINT COMMENT '被退款件数',`refund_payment_amount` DECIMAL(16,2) COMMENT '被退款金额',`cart_count` BIGINT COMMENT '被加入购物车次数',`favor_count` BIGINT COMMENT '被收藏次数',`appraise_good_count` BIGINT COMMENT '好评数',`appraise_mid_count` BIGINT COMMENT '中评数',`appraise_bad_count` BIGINT COMMENT '差评数',`appraise_default_count` BIGINT COMMENT '默认评价数'
) COMMENT '每日商品行为'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dws/dws_sku_action_daycount/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
with
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,sku_id,count(*) order_count,sum(sku_num) order_num,sum(if(split_activity_amount>0,1,0)) order_activity_count,sum(if(split_coupon_amount>0,1,0)) order_coupon_count,sum(split_activity_amount) order_activity_reduce_amount,sum(split_coupon_amount) order_coupon_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom dwd_order_detailgroup by date_format(create_time,'yyyy-MM-dd'),sku_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,sku_id,count(*) payment_count,sum(sku_num) payment_num,sum(split_final_amount) payment_amountfrom dwd_order_detail odjoin(selectorder_id,callback_timefrom dwd_payment_infowhere callback_time is not null)pi on pi.order_id=od.order_idgroup by date_format(callback_time,'yyyy-MM-dd'),sku_id
),
tmp_ri as
(selectdate_format(create_time,'yyyy-MM-dd') dt,sku_id,count(*) refund_order_count,sum(refund_num) refund_order_num,sum(refund_amount) refund_order_amountfrom dwd_order_refund_infogroup by date_format(create_time,'yyyy-MM-dd'),sku_id
),
tmp_rp as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,rp.sku_id,count(*) refund_payment_count,sum(ri.refund_num) refund_payment_num,sum(refund_amount) refund_payment_amountfrom(selectorder_id,sku_id,refund_amount,callback_timefrom dwd_refund_payment)rpleft join(selectorder_id,sku_id,refund_numfrom dwd_order_refund_info)rion rp.order_id=ri.order_idand rp.sku_id=ri.sku_idgroup by date_format(callback_time,'yyyy-MM-dd'),rp.sku_id
),
tmp_cf as
(selectdt,item sku_id,sum(if(action_id='cart_add',1,0)) cart_count,sum(if(action_id='favor_add',1,0)) favor_countfrom dwd_action_logwhere action_id in ('cart_add','favor_add')group by dt,item
),
tmp_comment as
(selectdate_format(create_time,'yyyy-MM-dd') dt,sku_id,sum(if(appraise='1201',1,0)) appraise_good_count,sum(if(appraise='1202',1,0)) appraise_mid_count,sum(if(appraise='1203',1,0)) appraise_bad_count,sum(if(appraise='1204',1,0)) appraise_default_countfrom dwd_comment_infogroup by date_format(create_time,'yyyy-MM-dd'),sku_id
)
insert overwrite table dws_sku_action_daycount partition(dt)
selectsku_id,sum(order_count),sum(order_num),sum(order_activity_count),sum(order_coupon_count),sum(order_activity_reduce_amount),sum(order_coupon_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_num),sum(payment_amount),sum(refund_order_count),sum(refund_order_num),sum(refund_order_amount),sum(refund_payment_count),sum(refund_payment_num),sum(refund_payment_amount),sum(cart_count),sum(favor_count),sum(appraise_good_count),sum(appraise_mid_count),sum(appraise_bad_count),sum(appraise_default_count),dt
from
(selectdt,sku_id,order_count,order_num,order_activity_count,order_coupon_count,order_activity_reduce_amount,order_coupon_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_orderunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,payment_num,payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_payunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,refund_order_count,refund_order_num,refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_riunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,refund_payment_count,refund_payment_num,refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_rpunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,cart_count,favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_cfunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,appraise_good_count,appraise_mid_count,appraise_bad_count,appraise_default_countfrom tmp_comment
)t1
group by dt,sku_id;
(2)每日装载
with
tmp_order as
(selectsku_id,count(*) order_count,sum(sku_num) order_num,sum(if(split_activity_amount>0,1,0)) order_activity_count,sum(if(split_coupon_amount>0,1,0)) order_coupon_count,sum(split_activity_amount) order_activity_reduce_amount,sum(split_coupon_amount) order_coupon_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom dwd_order_detailwhere dt='2020-06-15'group by sku_id
),
tmp_pay as
(selectsku_id,count(*) payment_count,sum(sku_num) payment_num,sum(split_final_amount) payment_amountfrom dwd_order_detailwhere (dt='2020-06-15'or dt=date_add('2020-06-15',-1))and order_id in(select order_id from dwd_payment_info where dt='2020-06-15')group by sku_id
),
tmp_ri as
(selectsku_id,count(*) refund_order_count,sum(refund_num) refund_order_num,sum(refund_amount) refund_order_amountfrom dwd_order_refund_infowhere dt='2020-06-15'group by sku_id
),
tmp_rp as
(selectrp.sku_id,count(*) refund_payment_count,sum(ri.refund_num) refund_payment_num,sum(refund_amount) refund_payment_amountfrom(selectorder_id,sku_id,refund_amountfrom dwd_refund_paymentwhere dt='2020-06-15')rpleft join(selectorder_id,sku_id,refund_numfrom dwd_order_refund_infowhere dt>=date_add('2020-06-15',-15))rion rp.order_id=ri.order_idand rp.sku_id=ri.sku_idgroup by rp.sku_id
),
tmp_cf as
(selectitem sku_id,sum(if(action_id='cart_add',1,0)) cart_count,sum(if(action_id='favor_add',1,0)) favor_countfrom dwd_action_logwhere dt='2020-06-15'and action_id in ('cart_add','favor_add')group by item
),
tmp_comment as
(selectsku_id,sum(if(appraise='1201',1,0)) appraise_good_count,sum(if(appraise='1202',1,0)) appraise_mid_count,sum(if(appraise='1203',1,0)) appraise_bad_count,sum(if(appraise='1204',1,0)) appraise_default_countfrom dwd_comment_infowhere dt='2020-06-15'group by sku_id
)
insert overwrite table dws_sku_action_daycount partition(dt='2020-06-15')
selectsku_id,sum(order_count),sum(order_num),sum(order_activity_count),sum(order_coupon_count),sum(order_activity_reduce_amount),sum(order_coupon_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_num),sum(payment_amount),sum(refund_order_count),sum(refund_order_num),sum(refund_order_amount),sum(refund_payment_count),sum(refund_payment_num),sum(refund_payment_amount),sum(cart_count),sum(favor_count),sum(appraise_good_count),sum(appraise_mid_count),sum(appraise_bad_count),sum(appraise_default_count)
from
(selectsku_id,order_count,order_num,order_activity_count,order_coupon_count,order_activity_reduce_amount,order_coupon_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_orderunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,payment_num,payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_payunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,refund_order_count,refund_order_num,refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_riunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,refund_payment_count,refund_payment_num,refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_rpunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,cart_count,favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_cfunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,appraise_good_count,appraise_mid_count,appraise_bad_count,appraise_default_countfrom tmp_comment
)t1
group by sku_id;
2.4 优惠卷主题
1)建表语句
DROP TABLE IF EXISTS dws_coupon_info_daycount;
CREATE EXTERNAL TABLE dws_coupon_info_daycount(`coupon_id` STRING COMMENT '优惠券ID',`get_count` BIGINT COMMENT '被领取次数',`order_count` BIGINT COMMENT '被使用(下单)次数', `order_reduce_amount` DECIMAL(16,2) COMMENT '用券下单优惠金额',`order_original_amount` DECIMAL(16,2) COMMENT '用券订单原价金额',`order_final_amount` DECIMAL(16,2) COMMENT '用券下单最终金额',`payment_count` BIGINT COMMENT '被使用(支付)次数',`payment_reduce_amount` DECIMAL(16,2) COMMENT '用券支付优惠金额',`payment_amount` DECIMAL(16,2) COMMENT '用券支付总金额',`expire_count` BIGINT COMMENT '过期次数'
) COMMENT '每日活动统计'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dws/dws_coupon_info_daycount/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
with
tmp_cu as
(selectcoalesce(coupon_get.dt,coupon_using.dt,coupon_used.dt,coupon_exprie.dt) dt,coalesce(coupon_get.coupon_id,coupon_using.coupon_id,coupon_used.coupon_id,coupon_exprie.coupon_id) coupon_id,nvl(get_count,0) get_count,nvl(order_count,0) order_count,nvl(payment_count,0) payment_count,nvl(expire_count,0) expire_countfrom(selectdate_format(get_time,'yyyy-MM-dd') dt,coupon_id,count(*) get_countfrom dwd_coupon_usegroup by date_format(get_time,'yyyy-MM-dd'),coupon_id)coupon_getfull outer join(selectdate_format(using_time,'yyyy-MM-dd') dt,coupon_id,count(*) order_countfrom dwd_coupon_usewhere using_time is not nullgroup by date_format(using_time,'yyyy-MM-dd'),coupon_id)coupon_usingon coupon_get.dt=coupon_using.dtand coupon_get.coupon_id=coupon_using.coupon_idfull outer join(selectdate_format(used_time,'yyyy-MM-dd') dt,coupon_id,count(*) payment_countfrom dwd_coupon_usewhere used_time is not nullgroup by date_format(used_time,'yyyy-MM-dd'),coupon_id)coupon_usedon nvl(coupon_get.dt,coupon_using.dt)=coupon_used.dtand nvl(coupon_get.coupon_id,coupon_using.coupon_id)=coupon_used.coupon_idfull outer join(selectdate_format(expire_time,'yyyy-MM-dd') dt,coupon_id,count(*) expire_countfrom dwd_coupon_usewhere expire_time is not nullgroup by date_format(expire_time,'yyyy-MM-dd'),coupon_id)coupon_exprieon coalesce(coupon_get.dt,coupon_using.dt,coupon_used.dt)=coupon_exprie.dtand coalesce(coupon_get.coupon_id,coupon_using.coupon_id,coupon_used.coupon_id)=coupon_exprie.coupon_id
),
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,coupon_id,sum(split_coupon_amount) order_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom dwd_order_detailwhere coupon_id is not nullgroup by date_format(create_time,'yyyy-MM-dd'),coupon_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,coupon_id,sum(split_coupon_amount) payment_reduce_amount,sum(split_final_amount) payment_amountfrom(selectorder_id,coupon_id,split_coupon_amount,split_final_amountfrom dwd_order_detailwhere coupon_id is not null)odjoin(selectorder_id,callback_timefrom dwd_payment_info)pion od.order_id=pi.order_idgroup by date_format(callback_time,'yyyy-MM-dd'),coupon_id
)
insert overwrite table dws_coupon_info_daycount partition(dt)
selectcoupon_id,sum(get_count),sum(order_count),sum(order_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_reduce_amount),sum(payment_amount),sum(expire_count),dt
from
(selectdt,coupon_id,get_count,order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,0 payment_reduce_amount,0 payment_amount,expire_countfrom tmp_cuunion allselectdt,coupon_id,0 get_count,0 order_count,order_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_reduce_amount,0 payment_amount,0 expire_countfrom tmp_orderunion allselectdt,coupon_id,0 get_count,0 order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,payment_reduce_amount,payment_amount,0 expire_countfrom tmp_pay
)t1
group by dt,coupon_id;
(2)每日装载
with
tmp_cu as
(selectcoupon_id,sum(if(date_format(get_time,'yyyy-MM-dd')='2020-06-15',1,0)) get_count,sum(if(date_format(using_time,'yyyy-MM-dd')='2020-06-15',1,0)) order_count,sum(if(date_format(used_time,'yyyy-MM-dd')='2020-06-15',1,0)) payment_count,sum(if(date_format(expire_time,'yyyy-MM-dd')='2020-06-15',1,0)) expire_countfrom dwd_coupon_usewhere dt='9999-99-99'or dt='2020-06-15'group by coupon_id
),
tmp_order as
(selectcoupon_id,sum(split_coupon_amount) order_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom dwd_order_detailwhere dt='2020-06-15'and coupon_id is not nullgroup by coupon_id
),
tmp_pay as
(selectcoupon_id,sum(split_coupon_amount) payment_reduce_amount,sum(split_final_amount) payment_amountfrom dwd_order_detailwhere (dt='2020-06-15'or dt=date_add('2020-06-15',-1))and coupon_id is not nulland order_id in(select order_id from dwd_payment_info where dt='2020-06-15')group by coupon_id
)
insert overwrite table dws_coupon_info_daycount partition(dt='2020-06-15')
selectcoupon_id,sum(get_count),sum(order_count),sum(order_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_reduce_amount),sum(payment_amount),sum(expire_count)
from
(selectcoupon_id,get_count,order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,0 payment_reduce_amount,0 payment_amount,expire_countfrom tmp_cuunion allselectcoupon_id,0 get_count,0 order_count,order_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_reduce_amount,0 payment_amount,0 expire_countfrom tmp_orderunion allselectcoupon_id,0 get_count,0 order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,payment_reduce_amount,payment_amount,0 expire_countfrom tmp_pay
)t1
group by coupon_id;
2.5 活动主题
1)建表语句
DROP TABLE IF EXISTS dws_activity_info_daycount;
CREATE EXTERNAL TABLE dws_activity_info_daycount(`activity_rule_id` STRING COMMENT '活动规则ID',`activity_id` STRING COMMENT '活动ID',`order_count` BIGINT COMMENT '参与某活动某规则下单次数', `order_reduce_amount` DECIMAL(16,2) COMMENT '参与某活动某规则下单减免金额',`order_original_amount` DECIMAL(16,2) COMMENT '参与某活动某规则下单原始金额',`order_final_amount` DECIMAL(16,2) COMMENT '参与某活动某规则下单最终金额',`payment_count` BIGINT COMMENT '参与某活动某规则支付次数',`payment_reduce_amount` DECIMAL(16,2) COMMENT '参与某活动某规则支付减免金额',`payment_amount` DECIMAL(16,2) COMMENT '参与某活动某规则支付金额'
) COMMENT '每日活动统计'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dws/dws_activity_info_daycount/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
with
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,activity_rule_id,activity_id,count(*) order_count,sum(split_activity_amount) order_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom dwd_order_detailwhere activity_id is not nullgroup by date_format(create_time,'yyyy-MM-dd'),activity_rule_id,activity_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,activity_rule_id,activity_id,count(*) payment_count,sum(split_activity_amount) payment_reduce_amount,sum(split_final_amount) payment_amountfrom(selectactivity_rule_id,activity_id,order_id,split_activity_amount,split_final_amountfrom dwd_order_detailwhere activity_id is not null)odjoin(selectorder_id,callback_timefrom dwd_payment_info)pion od.order_id=pi.order_idgroup by date_format(callback_time,'yyyy-MM-dd'),activity_rule_id,activity_id
)
insert overwrite table dws_activity_info_daycount partition(dt)
selectactivity_rule_id,activity_id,sum(order_count),sum(order_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_reduce_amount),sum(payment_amount),dt
from
(selectdt,activity_rule_id,activity_id,order_count,order_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_reduce_amount,0 payment_amountfrom tmp_orderunion allselectdt,activity_rule_id,activity_id,0 order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,payment_reduce_amount,payment_amountfrom tmp_pay
)t1
group by dt,activity_rule_id,activity_id;
(2)每日装载
with
tmp_order as
(selectactivity_rule_id,activity_id,count(*) order_count,sum(split_activity_amount) order_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom dwd_order_detailwhere dt='2020-06-15'and activity_id is not nullgroup by activity_rule_id,activity_id
),
tmp_pay as
(selectactivity_rule_id,activity_id,count(*) payment_count,sum(split_activity_amount) payment_reduce_amount,sum(split_final_amount) payment_amountfrom dwd_order_detailwhere (dt='2020-06-15'or dt=date_add('2020-06-15',-1))and activity_id is not nulland order_id in(select order_id from dwd_payment_info where dt='2020-06-15')group by activity_rule_id,activity_id
)
insert overwrite table dws_activity_info_daycount partition(dt='2020-06-15')
selectactivity_rule_id,activity_id,sum(order_count),sum(order_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_reduce_amount),sum(payment_amount)
from
(selectactivity_rule_id,activity_id,order_count,order_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_reduce_amount,0 payment_amountfrom tmp_orderunion allselectactivity_rule_id,activity_id,0 order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,payment_reduce_amount,payment_amountfrom tmp_pay
)t1
group by activity_rule_id,activity_id;
2.6 地区主题
1)建表语句
DROP TABLE IF EXISTS dws_area_stats_daycount;
CREATE EXTERNAL TABLE dws_area_stats_daycount(`province_id` STRING COMMENT '地区编号',`visit_count` BIGINT COMMENT '访问次数',`login_count` BIGINT COMMENT '登录次数',`visitor_count` BIGINT COMMENT '访客人数',`user_count` BIGINT COMMENT '用户人数',`order_count` BIGINT COMMENT '下单次数',`order_original_amount` DECIMAL(16,2) COMMENT '下单原始金额',`order_final_amount` DECIMAL(16,2) COMMENT '下单最终金额',`payment_count` BIGINT COMMENT '支付次数',`payment_amount` DECIMAL(16,2) COMMENT '支付金额',`refund_order_count` BIGINT COMMENT '退单次数',`refund_order_amount` DECIMAL(16,2) COMMENT '退单金额',`refund_payment_count` BIGINT COMMENT '退款次数',`refund_payment_amount` DECIMAL(16,2) COMMENT '退款金额'
) COMMENT '每日地区统计表'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dws/dws_area_stats_daycount/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)数据装载
(1)首日装载
with
tmp_vu as
(selectdt,id province_id,visit_count,login_count,visitor_count,user_countfrom(selectdt,area_code,count(*) visit_count,--访客访问次数count(user_id) login_count,--用户访问次数,等价于sum(if(user_id is not null,1,0))count(distinct(mid_id)) visitor_count,--访客人数count(distinct(user_id)) user_count--用户人数from dwd_page_logwhere last_page_id is nullgroup by dt,area_code)tmpleft join dim_base_province areaon tmp.area_code=area.area_code
),
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,province_id,count(*) order_count,sum(original_amount) order_original_amount,sum(final_amount) order_final_amountfrom dwd_order_infogroup by date_format(create_time,'yyyy-MM-dd'),province_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,province_id,count(*) payment_count,sum(payment_amount) payment_amountfrom dwd_payment_infogroup by date_format(callback_time,'yyyy-MM-dd'),province_id
),
tmp_ro as
(selectdate_format(create_time,'yyyy-MM-dd') dt,province_id,count(*) refund_order_count,sum(refund_amount) refund_order_amountfrom dwd_order_refund_infogroup by date_format(create_time,'yyyy-MM-dd'),province_id
),
tmp_rp as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,province_id,count(*) refund_payment_count,sum(refund_amount) refund_payment_amountfrom dwd_refund_paymentgroup by date_format(callback_time,'yyyy-MM-dd'),province_id
)
insert overwrite table dws_area_stats_daycount partition(dt)
selectprovince_id,sum(visit_count),sum(login_count),sum(visitor_count),sum(user_count),sum(order_count),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_amount),sum(refund_order_count),sum(refund_order_amount),sum(refund_payment_count),sum(refund_payment_amount),dt
from
(selectdt,province_id,visit_count,login_count,visitor_count,user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_vuunion allselectdt,province_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,order_count,order_original_amount,order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_orderunion allselectdt,province_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,payment_count,payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_payunion allselectdt,province_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,refund_order_count,refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_rounion allselectdt,province_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,refund_payment_count,refund_payment_amountfrom tmp_rp
)t1
group by dt,province_id;
(2)每日装载
with
tmp_vu as
(selectid province_id,visit_count,login_count,visitor_count,user_countfrom(selectarea_code,count(*) visit_count,--访客访问次数count(user_id) login_count,--用户访问次数,等价于sum(if(user_id is not null,1,0))count(distinct(mid_id)) visitor_count,--访客人数count(distinct(user_id)) user_count--用户人数from dwd_page_logwhere dt='2020-06-15'and last_page_id is nullgroup by area_code)tmpleft join dim_base_province areaon tmp.area_code=area.area_code
),
tmp_order as
(selectprovince_id,count(*) order_count,sum(original_amount) order_original_amount,sum(final_amount) order_final_amountfrom dwd_order_infowhere dt='2020-06-15'or dt='9999-99-99'and date_format(create_time,'yyyy-MM-dd')='2020-06-15'group by province_id
),
tmp_pay as
(selectprovince_id,count(*) payment_count,sum(payment_amount) payment_amountfrom dwd_payment_infowhere dt='2020-06-15'group by province_id
),
tmp_ro as
(selectprovince_id,count(*) refund_order_count,sum(refund_amount) refund_order_amountfrom dwd_order_refund_infowhere dt='2020-06-15'group by province_id
),
tmp_rp as
(selectprovince_id,count(*) refund_payment_count,sum(refund_amount) refund_payment_amountfrom dwd_refund_paymentwhere dt='2020-06-15'group by province_id
)
insert overwrite table dws_area_stats_daycount partition(dt='2020-06-15')
selectprovince_id,sum(visit_count),sum(login_count),sum(visitor_count),sum(user_count),sum(order_count),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_amount),sum(refund_order_count),sum(refund_order_amount),sum(refund_payment_count),sum(refund_payment_amount)
from
(selectprovince_id,visit_count,login_count,visitor_count,user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_vuunion allselectprovince_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,order_count,order_original_amount,order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_orderunion allselectprovince_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,payment_count,payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_payunion allselectprovince_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,refund_order_count,refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_rounion allselectprovince_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,refund_payment_count,refund_payment_amountfrom tmp_rp
)t1
group by province_id;
7.7 DWS层首日数据装载脚本
#!/bin/bashAPP=gmallif [ -n "$2" ] ;thendo_date=$2
else echo "请传入日期参数"exit
fidws_visitor_action_daycount="
insert overwrite table ${APP}.dws_visitor_action_daycount partition(dt='$do_date')
selectt1.mid_id,t1.brand,t1.model,t1.is_new,t1.channel,t1.os,t1.area_code,t1.version_code,t1.visit_count,t3.page_stats
from
(selectmid_id,brand,model,if(array_contains(collect_set(is_new),'0'),'0','1') is_new,--ods_page_log中,同一天内,同一设备的is_new字段,可能全部为1,可能全部为0,也可能部分为0,部分为1(卸载重装),故做该处理collect_set(channel) channel,collect_set(os) os,collect_set(area_code) area_code,collect_set(version_code) version_code,sum(if(last_page_id is null,1,0)) visit_countfrom ${APP}.dwd_page_logwhere dt='$do_date'and last_page_id is nullgroup by mid_id,model,brand
)t1
join
(selectmid_id,brand,model,collect_set(named_struct('page_id',page_id,'page_count',page_count,'during_time',during_time)) page_statsfrom(selectmid_id,brand,model,page_id,count(*) page_count,sum(during_time) during_timefrom ${APP}.dwd_page_logwhere dt='$do_date'group by mid_id,model,brand,page_id)t2group by mid_id,model,brand
)t3
on t1.mid_id=t3.mid_id
and t1.brand=t3.brand
and t1.model=t3.model;
"dws_area_stats_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp_vu as
(selectdt,id province_id,visit_count,login_count,visitor_count,user_countfrom(selectdt,area_code,count(*) visit_count,--访客访问次数count(user_id) login_count,--用户访问次数,等价于sum(if(user_id is not null,1,0))count(distinct(mid_id)) visitor_count,--访客人数count(distinct(user_id)) user_count--用户人数from ${APP}.dwd_page_logwhere last_page_id is nullgroup by dt,area_code)tmpleft join ${APP}.dim_base_province areaon tmp.area_code=area.area_code
),
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,province_id,count(*) order_count,sum(original_amount) order_original_amount,sum(final_amount) order_final_amountfrom ${APP}.dwd_order_infogroup by date_format(create_time,'yyyy-MM-dd'),province_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,province_id,count(*) payment_count,sum(payment_amount) payment_amountfrom ${APP}.dwd_payment_infogroup by date_format(callback_time,'yyyy-MM-dd'),province_id
),
tmp_ro as
(selectdate_format(create_time,'yyyy-MM-dd') dt,province_id,count(*) refund_order_count,sum(refund_amount) refund_order_amountfrom ${APP}.dwd_order_refund_infogroup by date_format(create_time,'yyyy-MM-dd'),province_id
),
tmp_rp as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,province_id,count(*) refund_payment_count,sum(refund_amount) refund_payment_amountfrom ${APP}.dwd_refund_paymentgroup by date_format(callback_time,'yyyy-MM-dd'),province_id
)
insert overwrite table ${APP}.dws_area_stats_daycount partition(dt)
selectprovince_id,sum(visit_count),sum(login_count),sum(visitor_count),sum(user_count),sum(order_count),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_amount),sum(refund_order_count),sum(refund_order_amount),sum(refund_payment_count),sum(refund_payment_amount),dt
from
(selectdt,province_id,visit_count,login_count,visitor_count,user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_vuunion allselectdt,province_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,order_count,order_original_amount,order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_orderunion allselectdt,province_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,payment_count,payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_payunion allselectdt,province_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,refund_order_count,refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_rounion allselectdt,province_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,refund_payment_count,refund_payment_amountfrom tmp_rp
)t1
group by dt,province_id;
"dws_user_action_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp_login as
(selectdt,user_id,count(*) login_countfrom ${APP}.dwd_page_logwhere user_id is not nulland last_page_id is nullgroup by dt,user_id
),
tmp_cf as
(selectdt,user_id,sum(if(action_id='cart_add',1,0)) cart_count,sum(if(action_id='favor_add',1,0)) favor_countfrom ${APP}.dwd_action_logwhere user_id is not nulland action_id in ('cart_add','favor_add')group by dt,user_id
),
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,user_id,count(*) order_count,sum(if(activity_reduce_amount>0,1,0)) order_activity_count,sum(if(coupon_reduce_amount>0,1,0)) order_coupon_count,sum(activity_reduce_amount) order_activity_reduce_amount,sum(coupon_reduce_amount) order_coupon_reduce_amount,sum(original_amount) order_original_amount,sum(final_amount) order_final_amountfrom ${APP}.dwd_order_infogroup by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,user_id,count(*) payment_count,sum(payment_amount) payment_amountfrom ${APP}.dwd_payment_infogroup by date_format(callback_time,'yyyy-MM-dd'),user_id
),
tmp_ri as
(selectdate_format(create_time,'yyyy-MM-dd') dt,user_id,count(*) refund_order_count,sum(refund_num) refund_order_num,sum(refund_amount) refund_order_amountfrom ${APP}.dwd_order_refund_infogroup by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_rp as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,rp.user_id,count(*) refund_payment_count,sum(ri.refund_num) refund_payment_num,sum(rp.refund_amount) refund_payment_amountfrom(selectuser_id,order_id,sku_id,refund_amount,callback_timefrom ${APP}.dwd_refund_payment)rpleft join(selectuser_id,order_id,sku_id,refund_numfrom ${APP}.dwd_order_refund_info)rion rp.order_id=ri.order_idand rp.sku_id=rp.sku_idgroup by date_format(callback_time,'yyyy-MM-dd'),rp.user_id
),
tmp_coupon as
(selectcoalesce(coupon_get.dt,coupon_using.dt,coupon_used.dt) dt,coalesce(coupon_get.user_id,coupon_using.user_id,coupon_used.user_id) user_id,nvl(coupon_get_count,0) coupon_get_count,nvl(coupon_using_count,0) coupon_using_count,nvl(coupon_used_count,0) coupon_used_countfrom(selectdate_format(get_time,'yyyy-MM-dd') dt,user_id,count(*) coupon_get_countfrom ${APP}.dwd_coupon_usewhere get_time is not nullgroup by user_id,date_format(get_time,'yyyy-MM-dd'))coupon_getfull outer join(selectdate_format(using_time,'yyyy-MM-dd') dt,user_id,count(*) coupon_using_countfrom ${APP}.dwd_coupon_usewhere using_time is not nullgroup by user_id,date_format(using_time,'yyyy-MM-dd'))coupon_usingon coupon_get.dt=coupon_using.dtand coupon_get.user_id=coupon_using.user_idfull outer join(selectdate_format(used_time,'yyyy-MM-dd') dt,user_id,count(*) coupon_used_countfrom ${APP}.dwd_coupon_usewhere used_time is not nullgroup by user_id,date_format(used_time,'yyyy-MM-dd'))coupon_usedon nvl(coupon_get.dt,coupon_using.dt)=coupon_used.dtand nvl(coupon_get.user_id,coupon_using.user_id)=coupon_used.user_id
),
tmp_comment as
(selectdate_format(create_time,'yyyy-MM-dd') dt,user_id,sum(if(appraise='1201',1,0)) appraise_good_count,sum(if(appraise='1202',1,0)) appraise_mid_count,sum(if(appraise='1203',1,0)) appraise_bad_count,sum(if(appraise='1204',1,0)) appraise_default_countfrom ${APP}.dwd_comment_infogroup by date_format(create_time,'yyyy-MM-dd'),user_id
),
tmp_od as
(selectdt,user_id,collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',order_count,'activity_reduce_amount',activity_reduce_amount,'coupon_reduce_amount',coupon_reduce_amount,'original_amount',original_amount,'final_amount',final_amount)) order_detail_statsfrom(selectdate_format(create_time,'yyyy-MM-dd') dt,user_id,sku_id,sum(sku_num) sku_num,count(*) order_count,cast(sum(split_activity_amount) as decimal(16,2)) activity_reduce_amount,cast(sum(split_coupon_amount) as decimal(16,2)) coupon_reduce_amount,cast(sum(original_amount) as decimal(16,2)) original_amount,cast(sum(split_final_amount) as decimal(16,2)) final_amountfrom ${APP}.dwd_order_detailgroup by date_format(create_time,'yyyy-MM-dd'),user_id,sku_id)t1group by dt,user_id
)
insert overwrite table ${APP}.dws_user_action_daycount partition(dt)
selectcoalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id,tmp_od.user_id),nvl(login_count,0),nvl(cart_count,0),nvl(favor_count,0),nvl(order_count,0),nvl(order_activity_count,0),nvl(order_activity_reduce_amount,0),nvl(order_coupon_count,0),nvl(order_coupon_reduce_amount,0),nvl(order_original_amount,0),nvl(order_final_amount,0),nvl(payment_count,0),nvl(payment_amount,0),nvl(refund_order_count,0),nvl(refund_order_num,0),nvl(refund_order_amount,0),nvl(refund_payment_count,0),nvl(refund_payment_num,0),nvl(refund_payment_amount,0),nvl(coupon_get_count,0),nvl(coupon_using_count,0),nvl(coupon_used_count,0),nvl(appraise_good_count,0),nvl(appraise_mid_count,0),nvl(appraise_bad_count,0),nvl(appraise_default_count,0),order_detail_stats,coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt,tmp_coupon.dt,tmp_od.dt)
from tmp_login
full outer join tmp_cf
on tmp_login.user_id=tmp_cf.user_id
and tmp_login.dt=tmp_cf.dt
full outer join tmp_order
on coalesce(tmp_login.user_id,tmp_cf.user_id)=tmp_order.user_id
and coalesce(tmp_login.dt,tmp_cf.dt)=tmp_order.dt
full outer join tmp_pay
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id)=tmp_pay.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt)=tmp_pay.dt
full outer join tmp_ri
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id)=tmp_ri.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt)=tmp_ri.dt
full outer join tmp_rp
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id)=tmp_rp.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt)=tmp_rp.dt
full outer join tmp_comment
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id)=tmp_comment.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt)=tmp_comment.dt
full outer join tmp_coupon
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id)=tmp_coupon.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt)=tmp_coupon.dt
full outer join tmp_od
on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id)=tmp_od.user_id
and coalesce(tmp_login.dt,tmp_cf.dt,tmp_order.dt,tmp_pay.dt,tmp_ri.dt,tmp_rp.dt,tmp_comment.dt,tmp_coupon.dt)=tmp_od.dt;
"dws_activity_info_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,activity_rule_id,activity_id,count(*) order_count,sum(split_activity_amount) order_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom ${APP}.dwd_order_detailwhere activity_id is not nullgroup by date_format(create_time,'yyyy-MM-dd'),activity_rule_id,activity_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,activity_rule_id,activity_id,count(*) payment_count,sum(split_activity_amount) payment_reduce_amount,sum(split_final_amount) payment_amountfrom(selectactivity_rule_id,activity_id,order_id,split_activity_amount,split_final_amountfrom ${APP}.dwd_order_detailwhere activity_id is not null)odjoin(selectorder_id,callback_timefrom ${APP}.dwd_payment_info)pion od.order_id=pi.order_idgroup by date_format(callback_time,'yyyy-MM-dd'),activity_rule_id,activity_id
)
insert overwrite table ${APP}.dws_activity_info_daycount partition(dt)
selectactivity_rule_id,activity_id,sum(order_count),sum(order_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_reduce_amount),sum(payment_amount),dt
from
(selectdt,activity_rule_id,activity_id,order_count,order_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_reduce_amount,0 payment_amountfrom tmp_orderunion allselectdt,activity_rule_id,activity_id,0 order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,payment_reduce_amount,payment_amountfrom tmp_pay
)t1
group by dt,activity_rule_id,activity_id;"dws_sku_action_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,sku_id,count(*) order_count,sum(sku_num) order_num,sum(if(split_activity_amount>0,1,0)) order_activity_count,sum(if(split_coupon_amount>0,1,0)) order_coupon_count,sum(split_activity_amount) order_activity_reduce_amount,sum(split_coupon_amount) order_coupon_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom ${APP}.dwd_order_detailgroup by date_format(create_time,'yyyy-MM-dd'),sku_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,sku_id,count(*) payment_count,sum(sku_num) payment_num,sum(split_final_amount) payment_amountfrom ${APP}.dwd_order_detail odjoin(selectorder_id,callback_timefrom ${APP}.dwd_payment_infowhere callback_time is not null)pi on pi.order_id=od.order_idgroup by date_format(callback_time,'yyyy-MM-dd'),sku_id
),
tmp_ri as
(selectdate_format(create_time,'yyyy-MM-dd') dt,sku_id,count(*) refund_order_count,sum(refund_num) refund_order_num,sum(refund_amount) refund_order_amountfrom ${APP}.dwd_order_refund_infogroup by date_format(create_time,'yyyy-MM-dd'),sku_id
),
tmp_rp as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,rp.sku_id,count(*) refund_payment_count,sum(ri.refund_num) refund_payment_num,sum(refund_amount) refund_payment_amountfrom(selectorder_id,sku_id,refund_amount,callback_timefrom ${APP}.dwd_refund_payment)rpleft join(selectorder_id,sku_id,refund_numfrom ${APP}.dwd_order_refund_info)rion rp.order_id=ri.order_idand rp.sku_id=ri.sku_idgroup by date_format(callback_time,'yyyy-MM-dd'),rp.sku_id
),
tmp_cf as
(selectdt,item sku_id,sum(if(action_id='cart_add',1,0)) cart_count,sum(if(action_id='favor_add',1,0)) favor_countfrom ${APP}.dwd_action_logwhere action_id in ('cart_add','favor_add')group by dt,item
),
tmp_comment as
(selectdate_format(create_time,'yyyy-MM-dd') dt,sku_id,sum(if(appraise='1201',1,0)) appraise_good_count,sum(if(appraise='1202',1,0)) appraise_mid_count,sum(if(appraise='1203',1,0)) appraise_bad_count,sum(if(appraise='1204',1,0)) appraise_default_countfrom ${APP}.dwd_comment_infogroup by date_format(create_time,'yyyy-MM-dd'),sku_id
)
insert overwrite table ${APP}.dws_sku_action_daycount partition(dt)
selectsku_id,sum(order_count),sum(order_num),sum(order_activity_count),sum(order_coupon_count),sum(order_activity_reduce_amount),sum(order_coupon_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_num),sum(payment_amount),sum(refund_order_count),sum(refund_order_num),sum(refund_order_amount),sum(refund_payment_count),sum(refund_payment_num),sum(refund_payment_amount),sum(cart_count),sum(favor_count),sum(appraise_good_count),sum(appraise_mid_count),sum(appraise_bad_count),sum(appraise_default_count),dt
from
(selectdt,sku_id,order_count,order_num,order_activity_count,order_coupon_count,order_activity_reduce_amount,order_coupon_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_orderunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,payment_num,payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_payunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,refund_order_count,refund_order_num,refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_riunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,refund_payment_count,refund_payment_num,refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_rpunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,cart_count,favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_cfunion allselectdt,sku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,appraise_good_count,appraise_mid_count,appraise_bad_count,appraise_default_countfrom tmp_comment
)t1
group by dt,sku_id;"dws_coupon_info_daycount="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp_cu as
(selectcoalesce(coupon_get.dt,coupon_using.dt,coupon_used.dt,coupon_exprie.dt) dt,coalesce(coupon_get.coupon_id,coupon_using.coupon_id,coupon_used.coupon_id,coupon_exprie.coupon_id) coupon_id,nvl(get_count,0) get_count,nvl(order_count,0) order_count,nvl(payment_count,0) payment_count,nvl(expire_count,0) expire_countfrom(selectdate_format(get_time,'yyyy-MM-dd') dt,coupon_id,count(*) get_countfrom ${APP}.dwd_coupon_usegroup by date_format(get_time,'yyyy-MM-dd'),coupon_id)coupon_getfull outer join(selectdate_format(using_time,'yyyy-MM-dd') dt,coupon_id,count(*) order_countfrom ${APP}.dwd_coupon_usewhere using_time is not nullgroup by date_format(using_time,'yyyy-MM-dd'),coupon_id)coupon_usingon coupon_get.dt=coupon_using.dtand coupon_get.coupon_id=coupon_using.coupon_idfull outer join(selectdate_format(used_time,'yyyy-MM-dd') dt,coupon_id,count(*) payment_countfrom ${APP}.dwd_coupon_usewhere used_time is not nullgroup by date_format(used_time,'yyyy-MM-dd'),coupon_id)coupon_usedon nvl(coupon_get.dt,coupon_using.dt)=coupon_used.dtand nvl(coupon_get.coupon_id,coupon_using.coupon_id)=coupon_used.coupon_idfull outer join(selectdate_format(expire_time,'yyyy-MM-dd') dt,coupon_id,count(*) expire_countfrom ${APP}.dwd_coupon_usewhere expire_time is not nullgroup by date_format(expire_time,'yyyy-MM-dd'),coupon_id)coupon_exprieon coalesce(coupon_get.dt,coupon_using.dt,coupon_used.dt)=coupon_exprie.dtand coalesce(coupon_get.coupon_id,coupon_using.coupon_id,coupon_used.coupon_id)=coupon_exprie.coupon_id
),
tmp_order as
(selectdate_format(create_time,'yyyy-MM-dd') dt,coupon_id,sum(split_coupon_amount) order_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom ${APP}.dwd_order_detailwhere coupon_id is not nullgroup by date_format(create_time,'yyyy-MM-dd'),coupon_id
),
tmp_pay as
(selectdate_format(callback_time,'yyyy-MM-dd') dt,coupon_id,sum(split_coupon_amount) payment_reduce_amount,sum(split_final_amount) payment_amountfrom(selectorder_id,coupon_id,split_coupon_amount,split_final_amountfrom ${APP}.dwd_order_detailwhere coupon_id is not null)odjoin(selectorder_id,callback_timefrom ${APP}.dwd_payment_info)pion od.order_id=pi.order_idgroup by date_format(callback_time,'yyyy-MM-dd'),coupon_id
)
insert overwrite table ${APP}.dws_coupon_info_daycount partition(dt)
selectcoupon_id,sum(get_count),sum(order_count),sum(order_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_reduce_amount),sum(payment_amount),sum(expire_count),dt
from
(selectdt,coupon_id,get_count,order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,0 payment_reduce_amount,0 payment_amount,expire_countfrom tmp_cuunion allselectdt,coupon_id,0 get_count,0 order_count,order_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_reduce_amount,0 payment_amount,0 expire_countfrom tmp_orderunion allselectdt,coupon_id,0 get_count,0 order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,payment_reduce_amount,payment_amount,0 expire_countfrom tmp_pay
)t1
group by dt,coupon_id;
"case $1 in"dws_visitor_action_daycount" )hive -e "$dws_visitor_action_daycount";;"dws_user_action_daycount" )hive -e "$dws_user_action_daycount";;"dws_activity_info_daycount" )hive -e "$dws_activity_info_daycount";;"dws_area_stats_daycount" )hive -e "$dws_area_stats_daycount";;"dws_sku_action_daycount" )hive -e "$dws_sku_action_daycount";;"dws_coupon_info_daycount" )hive -e "$dws_coupon_info_daycount";;"all" )hive -e "$dws_visitor_action_daycount$dws_user_action_daycount$dws_activity_info_daycount$dws_area_stats_daycount$dws_sku_action_daycount$dws_coupon_info_daycount";;
esac
7.8 DWS层每日数据装载脚本
#!/bin/bashAPP=gmall
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;thendo_date=$2
else do_date=`date -d "-1 day" +%F`
fidws_visitor_action_daycount="insert overwrite table ${APP}.dws_visitor_action_daycount partition(dt='$do_date')
selectt1.mid_id,t1.brand,t1.model,t1.is_new,t1.channel,t1.os,t1.area_code,t1.version_code,t1.visit_count,t3.page_stats
from
(selectmid_id,brand,model,if(array_contains(collect_set(is_new),'0'),'0','1') is_new,--ods_page_log中,同一天内,同一设备的is_new字段,可能全部为1,可能全部为0,也可能部分为0,部分为1(卸载重装),故做该处理collect_set(channel) channel,collect_set(os) os,collect_set(area_code) area_code,collect_set(version_code) version_code,sum(if(last_page_id is null,1,0)) visit_countfrom ${APP}.dwd_page_logwhere dt='$do_date'and last_page_id is nullgroup by mid_id,model,brand
)t1
join
(selectmid_id,brand,model,collect_set(named_struct('page_id',page_id,'page_count',page_count,'during_time',during_time)) page_statsfrom(selectmid_id,brand,model,page_id,count(*) page_count,sum(during_time) during_timefrom ${APP}.dwd_page_logwhere dt='$do_date'group by mid_id,model,brand,page_id)t2group by mid_id,model,brand
)t3
on t1.mid_id=t3.mid_id
and t1.brand=t3.brand
and t1.model=t3.model;"dws_user_action_daycount="
with
tmp_login as
(selectuser_id,count(*) login_countfrom ${APP}.dwd_page_logwhere dt='$do_date'and user_id is not nulland last_page_id is nullgroup by user_id
),
tmp_cf as
(selectuser_id,sum(if(action_id='cart_add',1,0)) cart_count,sum(if(action_id='favor_add',1,0)) favor_countfrom ${APP}.dwd_action_logwhere dt='$do_date'and user_id is not nulland action_id in ('cart_add','favor_add')group by user_id
),
tmp_order as
(selectuser_id,count(*) order_count,sum(if(activity_reduce_amount>0,1,0)) order_activity_count,sum(if(coupon_reduce_amount>0,1,0)) order_coupon_count,sum(activity_reduce_amount) order_activity_reduce_amount,sum(coupon_reduce_amount) order_coupon_reduce_amount,sum(original_amount) order_original_amount,sum(final_amount) order_final_amountfrom ${APP}.dwd_order_infowhere (dt='$do_date'or dt='9999-99-99')and date_format(create_time,'yyyy-MM-dd')='$do_date'group by user_id
),
tmp_pay as
(selectuser_id,count(*) payment_count,sum(payment_amount) payment_amountfrom ${APP}.dwd_payment_infowhere dt='$do_date'group by user_id
),
tmp_ri as
(selectuser_id,count(*) refund_order_count,sum(refund_num) refund_order_num,sum(refund_amount) refund_order_amountfrom ${APP}.dwd_order_refund_infowhere dt='$do_date'group by user_id
),
tmp_rp as
(selectrp.user_id,count(*) refund_payment_count,sum(ri.refund_num) refund_payment_num,sum(rp.refund_amount) refund_payment_amountfrom(selectuser_id,order_id,sku_id,refund_amountfrom ${APP}.dwd_refund_paymentwhere dt='$do_date')rpleft join(selectuser_id,order_id,sku_id,refund_numfrom ${APP}.dwd_order_refund_infowhere dt>=date_add('$do_date',-15))rion rp.order_id=ri.order_idand rp.sku_id=rp.sku_idgroup by rp.user_id
),
tmp_coupon as
(selectuser_id,sum(if(date_format(get_time,'yyyy-MM-dd')='$do_date',1,0)) coupon_get_count,sum(if(date_format(using_time,'yyyy-MM-dd')='$do_date',1,0)) coupon_using_count,sum(if(date_format(used_time,'yyyy-MM-dd')='$do_date',1,0)) coupon_used_countfrom ${APP}.dwd_coupon_usewhere (dt='$do_date' or dt='9999-99-99')and (date_format(get_time, 'yyyy-MM-dd') = '$do_date'or date_format(using_time,'yyyy-MM-dd')='$do_date'or date_format(used_time,'yyyy-MM-dd')='$do_date')group by user_id
),
tmp_comment as
(selectuser_id,sum(if(appraise='1201',1,0)) appraise_good_count,sum(if(appraise='1202',1,0)) appraise_mid_count,sum(if(appraise='1203',1,0)) appraise_bad_count,sum(if(appraise='1204',1,0)) appraise_default_countfrom ${APP}.dwd_comment_infowhere dt='$do_date'group by user_id
),
tmp_od as
(selectuser_id,collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',order_count,'activity_reduce_amount',activity_reduce_amount,'coupon_reduce_amount',coupon_reduce_amount,'original_amount',original_amount,'final_amount',final_amount)) order_detail_statsfrom(selectuser_id,sku_id,sum(sku_num) sku_num,count(*) order_count,cast(sum(split_activity_amount) as decimal(16,2)) activity_reduce_amount,cast(sum(split_coupon_amount) as decimal(16,2)) coupon_reduce_amount,cast(sum(original_amount) as decimal(16,2)) original_amount,cast(sum(split_final_amount) as decimal(16,2)) final_amountfrom ${APP}.dwd_order_detailwhere dt='$do_date'group by user_id,sku_id)t1group by user_id
)
insert overwrite table ${APP}.dws_user_action_daycount partition(dt='$do_date')
selectcoalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id,tmp_od.user_id),nvl(login_count,0),nvl(cart_count,0),nvl(favor_count,0),nvl(order_count,0),nvl(order_activity_count,0),nvl(order_activity_reduce_amount,0),nvl(order_coupon_count,0),nvl(order_coupon_reduce_amount,0),nvl(order_original_amount,0),nvl(order_final_amount,0),nvl(payment_count,0),nvl(payment_amount,0),nvl(refund_order_count,0),nvl(refund_order_num,0),nvl(refund_order_amount,0),nvl(refund_payment_count,0),nvl(refund_payment_num,0),nvl(refund_payment_amount,0),nvl(coupon_get_count,0),nvl(coupon_using_count,0),nvl(coupon_used_count,0),nvl(appraise_good_count,0),nvl(appraise_mid_count,0),nvl(appraise_bad_count,0),nvl(appraise_default_count,0),order_detail_stats
from tmp_login
full outer join tmp_cf on tmp_login.user_id=tmp_cf.user_id
full outer join tmp_order on coalesce(tmp_login.user_id,tmp_cf.user_id)=tmp_order.user_id
full outer join tmp_pay on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id)=tmp_pay.user_id
full outer join tmp_ri on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id)=tmp_ri.user_id
full outer join tmp_rp on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id)=tmp_rp.user_id
full outer join tmp_comment on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id)=tmp_comment.user_id
full outer join tmp_coupon on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id)=tmp_coupon.user_id
full outer join tmp_od on coalesce(tmp_login.user_id,tmp_cf.user_id,tmp_order.user_id,tmp_pay.user_id,tmp_ri.user_id,tmp_rp.user_id,tmp_comment.user_id,tmp_coupon.user_id)=tmp_od.user_id;
"dws_activity_info_daycount="
with
tmp_order as
(selectactivity_rule_id,activity_id,count(*) order_count,sum(split_activity_amount) order_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom ${APP}.dwd_order_detailwhere dt='$do_date'and activity_id is not nullgroup by activity_rule_id,activity_id
),
tmp_pay as
(selectactivity_rule_id,activity_id,count(*) payment_count,sum(split_activity_amount) payment_reduce_amount,sum(split_final_amount) payment_amountfrom ${APP}.dwd_order_detailwhere (dt='$do_date'or dt=date_add('$do_date',-1))and activity_id is not nulland order_id in(select order_id from ${APP}.dwd_payment_info where dt='$do_date')group by activity_rule_id,activity_id
)
insert overwrite table ${APP}.dws_activity_info_daycount partition(dt='$do_date')
selectactivity_rule_id,activity_id,sum(order_count),sum(order_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_reduce_amount),sum(payment_amount)
from
(selectactivity_rule_id,activity_id,order_count,order_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_reduce_amount,0 payment_amountfrom tmp_orderunion allselectactivity_rule_id,activity_id,0 order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,payment_reduce_amount,payment_amountfrom tmp_pay
)t1
group by activity_rule_id,activity_id;"dws_sku_action_daycount="
with
tmp_order as
(selectsku_id,count(*) order_count,sum(sku_num) order_num,sum(if(split_activity_amount>0,1,0)) order_activity_count,sum(if(split_coupon_amount>0,1,0)) order_coupon_count,sum(split_activity_amount) order_activity_reduce_amount,sum(split_coupon_amount) order_coupon_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom ${APP}.dwd_order_detailwhere dt='$do_date'group by sku_id
),
tmp_pay as
(selectsku_id,count(*) payment_count,sum(sku_num) payment_num,sum(split_final_amount) payment_amountfrom ${APP}.dwd_order_detailwhere (dt='$do_date'or dt=date_add('$do_date',-1))and order_id in(select order_id from ${APP}.dwd_payment_info where dt='$do_date')group by sku_id
),
tmp_ri as
(selectsku_id,count(*) refund_order_count,sum(refund_num) refund_order_num,sum(refund_amount) refund_order_amountfrom ${APP}.dwd_order_refund_infowhere dt='$do_date'group by sku_id
),
tmp_rp as
(selectrp.sku_id,count(*) refund_payment_count,sum(ri.refund_num) refund_payment_num,sum(refund_amount) refund_payment_amountfrom(selectorder_id,sku_id,refund_amountfrom ${APP}.dwd_refund_paymentwhere dt='$do_date')rpleft join(selectorder_id,sku_id,refund_numfrom ${APP}.dwd_order_refund_infowhere dt>=date_add('$do_date',-15))rion rp.order_id=ri.order_idand rp.sku_id=ri.sku_idgroup by rp.sku_id
),
tmp_cf as
(selectitem sku_id,sum(if(action_id='cart_add',1,0)) cart_count,sum(if(action_id='favor_add',1,0)) favor_countfrom ${APP}.dwd_action_logwhere dt='$do_date'and action_id in ('cart_add','favor_add')group by item
),
tmp_comment as
(selectsku_id,sum(if(appraise='1201',1,0)) appraise_good_count,sum(if(appraise='1202',1,0)) appraise_mid_count,sum(if(appraise='1203',1,0)) appraise_bad_count,sum(if(appraise='1204',1,0)) appraise_default_countfrom ${APP}.dwd_comment_infowhere dt='$do_date'group by sku_id
)
insert overwrite table ${APP}.dws_sku_action_daycount partition(dt='$do_date')
selectsku_id,sum(order_count),sum(order_num),sum(order_activity_count),sum(order_coupon_count),sum(order_activity_reduce_amount),sum(order_coupon_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_num),sum(payment_amount),sum(refund_order_count),sum(refund_order_num),sum(refund_order_amount),sum(refund_payment_count),sum(refund_payment_num),sum(refund_payment_amount),sum(cart_count),sum(favor_count),sum(appraise_good_count),sum(appraise_mid_count),sum(appraise_bad_count),sum(appraise_default_count)
from
(selectsku_id,order_count,order_num,order_activity_count,order_coupon_count,order_activity_reduce_amount,order_coupon_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_orderunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,payment_num,payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_payunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,refund_order_count,refund_order_num,refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_riunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,refund_payment_count,refund_payment_num,refund_payment_amount,0 cart_count,0 favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_rpunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,cart_count,favor_count,0 appraise_good_count,0 appraise_mid_count,0 appraise_bad_count,0 appraise_default_countfrom tmp_cfunion allselectsku_id,0 order_count,0 order_num,0 order_activity_count,0 order_coupon_count,0 order_activity_reduce_amount,0 order_coupon_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_num,0 payment_amount,0 refund_order_count,0 refund_order_num,0 refund_order_amount,0 refund_payment_count,0 refund_payment_num,0 refund_payment_amount,0 cart_count,0 favor_count,appraise_good_count,appraise_mid_count,appraise_bad_count,appraise_default_countfrom tmp_comment
)t1
group by sku_id;"dws_coupon_info_daycount="
with
tmp_cu as
(selectcoupon_id,sum(if(date_format(get_time,'yyyy-MM-dd')='$do_date',1,0)) get_count,sum(if(date_format(using_time,'yyyy-MM-dd')='$do_date',1,0)) order_count,sum(if(date_format(used_time,'yyyy-MM-dd')='$do_date',1,0)) payment_count,sum(if(date_format(expire_time,'yyyy-MM-dd')='$do_date',1,0)) expire_countfrom ${APP}.dwd_coupon_usewhere dt='9999-99-99'or dt='$do_date'group by coupon_id
),
tmp_order as
(selectcoupon_id,sum(split_coupon_amount) order_reduce_amount,sum(original_amount) order_original_amount,sum(split_final_amount) order_final_amountfrom ${APP}.dwd_order_detailwhere dt='$do_date'and coupon_id is not nullgroup by coupon_id
),
tmp_pay as
(selectcoupon_id,sum(split_coupon_amount) payment_reduce_amount,sum(split_final_amount) payment_amountfrom ${APP}.dwd_order_detailwhere (dt='$do_date'or dt=date_add('$do_date',-1))and coupon_id is not nulland order_id in(select order_id from ${APP}.dwd_payment_info where dt='$do_date')group by coupon_id
)
insert overwrite table ${APP}.dws_coupon_info_daycount partition(dt='$do_date')
selectcoupon_id,sum(get_count),sum(order_count),sum(order_reduce_amount),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_reduce_amount),sum(payment_amount),sum(expire_count)
from
(selectcoupon_id,get_count,order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,payment_count,0 payment_reduce_amount,0 payment_amount,expire_countfrom tmp_cuunion allselectcoupon_id,0 get_count,0 order_count,order_reduce_amount,order_original_amount,order_final_amount,0 payment_count,0 payment_reduce_amount,0 payment_amount,0 expire_countfrom tmp_orderunion allselectcoupon_id,0 get_count,0 order_count,0 order_reduce_amount,0 order_original_amount,0 order_final_amount,0 payment_count,payment_reduce_amount,payment_amount,0 expire_countfrom tmp_pay
)t1
group by coupon_id;"dws_area_stats_daycount="
with
tmp_vu as
(selectid province_id,visit_count,login_count,visitor_count,user_countfrom(selectarea_code,count(*) visit_count,--访客访问次数count(user_id) login_count,--用户访问次数,等价于sum(if(user_id is not null,1,0))count(distinct(mid_id)) visitor_count,--访客人数count(distinct(user_id)) user_count--用户人数from ${APP}.dwd_page_logwhere dt='$do_date'and last_page_id is nullgroup by area_code)tmpleft join ${APP}.dim_base_province areaon tmp.area_code=area.area_code
),
tmp_order as
(selectprovince_id,count(*) order_count,sum(original_amount) order_original_amount,sum(final_amount) order_final_amountfrom ${APP}.dwd_order_infowhere dt='$do_date'or dt='9999-99-99'and date_format(create_time,'yyyy-MM-dd')='$do_date'group by province_id
),
tmp_pay as
(selectprovince_id,count(*) payment_count,sum(payment_amount) payment_amountfrom ${APP}.dwd_payment_infowhere dt='$do_date'group by province_id
),
tmp_ro as
(selectprovince_id,count(*) refund_order_count,sum(refund_amount) refund_order_amountfrom ${APP}.dwd_order_refund_infowhere dt='$do_date'group by province_id
),
tmp_rp as
(selectprovince_id,count(*) refund_payment_count,sum(refund_amount) refund_payment_amountfrom ${APP}.dwd_refund_paymentwhere dt='$do_date'group by province_id
)
insert overwrite table ${APP}.dws_area_stats_daycount partition(dt='$do_date')
selectprovince_id,sum(visit_count),sum(login_count),sum(visitor_count),sum(user_count),sum(order_count),sum(order_original_amount),sum(order_final_amount),sum(payment_count),sum(payment_amount),sum(refund_order_count),sum(refund_order_amount),sum(refund_payment_count),sum(refund_payment_amount)
from
(selectprovince_id,visit_count,login_count,visitor_count,user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_vuunion allselectprovince_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,order_count,order_original_amount,order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_orderunion allselectprovince_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,payment_count,payment_amount,0 refund_order_count,0 refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_payunion allselectprovince_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,refund_order_count,refund_order_amount,0 refund_payment_count,0 refund_payment_amountfrom tmp_rounion allselectprovince_id,0 visit_count,0 login_count,0 visitor_count,0 user_count,0 order_count,0 order_original_amount,0 order_final_amount,0 payment_count,0 payment_amount,0 refund_order_count,0 refund_order_amount,refund_payment_count,refund_payment_amountfrom tmp_rp
)t1
group by province_id;"case $1 in"dws_visitor_action_daycount" )hive -e "$dws_visitor_action_daycount";;"dws_user_action_daycount" )hive -e "$dws_user_action_daycount";;"dws_activity_info_daycount" )hive -e "$dws_activity_info_daycount";;"dws_area_stats_daycount" )hive -e "$dws_area_stats_daycount";;"dws_sku_action_daycount" )hive -e "$dws_sku_action_daycount";;"dws_coupon_info_daycount" )hive -e "$dws_coupon_info_daycount";;"all" )hive -e "$dws_visitor_action_daycount$dws_user_action_daycount$dws_activity_info_daycount$dws_area_stats_daycount$dws_sku_action_daycount$dws_coupon_info_daycount";;
esac
这篇关于数仓学习笔记(6)——数仓搭建(DWS层)的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!